diff --git a/.editorconfig b/.editorconfig index 0e6c17e7674..8cf0bd8d62a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,7 +1,14 @@ root = true +[*] +charset = utf-8 + [*.{cpp,hpp,c,h,java,cc,hh,m,mm,S,md,properties,gmk,m4,ac}] trim_trailing_whitespace = true [Makefile] trim_trailing_whitespace = true + +[src/hotspot/**.{cpp,hpp,h}] +indent_style = space +indent_size = 2 diff --git a/.gitattributes b/.gitattributes index ebb586628c3..5a18aa21d98 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,5 @@ * -text +* encoding=utf-8 *.java diff=java *.c diff=cpp *.h diff=cpp diff --git a/.github/actions/get-bundles/action.yml b/.github/actions/get-bundles/action.yml index aaec9a6d19f..270d15159a0 100644 --- a/.github/actions/get-bundles/action.yml +++ b/.github/actions/get-bundles/action.yml @@ -32,10 +32,16 @@ inputs: debug-suffix: description: 'File name suffix denoting debug level, possibly empty' required: false + static-suffix: + description: 'Static bundle file name suffix' + required: false outputs: jdk-path: description: 'Path to the installed JDK bundle' value: ${{ steps.path-name.outputs.jdk }} + static-jdk-path: + description: 'Path to the installed static JDK bundle' + value: ${{ steps.path-name.outputs.static_jdk }} symbols-path: description: 'Path to the installed symbols bundle' value: ${{ steps.path-name.outputs.symbols }} @@ -61,6 +67,15 @@ runs: path: bundles if: steps.download-bundles.outcome == 'failure' + - name: 'Download static bundles artifact' + id: download-static-bundles + uses: actions/download-artifact@v4 + with: + name: bundles-${{ inputs.platform }}${{ inputs.debug-suffix }}${{ inputs.static-suffix }} + path: bundles + continue-on-error: true + if: ${{ inputs.static-suffix == '-static' }} + - name: 'Unpack bundles' run: | if [[ -e bundles/jdk-${{ inputs.platform }}${{ inputs.debug-suffix }}.zip ]]; then @@ -75,6 +90,20 @@ runs: tar -xf bundles/jdk-${{ inputs.platform }}${{ inputs.debug-suffix }}.tar.gz -C bundles/jdk fi + if [[ '${{ inputs.static-suffix }}' == '-static' ]]; then + if [[ -e bundles/jdk-${{ inputs.platform }}${{ inputs.debug-suffix }}${{ inputs.static-suffix }}.zip ]]; then + echo 'Unpacking static jdk bundle...' + mkdir -p bundles/static-jdk + unzip -q bundles/jdk-${{ inputs.platform }}${{ inputs.debug-suffix }}${{ inputs.static-suffix }}.zip -d bundles/static-jdk + fi + + if [[ -e bundles/jdk-${{ inputs.platform }}${{ inputs.debug-suffix }}${{ inputs.static-suffix }}.tar.gz ]]; then + echo 'Unpacking static jdk bundle...' + mkdir -p bundles/static-jdk + tar -xf bundles/jdk-${{ inputs.platform }}${{ inputs.debug-suffix }}${{ inputs.static-suffix }}.tar.gz -C bundles/static-jdk + fi + fi + if [[ -e bundles/symbols-${{ inputs.platform }}${{ inputs.debug-suffix }}.tar.gz ]]; then echo 'Unpacking symbols bundle...' mkdir -p bundles/symbols @@ -106,4 +135,12 @@ runs: echo "jdk=$jdk_dir" >> $GITHUB_OUTPUT echo "symbols=$symbols_dir" >> $GITHUB_OUTPUT echo "tests=$tests_dir" >> $GITHUB_OUTPUT + + if [[ '${{ inputs.static-suffix }}' == '-static' ]]; then + static_jdk_dir="$GITHUB_WORKSPACE/$(dirname $(find bundles/static-jdk -name bin -type d))" + if [[ '${{ runner.os }}' == 'Windows' ]]; then + static_jdk_dir="$(cygpath $static_jdk_dir)" + fi + echo "static_jdk=$static_jdk_dir" >> $GITHUB_OUTPUT + fi shell: bash diff --git a/.github/actions/upload-bundles/action.yml b/.github/actions/upload-bundles/action.yml index 30f4ac03c1e..dfa994baac0 100644 --- a/.github/actions/upload-bundles/action.yml +++ b/.github/actions/upload-bundles/action.yml @@ -35,6 +35,9 @@ inputs: bundle-suffix: description: 'Bundle name suffix, possibly empty' required: false + static-suffix: + description: 'Static JDK bundle name suffix, possibly empty' + required: false runs: using: composite @@ -46,6 +49,8 @@ runs: # Rename bundles to consistent names jdk_bundle_zip="$(ls build/*/bundles/jdk-*_bin${{ inputs.debug-suffix }}.zip 2> /dev/null || true)" jdk_bundle_tar_gz="$(ls build/*/bundles/jdk-*_bin${{ inputs.debug-suffix }}.tar.gz 2> /dev/null || true)" + static_jdk_bundle_zip="$(ls build/*/bundles/static-jdk-*_bin${{ inputs.debug-suffix }}.zip 2> /dev/null || true)" + static_jdk_bundle_tar_gz="$(ls build/*/bundles/static-jdk-*_bin${{ inputs.debug-suffix }}.tar.gz 2> /dev/null || true)" symbols_bundle="$(ls build/*/bundles/jdk-*_bin${{ inputs.debug-suffix }}-symbols.tar.gz 2> /dev/null || true)" tests_bundle="$(ls build/*/bundles/jdk-*_bin-tests${{ inputs.debug-suffix }}.tar.gz 2> /dev/null || true)" static_libs_bundle="$(ls build/*/bundles/jdk-*_bin-static-libs${{ inputs.debug-suffix }}.tar.gz 2> /dev/null || true)" @@ -58,6 +63,12 @@ runs: if [[ "$jdk_bundle_tar_gz" != "" ]]; then mv "$jdk_bundle_tar_gz" "bundles/jdk-${{ inputs.platform }}${{ inputs.debug-suffix }}.tar.gz" fi + if [[ "$static_jdk_bundle_zip" != "" ]]; then + mv "$static_jdk_bundle_zip" "bundles/jdk-${{ inputs.platform }}${{ inputs.debug-suffix }}${{ inputs.static-suffix }}.zip" + fi + if [[ "$static_jdk_bundle_tar_gz" != "" ]]; then + mv "$static_jdk_bundle_tar_gz" "bundles/jdk-${{ inputs.platform }}${{ inputs.debug-suffix }}${{ inputs.static-suffix }}.tar.gz" + fi if [[ "$symbols_bundle" != "" ]]; then mv "$symbols_bundle" "bundles/symbols-${{ inputs.platform }}${{ inputs.debug-suffix }}.tar.gz" fi @@ -68,7 +79,7 @@ runs: mv "$static_libs_bundle" "bundles/static-libs-${{ inputs.platform }}${{ inputs.debug-suffix }}.tar.gz" fi - if [[ "$jdk_bundle_zip$jdk_bundle_tar_gz$symbols_bundle$tests_bundle$static_libs_bundle" != "" ]]; then + if [[ "$jdk_bundle_zip$jdk_bundle_tar_gz$static_jdk_bundle_zip$static_jdk_bundle_tar_gz$symbols_bundle$tests_bundle$static_libs_bundle" != "" ]]; then echo 'bundles-found=true' >> $GITHUB_OUTPUT else echo 'bundles-found=false' >> $GITHUB_OUTPUT @@ -78,7 +89,7 @@ runs: - name: 'Upload bundles artifact' uses: actions/upload-artifact@v4 with: - name: bundles-${{ inputs.platform }}${{ inputs.debug-suffix }}${{ inputs.bundle-suffix }} + name: bundles-${{ inputs.platform }}${{ inputs.debug-suffix }}${{ inputs.static-suffix }}${{ inputs.bundle-suffix }} path: bundles retention-days: 1 if: steps.bundles.outputs.bundles-found == 'true' diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index 101668b2bd5..9c991eed419 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -64,6 +64,9 @@ on: bundle-suffix: required: false type: string + static-suffix: + required: false + type: string jobs: build-linux: @@ -143,3 +146,4 @@ jobs: platform: ${{ inputs.platform }} debug-suffix: "${{ matrix.debug-level == 'debug' && '-debug' || '' }}" bundle-suffix: ${{ inputs.bundle-suffix }} + static-suffix: ${{ inputs.static-suffix }} diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index d02ef91ad86..9bb43a8b83c 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -63,7 +63,7 @@ env: jobs: build-windows: name: build - runs-on: windows-2019 + runs-on: windows-2025 defaults: run: shell: bash @@ -102,7 +102,7 @@ jobs: id: toolchain-check run: | set +e - '/c/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/vc/auxiliary/build/vcvars64.bat' -vcvars_ver=${{ inputs.msvc-toolset-version }} + '/c/Program Files/Microsoft Visual Studio/2022/Enterprise/vc/auxiliary/build/vcvars64.bat' -vcvars_ver=${{ inputs.msvc-toolset-version }} if [ $? -eq 0 ]; then echo "Toolchain is already installed" echo "toolchain-installed=true" >> $GITHUB_OUTPUT @@ -115,7 +115,7 @@ jobs: run: | # Run Visual Studio Installer '/c/Program Files (x86)/Microsoft Visual Studio/Installer/vs_installer.exe' \ - modify --quiet --installPath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' \ + modify --quiet --installPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' \ --add Microsoft.VisualStudio.Component.VC.${{ inputs.msvc-toolset-version }}.${{ inputs.msvc-toolset-architecture }} if: steps.toolchain-check.outputs.toolchain-installed != 'true' diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8dce1d214dc..0e64ad78625 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -231,16 +231,14 @@ jobs: uses: ./.github/workflows/build-linux.yml with: platform: linux-x64 - make-target: 'static-jdk-image' + make-target: 'static-jdk-bundles' # There are issues with fastdebug static build in GHA due to space limit. # Only do release build for now. debug-levels: '[ "release" ]' gcc-major-version: '10' configure-arguments: ${{ github.event.inputs.configure-arguments }} make-arguments: ${{ github.event.inputs.make-arguments }} - # It currently doesn't produce any bundles, but probably will do in - # the future. - bundle-suffix: "-static" + static-suffix: "-static" if: needs.prepare.outputs.linux-x64 == 'true' build-linux-x64-static-libs: @@ -312,7 +310,7 @@ jobs: uses: ./.github/workflows/build-windows.yml with: platform: windows-x64 - msvc-toolset-version: '14.29' + msvc-toolset-version: '14.43' msvc-toolset-architecture: 'x86.x64' configure-arguments: ${{ github.event.inputs.configure-arguments }} make-arguments: ${{ github.event.inputs.make-arguments }} @@ -324,7 +322,7 @@ jobs: uses: ./.github/workflows/build-windows.yml with: platform: windows-aarch64 - msvc-toolset-version: '14.29' + msvc-toolset-version: '14.43' msvc-toolset-architecture: 'arm64' make-target: 'hotspot' extra-conf-options: '--openjdk-target=aarch64-unknown-cygwin' @@ -361,6 +359,19 @@ jobs: platform: linux-x64 bootjdk-platform: linux-x64 runs-on: ubuntu-22.04 + debug-suffix: -debug + + test-linux-x64-static: + name: linux-x64-static + needs: + - build-linux-x64 + - build-linux-x64-static + uses: ./.github/workflows/test.yml + with: + platform: linux-x64 + bootjdk-platform: linux-x64 + runs-on: ubuntu-22.04 + static-suffix: "-static" test-macos-aarch64: name: macos-aarch64 @@ -372,6 +383,7 @@ jobs: bootjdk-platform: macos-aarch64 runs-on: macos-14 xcode-toolset-version: '15.4' + debug-suffix: -debug test-windows-x64: name: windows-x64 @@ -381,4 +393,5 @@ jobs: with: platform: windows-x64 bootjdk-platform: windows-x64 - runs-on: windows-2019 + runs-on: windows-2025 + debug-suffix: -debug diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 99aaf9650a0..665ae224372 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,6 +40,12 @@ on: xcode-toolset-version: required: false type: string + debug-suffix: + required: false + type: string + static-suffix: + required: false + type: string env: # These are needed to make the MSYS2 bash work properly @@ -86,35 +92,35 @@ jobs: - test-name: 'hs/tier1 common' test-suite: 'test/hotspot/jtreg/:tier1_common' - debug-suffix: -debug + debug-suffix: ${{ inputs.debug-suffix }} - test-name: 'hs/tier1 compiler part 1' test-suite: 'test/hotspot/jtreg/:tier1_compiler_1' - debug-suffix: -debug + debug-suffix: ${{ inputs.debug-suffix }} - test-name: 'hs/tier1 compiler part 2' test-suite: 'test/hotspot/jtreg/:tier1_compiler_2' - debug-suffix: -debug + debug-suffix: ${{ inputs.debug-suffix }} - test-name: 'hs/tier1 compiler part 3' test-suite: 'test/hotspot/jtreg/:tier1_compiler_3' - debug-suffix: -debug + debug-suffix: ${{ inputs.debug-suffix }} - test-name: 'hs/tier1 gc' test-suite: 'test/hotspot/jtreg/:tier1_gc' - debug-suffix: -debug + debug-suffix: ${{ inputs.debug-suffix }} - test-name: 'hs/tier1 runtime' test-suite: 'test/hotspot/jtreg/:tier1_runtime' - debug-suffix: -debug + debug-suffix: ${{ inputs.debug-suffix }} - test-name: 'hs/tier1 serviceability' test-suite: 'test/hotspot/jtreg/:tier1_serviceability' - debug-suffix: -debug + debug-suffix: ${{ inputs.debug-suffix }} - test-name: 'lib-test/tier1' test-suite: 'test/lib-test/:tier1' - debug-suffix: -debug + debug-suffix: ${{ inputs.debug-suffix }} steps: - name: 'Checkout the JDK source' @@ -140,6 +146,7 @@ jobs: with: platform: ${{ inputs.platform }} debug-suffix: ${{ matrix.debug-suffix }} + static-suffix: ${{ inputs.static-suffix }} - name: 'Install dependencies' run: | @@ -160,6 +167,21 @@ jobs: else echo "value=$PATH" >> $GITHUB_OUTPUT fi + if [[ '${{ inputs.static-suffix }}' == '-static' ]]; then + echo "static-hotspot-problemlist-path=`pwd`/test/hotspot/jtreg/ProblemList-StaticJdk.txt" >> $GITHUB_OUTPUT + echo "static-jdk-problemlist-path=`pwd`/test/jdk/ProblemList-StaticJdk.txt" >> $GITHUB_OUTPUT + echo "static-langtools-problemlist-path=`pwd`/test/langtools/ProblemList-StaticJdk.txt" >> $GITHUB_OUTPUT + echo "static-lib-test-problemlist-path=`pwd`/test/lib-test/ProblemList-StaticJdk.txt" >> $GITHUB_OUTPUT + fi + + - name: 'Set Extra Options' + id: extra-options + run: | + if [[ '${{ inputs.static-suffix }}' == '-static' ]]; then + echo "test-jdk=JDK_UNDER_TEST=${{ steps.bundles.outputs.static-jdk-path }}" >> $GITHUB_OUTPUT + echo "compile-jdk=JDK_FOR_COMPILE=${{ steps.bundles.outputs.jdk-path }}" >> $GITHUB_OUTPUT + echo "extra-problem-lists=EXTRA_PROBLEM_LISTS=${{ steps.path.outputs.static-hotspot-problemlist-path }}%20${{ steps.path.outputs.static-jdk-problemlist-path }}%20${{ steps.path.outputs.static-langtools-problemlist-path }}%20${{ steps.path.outputs.static-lib-test-problemlist-path }}" >> $GITHUB_OUTPUT + fi - name: 'Run tests' id: run-tests @@ -171,7 +193,9 @@ jobs: JDK_IMAGE_DIR=${{ steps.bundles.outputs.jdk-path }} SYMBOLS_IMAGE_DIR=${{ steps.bundles.outputs.symbols-path }} TEST_IMAGE_DIR=${{ steps.bundles.outputs.tests-path }} - JTREG='JAVA_OPTIONS=-XX:-CreateCoredumpOnCrash;VERBOSE=fail,error,time;KEYWORDS=!headful' + ${{ steps.extra-options.outputs.test-jdk }} + ${{ steps.extra-options.outputs.compile-jdk }} + JTREG='JAVA_OPTIONS=-XX:-CreateCoredumpOnCrash;VERBOSE=fail,error,time;KEYWORDS=!headful;${{ steps.extra-options.outputs.extra-problem-lists }}' && bash ./.github/scripts/gen-test-summary.sh "$GITHUB_STEP_SUMMARY" "$GITHUB_OUTPUT" env: PATH: ${{ steps.path.outputs.value }} @@ -204,7 +228,7 @@ jobs: echo '::warning ::Missing test-support directory' fi - artifact_name="results-${{ inputs.platform }}-$(echo ${{ matrix.test-name }} | tr '/ ' '__')" + artifact_name="results-${{ inputs.platform }}-$(echo ${{ matrix.test-name }}${{ inputs.static-suffix }} | tr '/ ' '__')" echo "artifact-name=$artifact_name" >> $GITHUB_OUTPUT if: always() diff --git a/.gitignore b/.gitignore index 2d82e0d943c..9145a9fa67b 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ NashornProfile.txt /.gdbinit /.lldbinit **/core.[0-9]* +*.rej +*.orig diff --git a/.jcheck/conf b/.jcheck/conf index 6ab5c2d64c2..60881e74d2a 100644 --- a/.jcheck/conf +++ b/.jcheck/conf @@ -1,7 +1,7 @@ [general] project=jdk jbs=JDK -version=25 +version=26 [checks] error=author,committer,reviewers,merge,issues,executable,symlink,message,hg-tag,whitespace,problemlists,copyright diff --git a/doc/building.html b/doc/building.html index 1e6f99e97c9..da8465bc532 100644 --- a/doc/building.html +++ b/doc/building.html @@ -282,9 +282,34 @@
On Windows, if using Cygwin, extra care -must be taken to make sure the environment is consistent. It is -recommended that you follow this procedure:
+UTF-8 support is needed to compile the JDK. On Unix systems, this
+typically means that the C.UTF-8
or
+en_US.UTF-8
locale needs to be available. For Windows
+users, please see the section on Locale
+Requirements below.
On Windows, extra care must be taken to have a smooth building +experience:
+Make sure that all relevant paths have short names. Short names
+are used by the build system to create space-free alternative paths.
+Short name creation is enabled per volume. The default setting can be
+checked with the command: fsutil 8dot3name query
. If short
+name creation was turned off when a directory was created, it will not
+have a short name. Whether a short name exists can be checked by running
+dir /X
in the containing directory (in cmd.exe). If a short
+path is present you should see something like 'ASDF~1' being displayed
+in one of the columns of the ouput. If a directory is missing a short
+name, the safest way to get one is to enable short names for that
+particular volume with
+fsutil 8dot3name set <drive letter>: 0
(note that
+you need to run as administrator for this), and then re-create the
+particular directory. A short name should be generated automatically
+then. Another option is to manually assign a short name to the directory
+using
+fsutil file setShortName <path> <short name>
.
If using Cygwin, you must make sure the +file permissions and attributes between Windows and Cygwin are +consistent. It is recommended that you follow this procedure:
Create the directory that is going to contain the top directory
of the JDK clone by using the mkdir
command in the Cygwin
@@ -294,6 +319,9 @@
Do not put the JDK clone in a path under your Cygwin home directory. This is especially important if your user name contains spaces and/or mixed upper and lower case letters.
Failure to follow these procedures might result in hard-to-debug +build problems.
You need to install a git client. You have two choices, Cygwin git or Git for Windows. Unfortunately there are pros and cons with each choice.
@@ -311,9 +339,7 @@core.autocrlf
to
false
(this is asked during installation).Failure to follow this procedure might result in hard-to-debug build -problems.
+The JDK is a massive project, and require machines ranging from @@ -376,7 +402,7 @@
It is recommended that you use at least macOS 13 (Ventura) and Xcode -14, but earlier versions may also work.
+It is recommended that you use at least macOS 14 and Xcode 15.4, but +earlier versions may also work.
The standard macOS environment contains the basic tooling needed to build, but for external libraries a package manager is recommended. The JDK uses homebrew in the examples, but @@ -590,11 +616,11 @@
Proposed changes should be discussed on the HotSpot Developers mailing +list. Changes are likely to be cautious and incremental, since HotSpot +coders have been using these guidelines for years.
+Substantive changes are approved by rough consensus +of the HotSpot Group +Members. The Group Lead determines whether consensus has been +reached.
+Editorial changes (changes that only affect the description of +HotSpot style, not its substance) do not require the full consensus +gathering process. The normal HotSpot pull request process may be used +for editorial changes, with the additional requirement that the +requisite reviewers are also HotSpot Group Members.
Some programmers seem to have lexers and even C preprocessors installed directly behind their eyeballs. The rest of us require code @@ -124,7 +140,7 @@
Many of the guidelines mentioned here have (sometimes widespread) counterexamples in the HotSpot code base. Finding a counterexample is not sufficient justification for new code to follow the counterexample @@ -137,20 +153,6 @@
Proposed changes should be discussed on the HotSpot Developers mailing -list. Changes are likely to be cautious and incremental, since HotSpot -coders have been using these guidelines for years.
-Substantive changes are approved by rough consensus -of the HotSpot Group -Members. The Group Lead determines whether consensus has been -reached.
-Editorial changes (changes that only affect the description of -HotSpot style, not its substance) do not require the full consensus -gathering process. The normal HotSpot pull request process may be used -for editorial changes, with the additional requirement that the -requisite reviewers are also HotSpot Group Members.
Prefer having checks inside test code.
Not only does having test logic outside, e.g. verification method, depending on asserts in product code contradict with several items above -but also decreases test’s readability and stability. It is much easier +but also decreases test's readability and stability. It is much easier to understand that a test is testing when all testing logic is located inside a test or nearby in shared test libraries. As a rule of thumb, the closer a check to a test, the better.
@@ -198,7 +198,7 @@Prefer EXPECT
over ASSERT
if possible.
This is related to the informativeness
property of tests, information for other checks can help to better
-localize a defect’s root-cause. One should use ASSERT
if it
+localize a defect's root-cause. One should use ASSERT
if it
is impossible to continue test execution or if it does not make much
sense. Later in the text, EXPECT
forms will be used to
refer to both ASSERT/EXPECT
.
eps
.
Use string special macros for C strings comparisons.
-EXPECT_EQ
just compares pointers’ values, which is
+
EXPECT_EQ
just compares pointers' values, which is
hardly what one wants comparing C strings. GoogleTest provides
EXPECT_STREQ
and EXPECT_STRNE
macros to
compare C string contents. There are also case-insensitive versions
@@ -293,7 +293,7 @@
This naming scheme helps to find tests, filter them and simplifies
test failure analysis. For example, class Foo
- test group
Foo
, compiler logging subsystem - test group
-CompilerLogging
, G1 GC — test group G1GC
, and
+CompilerLogging
, G1 GC - test group G1GC
, and
so forth.
A test file must have test_
prefix and .cpp
@@ -345,7 +345,7 @@
All test purpose friends should have either Test
or
Testable
suffix.
It greatly simplifies understanding of friendship’s purpose and +
It greatly simplifies understanding of friendship's purpose and
allows statically check that private members are not exposed
unexpectedly. Having FooTest
as a friend of
Foo
without any comments will be understood as a necessary
@@ -435,7 +435,7 @@
Restore changed flags.
It is quite common for tests to configure JVM in a certain way
-changing flags’ values. GoogleTest provides two ways to set up
+changing flags' values. GoogleTest provides two ways to set up
environment before a test and restore it afterward: using either
constructor and destructor or SetUp
and
TearDown
functions. Both ways require to use a test fixture
@@ -444,7 +444,7 @@
Caveats:
Changing a flag’s value could break the invariants between flags' +
Changing a flag's value could break the invariants between flags' values and hence could lead to unexpected/unsupported JVM state.
FLAG_SET_*
macros can change more than one flag (in
diff --git a/doc/starting-next-release.html b/doc/starting-next-release.html
new file mode 100644
index 00000000000..421229f9fbc
--- /dev/null
+++ b/doc/starting-next-release.html
@@ -0,0 +1,127 @@
+
+
+
The start of release changes, the changes that turn JDK N
+into JDK (N+1), are primarily small updates to various files
+along with new files to store symbol information to allow
+javac --release N ...
to run on JDK (N+1).
The updates include changes to files holding meta-data about the
+release, files under the src
directory for API and tooling
+updates, and incidental updates under the test
+directory.
As a matter of policy, there are a number of semantically distinct +concepts which get incremented separately at the start of a new +release:
+Runtime.version()
javax.lang.model.SourceVersion
-source
/-target
/--release
+argument recognized by javac
and related toolsThe expected file updates are listed below. Additional files may need +to be updated for a particular release.
+jcheck/conf
: update meta-data used by
+jcheck
and the Skara toolingmake/conf/version-numbers.conf
: update to meta-data
+used in the buildsrc
filessrc/hotspot/share/classfile/classFileParser.cpp
: add a
+#define
for the new versionsrc/java.base/share/classes/java/lang/classfile/ClassFile.java
:
+add a constant for the new class file format versionsrc/java.base/share/classes/java/lang/reflect/ClassFileFormatVersion.java
:
+add an enum
constant for the new class file format
+versionsrc/java.compiler/share/classes/javax/lang/model/SourceVersion.java
:
+add an enum
constant for the new source versionsrc/java.compiler/share/classes/javax/lang/model/util/*
+visitors: Update @SupportedSourceVersion
annotations to
+latest value. Note this update is done in lieu of introducing another
+set of visitors for each Java SE release.src/jdk.compiler/share/classes/com/sun/tools/javac/code/Source.java
:
+add an enum
constant for the new source version internal to
+javac
src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassFile.java
:
+add an enum
constant for the new class file format version
+internal to javac
src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/Target.java
:
+add an enum
constant for the new target version internal to
+javac
src/jdk.compiler/share/classes/com/sun/tools/javac/processing/PrintingProcessor.java
+update printing processor to support the new source version--release
is stored as new
+text files in the src/jdk.compiler/share/data/symbols
+directory, one file per module. The README file in that directory
+contains directions on how to create the files.test
filestest/langtools/tools/javac/api/TestGetSourceVersions.java
:
+add new SourceVersion
constant to test matrix.test/langtools/tools/javac/classfiles/ClassVersionChecker.java
:
+add new enum constant for the new class file versiontest/langtools/tools/javac/lib/JavacTestingAbstractProcessor.java
+update annotation processor extended by javac
tests to
+cover the new source versiontest/langtools/tools/javac/preview/classReaderTest/Client.nopreview.out
+and
+test/langtools/tools/javac/preview/classReaderTest/Client.preview.out
:
+update expected messages for preview errors and warningsjcov-test
instead of test
, e.g.
make jcov-test TEST=jdk_lang
. This will make sure the JCov
image is built, and that JCov reporting is enabled.
+To include JCov coverage for just a subset of all modules, you can
+use the --with-jcov-modules
arguments to
+configure
, e.g.
+--with-jcov-modules=jdk.compiler,java.desktop
.
For more fine-grained control, you can pass arbitrary filters to JCov
+using --with-jcov-filters
, and you can specify a specific
+JDK to instrument using --with-jcov-input-jdk
.
The JCov report is stored in
build/$BUILD/test-results/jcov-output/report
.
Please note that running with JCov reporting can be very memory @@ -589,6 +601,37 @@
For more notes about the PKCS11 tests, please refer to test/jdk/sun/security/pkcs11/README.
+One way to improve test coverage of ahead-of-time (AOT) optimizations +in the JDK is to run existing jtreg test cases in a special "AOT_JDK" +mode. Example:
+$ make test JTREG="AOT_JDK=onestep" \
+ TEST=open/test/hotspot/jtreg/runtime/invokedynamic
+In this testing mode, we first perform an AOT training run (see +https://openjdk.org/jeps/483) of a special test program (test/setup_aot/TestSetupAOT.java) +that accesses about 5,0000 classes in the JDK core libraries. +Optimization artifacts for these classes (such as pre-linked lambda +expressions, execution profiles, and pre-generated native code) are +stored into an AOT cache file, which will be used by all the JVMs +launched by the selected jtreg test cases.
+When the jtreg tests call into the core libraries classes that are in +the AOT cache, we will be able to test the AOT optimizations that were +used on those classes.
+Please note that not all existing jtreg test cases can be executed +with the AOT_JDK mode. See test/hotspot/jtreg/ProblemList-AotJdk.txt +and test/jdk/ProblemList-AotJdk.txt.
+Also, test cases that were written specifically to test AOT, such as +the tests under test/hotspot/jtreg/runtime/cds, +cannot be executed with the AOT_JDK mode.
+Valid values for AOT_JDK
are onestep
and
+twostep
. These control how the AOT cache is generated. See
+https://openjdk.org/jeps/514 for details. All other values are
+ignored.
Some security tests use a hardcoded provider for
diff --git a/doc/testing.md b/doc/testing.md
index 351690c5e60..bb56c05c295 100644
--- a/doc/testing.md
+++ b/doc/testing.md
@@ -345,6 +345,14 @@ The simplest way to run tests with JCov coverage report is to use the special
target `jcov-test` instead of `test`, e.g. `make jcov-test TEST=jdk_lang`. This
will make sure the JCov image is built, and that JCov reporting is enabled.
+To include JCov coverage for just a subset of all modules, you can use the
+`--with-jcov-modules` arguments to `configure`, e.g.
+`--with-jcov-modules=jdk.compiler,java.desktop`.
+
+For more fine-grained control, you can pass arbitrary filters to JCov using
+`--with-jcov-filters`, and you can specify a specific JDK to instrument
+using `--with-jcov-input-jdk`.
+
The JCov report is stored in `build/$BUILD/test-results/jcov-output/report`.
Please note that running with JCov reporting can be very memory intensive.
@@ -603,6 +611,43 @@ $ make test TEST="jtreg:sun/security/pkcs11/Secmod/AddTrustedCert.java" \
For more notes about the PKCS11 tests, please refer to
test/jdk/sun/security/pkcs11/README.
+### Testing Ahead-of-time Optimizations
+-------------------------------------------------------------------------------
+One way to improve test coverage of ahead-of-time (AOT) optimizations in
+the JDK is to run existing jtreg test cases in a special "AOT_JDK" mode.
+Example:
+
+```
+$ make test JTREG="AOT_JDK=onestep" \
+ TEST=open/test/hotspot/jtreg/runtime/invokedynamic
+```
+
+In this testing mode, we first perform an AOT training run
+(see https://openjdk.org/jeps/483) of a special test program
+([test/setup_aot/TestSetupAOT.java](../test/setup_aot/TestSetupAOT.java))
+that accesses about 5,0000 classes in the JDK core libraries.
+Optimization artifacts for these classes (such as pre-linked
+lambda expressions, execution profiles, and pre-generated native code)
+are stored into an AOT cache file, which will be used by all the JVMs
+launched by the selected jtreg test cases.
+
+When the jtreg tests call into the core libraries classes that are in
+the AOT cache, we will be able to test the AOT optimizations that were
+used on those classes.
+
+Please note that not all existing jtreg test cases can be executed with
+the AOT_JDK mode. See
+[test/hotspot/jtreg/ProblemList-AotJdk.txt](../test/hotspot/jtreg/ProblemList-AotJdk.txt)
+and [test/jdk/ProblemList-AotJdk.txt](../test/jdk/ProblemList-AotJdk.txt).
+
+Also, test cases that were written specifically to test AOT, such as the tests
+under [test/hotspot/jtreg/runtime/cds](../test/hotspot/jtreg/runtime/cds/),
+cannot be executed with the AOT_JDK mode.
+
+Valid values for `AOT_JDK` are `onestep` and `twostep`. These control how
+the AOT cache is generated. See https://openjdk.org/jeps/514 for details.
+All other values are ignored.
+
### Testing with alternative security providers
Some security tests use a hardcoded provider for `KeyFactory`, `Cipher`,
diff --git a/make/Bundles.gmk b/make/Bundles.gmk
index 8962b596278..ba8ec0c864b 100644
--- a/make/Bundles.gmk
+++ b/make/Bundles.gmk
@@ -174,9 +174,11 @@ else
JRE_IMAGE_HOMEDIR := $(JRE_IMAGE_DIR)
JDK_BUNDLE_SUBDIR := jdk-$(VERSION_NUMBER)
JRE_BUNDLE_SUBDIR := jre-$(VERSION_NUMBER)
+ STATIC_JDK_BUNDLE_SUBDIR := static-jdk-$(VERSION_NUMBER)
ifneq ($(DEBUG_LEVEL), release)
JDK_BUNDLE_SUBDIR := $(JDK_BUNDLE_SUBDIR)/$(DEBUG_LEVEL)
JRE_BUNDLE_SUBDIR := $(JRE_BUNDLE_SUBDIR)/$(DEBUG_LEVEL)
+ STATIC_JDK_BUNDLE_SUBDIR := $(STATIC_JDK_BUNDLE_SUBDIR)/$(DEBUG_LEVEL)
endif
# In certain situations, the JDK_IMAGE_DIR points to an image without the
# the symbols and demos. If so, the symobls and demos can be found in a
@@ -500,6 +502,21 @@ ifneq ($(filter static-libs-graal-bundles, $(MAKECMDGOALS)), )
STATIC_LIBS_GRAAL_TARGETS += $(BUILD_STATIC_LIBS_GRAAL_BUNDLE)
endif
+#################################################################################
+
+ifneq ($(filter static-jdk-bundles, $(MAKECMDGOALS)), )
+ STATIC_JDK_BUNDLE_FILES := $(call FindFiles, $(STATIC_JDK_IMAGE_DIR))
+
+ $(eval $(call SetupBundleFile, BUILD_STATIC_JDK_BUNDLE, \
+ BUNDLE_NAME := $(STATIC_JDK_BUNDLE_NAME), \
+ FILES := $(STATIC_JDK_BUNDLE_FILES), \
+ BASE_DIRS := $(STATIC_JDK_IMAGE_DIR), \
+ SUBDIR := $(STATIC_JDK_BUNDLE_SUBDIR), \
+ ))
+
+ STATIC_JDK_TARGETS += $(BUILD_STATIC_JDK_BUNDLE)
+endif
+
################################################################################
product-bundles: $(PRODUCT_TARGETS)
@@ -510,11 +527,12 @@ docs-javase-bundles: $(DOCS_JAVASE_TARGETS)
docs-reference-bundles: $(DOCS_REFERENCE_TARGETS)
static-libs-bundles: $(STATIC_LIBS_TARGETS)
static-libs-graal-bundles: $(STATIC_LIBS_GRAAL_TARGETS)
+static-jdk-bundles: $(STATIC_JDK_TARGETS)
jcov-bundles: $(JCOV_TARGETS)
.PHONY: product-bundles test-bundles \
docs-jdk-bundles docs-javase-bundles docs-reference-bundles \
- static-libs-bundles static-libs-graal-bundles jcov-bundles
+ static-libs-bundles static-libs-graal-bundles static-jdk-bundles jcov-bundles
################################################################################
diff --git a/make/CompileInterimLangtools.gmk b/make/CompileInterimLangtools.gmk
index c869ea160c7..c7d1c3796f6 100644
--- a/make/CompileInterimLangtools.gmk
+++ b/make/CompileInterimLangtools.gmk
@@ -95,14 +95,16 @@ define SetupInterimModule
SRC := $(BUILDTOOLS_OUTPUTDIR)/gensrc/$1.interim \
$$(wildcard $(SUPPORT_OUTPUTDIR)/gensrc/$1) \
$(TOPDIR)/src/$1/share/classes, \
- EXCLUDES := sun javax/tools/snippet-files, \
+ EXCLUDES := sun, \
EXCLUDE_FILES := $(TOPDIR)/src/$1/share/classes/module-info.java \
$(TOPDIR)/src/$1/share/classes/javax/tools/ToolProvider.java \
$(TOPDIR)/src/$1/share/classes/com/sun/tools/javac/launcher/Main.java \
+ $(TOPDIR)/src/$1/share/classes/com/sun/tools/javac/launcher/MemoryClassLoader.java \
$(TOPDIR)/src/$1/share/classes/com/sun/tools/javac/launcher/MemoryContext.java \
$(TOPDIR)/src/$1/share/classes/com/sun/tools/javac/launcher/MemoryModuleFinder.java \
$(TOPDIR)/src/$1/share/classes/com/sun/tools/javac/launcher/SourceLauncher.java \
Standard.java, \
+ EXCLUDE_PATTERNS := -files, \
EXTRA_FILES := $(BUILDTOOLS_OUTPUTDIR)/gensrc/$1.interim/module-info.java \
$($1.interim_EXTRA_FILES), \
COPY := .gif .png .xml .css .svg .js .js.template .txt .woff .woff2 javax.tools.JavaCompilerTool, \
diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk
index b4a193dfade..1e26fb2b529 100644
--- a/make/CompileJavaModules.gmk
+++ b/make/CompileJavaModules.gmk
@@ -113,6 +113,7 @@ $(eval $(call SetupJavaCompilation, $(MODULE), \
DISABLED_WARNINGS := $(DISABLED_WARNINGS_java), \
EXCLUDES := $(EXCLUDES), \
EXCLUDE_FILES := $(EXCLUDE_FILES), \
+ EXCLUDE_PATTERNS := -files, \
KEEP_ALL_TRANSLATIONS := $(KEEP_ALL_TRANSLATIONS), \
JAVAC_FLAGS := \
$(DOCLINT) \
diff --git a/make/Coverage.gmk b/make/Coverage.gmk
index 2fd4e4ec6d4..a375c343185 100644
--- a/make/Coverage.gmk
+++ b/make/Coverage.gmk
@@ -34,21 +34,28 @@ else
JCOV_INPUT_IMAGE_DIR := $(JDK_IMAGE_DIR)
endif
+JCOV_SUPPORT_DIR := $(SUPPORT_OUTPUTDIR)/jcov
+
#moving instrumented jdk image in and out of jcov_temp because of CODETOOLS-7902299
-JCOV_TEMP := $(SUPPORT_OUTPUTDIR)/jcov_temp
+JCOV_TEMP := $(JCOV_SUPPORT_DIR)/temp
+
+ifneq ($(JCOV_MODULES), )
+ JCOV_MODULES_FILTER := $(foreach m, $(JCOV_MODULES), -include_module $m)
+endif
$(JCOV_IMAGE_DIR)/release: $(JCOV_INPUT_IMAGE_DIR)/release
$(call LogWarn, Creating instrumented jdk image with JCov)
$(call MakeDir, $(JCOV_TEMP) $(IMAGES_OUTPUTDIR))
$(RM) -r $(JCOV_IMAGE_DIR) $(JCOV_TEMP)/*
$(CP) -r $(JCOV_INPUT_IMAGE_DIR) $(JCOV_TEMP)/$(JCOV_IMAGE_SUBDIR)
- $(JAVA) -Xmx3g -jar $(JCOV_HOME)/lib/jcov.jar JREInstr \
+ $(call ExecuteWithLog, $(JCOV_SUPPORT_DIR)/run-jcov, \
+ $(JAVA) -Xmx3g -jar $(JCOV_HOME)/lib/jcov.jar JREInstr \
-t $(JCOV_TEMP)/$(JCOV_IMAGE_SUBDIR)/template.xml \
-rt $(JCOV_HOME)/lib/jcov_network_saver.jar \
-exclude 'java.lang.Object' \
-exclude jdk.test.Main -exclude '**\$Proxy*' \
- $(JCOV_FILTERS) \
- $(JCOV_TEMP)/$(JCOV_IMAGE_SUBDIR)
+ $(JCOV_MODULES_FILTER) $(JCOV_FILTERS) \
+ $(JCOV_TEMP)/$(JCOV_IMAGE_SUBDIR))
$(MV) $(JCOV_TEMP)/$(JCOV_IMAGE_SUBDIR) $(JCOV_IMAGE_DIR)
$(RMDIR) $(JCOV_TEMP)
diff --git a/make/Docs.gmk b/make/Docs.gmk
index 60c029ce8f9..1fcc8575d2c 100644
--- a/make/Docs.gmk
+++ b/make/Docs.gmk
@@ -79,8 +79,6 @@ JAVADOC_TAGS := \
-tag see \
-taglet build.tools.taglet.ExtLink \
-taglet build.tools.taglet.Incubating \
- -taglet build.tools.taglet.PreviewNote \
- --preview-note-tag previewNote \
-tagletpath $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
$(CUSTOM_JAVADOC_TAGS) \
#
@@ -96,14 +94,14 @@ JAVADOC_DISABLED_DOCLINT_PACKAGES := org.w3c.* javax.smartcardio
# The initial set of options for javadoc
JAVADOC_OPTIONS := -use -keywords -notimestamp \
- -serialwarn -encoding ISO-8859-1 -docencoding UTF-8 -breakiterator \
+ -serialwarn -encoding utf-8 -docencoding utf-8 -breakiterator \
-splitIndex --system none -javafx --expand-requires transitive \
--override-methods=summary
# The reference options must stay stable to allow for comparisons across the
# development cycle.
REFERENCE_OPTIONS := -XDignore.symbol.file=true -use -keywords -notimestamp \
- -serialwarn -encoding ISO-8859-1 -breakiterator -splitIndex --system none \
+ -serialwarn -encoding utf-8 -breakiterator -splitIndex --system none \
-html5 -javafx --expand-requires transitive
# Should we add DRAFT stamps to the generated javadoc?
@@ -262,7 +260,7 @@ define create_overview_file
$$($1_OVERVIEW): $$($1_OVERVIEW_VARDEPS_FILE)
$$(call LogInfo, Creating overview.html for $1)
$$(call MakeDir, $$(@D))
- $$(ECHO) -n '$$($1_OVERVIEW_TEXT)' > $$@
+ $$(PRINTF) "%s" '$$($1_OVERVIEW_TEXT)' > $$@
endef
################################################################################
@@ -542,7 +540,9 @@ $(eval $(call SetupApiDocsGeneration, REFERENCE_API, \
# Format: space-delimited list of names, including at most one '%' as a
# wildcard. Spec source files match if their filename or any enclosing folder
# name matches one of the items in SPEC_FILTER.
-SPEC_FILTER := %
+ifeq ($(SPEC_FILTER), )
+ SPEC_FILTER := %
+endif
ApplySpecFilter = \
$(strip $(foreach file, $(1), \
diff --git a/make/GenerateLinkOptData.gmk b/make/GenerateLinkOptData.gmk
index 5fc745ba223..6f6e1f29b4c 100644
--- a/make/GenerateLinkOptData.gmk
+++ b/make/GenerateLinkOptData.gmk
@@ -76,10 +76,14 @@ $(CLASSLIST_FILE): $(INTERIM_IMAGE_DIR)/bin/java$(EXECUTABLE_SUFFIX) $(CLASSLIST
$(call LogInfo, Generating $(patsubst $(OUTPUTDIR)/%, %, $(JLI_TRACE_FILE)))
$(FIXPATH) $(INTERIM_IMAGE_DIR)/bin/java -XX:DumpLoadedClassList=$@.raw \
$(CLASSLIST_FILE_VM_OPTS) \
+ -Xlog:aot=off \
+ -Xlog:cds=off \
-cp $(SUPPORT_OUTPUTDIR)/classlist.jar \
build.tools.classlist.HelloClasslist $(LOG_DEBUG)
$(GREP) -v HelloClasslist $@.raw > $@.interim
$(FIXPATH) $(INTERIM_IMAGE_DIR)/bin/java -Xshare:dump \
+ -Xlog:aot=off \
+ -Xlog:cds=off \
-XX:SharedClassListFile=$@.interim -XX:SharedArchiveFile=$@.jsa \
-Xmx128M -Xms128M $(LOG_INFO)
$(FIXPATH) $(INTERIM_IMAGE_DIR)/bin/java -XX:DumpLoadedClassList=$@.raw.2 \
@@ -87,6 +91,8 @@ $(CLASSLIST_FILE): $(INTERIM_IMAGE_DIR)/bin/java$(EXECUTABLE_SUFFIX) $(CLASSLIST
-Djava.lang.invoke.MethodHandle.TRACE_RESOLVE=true \
$(CLASSLIST_FILE_VM_OPTS) \
--module-path $(SUPPORT_OUTPUTDIR)/classlist.jar \
+ -Xlog:aot=off \
+ -Xlog:cds=off \
-cp $(SUPPORT_OUTPUTDIR)/classlist.jar \
build.tools.classlist.HelloClasslist \
2> $(LINK_OPT_DIR)/stderr > $(JLI_TRACE_FILE) \
@@ -100,6 +106,8 @@ $(CLASSLIST_FILE): $(INTERIM_IMAGE_DIR)/bin/java$(EXECUTABLE_SUFFIX) $(CLASSLIST
$(GREP) -v HelloClasslist $@.raw.2 > $@.raw.3
$(GREP) -v @cp $@.raw.3 > $@.raw.4
$(FIXPATH) $(INTERIM_IMAGE_DIR)/bin/java \
+ -Xlog:aot=off \
+ -Xlog:cds=off \
-cp $(SUPPORT_OUTPUTDIR)/classlist.jar \
build.tools.classlist.SortClasslist $@.raw.4 > $@
diff --git a/make/Images.gmk b/make/Images.gmk
index 3f2b34bc9c6..22e3e43cb1f 100644
--- a/make/Images.gmk
+++ b/make/Images.gmk
@@ -162,7 +162,7 @@ define CreateCDSArchive
endif
ifeq ($(DEBUG_CDS_ARCHIVE), true)
- $1_$2_CDS_DUMP_FLAGS += -Xlog:cds+map*=trace:file=$$(JDK_IMAGE_DIR)/$$($1_$2_CDS_ARCHIVE).cdsmap:none:filesize=0
+ $1_$2_CDS_DUMP_FLAGS += -Xlog:aot+map*=trace:file=$$(JDK_IMAGE_DIR)/$$($1_$2_CDS_ARCHIVE).cdsmap:none:filesize=0
endif
$$(eval $$(call SetupExecute, $1_$2_gen_cds_archive_jdk, \
diff --git a/make/Init.gmk b/make/Init.gmk
index 5dd1a71dd9a..38959323628 100644
--- a/make/Init.gmk
+++ b/make/Init.gmk
@@ -37,10 +37,6 @@ include MakeFileStart.gmk
include $(TOPDIR)/make/InitSupport.gmk
include LogUtils.gmk
-# Force early generation of module-deps.gmk
-GENERATE_MODULE_DEPS_FILE := true
-include Modules.gmk
-
# Inclusion of this pseudo-target will cause make to execute this file
# serially, regardless of -j.
.NOTPARALLEL:
@@ -114,7 +110,18 @@ reconfigure:
CUSTOM_CONFIG_DIR="$(CUSTOM_CONFIG_DIR)" \
$(RECONFIGURE_COMMAND) )
-.PHONY: print-modules print-targets print-tests print-configuration reconfigure
+# Create files that are needed to run most targets in Main.gmk
+create-make-helpers:
+ ( cd $(TOPDIR) && \
+ $(MAKE) $(MAKE_ARGS) -j 1 -f make/GenerateFindTests.gmk \
+ $(USER_MAKE_VARS) )
+ ( cd $(TOPDIR) && \
+ $(MAKE) $(MAKE_ARGS) -j 1 -f make/Main.gmk $(USER_MAKE_VARS) \
+ UPDATE_MODULE_DEPS=true NO_RECIPES=true \
+ create-main-targets-include )
+
+.PHONY: print-modules print-targets print-tests print-configuration \
+ reconfigure create-make-helpers
##############################################################################
# The main target. This will delegate all other targets into Main.gmk.
@@ -134,7 +141,7 @@ TARGET_DESCRIPTION := target$(if $(word 2, $(MAIN_TARGETS)),s) \
# variables are explicitly propagated using $(USER_MAKE_VARS).
main: MAKEOVERRIDES :=
-main: $(INIT_TARGETS)
+main: $(INIT_TARGETS) create-make-helpers
ifneq ($(SEQUENTIAL_TARGETS)$(PARALLEL_TARGETS), )
$(call RotateLogFiles)
$(ECHO) "Building $(TARGET_DESCRIPTION)" $(BUILD_LOG_PIPE_SIMPLE)
@@ -144,6 +151,9 @@ main: $(INIT_TARGETS)
( cd $(TOPDIR) && \
$(MAKE) $(MAKE_ARGS) -j 1 -f make/Main.gmk $(USER_MAKE_VARS) \
$(SEQUENTIAL_TARGETS) )
+ # We might have cleaned away essential files, recreate them.
+ ( cd $(TOPDIR) && \
+ $(MAKE) $(MAKE_ARGS) -j 1 -f make/Init.gmk create-make-helpers )
endif
ifneq ($(PARALLEL_TARGETS), )
$(call PrepareFailureLogs)
diff --git a/make/Main.gmk b/make/Main.gmk
index eda3b79265a..d0568509a4e 100644
--- a/make/Main.gmk
+++ b/make/Main.gmk
@@ -42,6 +42,12 @@ include MakeFileStart.gmk
include $(TOPDIR)/make/MainSupport.gmk
include FindTests.gmk
+
+ifeq ($(UPDATE_MODULE_DEPS), true)
+ # Update module-deps.gmk if requested. This is read in Modules.gmk.
+ GENERATE_MODULE_DEPS_FILE := true
+endif
+
include Modules.gmk
# Are we requested to ignore dependencies?
@@ -411,12 +417,14 @@ $(eval $(call SetupTarget, create-source-revision-tracker, \
))
BOOTCYCLE_TARGET := product-images
+BOOTCYCLE_SPEC := $(dir $(SPEC))bootcycle-spec.gmk
+
bootcycle-images:
ifneq ($(COMPILE_TYPE), cross)
$(call LogWarn, Boot cycle build step 2: Building a new JDK image using previously built image)
$(call MakeDir, $(OUTPUTDIR)/bootcycle-build)
+$(MAKE) $(MAKE_ARGS) -f $(TOPDIR)/make/Init.gmk PARALLEL_TARGETS=$(BOOTCYCLE_TARGET) \
- LOG_PREFIX="[bootcycle] " JOBS= SPEC=$(dir $(SPEC))bootcycle-spec.gmk main
+ LOG_PREFIX="[bootcycle] " JOBS= SPEC=$(BOOTCYCLE_SPEC) main
else
$(call LogWarn, Boot cycle build disabled when cross compiling)
endif
@@ -875,6 +883,12 @@ $(eval $(call SetupTarget, static-libs-graal-bundles, \
DEPS := static-libs-graal-image, \
))
+$(eval $(call SetupTarget, static-jdk-bundles, \
+ MAKEFILE := Bundles, \
+ TARGET := static-jdk-bundles, \
+ DEPS := static-jdk-image, \
+))
+
ifeq ($(JCOV_ENABLED), true)
$(eval $(call SetupTarget, jcov-bundles, \
MAKEFILE := Bundles, \
diff --git a/make/MainSupport.gmk b/make/MainSupport.gmk
index ae4858c35af..d8dc894c1e9 100644
--- a/make/MainSupport.gmk
+++ b/make/MainSupport.gmk
@@ -57,7 +57,7 @@ define SetupTargetBody
endef
define CleanDocs
- @$(ECHO) -n "Cleaning docs ..."
+ @$(PRINTF) "Cleaning docs ..."
@$(ECHO) "" $(LOG_DEBUG)
$(RM) -r $(SUPPORT_OUTPUTDIR)/docs
$(RM) -r $(SUPPORT_OUTPUTDIR)/javadoc
@@ -67,28 +67,28 @@ endef
# Cleans the dir given as $1
define CleanDir
- @$(ECHO) -n "Cleaning $(strip $1) build artifacts ..."
+ @$(PRINTF) "Cleaning %s build artifacts ..." "$(strip $1)"
@$(ECHO) "" $(LOG_DEBUG)
($(CD) $(OUTPUTDIR) && $(RM) -r $1)
@$(ECHO) " done"
endef
define CleanSupportDir
- @$(ECHO) -n "Cleaning$(strip $1) build artifacts ..."
+ @$(PRINTF) "Cleaning %s build artifacts ..." "$(strip $1)"
@$(ECHO) "" $(LOG_DEBUG)
$(RM) -r $(SUPPORT_OUTPUTDIR)/$(strip $1)
@$(ECHO) " done"
endef
define CleanMakeSupportDir
- @$(ECHO) -n "Cleaning $(strip $1) make support artifacts ..."
+ @$(PRINTF) "Cleaning %s make support artifacts ..." "$(strip $1)"
@$(ECHO) "" $(LOG_DEBUG)
$(RM) -r $(MAKESUPPORT_OUTPUTDIR)/$(strip $1)
@$(ECHO) " done"
endef
define CleanTest
- @$(ECHO) -n "Cleaning test $(strip $1) ..."
+ @$(PRINTF) "Cleaning test %s ..." "$(strip $1)"
@$(ECHO) "" $(LOG_DEBUG)
$(RM) -r $(SUPPORT_OUTPUTDIR)/test/$(strip $(subst -,/,$1))
# Remove as much of the test directory structure as is empty
@@ -97,25 +97,25 @@ define CleanTest
endef
define Clean-gensrc
- @$(ECHO) -n "Cleaning gensrc $(if $1,for $(strip $1) )..."
+ @$(PRINTF) "Cleaning gensrc %s..." "$(if $1,for $(strip $1) )"
@$(ECHO) "" $(LOG_DEBUG)
$(RM) -r $(SUPPORT_OUTPUTDIR)/gensrc/$(strip $1)
@$(ECHO) " done"
endef
define Clean-java
- @$(ECHO) -n "Cleaning java $(if $1,for $(strip $1) )..."
+ @$(PRINTF) "Cleaning java %s..." "$(if $1,for $(strip $1) )"
@$(ECHO) "" $(LOG_DEBUG)
$(RM) -r $(JDK_OUTPUTDIR)/modules/$(strip $1)
$(RM) -r $(SUPPORT_OUTPUTDIR)/special_classes/$(strip $1)
$(ECHO) " done"
- $(ECHO) -n "Cleaning headers $(if $1,for $(strip $1) )..."
+ $(PRINTF) "Cleaning headers %s..." "$(if $1,for $(strip $1) )"
$(RM) -r $(SUPPORT_OUTPUTDIR)/headers/$(strip $1)
@$(ECHO) " done"
endef
define Clean-native
- @$(ECHO) -n "Cleaning native $(if $1,for $(strip $1) )..."
+ @$(PRINTF) "Cleaning native %s..." "$(if $1,for $(strip $1) )"
@$(ECHO) "" $(LOG_DEBUG)
$(RM) -r $(SUPPORT_OUTPUTDIR)/native/$(strip $1)
$(RM) -r $(SUPPORT_OUTPUTDIR)/modules_libs/$(strip $1)
@@ -124,7 +124,7 @@ define Clean-native
endef
define Clean-include
- @$(ECHO) -n "Cleaning include $(if $1,for $(strip $1) )..."
+ @$(PRINTF) "Cleaning include %s..." "$(if $1,for $(strip $1) )"
@$(ECHO) "" $(LOG_DEBUG)
$(RM) -r $(SUPPORT_OUTPUTDIR)/modules_include/$(strip $1)
@$(ECHO) " done"
diff --git a/make/PreInit.gmk b/make/PreInit.gmk
index a01971e845d..3df44308dd9 100644
--- a/make/PreInit.gmk
+++ b/make/PreInit.gmk
@@ -50,7 +50,8 @@ include $(TOPDIR)/make/Global.gmk
# Targets provided by Init.gmk.
ALL_INIT_TARGETS := print-modules print-targets print-configuration \
- print-tests reconfigure pre-compare-build post-compare-build
+ print-tests reconfigure pre-compare-build post-compare-build \
+ create-make-helpers
# CALLED_TARGETS is the list of targets that the user provided,
# or "default" if unspecified.
@@ -161,19 +162,19 @@ ifneq ($(SKIP_SPEC), true)
( cd $(TOPDIR) && \
$(foreach spec, $(SPECS), \
$(MAKE) $(MAKE_INIT_ARGS) -j 1 -f $(TOPDIR)/make/Init.gmk \
- SPEC=$(spec) $(MAKE_INIT_MAIN_TARGET_ARGS) \
- main && \
+ SPEC=$(spec) TOPDIR_ALT=$(TOPDIR) \
+ $(MAKE_INIT_MAIN_TARGET_ARGS) main && \
$(if $(and $(COMPARE_BUILD), $(PARALLEL_TARGETS)), \
$(MAKE) $(MAKE_INIT_ARGS) -f $(TOPDIR)/make/Init.gmk \
- SPEC=$(spec) \
+ SPEC=$(spec) TOPDIR_ALT=$(TOPDIR) \
COMPARE_BUILD="$(COMPARE_BUILD)" \
pre-compare-build && \
$(MAKE) $(MAKE_INIT_ARGS) -j 1 -f $(TOPDIR)/make/Init.gmk \
- SPEC=$(spec) $(MAKE_INIT_MAIN_TARGET_ARGS) \
+ SPEC=$(spec) TOPDIR_ALT=$(TOPDIR) \
COMPARE_BUILD="$(COMPARE_BUILD):NODRYRUN=true" \
- main && \
+ $(MAKE_INIT_MAIN_TARGET_ARGS) main && \
$(MAKE) $(MAKE_INIT_ARGS) -f $(TOPDIR)/make/Init.gmk \
- SPEC=$(spec) \
+ SPEC=$(spec) TOPDIR_ALT=$(TOPDIR) \
COMPARE_BUILD="$(COMPARE_BUILD):NODRYRUN=true" \
post-compare-build && \
) \
diff --git a/make/PreInitSupport.gmk b/make/PreInitSupport.gmk
index 668363d8725..660e1214b5b 100644
--- a/make/PreInitSupport.gmk
+++ b/make/PreInitSupport.gmk
@@ -250,13 +250,14 @@ endef
# Param 1: FORCE = force generation of main-targets.gmk or LAZY = do not force.
# Param 2: The SPEC file to use.
define DefineMainTargets
+ SPEC_FILE := $(strip $2)
# We will start by making sure the main-targets.gmk file is removed, if
# make has not been restarted. By the -include, we will trigger the
# rule for generating the file (which is never there since we removed it),
# thus generating it fresh, and make will restart, incrementing the restart
# count.
- main_targets_file := $$(dir $(strip $2))make-support/main-targets.gmk
+ main_targets_file := $$(dir $$(SPEC_FILE))make-support/main-targets.gmk
ifeq ($$(MAKE_RESTARTS), )
# Only do this if make has not been restarted, and if we do not force it.
@@ -267,11 +268,12 @@ define DefineMainTargets
$$(main_targets_file):
@( cd $$(TOPDIR) && \
- $$(MAKE) $$(MAKE_LOG_FLAGS) -r -R -f $$(TOPDIR)/make/GenerateFindTests.gmk \
- -I $$(TOPDIR)/make/common SPEC=$(strip $2) )
+ $$(MAKE) $$(MAKE_LOG_FLAGS) -s -r -R -f $$(TOPDIR)/make/GenerateFindTests.gmk \
+ -I $$(TOPDIR)/make/common SPEC=$$(SPEC_FILE) TOPDIR_ALT=$$(TOPDIR))
@( cd $$(TOPDIR) && \
- $$(MAKE) $$(MAKE_LOG_FLAGS) -r -R -f $$(TOPDIR)/make/Main.gmk \
- -I $$(TOPDIR)/make/common SPEC=$(strip $2) NO_RECIPES=true \
+ $$(MAKE) $$(MAKE_LOG_FLAGS) -s -r -R -f $$(TOPDIR)/make/Main.gmk \
+ -I $$(TOPDIR)/make/common SPEC=$$(SPEC_FILE) TOPDIR_ALT=$$(TOPDIR) \
+ UPDATE_MODULE_DEPS=true NO_RECIPES=true \
$$(MAKE_LOG_VARS) \
create-main-targets-include )
diff --git a/make/RunTests.gmk b/make/RunTests.gmk
index 80c1ff99b2e..60ae1bd4763 100644
--- a/make/RunTests.gmk
+++ b/make/RunTests.gmk
@@ -115,6 +115,7 @@ JTREG_COV_OPTIONS :=
ifeq ($(TEST_OPTS_JCOV), true)
JCOV_OUTPUT_DIR := $(TEST_RESULTS_DIR)/jcov-output
+ JCOV_SUPPORT_DIR := $(TEST_SUPPORT_DIR)/jcov-support
JCOV_GRABBER_LOG := $(JCOV_OUTPUT_DIR)/grabber.log
JCOV_RESULT_FILE := $(JCOV_OUTPUT_DIR)/result.xml
JCOV_REPORT := $(JCOV_OUTPUT_DIR)/report
@@ -582,6 +583,8 @@ define SetMicroValue
else
ifneq ($3, )
$1_$2 := $3
+ else
+ $1_$2 :=
endif
endif
endef
@@ -708,6 +711,8 @@ define SetJtregValue
else
ifneq ($3, )
$1_$2 := $3
+ else
+ $1_$2 :=
endif
endif
endif
@@ -720,6 +725,7 @@ endef
# Parameter 1 is the name of the rule.
#
# Remaining parameters are named arguments.
+# TRAINING The AOT training mode: onestep or twostep
# VM_OPTIONS List of JVM arguments to use when creating AOT cache
#
# After calling this, the following variables are defined
@@ -748,23 +754,39 @@ define SetupAOTBody
$$($1_AOT_JDK_CACHE): $$(JDK_IMAGE_DIR)/release
$$(call MakeDir, $$($1_AOT_JDK_OUTPUT_DIR))
- $$(call LogWarn, AOT: Create cache configuration) \
- $$(call ExecuteWithLog, $$($1_AOT_JDK_OUTPUT_DIR), ( \
- cd $$($1_AOT_JDK_OUTPUT_DIR); \
- $(JAR) --extract --file $(TEST_IMAGE_DIR)/setup_aot/TestSetupAOT.jar; \
- $$(FIXPATH) $(JDK_UNDER_TEST)/bin/java $$($1_VM_OPTIONS) \
- -Xlog:class+load,cds,cds+class=debug:file=$$($1_AOT_JDK_CONF).log -Xlog:cds*=error \
- -XX:AOTMode=record -XX:AOTConfiguration=$$($1_AOT_JDK_CONF) \
- TestSetupAOT $$($1_AOT_JDK_OUTPUT_DIR) > $$($1_AOT_JDK_LOG) \
- ))
-
- $$(call LogWarn, AOT: Generate AOT cache $$($1_AOT_JDK_CACHE) with flags: $$($1_VM_OPTIONS))
- $$(call ExecuteWithLog, $$($1_AOT_JDK_OUTPUT_DIR), ( \
- $$(FIXPATH) $(JDK_UNDER_TEST)/bin/java \
- $$($1_VM_OPTIONS) -Xlog:cds,cds+class=debug:file=$$($1_AOT_JDK_CACHE).log -Xlog:cds*=error \
- -XX:ExtraSharedClassListFile=$(JDK_UNDER_TEST)/lib/classlist \
- -XX:AOTMode=create -XX:AOTConfiguration=$$($1_AOT_JDK_CONF) -XX:AOTCache=$$($1_AOT_JDK_CACHE) \
- ))
+ ifeq ($$($1_TRAINING), onestep)
+
+ $$(call LogWarn, AOT: Create AOT cache $$($1_AOT_JDK_CACHE) in one step with flags: $$($1_VM_OPTIONS)) \
+ $$(call ExecuteWithLog, $$($1_AOT_JDK_OUTPUT_DIR), ( \
+ cd $$($1_AOT_JDK_OUTPUT_DIR); \
+ $(JAR) --extract --file $(TEST_IMAGE_DIR)/setup_aot/TestSetupAOT.jar; \
+ $$(FIXPATH) $(JDK_UNDER_TEST)/bin/java $$($1_VM_OPTIONS) \
+ -Xlog:class+load,aot,aot+class=debug:file=$$($1_AOT_JDK_CACHE).log -Xlog:cds*=error -Xlog:aot*=error \
+ -XX:AOTMode=record -XX:AOTCacheOutput=$$($1_AOT_JDK_CACHE) \
+ TestSetupAOT $$($1_AOT_JDK_OUTPUT_DIR) > $$($1_AOT_JDK_LOG) \
+ ))
+
+ else
+
+ $$(call LogWarn, AOT: Create cache configuration) \
+ $$(call ExecuteWithLog, $$($1_AOT_JDK_OUTPUT_DIR), ( \
+ cd $$($1_AOT_JDK_OUTPUT_DIR); \
+ $(JAR) --extract --file $(TEST_IMAGE_DIR)/setup_aot/TestSetupAOT.jar; \
+ $$(FIXPATH) $(JDK_UNDER_TEST)/bin/java $$($1_VM_OPTIONS) \
+ -Xlog:class+load,aot,aot+class=debug:file=$$($1_AOT_JDK_CONF).log -Xlog:cds*=error -Xlog:aot*=error \
+ -XX:AOTMode=record -XX:AOTConfiguration=$$($1_AOT_JDK_CONF) \
+ TestSetupAOT $$($1_AOT_JDK_OUTPUT_DIR) > $$($1_AOT_JDK_LOG) \
+ ))
+
+ $$(call LogWarn, AOT: Generate AOT cache $$($1_AOT_JDK_CACHE) with flags: $$($1_VM_OPTIONS))
+ $$(call ExecuteWithLog, $$($1_AOT_JDK_OUTPUT_DIR), ( \
+ $$(FIXPATH) $(JDK_UNDER_TEST)/bin/java \
+ $$($1_VM_OPTIONS) -Xlog:aot,aot+class=debug:file=$$($1_AOT_JDK_CACHE).log -Xlog:cds*=error -Xlog:aot*=error \
+ -XX:ExtraSharedClassListFile=$(JDK_UNDER_TEST)/lib/classlist \
+ -XX:AOTMode=create -XX:AOTConfiguration=$$($1_AOT_JDK_CONF) -XX:AOTCache=$$($1_AOT_JDK_CACHE) \
+ ))
+
+ endif
$1_AOT_TARGETS += $$($1_AOT_JDK_CACHE)
@@ -830,7 +852,7 @@ define SetupRunJtregTestBody
JTREG_RETRY_COUNT ?= 0
JTREG_REPEAT_COUNT ?= 0
JTREG_REPORT ?= files
- JTREG_AOT_JDK ?= false
+ JTREG_AOT_JDK ?= none
ifneq ($$(JTREG_RETRY_COUNT), 0)
ifneq ($$(JTREG_REPEAT_COUNT), 0)
@@ -840,6 +862,12 @@ define SetupRunJtregTestBody
endif
endif
+ ifeq ($$(JTREG_RUN_PROBLEM_LISTS), true)
+ JTREG_PROBLEM_LIST_PREFIX := -match:
+ else
+ JTREG_PROBLEM_LIST_PREFIX := -exclude:
+ endif
+
ifneq ($$(JTREG_TEST_THREAD_FACTORY), )
$1_JTREG_BASIC_OPTIONS += -testThreadFactoryPath:$$(JTREG_TEST_THREAD_FACTORY_JAR)
$1_JTREG_BASIC_OPTIONS += -testThreadFactory:$$(JTREG_TEST_THREAD_FACTORY)
@@ -868,7 +896,7 @@ define SetupRunJtregTestBody
# version of the JDK.
$1_JTREG_BASIC_OPTIONS += -$$($1_JTREG_TEST_MODE) \
-verbose:$$(JTREG_VERBOSE) -retain:$$(JTREG_RETAIN) \
- -concurrency:$$($1_JTREG_JOBS) -timeoutFactor:$$(JTREG_TIMEOUT_FACTOR) \
+ -concurrency:$$($1_JTREG_JOBS) \
-vmoption:-XX:MaxRAMPercentage=$$($1_JTREG_MAX_RAM_PERCENTAGE) \
-vmoption:-Dtest.boot.jdk="$$(BOOT_JDK)" \
-vmoption:-Djava.io.tmpdir="$$($1_TEST_TMP_DIR)"
@@ -901,12 +929,6 @@ define SetupRunJtregTestBody
$1_JTREG_BASIC_OPTIONS += -nativepath:$$($1_JTREG_NATIVEPATH)
endif
- ifeq ($$(JTREG_RUN_PROBLEM_LISTS), true)
- JTREG_PROBLEM_LIST_PREFIX := -match:
- else
- JTREG_PROBLEM_LIST_PREFIX := -exclude:
- endif
-
ifneq ($$($1_JTREG_PROBLEM_LIST), )
$1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$($1_JTREG_PROBLEM_LIST))
endif
@@ -929,6 +951,11 @@ define SetupRunJtregTestBody
JTREG_AUTO_PROBLEM_LISTS += ProblemList-shenandoah.txt
endif
+ ifneq ($$(findstring --enable-preview, $$(JTREG_ALL_OPTIONS)), )
+ JTREG_AUTO_PROBLEM_LISTS += ProblemList-enable-preview.txt
+ endif
+
+
ifneq ($$(JTREG_EXTRA_PROBLEM_LISTS), )
# Accept both absolute paths as well as relative to the current test root.
$1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$(wildcard \
@@ -965,12 +992,12 @@ define SetupRunJtregTestBody
endif
endif
- ifeq ($$(JTREG_AOT_JDK), true)
+ ifneq ($$(filter $$(JTREG_AOT_JDK), onestep twostep), )
$$(call LogWarn, Add AOT target for $1)
- $$(eval $$(call SetupAOT, $1, VM_OPTIONS := $$(JTREG_ALL_OPTIONS) ))
-
+ $$(eval $$(call SetupAOT, $1, \
+ TRAINING := $$(JTREG_AOT_JDK), \
+ VM_OPTIONS := $$(JTREG_ALL_OPTIONS) ))
$$(call LogWarn, AOT_JDK_CACHE=$$($1_AOT_JDK_CACHE))
-
$1_JTREG_BASIC_OPTIONS += -vmoption:-XX:AOTCache="$$($1_AOT_JDK_CACHE)"
endif
@@ -988,6 +1015,7 @@ define SetupRunJtregTestBody
endif
JTREG_TIMEOUT_FACTOR ?= $$(JTREG_AUTO_TIMEOUT_FACTOR)
+ $1_JTREG_BASIC_OPTIONS += -timeoutFactor:$$(JTREG_TIMEOUT_FACTOR)
clean-outputdirs-$1:
$$(call LogWarn, Clean up dirs for $1)
@@ -1338,12 +1366,14 @@ TARGETS += run-all-tests pre-run-test post-run-test run-test-report run-test
ifeq ($(TEST_OPTS_JCOV), true)
+ JCOV_VM_OPTS := -Xmx4g -Djdk.xml.totalEntitySizeLimit=0 -Djdk.xml.maxGeneralEntitySizeLimit=0
+
jcov-do-start-grabber:
$(call MakeDir, $(JCOV_OUTPUT_DIR))
if $(JAVA) -jar $(JCOV_HOME)/lib/jcov.jar GrabberManager -status 1>/dev/null 2>&1 ; then \
$(JAVA) -jar $(JCOV_HOME)/lib/jcov.jar GrabberManager -stop -stoptimeout 3600 ; \
fi
- $(JAVA) -Xmx4g -jar $(JCOV_HOME)/lib/jcov.jar Grabber -v -t \
+ $(JAVA) $(JCOV_VM_OPTS) -jar $(JCOV_HOME)/lib/jcov.jar Grabber -v -t \
$(JCOV_IMAGE_DIR)/template.xml -o $(JCOV_RESULT_FILE) \
1>$(JCOV_GRABBER_LOG) 2>&1 &
@@ -1356,6 +1386,10 @@ ifeq ($(TEST_OPTS_JCOV), true)
$(JAVA) -jar $(JCOV_HOME)/lib/jcov.jar GrabberManager -stop -stoptimeout 3600
JCOV_REPORT_TITLE := JDK code coverage report
+ ifneq ($(JCOV_MODULES), )
+ JCOV_MODULES_FILTER := $(foreach m, $(JCOV_MODULES), -include_module $m)
+ JCOV_REPORT_TITLE += Included modules: $(JCOV_MODULES)
+ endif
ifneq ($(JCOV_FILTERS), )
JCOV_REPORT_TITLE += Code filters: $(JCOV_FILTERS)
endif
@@ -1363,11 +1397,12 @@ ifeq ($(TEST_OPTS_JCOV), true)
jcov-gen-report: jcov-stop-grabber
$(call LogWarn, Generating JCov report ...)
- $(JAVA) -Xmx4g -jar $(JCOV_HOME)/lib/jcov.jar RepGen -sourcepath \
+ $(call ExecuteWithLog, $(JCOV_SUPPORT_DIR)/run-jcov-repgen, \
+ $(JAVA) $(JCOV_VM_OPTS) -jar $(JCOV_HOME)/lib/jcov.jar RepGen -sourcepath \
`$(ECHO) $(TOPDIR)/src/*/share/classes/ | $(TR) ' ' ':'` -fmt html \
- $(JCOV_FILTERS) \
+ $(JCOV_MODULES_FILTER) $(JCOV_FILTERS) \
-mainReportTitle "$(JCOV_REPORT_TITLE)" \
- -o $(JCOV_REPORT) $(JCOV_RESULT_FILE)
+ -o $(JCOV_REPORT) $(JCOV_RESULT_FILE))
TARGETS += jcov-do-start-grabber jcov-start-grabber jcov-stop-grabber \
jcov-gen-report
@@ -1387,7 +1422,7 @@ ifeq ($(TEST_OPTS_JCOV), true)
jcov-gen-diffcoverage: jcov-stop-grabber
$(call LogWarn, Generating diff coverage with changeset $(TEST_OPTS_JCOV_DIFF_CHANGESET) ... )
$(DIFF_COMMAND)
- $(JAVA) -Xmx4g -jar $(JCOV_HOME)/lib/jcov.jar \
+ $(JAVA) $(JCOV_VM_OPTS) -jar $(JCOV_HOME)/lib/jcov.jar \
DiffCoverage -replaceDiff "src/.*/classes/:" -all \
$(JCOV_RESULT_FILE) $(JCOV_SOURCE_DIFF) > \
$(JCOV_DIFF_COVERAGE_REPORT)
diff --git a/make/RunTestsPrebuilt.gmk b/make/RunTestsPrebuilt.gmk
index ba9731789b0..f5fe1d33830 100644
--- a/make/RunTestsPrebuilt.gmk
+++ b/make/RunTestsPrebuilt.gmk
@@ -217,9 +217,9 @@ else ifeq ($(OPENJDK_TARGET_OS), macosx)
else ifeq ($(OPENJDK_TARGET_OS), windows)
NUM_CORES := $(NUMBER_OF_PROCESSORS)
MEMORY_SIZE := $(shell \
- $(EXPR) `wmic computersystem get totalphysicalmemory -value \
- | $(GREP) = | $(SED) 's/\\r//g' \
- | $(CUT) -d "=" -f 2-` / 1024 / 1024 \
+ $(EXPR) `powershell -Command \
+ "(Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory" \
+ | $(SED) 's/\\r//g' ` / 1024 / 1024 \
)
endif
ifeq ($(NUM_CORES), )
diff --git a/make/autoconf/basic.m4 b/make/autoconf/basic.m4
index 6daba35547b..2d3e071dd52 100644
--- a/make/autoconf/basic.m4
+++ b/make/autoconf/basic.m4
@@ -134,17 +134,33 @@ AC_DEFUN_ONCE([BASIC_SETUP_BUILD_ENV],
)
AC_SUBST(BUILD_ENV)
+ AC_MSG_CHECKING([for locale to use])
if test "x$LOCALE" != x; then
# Check if we actually have C.UTF-8; if so, use it
if $LOCALE -a | $GREP -q -E "^C\.(utf8|UTF-8)$"; then
LOCALE_USED=C.UTF-8
+ AC_MSG_RESULT([C.UTF-8 (recommended)])
+ elif $LOCALE -a | $GREP -q -E "^en_US\.(utf8|UTF-8)$"; then
+ LOCALE_USED=en_US.UTF-8
+ AC_MSG_RESULT([en_US.UTF-8 (acceptable fallback)])
else
- AC_MSG_WARN([C.UTF-8 locale not found, using C locale])
- LOCALE_USED=C
+ # As a fallback, check if users locale is UTF-8. USER_LOCALE was saved
+ # by the wrapper configure script before autconf messed up LC_ALL.
+ if $ECHO $USER_LOCALE | $GREP -q -E "\.(utf8|UTF-8)$"; then
+ LOCALE_USED=$USER_LOCALE
+ AC_MSG_RESULT([$USER_LOCALE (untested fallback)])
+ AC_MSG_WARN([Could not find C.UTF-8 or en_US.UTF-8 locale. This is not supported, and the build might fail unexpectedly.])
+ else
+ AC_MSG_RESULT([no UTF-8 locale found])
+ AC_MSG_WARN([No UTF-8 locale found. This is not supported. Proceeding with the C locale, but the build might fail unexpectedly.])
+ LOCALE_USED=C
+ fi
+ AC_MSG_NOTICE([The recommended locale is C.UTF-8, but en_US.UTF-8 is also accepted.])
fi
else
- AC_MSG_WARN([locale command not not found, using C locale])
- LOCALE_USED=C
+ LOCALE_USED=C.UTF-8
+ AC_MSG_RESULT([C.UTF-8 (default)])
+ AC_MSG_WARN([locale command not not found, using C.UTF-8 locale])
fi
export LC_ALL=$LOCALE_USED
@@ -399,11 +415,21 @@ AC_DEFUN_ONCE([BASIC_SETUP_OUTPUT_DIR],
[ CONF_NAME=${with_conf_name} ])
# Test from where we are running configure, in or outside of src root.
+ if test "x$OPENJDK_BUILD_OS" = xwindows || test "x$OPENJDK_BUILD_OS" = "xmacosx"; then
+ # These systems have case insensitive paths, so convert them to lower case.
+ [ cmp_configure_start_dir=`$ECHO $CONFIGURE_START_DIR | $TR '[:upper:]' '[:lower:]'` ]
+ [ cmp_topdir=`$ECHO $TOPDIR | $TR '[:upper:]' '[:lower:]'` ]
+ [ cmp_custom_root=`$ECHO $CUSTOM_ROOT | $TR '[:upper:]' '[:lower:]'` ]
+ else
+ cmp_configure_start_dir="$CONFIGURE_START_DIR"
+ cmp_topdir="$TOPDIR"
+ cmp_custom_root="$CUSTOM_ROOT"
+ fi
AC_MSG_CHECKING([where to store configuration])
- if test "x$CONFIGURE_START_DIR" = "x$TOPDIR" \
- || test "x$CONFIGURE_START_DIR" = "x$CUSTOM_ROOT" \
- || test "x$CONFIGURE_START_DIR" = "x$TOPDIR/make/autoconf" \
- || test "x$CONFIGURE_START_DIR" = "x$TOPDIR/make" ; then
+ if test "x$cmp_configure_start_dir" = "x$cmp_topdir" \
+ || test "x$cmp_configure_start_dir" = "x$cmp_custom_root" \
+ || test "x$cmp_configure_start_dir" = "x$cmp_topdir/make/autoconf" \
+ || test "x$cmp_configure_start_dir" = "x$cmp_topdir/make" ; then
# We are running configure from the src root.
# Create a default ./build/target-variant-debuglevel output root.
if test "x${CONF_NAME}" = x; then
@@ -424,7 +450,12 @@ AC_DEFUN_ONCE([BASIC_SETUP_OUTPUT_DIR],
# If configuration is situated in normal build directory, just use the build
# directory name as configuration name, otherwise use the complete path.
if test "x${CONF_NAME}" = x; then
- CONF_NAME=`$ECHO $CONFIGURE_START_DIR | $SED -e "s!^${TOPDIR}/build/!!"`
+ [ if [[ "$cmp_configure_start_dir" =~ ^${cmp_topdir}/build/[^/]+$ ||
+ "$cmp_configure_start_dir" =~ ^${cmp_custom_root}/build/[^/]+$ ]]; then ]
+ CONF_NAME="${CONFIGURE_START_DIR##*/}"
+ else
+ CONF_NAME="$CONFIGURE_START_DIR"
+ fi
fi
OUTPUTDIR="$CONFIGURE_START_DIR"
AC_MSG_RESULT([in current directory])
diff --git a/make/autoconf/basic_windows.m4 b/make/autoconf/basic_windows.m4
index fb6fc526bfa..dac6ec15db6 100644
--- a/make/autoconf/basic_windows.m4
+++ b/make/autoconf/basic_windows.m4
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -159,7 +159,7 @@ AC_DEFUN([BASIC_SETUP_PATHS_WINDOWS],
else
WINENV_PREFIX_ARG="$WINENV_PREFIX"
fi
- FIXPATH_ARGS="-e $PATHTOOL -p $WINENV_PREFIX_ARG -r ${WINENV_ROOT//\\/\\\\} -t $WINENV_TEMP_DIR -c $CMD -q"
+ FIXPATH_ARGS="-e $PATHTOOL -p $WINENV_PREFIX_ARG -r ${WINENV_ROOT//\\/\\\\} -t $WINENV_TEMP_DIR -c $CMD"
FIXPATH_BASE="$BASH $FIXPATH_DIR/fixpath.sh $FIXPATH_ARGS"
FIXPATH="$FIXPATH_BASE exec"
@@ -215,7 +215,7 @@ AC_DEFUN([BASIC_WINDOWS_FINALIZE_FIXPATH],
if test "x$OPENJDK_BUILD_OS" = xwindows; then
FIXPATH_CMDLINE=". $TOPDIR/make/scripts/fixpath.sh -e $PATHTOOL \
-p $WINENV_PREFIX_ARG -r ${WINENV_ROOT//\\/\\\\} -t $WINENV_TEMP_DIR \
- -c $CMD -q"
+ -c $CMD"
$ECHO > $OUTPUTDIR/fixpath '#!/bin/bash'
$ECHO >> $OUTPUTDIR/fixpath export PATH='"[$]PATH:'$PATH'"'
$ECHO >> $OUTPUTDIR/fixpath $FIXPATH_CMDLINE '"[$]@"'
diff --git a/make/autoconf/boot-jdk.m4 b/make/autoconf/boot-jdk.m4
index d39e6e75a94..feb16c7d179 100644
--- a/make/autoconf/boot-jdk.m4
+++ b/make/autoconf/boot-jdk.m4
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -180,11 +180,13 @@ AC_DEFUN([BOOTJDK_CHECK_JAVA_HOME],
# Test: Is there a java or javac in the PATH, which is a symlink to the JDK?
AC_DEFUN([BOOTJDK_CHECK_JAVA_IN_PATH_IS_SYMLINK],
[
- UTIL_LOOKUP_PROGS(JAVAC_CHECK, javac, , NOFIXPATH)
- UTIL_LOOKUP_PROGS(JAVA_CHECK, java, , NOFIXPATH)
- BINARY="$JAVAC_CHECK"
- if test "x$JAVAC_CHECK" = x; then
- BINARY="$JAVA_CHECK"
+ UTIL_LOOKUP_PROGS(JAVAC_CHECK, javac)
+ UTIL_GET_EXECUTABLE(JAVAC_CHECK) # Will setup JAVAC_CHECK_EXECUTABLE
+ UTIL_LOOKUP_PROGS(JAVA_CHECK, java)
+ UTIL_GET_EXECUTABLE(JAVA_CHECK) # Will setup JAVA_CHECK_EXECUTABLE
+ BINARY="$JAVAC_CHECK_EXECUTABLE"
+ if test "x$JAVAC_CHECK_EXECUTABLE" = x; then
+ BINARY="$JAVA_CHECK_EXECUTABLE"
fi
if test "x$BINARY" != x; then
# So there is a java(c) binary, it might be part of a JDK.
diff --git a/make/autoconf/build-performance.m4 b/make/autoconf/build-performance.m4
index 4414ea0d93c..10e86e75199 100644
--- a/make/autoconf/build-performance.m4
+++ b/make/autoconf/build-performance.m4
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -75,7 +75,8 @@ AC_DEFUN([BPERF_CHECK_MEMORY_SIZE],
FOUND_MEM=yes
elif test "x$OPENJDK_BUILD_OS" = xwindows; then
# Windows, but without cygwin
- MEMORY_SIZE=`wmic computersystem get totalphysicalmemory -value | grep = | cut -d "=" -f 2-`
+ MEMORY_SIZE=`powershell -Command \
+ "(Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory" | $SED 's/\\r//g' `
MEMORY_SIZE=`expr $MEMORY_SIZE / 1024 / 1024`
FOUND_MEM=yes
fi
diff --git a/make/autoconf/configure b/make/autoconf/configure
index 6fa0aacfbc9..443a37bae77 100644
--- a/make/autoconf/configure
+++ b/make/autoconf/configure
@@ -49,7 +49,9 @@ fi
export CONFIG_SHELL=$BASH
export _as_can_reexec=no
-# Make sure all shell commands are executed with the C locale
+# Save user's current locale, but make sure all future shell commands are
+# executed with the C locale
+export USER_LOCALE=$LC_ALL
export LC_ALL=C
if test "x$CUSTOM_CONFIG_DIR" != x; then
diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4
index bafedddf04f..e80d9a98957 100644
--- a/make/autoconf/flags-cflags.m4
+++ b/make/autoconf/flags-cflags.m4
@@ -573,12 +573,20 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_HELPER],
TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK -fvisibility=hidden -fstack-protector"
elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
- # The -utf-8 option sets source and execution character sets to UTF-8 to enable correct
- # compilation of all source files regardless of the active code page on Windows.
- TOOLCHAIN_CFLAGS_JVM="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -utf-8 -MP"
- TOOLCHAIN_CFLAGS_JDK="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -utf-8 -Zc:wchar_t-"
+ TOOLCHAIN_CFLAGS_JVM="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -MP"
+ TOOLCHAIN_CFLAGS_JDK="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -Zc:wchar_t-"
fi
+ # Set character encoding in source
+ if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
+ CHARSET_CFLAGS="-finput-charset=utf-8"
+ elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
+ # The -utf-8 option sets both source and execution character sets
+ CHARSET_CFLAGS="-utf-8 -validate-charset"
+ fi
+ TOOLCHAIN_CFLAGS_JVM="$TOOLCHAIN_CFLAGS_JVM $CHARSET_CFLAGS"
+ TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK $CHARSET_CFLAGS"
+
# CFLAGS C language level for JDK sources (hotspot only uses C++)
if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
LANGSTD_CFLAGS="-std=c11"
@@ -724,8 +732,7 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_CPU_DEP],
$1_CFLAGS_CPU_JVM="-mno-multiple -mno-string"
if test "x$FLAGS_CPU" = xppc64; then
# -mminimal-toc fixes `relocation truncated to fit' error for gcc 4.1.
- # Use ppc64 instructions, but schedule for power5
- $1_CFLAGS_CPU="-mcpu=powerpc64 -mtune=power5"
+ $1_CFLAGS_CPU="-mcpu=power8 -mtune=power8"
$1_CFLAGS_CPU_JVM="${$1_CFLAGS_CPU_JVM} -mminimal-toc"
elif test "x$FLAGS_CPU" = xppc64le; then
# Little endian machine uses ELFv2 ABI.
diff --git a/make/autoconf/help.m4 b/make/autoconf/help.m4
index 93796d27f06..d8c0b2ffaef 100644
--- a/make/autoconf/help.m4
+++ b/make/autoconf/help.m4
@@ -292,12 +292,12 @@ AC_DEFUN_ONCE([HELP_PRINT_SUMMARY_AND_WARNINGS],
$ECHO "* Debug level: $DEBUG_LEVEL"
$ECHO "* HS debug level: $HOTSPOT_DEBUG_LEVEL"
$ECHO "* JVM variants: $JVM_VARIANTS"
- $ECHO -n "* JVM features: "
+ $PRINTF "* JVM features: "
for variant in $JVM_VARIANTS; do
features_var_name=JVM_FEATURES_$variant
JVM_FEATURES_FOR_VARIANT=${!features_var_name}
- $ECHO -n "$variant: '$JVM_FEATURES_FOR_VARIANT' "
+ $PRINTF "%s: \'%s\' " "$variant" "$JVM_FEATURES_FOR_VARIANT"
done
$ECHO ""
diff --git a/make/autoconf/jdk-options.m4 b/make/autoconf/jdk-options.m4
index 79e44dd4ad1..289ed935fdf 100644
--- a/make/autoconf/jdk-options.m4
+++ b/make/autoconf/jdk-options.m4
@@ -405,10 +405,19 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_CODE_COVERAGE],
JCOV_FILTERS="$with_jcov_filters"
fi
fi
+
+ UTIL_ARG_WITH(NAME: jcov-modules, TYPE: string,
+ DEFAULT: [], RESULT: JCOV_MODULES_COMMMA_SEPARATED,
+ DESC: [which modules to include in jcov (comma-separated)],
+ OPTIONAL: true)
+
+ # Replace "," with " ".
+ JCOV_MODULES=${JCOV_MODULES_COMMMA_SEPARATED//,/ }
AC_SUBST(JCOV_ENABLED)
AC_SUBST(JCOV_HOME)
AC_SUBST(JCOV_INPUT_JDK)
AC_SUBST(JCOV_FILTERS)
+ AC_SUBST(JCOV_MODULES)
])
################################################################################
@@ -520,8 +529,21 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_UNDEFINED_BEHAVIOR_SANITIZER],
# Silence them for now.
UBSAN_CHECKS="-fsanitize=undefined -fsanitize=float-divide-by-zero -fno-sanitize=shift-base -fno-sanitize=alignment \
$ADDITIONAL_UBSAN_CHECKS"
- UBSAN_CFLAGS="$UBSAN_CHECKS -Wno-stringop-truncation -Wno-format-overflow -Wno-array-bounds -Wno-stringop-overflow -fno-omit-frame-pointer -DUNDEFINED_BEHAVIOR_SANITIZER"
+ UBSAN_CFLAGS="$UBSAN_CHECKS -Wno-array-bounds -fno-omit-frame-pointer -DUNDEFINED_BEHAVIOR_SANITIZER"
+ if test "x$TOOLCHAIN_TYPE" = "xgcc"; then
+ UBSAN_CFLAGS="$UBSAN_CFLAGS -Wno-format-overflow -Wno-stringop-overflow -Wno-stringop-truncation"
+ fi
UBSAN_LDFLAGS="$UBSAN_CHECKS"
+ # On AIX, the llvm_symbolizer is not found out of the box, so we have to provide the
+ # full qualified llvm_symbolizer path in the __ubsan_default_options() function in
+ # make/data/ubsan/ubsan_default_options.c. To get it there we compile our sources
+ # with an additional define LLVM_SYMBOLIZER, which we set here.
+ # To calculate the correct llvm_symbolizer path we can use the location of the compiler, because
+ # their relation is fixed.
+ if test "x$TOOLCHAIN_TYPE" = "xclang" && test "x$OPENJDK_TARGET_OS" = "xaix"; then
+ UBSAN_CFLAGS="$UBSAN_CFLAGS -fno-sanitize=function,vptr -DLLVM_SYMBOLIZER=$(dirname $(dirname $CC))/tools/ibm-llvm-symbolizer"
+ UBSAN_LDFLAGS="$UBSAN_LDFLAGS -fno-sanitize=function,vptr -Wl,-bbigtoc"
+ fi
UTIL_ARG_ENABLE(NAME: ubsan, DEFAULT: false, RESULT: UBSAN_ENABLED,
DESC: [enable UndefinedBehaviorSanitizer],
CHECK_AVAILABLE: [
diff --git a/make/autoconf/lib-tests.m4 b/make/autoconf/lib-tests.m4
index d2a4fcbb191..9eb5ee5a046 100644
--- a/make/autoconf/lib-tests.m4
+++ b/make/autoconf/lib-tests.m4
@@ -28,7 +28,7 @@
################################################################################
# Minimum supported versions
-JTREG_MINIMUM_VERSION=7.5.1
+JTREG_MINIMUM_VERSION=7.5.2
GTEST_MINIMUM_VERSION=1.14.0
################################################################################
diff --git a/make/autoconf/spec.gmk.template b/make/autoconf/spec.gmk.template
index 907a60290ec..e720916d88a 100644
--- a/make/autoconf/spec.gmk.template
+++ b/make/autoconf/spec.gmk.template
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -454,6 +454,7 @@ JCOV_ENABLED := @JCOV_ENABLED@
JCOV_HOME := @JCOV_HOME@
JCOV_INPUT_JDK := @JCOV_INPUT_JDK@
JCOV_FILTERS := @JCOV_FILTERS@
+JCOV_MODULES := @JCOV_MODULES@
# AddressSanitizer
ASAN_ENABLED := @ASAN_ENABLED@
@@ -846,10 +847,12 @@ SVE_CFLAGS := @SVE_CFLAGS@
JDK_IMAGE_SUBDIR := jdk
JRE_IMAGE_SUBDIR := jre
JCOV_IMAGE_SUBDIR := jdk-jcov
+STATIC_JDK_IMAGE_SUBDIR := static-jdk
# Colon left out to be able to override output dir for bootcycle-images
JDK_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(JDK_IMAGE_SUBDIR)
JRE_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(JRE_IMAGE_SUBDIR)
+STATIC_JDK_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(STATIC_JDK_IMAGE_SUBDIR)
JCOV_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(JCOV_IMAGE_SUBDIR)
# Test image, as above
@@ -929,6 +932,7 @@ DOCS_JAVASE_BUNDLE_NAME := javase-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
DOCS_REFERENCE_BUNDLE_NAME := jdk-reference-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
STATIC_LIBS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-static-libs$(DEBUG_PART).tar.gz
STATIC_LIBS_GRAAL_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-static-libs-graal$(DEBUG_PART).tar.gz
+STATIC_JDK_BUNDLE_NAME := static-jdk-$(BASE_NAME)_bin$(DEBUG_PART).$(JDK_BUNDLE_EXTENSION)
JCOV_BUNDLE_NAME := jdk-jcov-$(BASE_NAME)_bin$(DEBUG_PART).$(JDK_BUNDLE_EXTENSION)
JDK_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JDK_BUNDLE_NAME)
@@ -939,6 +943,7 @@ TEST_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(TEST_BUNDLE_NAME)
DOCS_JDK_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_JDK_BUNDLE_NAME)
DOCS_JAVASE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_JAVASE_BUNDLE_NAME)
DOCS_REFERENCE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_REFERENCE_BUNDLE_NAME)
+STATIC_JDK_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(STATIC_JDK_BUNDLE_NAME)
JCOV_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JCOV_BUNDLE_NAME)
# This macro is called to allow inclusion of closed source counterparts.
diff --git a/make/autoconf/util_paths.m4 b/make/autoconf/util_paths.m4
index 9e3e5472c9e..40864680aad 100644
--- a/make/autoconf/util_paths.m4
+++ b/make/autoconf/util_paths.m4
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -58,21 +58,32 @@ AC_DEFUN([UTIL_PREPEND_TO_PATH],
# 2) The path will be absolute, and it will be in unix-style (on
# cygwin).
# $1: The name of the variable to fix
-# $2: if NOFAIL, errors will be silently ignored
+# $2: if NOFAIL, if the path cannot be resolved then errors will not be
+# reported and an empty path will be set
AC_DEFUN([UTIL_FIXUP_PATH],
[
# Only process if variable expands to non-empty
path="[$]$1"
if test "x$path" != x; then
if test "x$OPENJDK_BUILD_OS" = "xwindows"; then
- if test "x$2" = "xNOFAIL"; then
- quiet_option="-q"
+ imported_path=`$FIXPATH_BASE -q import "$path"`
+ if test $? -ne 0 || test ! -e $imported_path; then
+ if test "x$2" != "xNOFAIL"; then
+ AC_MSG_NOTICE([The path of $1, which is given as "$path", can not be properly resolved.])
+ AC_MSG_NOTICE([Please see the section "Special Considerations" in building.md.])
+ AC_MSG_NOTICE([This is the error message given by fixpath:])
+ # Rerun fixpath without -q to get an error message
+ $FIXPATH_BASE import "$path"
+ AC_MSG_ERROR([Cannot continue])
+ else
+ imported_path=""
+ fi
fi
- imported_path=`$FIXPATH_BASE $quiet_option import "$path"`
- $FIXPATH_BASE verify "$imported_path"
+
+ $FIXPATH_BASE -q verify "$imported_path"
if test $? -ne 0; then
if test "x$2" != "xNOFAIL"; then
- AC_MSG_ERROR([The path of $1, which resolves as "$path", could not be imported.])
+ AC_MSG_ERROR([The path of $1, which resolves as "$path", could not be verified.])
else
imported_path=""
fi
@@ -83,7 +94,7 @@ AC_DEFUN([UTIL_FIXUP_PATH],
if test "x$imported_path_lower" != "x$orig_path_lower"; then
$1="$imported_path"
fi
- else
+ else # non-Windows
[ if [[ "$path" =~ " " ]]; then ]
if test "x$2" != "xNOFAIL"; then
AC_MSG_NOTICE([The path of $1, which resolves as "$path", is invalid.])
@@ -186,7 +197,6 @@ AC_DEFUN([UTIL_CHECK_WINENV_EXEC_TYPE],
# it need to be in the PATH.
# $1: The name of the variable to fix
# $2: Where to look for the command (replaces $PATH)
-# $3: set to NOFIXPATH to skip prefixing FIXPATH, even if needed on platform
AC_DEFUN([UTIL_FIXUP_EXECUTABLE],
[
input="[$]$1"
@@ -233,15 +243,19 @@ AC_DEFUN([UTIL_FIXUP_EXECUTABLE],
# This is a path with slashes, don't look at $PATH
if test "x$OPENJDK_BUILD_OS" = "xwindows"; then
# fixpath.sh import will do all heavy lifting for us
- new_path=`$FIXPATH_BASE import "$path"`
+ new_path=`$FIXPATH_BASE -q import "$path"`
- if test ! -e $new_path; then
+ if test $? -ne 0 || test ! -e $new_path; then
# It failed, but maybe spaces were part of the path and not separating
# the command and argument. Retry using that assumption.
- new_path=`$FIXPATH_BASE import "$input"`
- if test ! -e $new_path; then
- AC_MSG_NOTICE([The command for $1, which resolves as "$input", can not be found.])
- AC_MSG_ERROR([Cannot locate $input])
+ new_path=`$FIXPATH_BASE -q import "$input"`
+ if test $? -ne 0 || test ! -e $new_path; then
+ AC_MSG_NOTICE([The command for $1, which is given as "$input", can not be properly resolved.])
+ AC_MSG_NOTICE([Please see the section "Special Considerations" in building.md.])
+ AC_MSG_NOTICE([This is the error message given by fixpath:])
+ # Rerun fixpath without -q to get an error message
+ $FIXPATH_BASE import "$input"
+ AC_MSG_ERROR([Cannot continue])
fi
# It worked, clear all "arguments"
arguments=""
@@ -282,10 +296,6 @@ AC_DEFUN([UTIL_FIXUP_EXECUTABLE],
fi
fi
- if test "x$3" = xNOFIXPATH; then
- fixpath_prefix=""
- fi
-
# Now join together the path and the arguments once again
new_complete="$fixpath_prefix$new_path$arguments"
$1="$new_complete"
@@ -353,7 +363,15 @@ AC_DEFUN([UTIL_SETUP_TOOL],
else
# Otherwise we believe it is a complete path. Use it as it is.
if test ! -x "$tool_command" && test ! -x "${tool_command}.exe"; then
- AC_MSG_ERROR([User supplied tool $1="$tool_command" does not exist or is not executable])
+ # Maybe the path had spaces in it; try again with the entire argument
+ if test ! -x "$tool_override" && test ! -x "${tool_override}.exe"; then
+ AC_MSG_ERROR([User supplied tool $1="$tool_override" does not exist or is not executable])
+ else
+ # We successfully located the executable assuming the spaces were part of the path.
+ # We can't combine using paths with spaces and arguments, so assume tool_args is empty.
+ tool_command="$tool_override"
+ tool_args=""
+ fi
fi
if test ! -x "$tool_command"; then
tool_command="${tool_command}.exe"
@@ -379,7 +397,6 @@ AC_DEFUN([UTIL_SETUP_TOOL],
# $1: variable to set
# $2: executable name (or list of names) to look for
# $3: [path]
-# $4: set to NOFIXPATH to skip prefixing FIXPATH, even if needed on platform
AC_DEFUN([UTIL_LOOKUP_PROGS],
[
UTIL_SETUP_TOOL($1, [
@@ -421,10 +438,8 @@ AC_DEFUN([UTIL_LOOKUP_PROGS],
# If we have FIXPATH enabled, strip all instances of it and prepend
# a single one, to avoid double fixpath prefixing.
- if test "x$4" != xNOFIXPATH; then
- [ if [[ $FIXPATH != "" && $result =~ ^"$FIXPATH " ]]; then ]
- result="\$FIXPATH ${result#"$FIXPATH "}"
- fi
+ [ if [[ $FIXPATH != "" && $result =~ ^"$FIXPATH " ]]; then ]
+ result="\$FIXPATH ${result#"$FIXPATH "}"
fi
AC_MSG_RESULT([$result])
break 2;
@@ -515,6 +530,24 @@ AC_DEFUN([UTIL_ADD_FIXPATH],
fi
])
+################################################################################
+# Return a path to the executable binary from a command line, stripping away
+# any FIXPATH prefix or arguments. The resulting value can be checked for
+# existence using "test -e". The result is returned in a variable named
+# "$1_EXECUTABLE".
+#
+# $1: variable describing the command to get the binary for
+AC_DEFUN([UTIL_GET_EXECUTABLE],
+[
+ # Strip the FIXPATH prefix, if any
+ fixpath_stripped="[$]$1"
+ [ if [[ $FIXPATH != "" && $fixpath_stripped =~ ^"$FIXPATH " ]]; then ]
+ fixpath_stripped="${fixpath_stripped#"$FIXPATH "}"
+ fi
+ # Remove any arguments following the binary
+ $1_EXECUTABLE="${fixpath_stripped%% *}"
+])
+
################################################################################
AC_DEFUN([UTIL_REMOVE_SYMBOLIC_LINKS],
[
diff --git a/make/common/FindTests.gmk b/make/common/FindTests.gmk
index 41cf08d9e48..517bb2973f4 100644
--- a/make/common/FindTests.gmk
+++ b/make/common/FindTests.gmk
@@ -59,14 +59,14 @@ ifeq ($(GENERATE_FIND_TESTS_FILE), true)
$(call MakeTargetDir)
( $(foreach root, $(JTREG_TESTROOTS), \
$(ECHO) ""; \
- $(ECHO) -n "$(root)_JTREG_TEST_GROUPS := "; \
+ $(PRINTF) "\n%s_JTREG_TEST_GROUPS := " "$(root)"; \
$(SED) -n -e 's/^\#.*//g' -e 's/\([^ ]*\)\w*=.*/\1/gp' \
$($(root)_JTREG_GROUP_FILES) \
| $(SORT) -u | $(TR) '\n' ' ' ; \
) \
) > $@
$(ECHO) "" >> $@
- $(ECHO) -n "MAKE_TEST_TARGETS := " >> $@
+ $(PRINTF) "MAKE_TEST_TARGETS := " >> $@
$(MAKE) -s --no-print-directory $(MAKE_ARGS) \
SPEC=$(SPEC) -f $(TOPDIR)/test/make/TestMake.gmk print-targets \
TARGETS_FILE=$@
diff --git a/make/common/JavaCompilation.gmk b/make/common/JavaCompilation.gmk
index 70b3557baea..c5a74413de1 100644
--- a/make/common/JavaCompilation.gmk
+++ b/make/common/JavaCompilation.gmk
@@ -80,15 +80,13 @@ endef
#
# The sed expression does this:
# 1. Add a backslash before any :, = or ! that do not have a backslash already.
-# 2. Apply the file unicode2x.sed which does a whole bunch of \u00XX to \xXX
-# conversions.
-# 3. Delete all lines starting with #.
-# 4. Delete empty lines.
-# 5. Append lines ending with \ with the next line.
-# 6. Remove leading and trailing white space. Note that tabs must be explicit
+# 2. Delete all lines starting with #.
+# 3. Delete empty lines.
+# 4. Append lines ending with \ with the next line.
+# 5. Remove leading and trailing white space. Note that tabs must be explicit
# as sed on macosx does not understand '\t'.
-# 7. Replace the first \= with just =.
-# 8. Finally it's all sorted to create a stable output.
+# 6. Replace the first \= with just =.
+# 7. Finally it's all sorted to create a stable output.
#
# It is assumed that = is the character used for separating names and values.
define add_file_to_clean
@@ -108,7 +106,6 @@ define add_file_to_clean
( $(CAT) $$< && $(ECHO) "" ) \
| $(SED) -e 's/\([^\\]\):/\1\\:/g' -e 's/\([^\\]\)=/\1\\=/g' \
-e 's/\([^\\]\)!/\1\\!/g' -e 's/^[ ]*#.*/#/g' \
- | $(SED) -f "$$(TOPDIR)/make/common/support/unicode2x.sed" \
| $(SED) -e '/^#/d' -e '/^$$$$/d' \
-e :a -e '/\\$$$$/N; s/\\\n//; ta' \
-e 's/^[ ]*//;s/[ ]*$$$$//' \
@@ -155,6 +152,7 @@ endef
# INCLUDE_FILES "com/sun/SolarisFoobar.java" means only compile this file!
# EXCLUDE_FILES "com/sun/SolarisFoobar.java" means do not compile this particular file!
# "SolarisFoobar.java" means do not compile SolarisFoobar, wherever it is found.
+# EXCLUDE_PATTERNS Exclude files matching any of these substrings
# EXTRA_FILES List of extra source files to include in compilation. Can be used to
# specify files that need to be generated by other rules first.
# HEADERS path to directory where all generated c-headers are written.
@@ -265,10 +263,12 @@ define SetupJavaCompilationBody
endif
# Tell javac to do exactly as told and no more
- PARANOIA_FLAGS := -implicit:none -Xprefer:source -XDignore.symbol.file=true -encoding ascii
+ PARANOIA_FLAGS := -implicit:none -Xprefer:source -XDignore.symbol.file=true
$1_FLAGS += -g -Xlint:all $$($1_TARGET_RELEASE) $$(PARANOIA_FLAGS)
$1_FLAGS += $$($1_JAVAC_FLAGS)
+ # Set character encoding in source
+ $1_FLAGS += -encoding utf-8
ifeq ($$(JAVA_WARNINGS_AS_ERRORS), true)
$1_FLAGS += -Werror
@@ -333,6 +333,20 @@ define SetupJavaCompilationBody
$1_INCLUDE_PATTERN += $$(foreach i, $$($1_SRC), $$(addprefix $$i/, $$(addsuffix /%, $$($1_INCLUDES))))
endif
+ ifneq ($$($1_EXCLUDE_PATTERNS), )
+ # We must not match the exclude pattern against the src roots, so first
+ # strip the src prefixes from the absolute file paths in SRCS.
+ $1_SRCS_WITHOUT_ROOTS := $$(foreach i, $$($1_SRC), \
+ $$(patsubst $$i/%,%, $$(filter $$i/%, $$($1_SRCS))))
+ $1_EXCLUDE_PATTERNS_WITHOUT_ROOTS := $$(call containing, \
+ $$($1_EXCLUDE_PATTERNS), $$($1_SRCS_WITHOUT_ROOTS))
+ # The add back all possible src prefixes; this will generate more paths
+ # than really exists, but it does not matter since we will use this as
+ # input to filter-out.
+ $1_EXCLUDE_PATTERN += $$(foreach i, $$($1_SRC), $$(addprefix $$i/, \
+ $$($1_EXCLUDE_PATTERNS_WITHOUT_ROOTS)))
+ endif
+
# Apply include/exclude patterns to java sources
ifneq ($$($1_EXCLUDE_PATTERN), )
$1_SRCS := $$(filter-out $$($1_EXCLUDE_PATTERN), $$($1_SRCS))
diff --git a/make/common/JdkNativeCompilation.gmk b/make/common/JdkNativeCompilation.gmk
index 372ad39305c..0285669ffd8 100644
--- a/make/common/JdkNativeCompilation.gmk
+++ b/make/common/JdkNativeCompilation.gmk
@@ -227,6 +227,8 @@ endef
GLOBAL_VERSION_INFO_RESOURCE := $(TOPDIR)/src/java.base/windows/native/common/version.rc
+# \xA9 is the copyright symbol in ANSI encoding (Windows-1252), which rc.exe
+# assumes the resource file is in.
JDK_RCFLAGS=$(RCFLAGS) \
-D"JDK_VERSION_STRING=$(VERSION_STRING)" \
-D"JDK_COMPANY=$(JDK_RC_COMPANY_NAME)" \
diff --git a/make/common/MakeFileStart.gmk b/make/common/MakeFileStart.gmk
index f1dd0abb792..f18c623d3e8 100644
--- a/make/common/MakeFileStart.gmk
+++ b/make/common/MakeFileStart.gmk
@@ -47,7 +47,7 @@ endif
# We need spec.gmk to get $(TOPDIR)
include $(SPEC)
-THIS_MAKEFILE := $(patsubst make/%,%,$(patsubst $(TOPDIR)/%,%,$(THIS_MAKEFILE_PATH)))
+THIS_MAKEFILE := $(patsubst make/%,%,$(patsubst $(TOPDIR_ALT)/make/%,%,$(patsubst $(TOPDIR)/%,%,$(THIS_MAKEFILE_PATH))))
ifeq ($(LOG_FLOW), true)
$(info :Enter $(THIS_MAKEFILE))
diff --git a/make/common/MakeIncludeStart.gmk b/make/common/MakeIncludeStart.gmk
index d09f027c1d3..3904633f9f2 100644
--- a/make/common/MakeIncludeStart.gmk
+++ b/make/common/MakeIncludeStart.gmk
@@ -29,7 +29,7 @@
# Get the next to last word (by prepending a padding element)
THIS_INCLUDE_PATH := $(word $(words ${MAKEFILE_LIST}),padding ${MAKEFILE_LIST})
-THIS_INCLUDE := $(patsubst $(TOPDIR)/make/%,%,$(THIS_INCLUDE_PATH))
+THIS_INCLUDE := $(patsubst $(TOPDIR_ALT)/make/%,%,$(patsubst $(TOPDIR)/make/%,%,$(THIS_INCLUDE_PATH)))
# Print an indented message, also counting the top-level makefile as a level
ifneq ($(INCLUDE_GUARD_$(THIS_INCLUDE)), true)
diff --git a/make/common/Modules.gmk b/make/common/Modules.gmk
index 663e9075cf8..725424d7618 100644
--- a/make/common/Modules.gmk
+++ b/make/common/Modules.gmk
@@ -180,7 +180,7 @@ ifeq ($(GENERATE_MODULE_DEPS_FILE), true)
$(call MakeTargetDir)
$(RM) $@
$(foreach m, $(MODULE_INFOS), \
- ( $(ECHO) -n "DEPS_$(call GetModuleNameFromModuleInfo, $m) := " && \
+ ( $(PRINTF) "DEPS_%s := " "$(call GetModuleNameFromModuleInfo, $m)" && \
$(AWK) -v MODULE=$(call GetModuleNameFromModuleInfo, $m) ' \
BEGIN { if (MODULE != "java.base") printf(" java.base"); } \
/^ *requires/ { sub(/;/, ""); \
@@ -194,7 +194,7 @@ ifeq ($(GENERATE_MODULE_DEPS_FILE), true)
gsub(/\r/, ""); \
printf(" %s", $$0) } \
END { printf("\n") }' $m && \
- $(ECHO) -n "TRANSITIVE_MODULES_$(call GetModuleNameFromModuleInfo, $m) := " && \
+ $(PRINTF) "TRANSITIVE_MODULES_%s := " "$(call GetModuleNameFromModuleInfo, $m)" && \
$(AWK) -v MODULE=$(call GetModuleNameFromModuleInfo, $m) ' \
BEGIN { if (MODULE != "java.base") printf(" java.base"); } \
/^ *requires *transitive/ { \
diff --git a/make/common/native/Paths.gmk b/make/common/native/Paths.gmk
index ee097b2e134..bdb8828eb32 100644
--- a/make/common/native/Paths.gmk
+++ b/make/common/native/Paths.gmk
@@ -128,10 +128,9 @@ define SetupSourceFiles
# Extract the C/C++ files.
ifneq ($$($1_EXCLUDE_PATTERNS), )
# We must not match the exclude pattern against the src root(s).
- $1_SRCS_WITHOUT_ROOTS := $$($1_SRCS)
- $$(foreach i, $$($1_SRC), $$(eval $1_SRCS_WITHOUT_ROOTS := $$(patsubst \
- $$i/%,%, $$($1_SRCS_WITHOUT_ROOTS))))
- $1_ALL_EXCLUDE_FILES := $$(call containing, $$($1_EXCLUDE_PATTERNS), \
+ $1_SRCS_WITHOUT_ROOTS := $$(foreach i, $$($1_SRC), \
+ $$(patsubst $$i/%,%, $$(filter $$i/%, $$($1_SRCS))))
+ $1_ALL_EXCLUDE_FILES := $$(call containing, $$($1_EXCLUDE_PATTERNS), \
$$($1_SRCS_WITHOUT_ROOTS))
endif
ifneq ($$($1_EXCLUDE_FILES), )
diff --git a/make/common/support/unicode2x.sed b/make/common/support/unicode2x.sed
deleted file mode 100644
index 5188b97fe03..00000000000
--- a/make/common/support/unicode2x.sed
+++ /dev/null
@@ -1,100 +0,0 @@
-s/\\u0020/\x20/g
-s/\\u003A/\x3A/g
-s/\\u006B/\x6B/g
-s/\\u0075/\x75/g
-s/\\u00A0/\xA0/g
-s/\\u00A3/\xA3/g
-s/\\u00B0/\xB0/g
-s/\\u00B7/\xB7/g
-s/\\u00BA/\xBA/g
-s/\\u00BF/\xBF/g
-s/\\u00C0/\xC0/g
-s/\\u00C1/\xC1/g
-s/\\u00C2/\xC2/g
-s/\\u00C4/\xC4/g
-s/\\u00C5/\xC5/g
-s/\\u00C8/\xC8/g
-s/\\u00C9/\xC9/g
-s/\\u00CA/\xCA/g
-s/\\u00CD/\xCD/g
-s/\\u00CE/\xCE/g
-s/\\u00D3/\xD3/g
-s/\\u00D4/\xD4/g
-s/\\u00D6/\xD6/g
-s/\\u00DA/\xDA/g
-s/\\u00DC/\xDC/g
-s/\\u00DD/\xDD/g
-s/\\u00DF/\xDF/g
-s/\\u00E0/\xE0/g
-s/\\u00E1/\xE1/g
-s/\\u00E2/\xE2/g
-s/\\u00E3/\xE3/g
-s/\\u00E4/\xE4/g
-s/\\u00E5/\xE5/g
-s/\\u00E6/\xE6/g
-s/\\u00E7/\xE7/g
-s/\\u00E8/\xE8/g
-s/\\u00E9/\xE9/g
-s/\\u00EA/\xEA/g
-s/\\u00EB/\xEB/g
-s/\\u00EC/\xEC/g
-s/\\u00ED/\xED/g
-s/\\u00EE/\xEE/g
-s/\\u00EF/\xEF/g
-s/\\u00F1/\xF1/g
-s/\\u00F2/\xF2/g
-s/\\u00F3/\xF3/g
-s/\\u00F4/\xF4/g
-s/\\u00F5/\xF5/g
-s/\\u00F6/\xF6/g
-s/\\u00F9/\xF9/g
-s/\\u00FA/\xFA/g
-s/\\u00FC/\xFC/g
-s/\\u0020/\x20/g
-s/\\u003f/\x3f/g
-s/\\u006f/\x6f/g
-s/\\u0075/\x75/g
-s/\\u00a0/\xa0/g
-s/\\u00a3/\xa3/g
-s/\\u00b0/\xb0/g
-s/\\u00ba/\xba/g
-s/\\u00bf/\xbf/g
-s/\\u00c1/\xc1/g
-s/\\u00c4/\xc4/g
-s/\\u00c5/\xc5/g
-s/\\u00c8/\xc8/g
-s/\\u00c9/\xc9/g
-s/\\u00ca/\xca/g
-s/\\u00cd/\xcd/g
-s/\\u00d6/\xd6/g
-s/\\u00dc/\xdc/g
-s/\\u00dd/\xdd/g
-s/\\u00df/\xdf/g
-s/\\u00e0/\xe0/g
-s/\\u00e1/\xe1/g
-s/\\u00e2/\xe2/g
-s/\\u00e3/\xe3/g
-s/\\u00e4/\xe4/g
-s/\\u00e5/\xe5/g
-s/\\u00e7/\xe7/g
-s/\\u00e8/\xe8/g
-s/\\u00e9/\xe9/g
-s/\\u00ea/\xea/g
-s/\\u00eb/\xeb/g
-s/\\u00ec/\xec/g
-s/\\u00ed/\xed/g
-s/\\u00ee/\xee/g
-s/\\u00ef/\xef/g
-s/\\u00f0/\xf0/g
-s/\\u00f1/\xf1/g
-s/\\u00f2/\xf2/g
-s/\\u00f3/\xf3/g
-s/\\u00f4/\xf4/g
-s/\\u00f5/\xf5/g
-s/\\u00f6/\xf6/g
-s/\\u00f7/\xf7/g
-s/\\u00f8/\xf8/g
-s/\\u00f9/\xf9/g
-s/\\u00fa/\xfa/g
-s/\\u00fc/\xfc/g
-s/\\u00ff/\xff/g
diff --git a/make/conf/github-actions.conf b/make/conf/github-actions.conf
index 27845ffbd7a..d2b6cd23128 100644
--- a/make/conf/github-actions.conf
+++ b/make/conf/github-actions.conf
@@ -26,7 +26,7 @@
# Versions and download locations for dependencies used by GitHub Actions (GHA)
GTEST_VERSION=1.14.0
-JTREG_VERSION=7.5.1+1
+JTREG_VERSION=7.5.2+1
LINUX_X64_BOOT_JDK_EXT=tar.gz
LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk24/1f9ff9062db4449d8ca828c504ffae90/36/GPL/openjdk-24_linux-x64_bin.tar.gz
diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js
index aa4d846280e..91876878046 100644
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@@ -241,10 +241,10 @@ var getJibProfilesCommon = function (input, data) {
// List of the main profile names used for iteration
common.main_profile_names = [
- "linux-x64", "linux-x86", "macosx-x64", "macosx-aarch64",
+ "macosx-x64", "macosx-aarch64",
"windows-x64", "windows-aarch64",
- "linux-aarch64", "linux-arm32", "linux-ppc64le", "linux-s390x",
- "linux-riscv64"
+ "linux-x64", "linux-aarch64",
+ "linux-arm32", "linux-ppc64le", "linux-s390x", "linux-riscv64"
];
// These are the base settings for all the main build profiles.
@@ -254,7 +254,6 @@ var getJibProfilesCommon = function (input, data) {
configure_args: concat(
"--with-exclude-translations=es,fr,it,ko,pt_BR,sv,ca,tr,cs,sk,ja_JP_A,ja_JP_HA,ja_JP_HI,ja_JP_I,zh_TW,zh_HK",
"--disable-jvm-feature-shenandoahgc",
- "--disable-cds-archive-coh",
versionArgs(input, common))
};
@@ -283,9 +282,6 @@ var getJibProfilesCommon = function (input, data) {
labels: "open"
};
- common.configure_args_64bit = ["--with-target-bits=64"];
- common.configure_args_32bit = ["--with-target-bits=32"];
-
/**
* Define common artifacts template for all main profiles
* @param o - Object containing data for artifacts
@@ -412,58 +408,34 @@ var getJibProfilesProfiles = function (input, common, data) {
// Main SE profiles
var profiles = {
-
- "linux-x64": {
- target_os: "linux",
- target_cpu: "x64",
- dependencies: ["devkit", "gtest", "build_devkit", "graphviz", "pandoc", "tidy"],
- configure_args: concat(
- (input.build_cpu == "x64" ? common.configure_args_64bit
- : "--openjdk-target=x86_64-linux-gnu"),
- "--with-zlib=system", "--disable-dtrace",
- (isWsl(input) ? [ "--host=x86_64-unknown-linux-gnu",
- "--build=x86_64-unknown-linux-gnu" ] : [])),
- },
-
- "linux-x86": {
- target_os: "linux",
- target_cpu: "x86",
- build_cpu: "x64",
- dependencies: ["devkit", "gtest", "libffi"],
- configure_args: concat(common.configure_args_32bit, [
- "--with-jvm-variants=minimal,server",
- "--with-zlib=system",
- "--with-libffi=" + input.get("libffi", "home_path"),
- "--enable-libffi-bundling",
- "--enable-fallback-linker"
- ])
- },
-
"macosx-x64": {
target_os: "macosx",
target_cpu: "x64",
dependencies: ["devkit", "gtest", "graphviz", "pandoc", "tidy"],
- configure_args: concat(common.configure_args_64bit, "--with-zlib=system",
+ configure_args: [
+ "--with-zlib=system",
"--with-macosx-version-max=11.00.00",
"--enable-compatible-cds-alignment",
// Use system SetFile instead of the one in the devkit as the
// devkit one may not work on Catalina.
- "SETFILE=/usr/bin/SetFile"),
+ "SETFILE=/usr/bin/SetFile"
+ ],
},
"macosx-aarch64": {
target_os: "macosx",
target_cpu: "aarch64",
dependencies: ["devkit", "gtest", "graphviz", "pandoc", "tidy"],
- configure_args: concat(common.configure_args_64bit,
- "--with-macosx-version-max=11.00.00"),
+ configure_args: [
+ "--with-macosx-version-max=11.00.00"
+ ],
},
"windows-x64": {
target_os: "windows",
target_cpu: "x64",
dependencies: ["devkit", "gtest", "pandoc"],
- configure_args: concat(common.configure_args_64bit),
+ configure_args: [],
},
"windows-aarch64": {
@@ -475,7 +447,19 @@ var getJibProfilesProfiles = function (input, common, data) {
],
},
- "linux-aarch64": {
+ "linux-x64": {
+ target_os: "linux",
+ target_cpu: "x64",
+ dependencies: ["devkit", "gtest", "build_devkit", "graphviz", "pandoc", "tidy"],
+ configure_args: concat(
+ "--with-zlib=system",
+ "--disable-dtrace",
+ (cross_compiling ? [ "--openjdk-target=x86_64-linux-gnu" ] : []),
+ (isWsl(input) ? [ "--host=x86_64-unknown-linux-gnu",
+ "--build=x86_64-unknown-linux-gnu" ] : [])),
+ },
+
+ "linux-aarch64": {
target_os: "linux",
target_cpu: "aarch64",
dependencies: ["devkit", "gtest", "build_devkit", "graphviz", "pandoc", "tidy"],
@@ -492,8 +476,10 @@ var getJibProfilesProfiles = function (input, common, data) {
build_cpu: "x64",
dependencies: ["devkit", "gtest", "build_devkit"],
configure_args: [
- "--openjdk-target=arm-linux-gnueabihf", "--with-freetype=bundled",
- "--with-abi-profile=arm-vfp-hflt", "--disable-warnings-as-errors"
+ "--openjdk-target=arm-linux-gnueabihf",
+ "--with-freetype=bundled",
+ "--with-abi-profile=arm-vfp-hflt",
+ "--disable-warnings-as-errors"
],
},
@@ -503,7 +489,8 @@ var getJibProfilesProfiles = function (input, common, data) {
build_cpu: "x64",
dependencies: ["devkit", "gtest", "build_devkit"],
configure_args: [
- "--openjdk-target=ppc64le-linux-gnu", "--with-freetype=bundled",
+ "--openjdk-target=ppc64le-linux-gnu",
+ "--with-freetype=bundled",
"--disable-warnings-as-errors"
],
},
@@ -514,7 +501,8 @@ var getJibProfilesProfiles = function (input, common, data) {
build_cpu: "x64",
dependencies: ["devkit", "gtest", "build_devkit"],
configure_args: [
- "--openjdk-target=s390x-linux-gnu", "--with-freetype=bundled",
+ "--openjdk-target=s390x-linux-gnu",
+ "--with-freetype=bundled",
"--disable-warnings-as-errors"
],
},
@@ -525,7 +513,8 @@ var getJibProfilesProfiles = function (input, common, data) {
build_cpu: "x64",
dependencies: ["devkit", "gtest", "build_devkit"],
configure_args: [
- "--openjdk-target=riscv64-linux-gnu", "--with-freetype=bundled",
+ "--openjdk-target=riscv64-linux-gnu",
+ "--with-freetype=bundled",
"--disable-warnings-as-errors"
],
},
@@ -586,24 +575,24 @@ var getJibProfilesProfiles = function (input, common, data) {
target_os: "linux",
target_cpu: "x64",
dependencies: ["devkit", "gtest", "libffi"],
- configure_args: concat(common.configure_args_64bit, [
+ configure_args: [
"--with-zlib=system",
"--with-jvm-variants=zero",
"--with-libffi=" + input.get("libffi", "home_path"),
"--enable-libffi-bundling",
- ])
+ ]
},
"linux-aarch64-zero": {
target_os: "linux",
target_cpu: "aarch64",
dependencies: ["devkit", "gtest", "libffi"],
- configure_args: concat(common.configure_args_64bit, [
+ configure_args: [
"--with-zlib=system",
"--with-jvm-variants=zero",
"--with-libffi=" + input.get("libffi", "home_path"),
"--enable-libffi-bundling"
- ])
+ ]
},
"linux-x86-zero": {
@@ -611,12 +600,13 @@ var getJibProfilesProfiles = function (input, common, data) {
target_cpu: "x86",
build_cpu: "x64",
dependencies: ["devkit", "gtest", "libffi"],
- configure_args: concat(common.configure_args_32bit, [
+ configure_args: [
+ "--with-target-bits=32",
"--with-zlib=system",
"--with-jvm-variants=zero",
"--with-libffi=" + input.get("libffi", "home_path"),
"--enable-libffi-bundling"
- ])
+ ]
}
}
profiles = concatObjects(profiles, zeroProfiles);
@@ -635,8 +625,10 @@ var getJibProfilesProfiles = function (input, common, data) {
target_os: "linux",
target_cpu: "x64",
dependencies: ["devkit", "gtest"],
- configure_args: concat(common.configure_args_64bit,
- "--with-zlib=system", "--disable-precompiled-headers"),
+ configure_args: [
+ "--with-zlib=system",
+ "--disable-precompiled-headers"
+ ],
},
};
profiles = concatObjects(profiles, noPchProfiles);
@@ -693,9 +685,6 @@ var getJibProfilesProfiles = function (input, common, data) {
"linux-x64": {
platform: "linux-x64",
},
- "linux-x86": {
- platform: "linux-x86",
- },
"macosx-x64": {
platform: "macos-x64",
jdk_subdir: "jdk-" + data.version + ".jdk/Contents/Home",
@@ -872,7 +861,8 @@ var getJibProfilesProfiles = function (input, common, data) {
profiles[cmpBaselineName].configure_args = concat(
profiles[cmpBaselineName].configure_args,
"--with-hotspot-build-time=n/a",
- "--disable-precompiled-headers");
+ "--disable-precompiled-headers",
+ "--with-source-date=version");
// Do not inherit artifact definitions from base profile
delete profiles[cmpBaselineName].artifacts;
});
@@ -1089,8 +1079,8 @@ var getJibProfilesDependencies = function (input, common) {
var devkit_platform_revisions = {
linux_x64: "gcc14.2.0-OL6.4+1.0",
- macosx: "Xcode14.3.1+1.0",
- windows_x64: "VS2022-17.6.5+1.0",
+ macosx: "Xcode15.4+1.0",
+ windows_x64: "VS2022-17.13.2+1.0",
linux_aarch64: "gcc14.2.0-OL7.6+1.0",
linux_arm: "gcc8.2.0-Fedora27+1.0",
linux_ppc64le: "gcc14.2.0-Fedora_41+1.0",
@@ -1161,10 +1151,7 @@ var getJibProfilesDependencies = function (input, common) {
organization: common.organization,
ext: "tar.gz",
module: "devkit-" + devkit_cross_prefix + devkit_platform,
- revision: devkit_platform_revisions[devkit_platform],
- environment: {
- "DEVKIT_HOME": input.get("devkit", "home_path"),
- }
+ revision: devkit_platform_revisions[devkit_platform]
},
build_devkit: {
@@ -1187,9 +1174,9 @@ var getJibProfilesDependencies = function (input, common) {
jtreg: {
server: "jpg",
product: "jtreg",
- version: "7.5.1",
+ version: "7.5.2",
build_number: "1",
- file: "bundles/jtreg-7.5.1+1.zip",
+ file: "bundles/jtreg-7.5.2+1.zip",
environment_name: "JT_HOME",
environment_path: input.get("jtreg", "home_path") + "/bin",
configure_args: "--with-jtreg=" + input.get("jtreg", "home_path"),
diff --git a/make/conf/version-numbers.conf b/make/conf/version-numbers.conf
index ce9e32315a9..38d6e42dff9 100644
--- a/make/conf/version-numbers.conf
+++ b/make/conf/version-numbers.conf
@@ -26,17 +26,17 @@
# Default version, product, and vendor information to use,
# unless overridden by configure
-DEFAULT_VERSION_FEATURE=25
+DEFAULT_VERSION_FEATURE=26
DEFAULT_VERSION_INTERIM=0
DEFAULT_VERSION_UPDATE=0
DEFAULT_VERSION_PATCH=0
DEFAULT_VERSION_EXTRA1=0
DEFAULT_VERSION_EXTRA2=0
DEFAULT_VERSION_EXTRA3=0
-DEFAULT_VERSION_DATE=2025-09-16
-DEFAULT_VERSION_CLASSFILE_MAJOR=69 # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`"
+DEFAULT_VERSION_DATE=2026-03-17
+DEFAULT_VERSION_CLASSFILE_MAJOR=70 # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`"
DEFAULT_VERSION_CLASSFILE_MINOR=0
DEFAULT_VERSION_DOCS_API_SINCE=11
-DEFAULT_ACCEPTABLE_BOOT_VERSIONS="24 25"
-DEFAULT_JDK_SOURCE_TARGET_VERSION=25
+DEFAULT_ACCEPTABLE_BOOT_VERSIONS="24 25 26"
+DEFAULT_JDK_SOURCE_TARGET_VERSION=26
DEFAULT_PROMOTED_VERSION_PRE=ea
diff --git a/make/data/ubsan/ubsan_default_options.c b/make/data/ubsan/ubsan_default_options.c
index 011d1a675a9..05e4722e45a 100644
--- a/make/data/ubsan/ubsan_default_options.c
+++ b/make/data/ubsan/ubsan_default_options.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -43,6 +43,18 @@
#define ATTRIBUTE_USED
#endif
+// On AIX, the llvm_symbolizer is not found out of the box, so we have to provide the
+// full qualified llvm_symbolizer path in the __ubsan_default_options() function.
+// To get it here we compile our sources with an additional define LLVM_SYMBOLIZER
+// containing the path, which we set in make/autoconf/jdk-options.m4.
+#ifdef LLVM_SYMBOLIZER
+#define _LLVM_SYMBOLIZER(X) ",external_symbolizer_path=" X_LLVM_SYMBOLIZER(X)
+#define X_LLVM_SYMBOLIZER(X) #X
+#else
+#define LLVM_SYMBOLIZER
+#define _LLVM_SYMBOLIZER(X)
+#endif
+
// Override weak symbol exposed by UBSan to override default options. This is called by UBSan
// extremely early during library loading, before main is called. We need to override the default
// options because by default UBSan only prints a warning for each occurrence. We want jtreg tests
@@ -50,5 +62,5 @@
// thread so it is easier to track down. You can override these options by setting the environment
// variable UBSAN_OPTIONS.
ATTRIBUTE_DEFAULT_VISIBILITY ATTRIBUTE_USED const char* __ubsan_default_options() {
- return "halt_on_error=1,print_stacktrace=1";
+ return "halt_on_error=1,print_stacktrace=1" _LLVM_SYMBOLIZER(LLVM_SYMBOLIZER);
}
diff --git a/make/devkit/Tools.gmk b/make/devkit/Tools.gmk
index f4323f58638..1b9240df49c 100644
--- a/make/devkit/Tools.gmk
+++ b/make/devkit/Tools.gmk
@@ -39,6 +39,8 @@
# Fix this...
#
+uppercase = $(shell echo $1 | tr a-z A-Z)
+
$(info TARGET=$(TARGET))
$(info HOST=$(HOST))
$(info BUILD=$(BUILD))
@@ -91,99 +93,28 @@ endif
################################################################################
# Define external dependencies
-# Latest that could be made to work.
-GCC_VER := 14.2.0
-ifeq ($(GCC_VER), 14.2.0)
- gcc_ver := gcc-14.2.0
- binutils_ver := binutils-2.43
- ccache_ver := ccache-4.10.2
- CCACHE_CMAKE_BASED := 1
- mpfr_ver := mpfr-4.2.1
- gmp_ver := gmp-6.3.0
- mpc_ver := mpc-1.3.1
- gdb_ver := gdb-15.2
- REQUIRED_MIN_MAKE_MAJOR_VERSION := 4
-else ifeq ($(GCC_VER), 13.2.0)
- gcc_ver := gcc-13.2.0
- binutils_ver := binutils-2.41
- ccache_ver := ccache-3.7.12
- mpfr_ver := mpfr-4.2.0
- gmp_ver := gmp-6.3.0
- mpc_ver := mpc-1.3.1
- gdb_ver := gdb-13.2
- REQUIRED_MIN_MAKE_MAJOR_VERSION := 4
-else ifeq ($(GCC_VER), 11.3.0)
- gcc_ver := gcc-11.3.0
- binutils_ver := binutils-2.39
- ccache_ver := ccache-3.7.12
- mpfr_ver := mpfr-4.1.1
- gmp_ver := gmp-6.2.1
- mpc_ver := mpc-1.2.1
- gdb_ver := gdb-11.2
- REQUIRED_MIN_MAKE_MAJOR_VERSION := 4
-else ifeq ($(GCC_VER), 11.2.0)
- gcc_ver := gcc-11.2.0
- binutils_ver := binutils-2.37
- ccache_ver := ccache-3.7.12
- mpfr_ver := mpfr-4.1.0
- gmp_ver := gmp-6.2.1
- mpc_ver := mpc-1.2.1
- gdb_ver := gdb-11.1
- REQUIRED_MIN_MAKE_MAJOR_VERSION := 4
-else ifeq ($(GCC_VER), 10.3.0)
- gcc_ver := gcc-10.3.0
- binutils_ver := binutils-2.36.1
- ccache_ver := ccache-3.7.11
- mpfr_ver := mpfr-4.1.0
- gmp_ver := gmp-6.2.0
- mpc_ver := mpc-1.1.0
- gdb_ver := gdb-10.1
- REQUIRED_MIN_MAKE_MAJOR_VERSION := 4
-else ifeq ($(GCC_VER), 10.2.0)
- gcc_ver := gcc-10.2.0
- binutils_ver := binutils-2.35
- ccache_ver := ccache-3.7.11
- mpfr_ver := mpfr-4.1.0
- gmp_ver := gmp-6.2.0
- mpc_ver := mpc-1.1.0
- gdb_ver := gdb-9.2
- REQUIRED_MIN_MAKE_MAJOR_VERSION := 4
-else ifeq ($(GCC_VER), 9.2.0)
- gcc_ver := gcc-9.2.0
- binutils_ver := binutils-2.34
- ccache_ver := ccache-3.7.3
- mpfr_ver := mpfr-3.1.5
- gmp_ver := gmp-6.1.2
- mpc_ver := mpc-1.0.3
- gdb_ver := gdb-8.3
-else ifeq ($(GCC_VER), 8.3.0)
- gcc_ver := gcc-8.3.0
- binutils_ver := binutils-2.32
- ccache_ver := ccache-3.7.3
- mpfr_ver := mpfr-3.1.5
- gmp_ver := gmp-6.1.2
- mpc_ver := mpc-1.0.3
- gdb_ver := gdb-8.3
-else ifeq ($(GCC_VER), 7.3.0)
- gcc_ver := gcc-7.3.0
- binutils_ver := binutils-2.30
- ccache_ver := ccache-3.3.6
- mpfr_ver := mpfr-3.1.5
- gmp_ver := gmp-6.1.2
- mpc_ver := mpc-1.0.3
- gdb_ver := gdb-8.1
-else ifeq ($(GCC_VER), 4.9.2)
- gcc_ver := gcc-4.9.2
- binutils_ver := binutils-2.25
- ccache_ver := ccache-3.2.1
- mpfr_ver := mpfr-3.0.1
- gmp_ver := gmp-4.3.2
- mpc_ver := mpc-1.0.1
- gdb_ver := gdb-7.12.1
-else
- $(error Unsupported GCC version)
-endif
+gcc_ver_only := 14.2.0
+binutils_ver_only := 2.43
+ccache_ver_only := 4.10.2
+CCACHE_CMAKE_BASED := 1
+mpfr_ver_only := 4.2.1
+gmp_ver_only := 6.3.0
+mpc_ver_only := 1.3.1
+gdb_ver_only := 15.2
+
+dependencies := gcc binutils ccache mpfr gmp mpc gdb
+$(foreach dep,$(dependencies),$(eval $(dep)_ver := $(dep)-$($(dep)_ver_only)))
+
+GCC := http://ftp.gnu.org/pub/gnu/gcc/$(gcc_ver)/$(gcc_ver).tar.xz
+BINUTILS := http://ftp.gnu.org/pub/gnu/binutils/$(binutils_ver).tar.gz
+CCACHE := https://github.com/ccache/ccache/releases/download/v$(ccache_ver_only)/$(ccache_ver).tar.xz
+MPFR := https://www.mpfr.org/$(mpfr_ver)/$(mpfr_ver).tar.bz2
+GMP := http://ftp.gnu.org/pub/gnu/gmp/$(gmp_ver).tar.bz2
+MPC := http://ftp.gnu.org/pub/gnu/mpc/$(mpc_ver).tar.gz
+GDB := http://ftp.gnu.org/gnu/gdb/$(gdb_ver).tar.xz
+
+REQUIRED_MIN_MAKE_MAJOR_VERSION := 4
ifneq ($(REQUIRED_MIN_MAKE_MAJOR_VERSION),)
MAKE_MAJOR_VERSION := $(word 1,$(subst ., ,$(MAKE_VERSION)))
SUPPORTED_MAKE_VERSION := $(shell [ $(MAKE_MAJOR_VERSION) -ge $(REQUIRED_MIN_MAKE_MAJOR_VERSION) ] && echo true)
@@ -192,17 +123,6 @@ ifneq ($(REQUIRED_MIN_MAKE_MAJOR_VERSION),)
endif
endif
-ccache_ver_only := $(patsubst ccache-%,%,$(ccache_ver))
-
-
-GCC := http://ftp.gnu.org/pub/gnu/gcc/$(gcc_ver)/$(gcc_ver).tar.xz
-BINUTILS := http://ftp.gnu.org/pub/gnu/binutils/$(binutils_ver).tar.gz
-CCACHE := https://github.com/ccache/ccache/releases/download/v$(ccache_ver_only)/$(ccache_ver).tar.xz
-MPFR := https://www.mpfr.org/${mpfr_ver}/${mpfr_ver}.tar.bz2
-GMP := http://ftp.gnu.org/pub/gnu/gmp/${gmp_ver}.tar.bz2
-MPC := http://ftp.gnu.org/pub/gnu/mpc/${mpc_ver}.tar.gz
-GDB := http://ftp.gnu.org/gnu/gdb/${gdb_ver}.tar.xz
-
# RPMs used by all BASE_OS
RPM_LIST := \
$(KERNEL_HEADERS_RPM) \
@@ -297,7 +217,7 @@ define Download
endef
# Download and unpack all source packages
-$(foreach p,GCC BINUTILS CCACHE MPFR GMP MPC GDB,$(eval $(call Download,$(p))))
+$(foreach dep,$(dependencies),$(eval $(call Download,$(call uppercase,$(dep)))))
################################################################################
# Unpack RPMS
@@ -374,7 +294,7 @@ endif
################################################################################
# Define marker files for each source package to be compiled
-$(foreach t,binutils mpfr gmp mpc gcc ccache gdb,$(eval $(t) = $(TARGETDIR)/$($(t)_ver).done))
+$(foreach dep,$(dependencies),$(eval $(dep) = $(TARGETDIR)/$($(dep)_ver).done))
################################################################################
@@ -721,12 +641,12 @@ ifeq ($(TARGET), $(HOST))
ln -s $(TARGET)-$* $@
missing-links := $(addprefix $(PREFIX)/bin/, \
- addr2line ar as c++ c++filt dwp elfedit g++ gcc gcc-$(GCC_VER) gprof ld ld.bfd \
+ addr2line ar as c++ c++filt dwp elfedit g++ gcc gcc-$(gcc_ver_only) gprof ld ld.bfd \
ld.gold nm objcopy objdump ranlib readelf size strings strip)
endif
# Add link to work around "plugin needed to handle lto object" (JDK-8344272)
-$(PREFIX)/lib/bfd-plugins/liblto_plugin.so: $(PREFIX)/libexec/gcc/$(TARGET)/$(GCC_VER)/liblto_plugin.so
+$(PREFIX)/lib/bfd-plugins/liblto_plugin.so: $(PREFIX)/libexec/gcc/$(TARGET)/$(gcc_ver_only)/liblto_plugin.so
@echo 'Creating missing $(@F) soft link'
@mkdir -p $(@D)
ln -s $$(realpath -s --relative-to=$(@D) $<) $@
diff --git a/make/devkit/createAutoconfBundle.sh b/make/devkit/createAutoconfBundle.sh
index 7363b9cd8a7..ebe9c427f76 100644
--- a/make/devkit/createAutoconfBundle.sh
+++ b/make/devkit/createAutoconfBundle.sh
@@ -1,6 +1,6 @@
#!/bin/bash -e
#
-# Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -24,10 +24,21 @@
# questions.
#
-# Create a bundle in the current directory, containing what's needed to run
+# Create a bundle in OpenJDK build folder, containing what's needed to run
# the 'autoconf' program by the OpenJDK build. To override TARGET_PLATFORM
# just set the variable before running this script.
+# This script fetches sources from network so make sure your proxy is setup appropriately.
+
+# colored print to highlight some of the logs
+function print_log()
+{
+ Color_Cyan='\033[1;36m' # Cyan
+ Color_Off='\033[0m' # Reset color
+ printf "${Color_Cyan}> $1${Color_Off}\n"
+}
+
+
# Autoconf depends on m4, so download and build that first.
AUTOCONF_VERSION=2.69
M4_VERSION=1.4.18
@@ -58,11 +69,12 @@ MODULE_NAME=autoconf-$TARGET_PLATFORM-$AUTOCONF_VERSION+$PACKAGE_VERSION
BUNDLE_NAME=$MODULE_NAME.tar.gz
SCRIPT_DIR="$(cd "$(dirname $0)" > /dev/null && pwd)"
-OUTPUT_ROOT="${SCRIPT_DIR}/../../build/autoconf"
+BASEDIR="$(cd "$SCRIPT_DIR/../.." > /dev/null && pwd)"
+OUTPUT_ROOT="$BASEDIR/build/autoconf"
-cd $OUTPUT_ROOT
IMAGE_DIR=$OUTPUT_ROOT/$MODULE_NAME
mkdir -p $IMAGE_DIR/usr
+cd $OUTPUT_ROOT
# Download and build m4
@@ -76,7 +88,7 @@ elif test "x$TARGET_PLATFORM" = xcygwin_x86; then
cp /usr/bin/m4 $IMAGE_DIR/usr/bin
elif test "x$TARGET_PLATFORM" = xlinux_x64; then
M4_VERSION=1.4.13-5
- wget http://yum.oracle.com/repo/OracleLinux/OL6/latest/x86_64/getPackage/m4-$M4_VERSION.el6.x86_64.rpm
+ wget https://yum.oracle.com/repo/OracleLinux/OL6/latest/x86_64/getPackage/m4-$M4_VERSION.el6.x86_64.rpm
cd $IMAGE_DIR
rpm2cpio $OUTPUT_ROOT/m4-$M4_VERSION.el6.x86_64.rpm | cpio -d -i
elif test "x$TARGET_PLATFORM" = xlinux_x86; then
@@ -85,27 +97,38 @@ elif test "x$TARGET_PLATFORM" = xlinux_x86; then
cd $IMAGE_DIR
rpm2cpio $OUTPUT_ROOT/m4-$M4_VERSION.el6.i686.rpm | cpio -d -i
else
+ print_log "m4: download"
wget https://ftp.gnu.org/gnu/m4/m4-$M4_VERSION.tar.gz
- tar xzf m4-$M4_VERSION.tar.gz
+ tar -xzf m4-$M4_VERSION.tar.gz
cd m4-$M4_VERSION
+ print_log "m4: configure"
./configure --prefix=$IMAGE_DIR/usr CFLAGS="-w -Wno-everything"
+ print_log "m4: make"
make
+ print_log "m4: make install"
make install
cd ..
fi
# Download and build autoconf
+print_log "autoconf: download"
wget https://ftp.gnu.org/gnu/autoconf/autoconf-$AUTOCONF_VERSION.tar.gz
-tar xzf autoconf-$AUTOCONF_VERSION.tar.gz
+tar -xzf autoconf-$AUTOCONF_VERSION.tar.gz
cd autoconf-$AUTOCONF_VERSION
+print_log "autoconf: configure"
./configure --prefix=$IMAGE_DIR/usr M4=$IMAGE_DIR/usr/bin/m4
+print_log "autoconf: make"
make
+print_log "autoconf: make install"
make install
cd ..
+# The resulting scripts from installation folder use absolute paths to reference other files within installation folder
+print_log "replace absolue paths from installation files with a relative ."
perl -pi -e "s!$IMAGE_DIR/!./!" $IMAGE_DIR/usr/bin/auto* $IMAGE_DIR/usr/share/autoconf/autom4te.cfg
+print_log "creating $IMAGE_DIR/autoconf wrapper script"
cat > $IMAGE_DIR/autoconf << EOF
#!/bin/bash
# Get an absolute path to this script
@@ -123,6 +146,9 @@ PREPEND_INCLUDE="--prepend-include \$this_script_dir/usr/share/autoconf"
exec \$this_script_dir/usr/bin/autoconf \$PREPEND_INCLUDE "\$@"
EOF
+
chmod +x $IMAGE_DIR/autoconf
+
+print_log "archiving $IMAGE_DIR directory as $OUTPUT_ROOT/$BUNDLE_NAME"
cd $IMAGE_DIR
tar -cvzf $OUTPUT_ROOT/$BUNDLE_NAME *
diff --git a/make/devkit/createWindowsDevkit.sh b/make/devkit/createWindowsDevkit.sh
index 0646cb68ef4..757fb157ad4 100644
--- a/make/devkit/createWindowsDevkit.sh
+++ b/make/devkit/createWindowsDevkit.sh
@@ -56,16 +56,22 @@ BUILD_DIR="${SCRIPT_DIR}/../../build/devkit"
UNAME_SYSTEM=`uname -s`
UNAME_RELEASE=`uname -r`
+UNAME_OS=`uname -o`
# Detect cygwin or WSL
IS_CYGWIN=`echo $UNAME_SYSTEM | grep -i CYGWIN`
IS_WSL=`echo $UNAME_RELEASE | grep Microsoft`
+IS_MSYS=`echo $UNAME_OS | grep -i Msys`
+MSYS2_ARG_CONV_EXCL="*" # make "cmd.exe /c" work for msys2
+CMD_EXE="cmd.exe /c"
if test "x$IS_CYGWIN" != "x"; then
BUILD_ENV="cygwin"
+elif test "x$IS_MSYS" != "x"; then
+ BUILD_ENV="cygwin"
elif test "x$IS_WSL" != "x"; then
BUILD_ENV="wsl"
else
- echo "Unknown environment; only Cygwin and WSL are supported."
+ echo "Unknown environment; only Cygwin/MSYS2/WSL are supported."
exit 1
fi
@@ -76,7 +82,7 @@ elif test "x$BUILD_ENV" = "xwsl"; then
fi
# Work around the insanely named ProgramFiles(x86) env variable
-PROGRAMFILES_X86="$($WINDOWS_PATH_TO_UNIX_PATH "$(cmd.exe /c set | sed -n 's/^ProgramFiles(x86)=//p' | tr -d '\r')")"
+PROGRAMFILES_X86="$($WINDOWS_PATH_TO_UNIX_PATH "$(${CMD_EXE} set | sed -n 's/^ProgramFiles(x86)=//p' | tr -d '\r')")"
PROGRAMFILES="$($WINDOWS_PATH_TO_UNIX_PATH "$PROGRAMFILES")"
case $VS_VERSION in
@@ -99,13 +105,15 @@ esac
# Find Visual Studio installation dir
-VSNNNCOMNTOOLS=`cmd.exe /c echo %VS${VS_VERSION_NUM_NODOT}COMNTOOLS% | tr -d '\r'`
+VSNNNCOMNTOOLS=`${CMD_EXE} echo %VS${VS_VERSION_NUM_NODOT}COMNTOOLS% | tr -d '\r'`
+VSNNNCOMNTOOLS="$($WINDOWS_PATH_TO_UNIX_PATH "$VSNNNCOMNTOOLS")"
if [ -d "$VSNNNCOMNTOOLS" ]; then
- VS_INSTALL_DIR="$($WINDOWS_PATH_TO_UNIX_PATH "$VSNNNCOMNTOOLS/../..")"
+ VS_INSTALL_DIR="$VSNNNCOMNTOOLS/../.."
else
VS_INSTALL_DIR="${MSVC_PROGRAMFILES_DIR}/Microsoft Visual Studio/$VS_VERSION"
VS_INSTALL_DIR="$(ls -d "${VS_INSTALL_DIR}/"{Community,Professional,Enterprise} 2>/dev/null | head -n1)"
fi
+echo "VSNNNCOMNTOOLS: $VSNNNCOMNTOOLS"
echo "VS_INSTALL_DIR: $VS_INSTALL_DIR"
# Extract semantic version
@@ -180,7 +188,11 @@ cp $DEVKIT_ROOT/VC/redist/arm64/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/arm64
################################################################################
# Copy SDK files
-SDK_INSTALL_DIR="$PROGRAMFILES_X86/Windows Kits/$SDK_VERSION"
+SDK_INSTALL_DIR=`${CMD_EXE} echo %WindowsSdkDir% | tr -d '\r'`
+SDK_INSTALL_DIR="$($WINDOWS_PATH_TO_UNIX_PATH "$SDK_INSTALL_DIR")"
+if [ ! -d "$SDK_INSTALL_DIR" ]; then
+ SDK_INSTALL_DIR="$PROGRAMFILES_X86/Windows Kits/$SDK_VERSION"
+fi
echo "SDK_INSTALL_DIR: $SDK_INSTALL_DIR"
SDK_FULL_VERSION="$(ls "$SDK_INSTALL_DIR/bin" | sort -r -n | head -n1)"
diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk
index 0a897230f83..0fd1c752174 100644
--- a/make/hotspot/lib/JvmFeatures.gmk
+++ b/make/hotspot/lib/JvmFeatures.gmk
@@ -125,9 +125,11 @@ endif
ifneq ($(call check-jvm-feature, cds), true)
JVM_CFLAGS_FEATURES += -DINCLUDE_CDS=0
JVM_EXCLUDE_FILES += \
+ aotCodeCache.cpp \
classLoaderDataShared.cpp \
classLoaderExt.cpp \
- systemDictionaryShared.cpp
+ systemDictionaryShared.cpp \
+ trainingData.cpp
JVM_EXCLUDE_PATTERNS += cds/
endif
diff --git a/make/jdk/src/classes/build/tools/classlist/HelloClasslist.java b/make/jdk/src/classes/build/tools/classlist/HelloClasslist.java
index 1b930ca7527..fa1b33bb03e 100644
--- a/make/jdk/src/classes/build/tools/classlist/HelloClasslist.java
+++ b/make/jdk/src/classes/build/tools/classlist/HelloClasslist.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -59,6 +59,7 @@ public class HelloClasslist {
private static final Logger LOGGER = Logger.getLogger("Hello");
+ @SuppressWarnings("restricted")
public static void main(String ... args) throws Throwable {
FileSystems.getDefault();
@@ -141,6 +142,7 @@ public static void main(String ... args) throws Throwable {
HelloClasslist.class.getMethod("staticMethod_V").invoke(null);
var obj = HelloClasslist.class.getMethod("staticMethod_L_L", Object.class).invoke(null, instance);
HelloClasslist.class.getField("field").get(instance);
+ MethodHandles.Lookup.ClassOption.class.getEnumConstants();
// A selection of trivial and relatively common MH operations
invoke(MethodHandles.identity(double.class), 1.0);
@@ -160,6 +162,9 @@ record B(int b) { }
case B b -> b.b;
default -> 17;
};
+ // record run-time methods
+ o.equals(new B(5));
+ o.hashCode();
LOGGER.log(Level.FINE, "Value: " + value);
// The Striped64$Cell is loaded rarely only when there's a contention among
diff --git a/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java b/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java
index 8d6a703f1c9..a2f37db72d9 100644
--- a/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java
+++ b/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1190,7 +1190,7 @@ static void genTable(StringBuffer result, String name,
if (Csyntax)
result.append(" static ");
else
- result.append(" static final ");
+ result.append(" @Stable static final ");
result.append(atype);
result.append(" ").append(name).append("[");
if (Csyntax)
@@ -1347,7 +1347,7 @@ else if (bits == 64) {
}
static void genCaseMapTableDeclaration(StringBuffer result) {
- result.append(" static final char[][][] charMap;\n");
+ result.append(" @Stable static final char[][][] charMap;\n");
}
static void genCaseMapTable(StringBuffer result, SpecialCaseMap[] specialCaseMaps){
diff --git a/make/jdk/src/classes/build/tools/pandocfilter/PandocFilter.java b/make/jdk/src/classes/build/tools/pandocfilter/PandocFilter.java
index 64eeaaa36df..ebb49613e53 100644
--- a/make/jdk/src/classes/build/tools/pandocfilter/PandocFilter.java
+++ b/make/jdk/src/classes/build/tools/pandocfilter/PandocFilter.java
@@ -1,3 +1,26 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
package build.tools.pandocfilter;
import build.tools.pandocfilter.json.JSON;
diff --git a/make/jdk/src/classes/build/tools/taglet/PreviewNote.java b/make/jdk/src/classes/build/tools/taglet/PreviewNote.java
deleted file mode 100644
index ee3f9bea527..00000000000
--- a/make/jdk/src/classes/build/tools/taglet/PreviewNote.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package build.tools.taglet;
-
-import java.util.EnumSet;
-import java.util.List;
-import java.util.Set;
-
-
-import javax.lang.model.element.Element;
-import javax.tools.Diagnostic;
-
-
-import com.sun.source.doctree.DocTree;
-import com.sun.source.doctree.UnknownInlineTagTree;
-import jdk.javadoc.doclet.Doclet;
-import jdk.javadoc.doclet.DocletEnvironment;
-import jdk.javadoc.doclet.Reporter;
-import jdk.javadoc.doclet.StandardDoclet;
-import jdk.javadoc.doclet.Taglet;
-
-import static com.sun.source.doctree.DocTree.Kind.UNKNOWN_INLINE_TAG;
-
-/**
- * An inline tag to insert a note formatted as preview note.
- * The tag can be used as follows:
- *
- *
- * {@previewNote jep-number [Preview note heading]} - * Preview note content - * {@previewNote} - *- * - */ -public class PreviewNote implements Taglet { - - static final String TAG_NAME = "previewNote"; - Reporter reporter = null; - - @Override - public void init(DocletEnvironment env, Doclet doclet) { - if (doclet instanceof StandardDoclet stdoclet) { - reporter = stdoclet.getReporter(); - } - } - - /** - * Returns the set of locations in which the tag may be used. - */ - @Override - public Set
The theme mechanism is designed to allow developers to create their own themes. For an example of this, see the themes which are included with Metalworks. Note, like all of the L&F packages, -the metal package is not yet frozen and the theme mechanism may change as +the metal package is not yet frozen and the theme mechanism may change as we get developer feedback on how to improve it.
JSlider slider = new JSlider();@@ -64,7 +64,7 @@
-slider.putClientProperty("JSlider.isFilled", +slider.putClientProperty("JSlider.isFilled", Boolean.TRUE);
Many popular applications support "roll-over" effects on buttons in toolbars. +
Many popular applications support "roll-over" effects on buttons in toolbars. The Java L&F provides an easy way to do this. Here is a code snippit:
@@ -72,7 +72,7 @@@@ -98,7 +98,7 @@// add your buttons here
-toolbar.putClientProperty("JToolBar.isRollover", +toolbar.putClientProperty("JToolBar.isRollover", Boolean.TRUE);
Back
diff --git a/src/demo/share/jfc/Metalworks/resources/HelpFiles/metalworks.html b/src/demo/share/jfc/Metalworks/resources/HelpFiles/metalworks.html
index df9a440b273..f8b9d0cc6e3 100644
--- a/src/demo/share/jfc/Metalworks/resources/HelpFiles/metalworks.html
+++ b/src/demo/share/jfc/Metalworks/resources/HelpFiles/metalworks.html
@@ -1,6 +1,6 @@
-
+-This was a creature, more troublesom to be drawn, then any - of the rest, for I could not, for a good while, think of a way to make it +
This was a creature, more troublesom to be drawn, then any + of the rest, for I could not, for a good while, think of a way to make it suffer its body to ly quiet in a natural posture; but whil'st it was alive, - if its feet were fetter'd in Wax or Glew, it would so twist and wind its body, - that I could not any wayes get a good view of it; and if I killed it, its - body was so little, that I did often spoile the shape of it, before I could - throughly view it: for this is the nature of these minute Bodies, that as - soon, almost, as ever their life is destroy'd, their parts immediately shrivel, - and lose their beauty; and so is it also with small Plants, as I instanced + if its feet were fetter'd in Wax or Glew, it would so twist and wind its body, + that I could not any wayes get a good view of it; and if I killed it, its + body was so little, that I did often spoile the shape of it, before I could + throughly view it: for this is the nature of these minute Bodies, that as + soon, almost, as ever their life is destroy'd, their parts immediately shrivel, + and lose their beauty; and so is it also with small Plants, as I instanced before, in the description of Moss.
-And thence also is the reason of the variations in the beards - of wild Oats, and in those of Muskgrass seed, that their bodies, being exceeding - small, those small variations which are made in the surfaces of all bodies, - almost upon every change of Air, especially if the body be porous, do here - become sensible, where the whole body is so small, that it is almost nothing - but surface; for as in vegetable substances, I see no great reason to think, - that the moisture of the Aire (that, sticking to a wreath'd beard, does make - it untwist) should evaporate, or exhale away, any faster then the moisture - of other bodies, but rather that the avolation from, or access of moisture - to, the surfaces of bodies being much the same, those bodies become most - sensible of it, which have the least proportion of body to their surface. +
And thence also is the reason of the variations in the beards + of wild Oats, and in those of Muskgrass seed, that their bodies, being exceeding + small, those small variations which are made in the surfaces of all bodies, + almost upon every change of Air, especially if the body be porous, do here + become sensible, where the whole body is so small, that it is almost nothing + but surface; for as in vegetable substances, I see no great reason to think, + that the moisture of the Aire (that, sticking to a wreath'd beard, does make + it untwist) should evaporate, or exhale away, any faster then the moisture + of other bodies, but rather that the avolation from, or access of moisture + to, the surfaces of bodies being much the same, those bodies become most + sensible of it, which have the least proportion of body to their surface.
-So is it also with Animal substances; the dead body of an - Ant, or such little creature, does almost instantly shrivel and dry, and - your object shall be quite another thing, before you can half delineate - it, which proceeds not from the extraordinary exhalation, but from the small - proportion of body and juices, to the usual drying of bodies in the Air, +
So is it also with Animal substances; the dead body of an + Ant, or such little creature, does almost instantly shrivel and dry, and + your object shall be quite another thing, before you can half delineate + it, which proceeds not from the extraordinary exhalation, but from the small + proportion of body and juices, to the usual drying of bodies in the Air, especially if warm.
-For which inconvenience, where I could not otherwise remove - it, I thought of this expedient. I took the creature, I had design'd to delineate, - and put it into a drop of very well rectified spirit of Wine, this I found - would presently dispatch, as it were, the Animal, and being taken out of - it, and lay'd on a paper,the spirit of Wine would immediately fly away, - and leave the Animal dry, in its natural posture, or at least, in a constitution, - that it might easily with a pin be plac'd, in what posture you desired to - draw it, and the limbs would so remain, without either moving, or shriveling. +
For which inconvenience, where I could not otherwise remove + it, I thought of this expedient. I took the creature, I had design'd to delineate, + and put it into a drop of very well rectified spirit of Wine, this I found + would presently dispatch, as it were, the Animal, and being taken out of + it, and lay'd on a paper,the spirit of Wine would immediately fly away, + and leave the Animal dry, in its natural posture, or at least, in a constitution, + that it might easily with a pin be plac'd, in what posture you desired to + draw it, and the limbs would so remain, without either moving, or shriveling.
-And thus I dealt with this Ant, which I have here delineated, - which was one of many, of a very large kind, that inhabited under the Roots - of a Tree, from whence they would sally out in great parties, and make most - grievous havock of the Flowers and Fruits, in the ambient Garden, and return back +
And thus I dealt with this Ant, which I have here delineated, + which was one of many, of a very large kind, that inhabited under the Roots + of a Tree, from whence they would sally out in great parties, and make most + grievous havock of the Flowers and Fruits, in the ambient Garden, and return back again very expertly, by the same wayes and paths they went.
-It was more then half the bigness of an Earwig, of a dark - brown, or reddish colour, with long legs, on the hinder of which it would - stand up, and raise its head as high as it could above the ground, that it - might stare the further about it, just after the same manner as I have also - observ'd a hunting Spider to do: and putting my finger towards them, they - have at first all run towards it, till almost at it; and then they would stand - round about it, at a certain distance, and smell, as it were, and consider - whether they should any of them venture any further, till one more bold then - the rest venturing to climb it, all the rest, if I would have suffered them, - would have immediately followed : much such other seemingly rational actions - I have observ'd in this little Vermine with much pleasure, which would be - too long to be here related; those that desire more of them may satisfie +
It was more then half the bigness of an Earwig, of a dark + brown, or reddish colour, with long legs, on the hinder of which it would + stand up, and raise its head as high as it could above the ground, that it + might stare the further about it, just after the same manner as I have also + observ'd a hunting Spider to do: and putting my finger towards them, they + have at first all run towards it, till almost at it; and then they would stand + round about it, at a certain distance, and smell, as it were, and consider + whether they should any of them venture any further, till one more bold then + the rest venturing to climb it, all the rest, if I would have suffered them, + would have immediately followed : much such other seemingly rational actions + I have observ'd in this little Vermine with much pleasure, which would be + too long to be here related; those that desire more of them may satisfie their curiosity in Ligons History of the Barbadoes.
-Having insnar'd several of these into a small Box, I made - choice of the tallest grown among them, and separating it from the rest, - I gave it a Gill of Brandy, or Spirit of Wine, which after a while e'en knock'd - him down dead drunk, so that he became moveless, though at first putting - in he struggled for a pretty while very much, till at last, certain bubbles - issuing out of his mouth, it ceased to move; this (because I had before found - them quickly to recover again, if they were taken out presently) I suffered - to lye above an hour in the Spirit; and after I had taken it out, and put - its body and legs into a natural posture, remained moveless about an hour; - but then, upon a sudden, as if it had been awaken out of a drunken sleep, - it suddenly reviv'd and ran away; being caught, and serv'd as before, he - for a while continued struggling and striving, till at last there issued - several bubbles out of its mouth, and then, tanquam animam expirasset, he - remained moveless for a good while ; but at length again recovering, it was - again redipt, and suffered to lye some hours in the Spirit; notwithstanding - which, after it had layen dry some three or four hours, it again recovered - life and motion: Which kind of Experiments, if prosecuted, which they highly - deserve, seem to me of no inconsiderable use towards the invention of the - Latent Scheme, (as the Noble Ve rulam calls it) or the hidden, unknown Texture +
Having insnar'd several of these into a small Box, I made + choice of the tallest grown among them, and separating it from the rest, + I gave it a Gill of Brandy, or Spirit of Wine, which after a while e'en knock'd + him down dead drunk, so that he became moveless, though at first putting + in he struggled for a pretty while very much, till at last, certain bubbles + issuing out of his mouth, it ceased to move; this (because I had before found + them quickly to recover again, if they were taken out presently) I suffered + to lye above an hour in the Spirit; and after I had taken it out, and put + its body and legs into a natural posture, remained moveless about an hour; + but then, upon a sudden, as if it had been awaken out of a drunken sleep, + it suddenly reviv'd and ran away; being caught, and serv'd as before, he + for a while continued struggling and striving, till at last there issued + several bubbles out of its mouth, and then, tanquam animam expirasset, he + remained moveless for a good while ; but at length again recovering, it was + again redipt, and suffered to lye some hours in the Spirit; notwithstanding + which, after it had layen dry some three or four hours, it again recovered + life and motion: Which kind of Experiments, if prosecuted, which they highly + deserve, seem to me of no inconsiderable use towards the invention of the + Latent Scheme, (as the Noble Ve rulam calls it) or the hidden, unknown Texture of Bodies.
-Of what Figure this Creature appear'd through the Microscope, - the 32. Scheme (though not so carefully graven as it ought) will represent - to the eye, namely, That it had a large head A A, at the upper end of which - were two protuberant eyes, pearl'd like those of a Fly, but smaller B B; - of the Nose, or foremost part, issued two horns C C, of a shape sufficiently - differing from those of a blew Fly, though indeed they seem to be both the - same kind of Organ, and to serve for a kind of smelling; beyond these were - two indented jaws D D, which he open'd sideways, and was able to gape them - asunder very wide; and the ends of them being armed with teeth, which meeting - went between each other, it was able to grasp and hold a heavy body, three - or four times the bulk and weight of its own body: It had only six legs, - shap'd like those of a Fly, which, as I shewed before, is an Argument that - it is a winged Insect, and though I could not perceive any sign of them in - the middle part of its body (which seem'd to consist of three joints or pieces - E F G, out of which sprung two legs, yet 'tis known that there are of them +
Of what Figure this Creature appear'd through the Microscope, + the 32. Scheme (though not so carefully graven as it ought) will represent + to the eye, namely, That it had a large head A A, at the upper end of which + were two protuberant eyes, pearl'd like those of a Fly, but smaller B B; + of the Nose, or foremost part, issued two horns C C, of a shape sufficiently + differing from those of a blew Fly, though indeed they seem to be both the + same kind of Organ, and to serve for a kind of smelling; beyond these were + two indented jaws D D, which he open'd sideways, and was able to gape them + asunder very wide; and the ends of them being armed with teeth, which meeting + went between each other, it was able to grasp and hold a heavy body, three + or four times the bulk and weight of its own body: It had only six legs, + shap'd like those of a Fly, which, as I shewed before, is an Argument that + it is a winged Insect, and though I could not perceive any sign of them in + the middle part of its body (which seem'd to consist of three joints or pieces + E F G, out of which sprung two legs, yet 'tis known that there are of them that have long wings, and fly up and down in the air.
-The third and last part of its body I I I was bigger and - larger then the other two, unto which it was joyn'd by a very small middle, - and had a kind of loose shell, or another distinct part of its body H, which - seem'd to be interpos'd, and to keep the thorax and belly from touching. - The whole body was cas'd over with a very strong armour, and the belly I - I I was covered likewise with multitudes of small white shining brisles; - the legs, horns, head, and middle parts of its body were bestruck with hairs +
The third and last part of its body I I I was bigger and + larger then the other two, unto which it was joyn'd by a very small middle, + and had a kind of loose shell, or another distinct part of its body H, which + seem'd to be interpos'd, and to keep the thorax and belly from touching. + The whole body was cas'd over with a very strong armour, and the belly I + I I was covered likewise with multitudes of small white shining brisles; + the legs, horns, head, and middle parts of its body were bestruck with hairs also, but smaller and darker.
diff --git a/src/demo/share/jfc/SwingSet2/resources/bug.html b/src/demo/share/jfc/SwingSet2/resources/bug.html index bf3b159112f..d61cdb9ad25 100644 --- a/src/demo/share/jfc/SwingSet2/resources/bug.html +++ b/src/demo/share/jfc/SwingSet2/resources/bug.html @@ -1,7 +1,7 @@
Untitled Document - + @@ -10,113 +10,113 @@Observ. LIV. Of a Louse.
-
--This is a Creature so officious, that 'twill be known to - every one at one time or other, so busie, and so impudent, that it will - be intruding it self in every ones company, and so proud and aspiring withall, - that it fears not to trample on the best, and affects nothing so much as - a Crown; feeds and lives very high, and that makes it so saucy, as to pull - any one by the ears that comes in its way, and will never be quiet till - it has drawn blood: it is troubled at nothing so much as at a man that scratches - his head, as knowing that man is plotting and contriving some mischief against - it, and that makes it oftentime sculk into some meaner and lower place, and - run behind a mans back, though it go very much against the hair; which ill - conditions of it having made it better known then trusted, would exempt me - from making any further description of it, did not my faithful Mercury, my +
++This is a Creature so officious, that 'twill be known to + every one at one time or other, so busie, and so impudent, that it will + be intruding it self in every ones company, and so proud and aspiring withall, + that it fears not to trample on the best, and affects nothing so much as + a Crown; feeds and lives very high, and that makes it so saucy, as to pull + any one by the ears that comes in its way, and will never be quiet till + it has drawn blood: it is troubled at nothing so much as at a man that scratches + his head, as knowing that man is plotting and contriving some mischief against + it, and that makes it oftentime sculk into some meaner and lower place, and + run behind a mans back, though it go very much against the hair; which ill + conditions of it having made it better known then trusted, would exempt me + from making any further description of it, did not my faithful Mercury, my Microscope, bring me other information of it.
-For this has discovered to me, by means of a very bright light - cast on it, that it is a Creature of a very odd shape ; it has a head shap'd - like that exprest in 35. Scheme marked with A, which seems almost Conical, - but is a little flatted on the upper and under sides, at the biggest part - of which, on either side behind the head (as it were, being the place where - other Creatures ears stand) are placed its two black shining goggle eyes - B B, looking backwards, and fenced round with several small cilia or hairs - that incompass it, so that it seems this Creature has no very good foresight: - It does not seem to have any eyelids, and therefore perhaps its eyes were - so placed, that it might the better cleanse them with its forelegs; and perhaps - this may be the reason, why they so much avoid and run from the light behind - them, for being made to live in the shady and dark recesses of the hair, - and thence probably their eye having a great aperture, the open and clear - light, especially that of the Sun, must needs very much offend them; to secure - these eyes from receiving any injury from the hairs through which it passes, - it has two horns that grow before it, in the place where one would have thought - the eyes should be; each of these C C have four joynts, which are fringed, - as 'twere, with small brisles, from which to the tip of its snout D, the - head seems very round and tapering, ending in a very sharp nose D, which - seems to have a small hole, and to be the passage through which he sucks +
For this has discovered to me, by means of a very bright light + cast on it, that it is a Creature of a very odd shape ; it has a head shap'd + like that exprest in 35. Scheme marked with A, which seems almost Conical, + but is a little flatted on the upper and under sides, at the biggest part + of which, on either side behind the head (as it were, being the place where + other Creatures ears stand) are placed its two black shining goggle eyes + B B, looking backwards, and fenced round with several small cilia or hairs + that incompass it, so that it seems this Creature has no very good foresight: + It does not seem to have any eyelids, and therefore perhaps its eyes were + so placed, that it might the better cleanse them with its forelegs; and perhaps + this may be the reason, why they so much avoid and run from the light behind + them, for being made to live in the shady and dark recesses of the hair, + and thence probably their eye having a great aperture, the open and clear + light, especially that of the Sun, must needs very much offend them; to secure + these eyes from receiving any injury from the hairs through which it passes, + it has two horns that grow before it, in the place where one would have thought + the eyes should be; each of these C C have four joynts, which are fringed, + as 'twere, with small brisles, from which to the tip of its snout D, the + head seems very round and tapering, ending in a very sharp nose D, which + seems to have a small hole, and to be the passage through which he sucks the blood.
-
Now whereas it if be plac'd on its back, with its belly - upwards, as it is in the 35. Scheme, it seems in several Positions to have - a resemblance of chaps, or jaws, as is represented in the Figure by E E, - yet in other postures those dark strokes disappear; and having kept several - of them in a box for two or three dayes, so that for all that time they had - nothing to feed on, I found, upon letting onecreep on my hand, that it immediately - fell to sucking, and did neither seem to thrust its nose very deep into the - skin, nor to open any kind of mouth, but I could plainly perceive a small - current of blood, which came directly from its snout, and past into its belly; - and about A there seem'd a contrivance, somewhat resembling a Pump, pair - of Bellows, or Heart, for by a very swift systole and diastole the blood +
Now whereas it if be plac'd on its back, with its belly + upwards, as it is in the 35. Scheme, it seems in several Positions to have + a resemblance of chaps, or jaws, as is represented in the Figure by E E, + yet in other postures those dark strokes disappear; and having kept several + of them in a box for two or three dayes, so that for all that time they had + nothing to feed on, I found, upon letting onecreep on my hand, that it immediately + fell to sucking, and did neither seem to thrust its nose very deep into the + skin, nor to open any kind of mouth, but I could plainly perceive a small + current of blood, which came directly from its snout, and past into its belly; + and about A there seem'd a contrivance, somewhat resembling a Pump, pair + of Bellows, or Heart, for by a very swift systole and diastole the blood seem'd drawn from the nose, and forced into the body.
-It did not seem at all, though I viewed it a good while as - it was sucking, to thrust more of its nose into the skin then the very snout - D, nor did it cause the least discernable pain, and yet the blood seem'd - to run through its head very quick and freely, so that it seems there is - no part of the skin but the blood is dispers'd into, nay, even into the - cuticula; for had it thrust its whole nose in from D to C C, it would not - have amounted to the supposed thickness of that tegument, the length of +
It did not seem at all, though I viewed it a good while as + it was sucking, to thrust more of its nose into the skin then the very snout + D, nor did it cause the least discernable pain, and yet the blood seem'd + to run through its head very quick and freely, so that it seems there is + no part of the skin but the blood is dispers'd into, nay, even into the + cuticula; for had it thrust its whole nose in from D to C C, it would not + have amounted to the supposed thickness of that tegument, the length of the nose being not more then a three hundredth part of an inch.
-It has six legs, covered with a very transparent shell, - and joynted exactly like a Crab's, or Lobster's; each leg is divided into - six parts by these joynts, and those have here and there several small hairs; - and at the end of each leg it has two claws, very properly adapted for its - peculiar use, being thereby inabled to walk very securely both on the skin - and hair; and indeed this contrivance of the feet is very curious, and could - not be made more commodiously and compendiously, for performing both these - requisite motions, of walking and climbing up the hair of a mans head, then - it is : for, by having the lesser claw (a) set so much short of the bigger - (b) when it walks on the skin the shorter touches not, and then the feet - are the same with those of a Mite, and several other small Insects, but by - means of the small joynts of the longer claw it can bend it round, and so - with both claws take hold of a hair, in the manner represented in the Figure, +
It has six legs, covered with a very transparent shell, + and joynted exactly like a Crab's, or Lobster's; each leg is divided into + six parts by these joynts, and those have here and there several small hairs; + and at the end of each leg it has two claws, very properly adapted for its + peculiar use, being thereby inabled to walk very securely both on the skin + and hair; and indeed this contrivance of the feet is very curious, and could + not be made more commodiously and compendiously, for performing both these + requisite motions, of walking and climbing up the hair of a mans head, then + it is : for, by having the lesser claw (a) set so much short of the bigger + (b) when it walks on the skin the shorter touches not, and then the feet + are the same with those of a Mite, and several other small Insects, but by + means of the small joynts of the longer claw it can bend it round, and so + with both claws take hold of a hair, in the manner represented in the Figure, the long transparent Cylinder F F F, being a Man's hair held by it.
-The Thorax seem'd cas'd with another kind of substance then - the belly, namely, with a thin transparent horny substance, which upon the fasting - of the Creature did not grow flaccid; through this I could plainly see the - blood, suck'd from my hand, to be variously distributed, and mov'd to and - fro; and about G there seem'd a pretty big white substance, which seem'd - to be moved within its thorax; besides, there appear'd very many small milk-white - vessels, which crost over the breast between the legs, out of which, on - either side, are many small branchings, these seem'd to be the veins and - arteries, for that which is analogus to blood in all Insects is milk-white. +
The Thorax seem'd cas'd with another kind of substance then + the belly, namely, with a thin transparent horny substance, which upon the fasting + of the Creature did not grow flaccid; through this I could plainly see the + blood, suck'd from my hand, to be variously distributed, and mov'd to and + fro; and about G there seem'd a pretty big white substance, which seem'd + to be moved within its thorax; besides, there appear'd very many small milk-white + vessels, which crost over the breast between the legs, out of which, on + either side, are many small branchings, these seem'd to be the veins and + arteries, for that which is analogus to blood in all Insects is milk-white.
-The belly is covered with a transparent substance likewise, - but more resembling a skin then a shell, for 'tis grain'd all over the belly - just like the skin in the palms of a man's hand, and when the belly is empty, - grows very flaccid and wrinkled ; at the upper end of this is placed the - stomach H H, and perhaps also the white spot I I may be the liver, or pancreas, - which by the peristaltick motion of the guts, is a little mov'd to and fro, - not with a systole and diastole, but rather with a thronging or justling +
The belly is covered with a transparent substance likewise, + but more resembling a skin then a shell, for 'tis grain'd all over the belly + just like the skin in the palms of a man's hand, and when the belly is empty, + grows very flaccid and wrinkled ; at the upper end of this is placed the + stomach H H, and perhaps also the white spot I I may be the liver, or pancreas, + which by the peristaltick motion of the guts, is a little mov'd to and fro, + not with a systole and diastole, but rather with a thronging or justling motion.
-Viewing one of these Creatures, after it had fasted two - dayes, all the hinder part was lank and flaccid, and the white spot I I - hardly mov'd, most of the white branchings disappear'd, and most also of - the redness or sucked blood in the guts, the peristaltick motion of which - was scarce discernable; but upon the suffering it to suck, it presently - fill'd the skin of the belly, and of the six scolop'd embosments on either side, - as full as it could be stuft ; the stomach and guts were as full as they - could hold; the peristaltick motion of the gut grew quick, and the justling - motion of I I accordingly ; multitudes of milk-white vessels seem'd quickly - filled, and turgid, which were perhaps the veins and arteries, and the Creature - was so greedy, that though it could not contain more, yet it continued sucking - as fast as ever, and as fast emptying it self behind : the digestion of this - Creature must needs be very quick, for though I perceiv'd the blood thicker - and blacker when suck'd, yet, when in the guts, it was of a very lovely - ruby colour, and that part of it, which was digested into the veins, seemed - white; whence it appears, that a further digestion of blood may make it - milk, at least of a resembling colour : What is else observable in the figure +
Viewing one of these Creatures, after it had fasted two + dayes, all the hinder part was lank and flaccid, and the white spot I I + hardly mov'd, most of the white branchings disappear'd, and most also of + the redness or sucked blood in the guts, the peristaltick motion of which + was scarce discernable; but upon the suffering it to suck, it presently + fill'd the skin of the belly, and of the six scolop'd embosments on either side, + as full as it could be stuft ; the stomach and guts were as full as they + could hold; the peristaltick motion of the gut grew quick, and the justling + motion of I I accordingly ; multitudes of milk-white vessels seem'd quickly + filled, and turgid, which were perhaps the veins and arteries, and the Creature + was so greedy, that though it could not contain more, yet it continued sucking + as fast as ever, and as fast emptying it self behind : the digestion of this + Creature must needs be very quick, for though I perceiv'd the blood thicker + and blacker when suck'd, yet, when in the guts, it was of a very lovely + ruby colour, and that part of it, which was digested into the veins, seemed + white; whence it appears, that a further digestion of blood may make it + milk, at least of a resembling colour : What is else observable in the figure of this Creature, maybe seen by the 35. Scheme.
diff --git a/src/demo/share/jfc/SwingSet2/resources/index.html b/src/demo/share/jfc/SwingSet2/resources/index.html index 0aa73c410ef..ba7d26068a1 100644 --- a/src/demo/share/jfc/SwingSet2/resources/index.html +++ b/src/demo/share/jfc/SwingSet2/resources/index.html @@ -1,7 +1,7 @@
Untitled Document - + @@ -20,21 +20,21 @@Of a Louse
-
Images and text used by permission of Octavo +
Images and text used by permission of Octavo Corporation (www.octavo.com),
- (c) 1999 Octavo Corporation. All + (c) 1999 Octavo Corporation. All rights reserved.
- Octavo Corporation is a publisher of rare - books and manuscripts with digital tools and formats through partnerships - with libraries, museums, and individuals. Using high-resolution digital imaging - technology, Octavo releases digital rare books on CD-ROM as Adobe PDF files - which can be viewed on and printed from almost any computing platform. You - can view each page and the binding on your computer screen, zoom in to view - detail up to 800% in some cases, and search, copy and paste the "live" text - placed invisibly behind the page images which is available for selected Editions. - Also included in each edition is the work's collation and provenance, as well + Octavo Corporation is a publisher of rare + books and manuscripts with digital tools and formats through partnerships + with libraries, museums, and individuals. Using high-resolution digital imaging + technology, Octavo releases digital rare books on CD-ROM as Adobe PDF files + which can be viewed on and printed from almost any computing platform. You + can view each page and the binding on your computer screen, zoom in to view + detail up to 800% in some cases, and search, copy and paste the "live" text + placed invisibly behind the page images which is available for selected Editions. + Also included in each edition is the work's collation and provenance, as well as commentary by a noted expert in its field.diff --git a/src/demo/share/jfc/SwingSet2/resources/king.html b/src/demo/share/jfc/SwingSet2/resources/king.html index a6f458af419..b14f2d84b51 100644 --- a/src/demo/share/jfc/SwingSet2/resources/king.html +++ b/src/demo/share/jfc/SwingSet2/resources/king.html @@ -1,34 +1,34 @@
Untitled Document - + -
+
![]()
-
Do here most humbly lay this small Present at Your - Majesties Royal feet. And though it comes accompany'd with two disadvantages, - the meanness of the Author, and of the Subject; yet in both I am incouraged +Do here most humbly lay this small Present at Your + Majesties Royal feet. And though it comes accompany'd with two disadvantages, + the meanness of the Author, and of the Subject; yet in both I am incouraged by the greatness of your Mercy and your Knowledge. -By the one I am taught , that you can forgive the most presumptuous - Offendors: And by the other, that you will not esteem the least work of Nature, +
By the one I am taught , that you can forgive the most presumptuous + Offendors: And by the other, that you will not esteem the least work of Nature, or Art, unworthy your Observation.
-Amidst the many felicities that have accompani'd your Majesties - happy Restauration and Government, it is none of the least considerable, that +
Amidst the many felicities that have accompani'd your Majesties + happy Restauration and Government, it is none of the least considerable, that Philosophy and Experimental Learning have prosper'd under your Royal Patronage.
-And as the calm prosperity of your Reign has given us the - leisure to follow these Studies of quiet and retirement, so it is just, that - the Fruits of them should, by way of acknowledgement, be return'd to your - Majesty. There are, Sir, several other of your Subjects, of your Royal Society, - now busie about Nobler matters: The Improvement of Manufactures and Agriculture, - the Increase of Commerce, the Advantage of Navigation: In all which they are +
And as the calm prosperity of your Reign has given us the + leisure to follow these Studies of quiet and retirement, so it is just, that + the Fruits of them should, by way of acknowledgement, be return'd to your + Majesty. There are, Sir, several other of your Subjects, of your Royal Society, + now busie about Nobler matters: The Improvement of Manufactures and Agriculture, + the Increase of Commerce, the Advantage of Navigation: In all which they are assisted by your Majesties Incouragement and Example.
-Amidst all those greater Designs, I here presume to bring - in that which is more proportionable to the smalness of my Abilities, and - to offer some of the least of all visible things, to that Mighty King, that - has establisht an Empire over the best of all Invisible things of this World, +
Amidst all those greater Designs, I here presume to bring + in that which is more proportionable to the smalness of my Abilities, and + to offer some of the least of all visible things, to that Mighty King, that + has establisht an Empire over the best of all Invisible things of this World, the Minds o f Men.
diff --git a/src/demo/share/jfc/SwingSet2/resources/preface.html b/src/demo/share/jfc/SwingSet2/resources/preface.html index 8138b288a18..b7c10d6eec1 100644 --- a/src/demo/share/jfc/SwingSet2/resources/preface.html +++ b/src/demo/share/jfc/SwingSet2/resources/preface.html @@ -1,7 +1,7 @@
Untitled Document - + @@ -9,101 +9,101 @@
THE PREFACE
-
+-
+-It is the great prerogative of Mankind above - other Creatures, that we are not only able to behold the works of Nature, - or barely to sustein our lives by them, but we have also the power of considering, +
It is the great prerogative of Mankind above + other Creatures, that we are not only able to behold the works of Nature, + or barely to sustein our lives by them, but we have also the power of considering, comparing, altering, assisting, and improving them to various uses.
-And as this is the peculiar priviledge of humane Nature - in general, so is it capable of being so far advanced by the helps of Art, - and Experience, as to make some Men excel others in their Observations, +
And as this is the peculiar priviledge of humane Nature + in general, so is it capable of being so far advanced by the helps of Art, + and Experience, as to make some Men excel others in their Observations, and Deductions, almost as much as they do Beasts.
-By the addition of such artificial Instruments and methods, - there may be, in some manner, a reparation made for the mischiefs, and imperfection, - mankind has drawn upon itself, by negligence, and intemperance, and a wilful - and superstitious deserting the Prescripts and Rules of Nature, whereby - every man, both from a deriv'd corruption, innate and born with him, and - from his breeding and converse with men, is very subject to slip into all +
By the addition of such artificial Instruments and methods, + there may be, in some manner, a reparation made for the mischiefs, and imperfection, + mankind has drawn upon itself, by negligence, and intemperance, and a wilful + and superstitious deserting the Prescripts and Rules of Nature, whereby + every man, both from a deriv'd corruption, innate and born with him, and + from his breeding and converse with men, is very subject to slip into all sorts of errors.
-The only way which now remains for us to recover some degree - of those former perfections, seems to be, by rectifying the operations of - the Sense, the Memory, and Reason, since upon the evidence, the strength, - the integrity, and the right correspondence of all these, all the light, - by which our actions are to be guided, is to be renewed, and all our command +
The only way which now remains for us to recover some degree + of those former perfections, seems to be, by rectifying the operations of + the Sense, the Memory, and Reason, since upon the evidence, the strength, + the integrity, and the right correspondence of all these, all the light, + by which our actions are to be guided, is to be renewed, and all our command over things is to be establisht.
-It is therefore most worthy of our consideration, to recollect - their several defects, that so we may the better understand how to supply - them, and by what assistances we may inlarge their power, and secure them +
It is therefore most worthy of our consideration, to recollect + their several defects, that so we may the better understand how to supply + them, and by what assistances we may inlarge their power, and secure them in performing their particular duties.
-As for the actions of our Senses, we cannot but observe - them to be in many particulars much outdone by those of other Creatures, - and when at best, to be far short of the perfection they seem capable of - : And these infirmities of the Senses arise from a double cause, either - from the disproportion of the Object to the Organ, whereby an infinite number - of things can never enter into them, or else from error in the Perception, - that many things, which come within their reach, are not received in a right +
As for the actions of our Senses, we cannot but observe + them to be in many particulars much outdone by those of other Creatures, + and when at best, to be far short of the perfection they seem capable of + : And these infirmities of the Senses arise from a double cause, either + from the disproportion of the Object to the Organ, whereby an infinite number + of things can never enter into them, or else from error in the Perception, + that many things, which come within their reach, are not received in a right manner.
-The like frailties are to be found in the Memory; we often - let many things slip away from us, which deserve to be retain'd; and of - those which we treasure up, a great part is either frivolous or false ; - and if good, and substantial, either in tract of time obliterated, or at - best so overwhelmed and buried under more frothy notions, that when there +
The like frailties are to be found in the Memory; we often + let many things slip away from us, which deserve to be retain'd; and of + those which we treasure up, a great part is either frivolous or false ; + and if good, and substantial, either in tract of time obliterated, or at + best so overwhelmed and buried under more frothy notions, that when there is need of them, they are in vain sought for.
-The two main foundations being so deceivable, it is no wonder, - that all the succeeding works which we build upon them, of arguing, concluding, - defining, judging, and all the other degrees of Reason, are lyable to the - same imperfection, being, at best, either vain, or uncertain: So that the - errors of the understanding are answerable to the two other, being defective - both in the quantity and goodness of its knowledge; for the limits, to which - our thoughts are confind, are small in respect of the vast extent of Nature - it self; some parts of it are too large to be comprehended, and some too +
The two main foundations being so deceivable, it is no wonder, + that all the succeeding works which we build upon them, of arguing, concluding, + defining, judging, and all the other degrees of Reason, are lyable to the + same imperfection, being, at best, either vain, or uncertain: So that the + errors of the understanding are answerable to the two other, being defective + both in the quantity and goodness of its knowledge; for the limits, to which + our thoughts are confind, are small in respect of the vast extent of Nature + it self; some parts of it are too large to be comprehended, and some too little to be perceived.
-And from thence it must follow, that not having a full sensation - of the Object, we must be very lame and imperfect in our conceptions about - it, and in all the propositions which we build upon it; hence we often take - the shadow of things for the substance, small appearances for good similitudes, - similitudes for definitions; and even many of those, which we think to be - the most solid definitions, are rather expressions of our own misguided +
And from thence it must follow, that not having a full sensation + of the Object, we must be very lame and imperfect in our conceptions about + it, and in all the propositions which we build upon it; hence we often take + the shadow of things for the substance, small appearances for good similitudes, + similitudes for definitions; and even many of those, which we think to be + the most solid definitions, are rather expressions of our own misguided apprehensions then of the true nature of the things themselves.
-The effects of these imperfections are manifested in different - ways, according to the temper and disposition of the several minds of men, - some they incline to gross ignorance and stupidity, and others to a presumptuous - imposing on other mens Opinions, and a confident dogmatizing on matters, +
The effects of these imperfections are manifested in different + ways, according to the temper and disposition of the several minds of men, + some they incline to gross ignorance and stupidity, and others to a presumptuous + imposing on other mens Opinions, and a confident dogmatizing on matters, whereof there is no assurance to be given.
-Thus all the uncertainty, and mistakes of humane actions, - proceed either from the narrowness and wandring of our Senses, from the - slipperiness or delusion of our Memory, from the confinement or rashness - of our Understanding, so that 'tis no wonder, that our power over natural - causes and effects is so slowly improvd, seeing we are not only to contend - with the obscurity and difficulty of the things whereon we work and think, +
Thus all the uncertainty, and mistakes of humane actions, + proceed either from the narrowness and wandring of our Senses, from the + slipperiness or delusion of our Memory, from the confinement or rashness + of our Understanding, so that 'tis no wonder, that our power over natural + causes and effects is so slowly improvd, seeing we are not only to contend + with the obscurity and difficulty of the things whereon we work and think, but even the forces of our own minds conspire to betray us.
-These being the dangers in the process of humane Reason, - the remedies of them all can only proceed from the real, the mechanical, - the experimental Philosophy, which has this advantage over the Philosophy - of discourse and disputation, that whereas that chiefly aims at the subtilty - of its Deductions and Conclusions, without much regard to the first groundwork, - which ought to be well laid on the Sense and Memory ; so this intends the - right ordering of them all, and the making them serviceable to each other. +
These being the dangers in the process of humane Reason, + the remedies of them all can only proceed from the real, the mechanical, + the experimental Philosophy, which has this advantage over the Philosophy + of discourse and disputation, that whereas that chiefly aims at the subtilty + of its Deductions and Conclusions, without much regard to the first groundwork, + which ought to be well laid on the Sense and Memory ; so this intends the + right ordering of them all, and the making them serviceable to each other.
-The first thing to be undertaken in this weighty work, is - a watchfulness over the failings and an inlargement of the dominion, of - the Senses. To which end it is requisite, first, That there should be a - scrupulous choice, and a strict examination, of the reality, constancy, - and certainty of the Particulars that we admit: This is the first rise whereon - truth is to begin, and here the most severe, and most impartial diligence, - must be imployed ; the storing up of all, without any regard to evidence +
The first thing to be undertaken in this weighty work, is + a watchfulness over the failings and an inlargement of the dominion, of + the Senses. To which end it is requisite, first, That there should be a + scrupulous choice, and a strict examination, of the reality, constancy, + and certainty of the Particulars that we admit: This is the first rise whereon + truth is to begin, and here the most severe, and most impartial diligence, + must be imployed ; the storing up of all, without any regard to evidence or use, will only tend to darkness and confusion.
-We must not therefore esteem the riches of our Philosophical - treasure by the number only, but chiefly by the weight; the most vulgar - Instances are not to be neglected, but above all, the most instructive are - to be entertain'd: the footsteps of Nature are to be trac'd, not only in - her ordinary course,but when she seems to be put to her shifts, to make - many doublings and turnings, and to use some kind of art in indeavouring +
We must not therefore esteem the riches of our Philosophical + treasure by the number only, but chiefly by the weight; the most vulgar + Instances are not to be neglected, but above all, the most instructive are + to be entertain'd: the footsteps of Nature are to be trac'd, not only in + her ordinary course,but when she seems to be put to her shifts, to make + many doublings and turnings, and to use some kind of art in indeavouring to avoid our discovery.
diff --git a/src/demo/share/jfc/SwingSet2/resources/seaweed.html b/src/demo/share/jfc/SwingSet2/resources/seaweed.html index fd2a9433630..f386751dd7c 100644 --- a/src/demo/share/jfc/SwingSet2/resources/seaweed.html +++ b/src/demo/share/jfc/SwingSet2/resources/seaweed.html @@ -1,7 +1,7 @@
Untitled Document - + @@ -11,47 +11,47 @@
-For curiosity and beauty, I have not among all the Plants - or Vegetables I have yet observ'd, seen any one comparable to this Sea-weed - I have here describ'd, of which I am able to say very little more then what - is represented by the second Figure of the ninth Scheme: Namely, that it is - a Plant which grows upon the Rocks under the water, and increases and spreads - it self into a great tuft, which is not onely handsomely branch'd into several - leaves, but the whole surface of the Plant is cover'd over with a most curious - kind of carv'd work, which consists of a texture much resembling a Honeycomb; - for the whole surface on both sides is cover'd over with a multitude of very - small holes, being no bigger then so many holes made with the point of a small - Pinn, and rang'd in the neatest and most delicate order imaginable, they being - plac'd in the manner of a Quincunx, or very much like the rows of the eyes - of a Fly, the rows or orders being very regular, which way soever they are - observ'd: what the texture was, as it appear'd through a pretty bigg Magnifying - Microscope, I have here adjoin'd in the first Figure of the 14. Scheme. which - round Area A B C D represents a part of the surface about one eighth part - of an Inch in Diameter: Those little holes, which to the eye look'd round, - like so many little spots, here appear'd very regularly shap'd holes, representing - almost the shape of the sole of a round toed shoe, the hinder part of which, - is, as it were, trod on or cover'd by the toe of that next below it; these - holes seem'd wall'd about with a very thin and transparent substance, looking - of a pale straw-colour; from the edge of which, against the middle of each - hole, were sprouted out four small transparent straw-colour'd Thorns, which - seem'd to protect and cover those cavities, from either side two; neer the - root of this Plant, were sprouted out several small branches of a kind of +
For curiosity and beauty, I have not among all the Plants + or Vegetables I have yet observ'd, seen any one comparable to this Sea-weed + I have here describ'd, of which I am able to say very little more then what + is represented by the second Figure of the ninth Scheme: Namely, that it is + a Plant which grows upon the Rocks under the water, and increases and spreads + it self into a great tuft, which is not onely handsomely branch'd into several + leaves, but the whole surface of the Plant is cover'd over with a most curious + kind of carv'd work, which consists of a texture much resembling a Honeycomb; + for the whole surface on both sides is cover'd over with a multitude of very + small holes, being no bigger then so many holes made with the point of a small + Pinn, and rang'd in the neatest and most delicate order imaginable, they being + plac'd in the manner of a Quincunx, or very much like the rows of the eyes + of a Fly, the rows or orders being very regular, which way soever they are + observ'd: what the texture was, as it appear'd through a pretty bigg Magnifying + Microscope, I have here adjoin'd in the first Figure of the 14. Scheme. which + round Area A B C D represents a part of the surface about one eighth part + of an Inch in Diameter: Those little holes, which to the eye look'd round, + like so many little spots, here appear'd very regularly shap'd holes, representing + almost the shape of the sole of a round toed shoe, the hinder part of which, + is, as it were, trod on or cover'd by the toe of that next below it; these + holes seem'd wall'd about with a very thin and transparent substance, looking + of a pale straw-colour; from the edge of which, against the middle of each + hole, were sprouted out four small transparent straw-colour'd Thorns, which + seem'd to protect and cover those cavities, from either side two; neer the + root of this Plant, were sprouted out several small branches of a kind of bastard Coralline, curiously branch'd, though small.
-And to confirm this, having lately the opportunity of viewing - the large Plant (if I may so call it) of a Sponge petrify'd, of which I made - mention in the last Observation, I found, that each of the Branches or Figures - of it, did, by the range of its pores, exhibit just such a texture, the rows - of pores crossing one another, much after the manner as the rows of eyes do - which are describ'd in the 26. Scheme : Coralline also, and several sorts of - white Coral, I have with a Microscope observ'd very curiously shap'd. And - I doubt not, but that he that shall observe these several kinds of Plants that - grow upon Rocks, which the Sea sometimes overflows, and those heaps of others - which are vomited out of it upon the shore, may find multitudes of little - Plants, and other bodies, which like this will afford very beautifull objects - for the Microscope ; and this Specimen here is adjoin'd onely to excite their - curiosities who have opportunity of observing to examine and collect what - they find worthy their notice; for the Sea, among terrestrial bodies, is also - a prolifick mother, and affords as many Instances of spontaneous generations +
And to confirm this, having lately the opportunity of viewing + the large Plant (if I may so call it) of a Sponge petrify'd, of which I made + mention in the last Observation, I found, that each of the Branches or Figures + of it, did, by the range of its pores, exhibit just such a texture, the rows + of pores crossing one another, much after the manner as the rows of eyes do + which are describ'd in the 26. Scheme : Coralline also, and several sorts of + white Coral, I have with a Microscope observ'd very curiously shap'd. And + I doubt not, but that he that shall observe these several kinds of Plants that + grow upon Rocks, which the Sea sometimes overflows, and those heaps of others + which are vomited out of it upon the shore, may find multitudes of little + Plants, and other bodies, which like this will afford very beautifull objects + for the Microscope ; and this Specimen here is adjoin'd onely to excite their + curiosities who have opportunity of observing to examine and collect what + they find worthy their notice; for the Sea, among terrestrial bodies, is also + a prolifick mother, and affords as many Instances of spontaneous generations as either the Air or Earth.
diff --git a/src/demo/share/jfc/SwingSet2/resources/title.html b/src/demo/share/jfc/SwingSet2/resources/title.html index ea13f5cf005..0e99558ccd7 100644 --- a/src/demo/share/jfc/SwingSet2/resources/title.html +++ b/src/demo/share/jfc/SwingSet2/resources/title.html @@ -1,7 +1,7 @@
Untitled Document - + @@ -14,18 +14,18 @@
MADE BY
MAGNIFYING GLASSES.
WITH
-OBSERVATIONS and INQUIRIES +
OBSERVATIONS and INQUIRIES thereupon.
-By R. HOOKE +
By R. HOOKE , Fellow of the ROYAL SOCIETY .
-
---LONDON, Printed by Jo. - Martyn, and Ja. Allestry, - Printers to the ROYAL SOCIETY , and are to - be sold at their Shop at the Bell in S. Paul's Church-yard. M +
+++LONDON, Printed by Jo. + Martyn, and Ja. Allestry, + Printers to the ROYAL SOCIETY , and are to + be sold at their Shop at the Bell in S. Paul's Church-yard. M D C L X V.
diff --git a/src/hotspot/.editorconfig b/src/hotspot/.editorconfig deleted file mode 100644 index 48e63362b54..00000000000 --- a/src/hotspot/.editorconfig +++ /dev/null @@ -1,3 +0,0 @@ -[*.{cpp,hpp,c,h}] -indent_style = space -indent_size = 2 diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 76e3c92ddc2..f367362b4d8 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1,5 +1,5 @@ // -// Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. // Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // @@ -2296,6 +2296,26 @@ bool Matcher::match_rule_supported(int opcode) { return false; } break; + case Op_FmaHF: + // UseFMA flag also needs to be checked along with FEAT_FP16 + if (!UseFMA || !is_feat_fp16_supported()) { + return false; + } + break; + case Op_AddHF: + case Op_SubHF: + case Op_MulHF: + case Op_DivHF: + case Op_MinHF: + case Op_MaxHF: + case Op_SqrtHF: + // Half-precision floating point scalar operations require FEAT_FP16 + // to be available. FEAT_FP16 is enabled if both "fphp" and "asimdhp" + // features are supported. + if (!is_feat_fp16_supported()) { + return false; + } + break; } return true; // Per default match rules are supported. @@ -2306,11 +2326,11 @@ const RegMask* Matcher::predicate_reg_mask(void) { } bool Matcher::supports_vector_calling_convention(void) { - return EnableVectorSupport && UseVectorStubs; + return EnableVectorSupport; } OptoRegPair Matcher::vector_return_value(uint ideal_reg) { - assert(EnableVectorSupport && UseVectorStubs, "sanity"); + assert(EnableVectorSupport, "sanity"); int lo = V0_num; int hi = V0_H_num; if (ideal_reg == Op_VecX || ideal_reg == Op_VecA) { @@ -4599,6 +4619,15 @@ operand immF0() interface(CONST_INTER); %} +// Half Float (FP16) Immediate +operand immH() +%{ + match(ConH); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // operand immFPacked() %{ @@ -6942,6 +6971,21 @@ instruct loadConD(vRegD dst, immD con) %{ ins_pipe(fp_load_constant_d); %} +// Load Half Float Constant +// The "ldr" instruction loads a 32-bit word from the constant pool into a +// 32-bit register but only the bottom half will be populated and the top +// 16 bits are zero. +instruct loadConH(vRegF dst, immH con) %{ + match(Set dst con); + format %{ + "ldrs $dst, [$constantaddress]\t# load from constant table: half float=$con\n\t" + %} + ins_encode %{ + __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con)); + %} + ins_pipe(fp_load_constant_s); +%} + // Store Instructions // Store Byte @@ -7717,14 +7761,12 @@ instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{ effect(TEMP tmp); ins_cost(INSN_COST * 13); - format %{ "movw $src, $src\n\t" - "mov $tmp, $src\t# vector (1D)\n\t" + format %{ "fmovs $tmp, $src\t# vector (1S)\n\t" "cnt $tmp, $tmp\t# vector (8B)\n\t" "addv $tmp, $tmp\t# vector (8B)\n\t" "mov $dst, $tmp\t# vector (1D)" %} ins_encode %{ - __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0 - __ mov($tmp$$FloatRegister, __ D, 0, $src$$Register); + __ fmovs($tmp$$FloatRegister, $src$$Register); __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); __ mov($dst$$Register, $tmp$$FloatRegister, __ D, 0); @@ -8144,6 +8186,7 @@ instruct castPP(iRegPNoSp dst) instruct castII(iRegI dst) %{ + predicate(VerifyConstraintCasts == 0); match(Set dst (CastII dst)); size(0); @@ -8153,8 +8196,22 @@ instruct castII(iRegI dst) ins_pipe(pipe_class_empty); %} +instruct castII_checked(iRegI dst, rFlagsReg cr) +%{ + predicate(VerifyConstraintCasts > 0); + match(Set dst (CastII dst)); + effect(KILL cr); + + format %{ "# castII_checked of $dst" %} + ins_encode %{ + __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register, rscratch1); + %} + ins_pipe(pipe_slow); +%} + instruct castLL(iRegL dst) %{ + predicate(VerifyConstraintCasts == 0); match(Set dst (CastLL dst)); size(0); @@ -8164,6 +8221,29 @@ instruct castLL(iRegL dst) ins_pipe(pipe_class_empty); %} +instruct castLL_checked(iRegL dst, rFlagsReg cr) +%{ + predicate(VerifyConstraintCasts > 0); + match(Set dst (CastLL dst)); + effect(KILL cr); + + format %{ "# castLL_checked of $dst" %} + ins_encode %{ + __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, rscratch1); + %} + ins_pipe(pipe_slow); +%} + +instruct castHH(vRegF dst) +%{ + match(Set dst (CastHH dst)); + size(0); + format %{ "# castHH of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(pipe_class_empty); +%} + instruct castFF(vRegF dst) %{ match(Set dst (CastFF dst)); @@ -13606,6 +13686,17 @@ instruct bits_reverse_L(iRegLNoSp dst, iRegL src) // ============================================================================ // Floating Point Arithmetic Instructions +instruct addHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (AddHF src1 src2)); + format %{ "faddh $dst, $src1, $src2" %} + ins_encode %{ + __ faddh($dst$$FloatRegister, + $src1$$FloatRegister, + $src2$$FloatRegister); + %} + ins_pipe(fp_dop_reg_reg_s); +%} + instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ match(Set dst (AddF src1 src2)); @@ -13636,6 +13727,17 @@ instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ ins_pipe(fp_dop_reg_reg_d); %} +instruct subHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (SubHF src1 src2)); + format %{ "fsubh $dst, $src1, $src2" %} + ins_encode %{ + __ fsubh($dst$$FloatRegister, + $src1$$FloatRegister, + $src2$$FloatRegister); + %} + ins_pipe(fp_dop_reg_reg_s); +%} + instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ match(Set dst (SubF src1 src2)); @@ -13666,6 +13768,17 @@ instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ ins_pipe(fp_dop_reg_reg_d); %} +instruct mulHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (MulHF src1 src2)); + format %{ "fmulh $dst, $src1, $src2" %} + ins_encode %{ + __ fmulh($dst$$FloatRegister, + $src1$$FloatRegister, + $src2$$FloatRegister); + %} + ins_pipe(fp_dop_reg_reg_s); +%} + instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ match(Set dst (MulF src1 src2)); @@ -13696,6 +13809,20 @@ instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ ins_pipe(fp_dop_reg_reg_d); %} +// src1 * src2 + src3 (half-precision float) +instruct maddHF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ + match(Set dst (FmaHF src3 (Binary src1 src2))); + format %{ "fmaddh $dst, $src1, $src2, $src3" %} + ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); + __ fmaddh($dst$$FloatRegister, + $src1$$FloatRegister, + $src2$$FloatRegister, + $src3$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + // src1 * src2 + src3 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ match(Set dst (FmaF src3 (Binary src1 src2))); @@ -13837,6 +13964,29 @@ instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zer ins_pipe(pipe_class_default); %} +// Math.max(HH)H (half-precision float) +instruct maxHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (MaxHF src1 src2)); + format %{ "fmaxh $dst, $src1, $src2" %} + ins_encode %{ + __ fmaxh($dst$$FloatRegister, + $src1$$FloatRegister, + $src2$$FloatRegister); + %} + ins_pipe(fp_dop_reg_reg_s); +%} + +// Math.min(HH)H (half-precision float) +instruct minHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (MinHF src1 src2)); + format %{ "fminh $dst, $src1, $src2" %} + ins_encode %{ + __ fminh($dst$$FloatRegister, + $src1$$FloatRegister, + $src2$$FloatRegister); + %} + ins_pipe(fp_dop_reg_reg_s); +%} // Math.max(FF)F instruct maxF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ @@ -13894,6 +14044,16 @@ instruct minD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ ins_pipe(fp_dop_reg_reg_d); %} +instruct divHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (DivHF src1 src2)); + format %{ "fdivh $dst, $src1, $src2" %} + ins_encode %{ + __ fdivh($dst$$FloatRegister, + $src1$$FloatRegister, + $src2$$FloatRegister); + %} + ins_pipe(fp_div_s); +%} instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ match(Set dst (DivF src1 src2)); @@ -14067,6 +14227,16 @@ instruct sqrtF_reg(vRegF dst, vRegF src) %{ ins_pipe(fp_div_d); %} +instruct sqrtHF_reg(vRegF dst, vRegF src) %{ + match(Set dst (SqrtHF src)); + format %{ "fsqrth $dst, $src" %} + ins_encode %{ + __ fsqrth($dst$$FloatRegister, + $src$$FloatRegister); + %} + ins_pipe(fp_div_s); +%} + // Math.rint, floor, ceil instruct roundD_reg(vRegD dst, vRegD src, immI rmode) %{ match(Set dst (RoundDoubleMode src rmode)); @@ -17116,6 +17286,64 @@ instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask, ins_pipe(pipe_slow); %} +//----------------------------- Reinterpret ---------------------------------- +// Reinterpret a half-precision float value in a floating point register to a general purpose register +instruct reinterpretHF2S(iRegINoSp dst, vRegF src) %{ + match(Set dst (ReinterpretHF2S src)); + format %{ "reinterpretHF2S $dst, $src" %} + ins_encode %{ + __ smov($dst$$Register, $src$$FloatRegister, __ H, 0); + %} + ins_pipe(pipe_slow); +%} + +// Reinterpret a half-precision float value in a general purpose register to a floating point register +instruct reinterpretS2HF(vRegF dst, iRegINoSp src) %{ + match(Set dst (ReinterpretS2HF src)); + format %{ "reinterpretS2HF $dst, $src" %} + ins_encode %{ + __ mov($dst$$FloatRegister, __ H, 0, $src$$Register); + %} + ins_pipe(pipe_slow); +%} + +// Without this optimization, ReinterpretS2HF (ConvF2HF src) would result in the following +// instructions (the first two are for ConvF2HF and the last instruction is for ReinterpretS2HF) - +// fcvt $tmp1_fpr, $src_fpr // Convert float to half-precision float +// mov $tmp2_gpr, $tmp1_fpr // Move half-precision float in FPR to a GPR +// mov $dst_fpr, $tmp2_gpr // Move the result from a GPR to an FPR +// The move from FPR to GPR in ConvF2HF and the move from GPR to FPR in ReinterpretS2HF +// can be omitted in this pattern, resulting in - +// fcvt $dst, $src // Convert float to half-precision float +instruct convF2HFAndS2HF(vRegF dst, vRegF src) +%{ + match(Set dst (ReinterpretS2HF (ConvF2HF src))); + format %{ "convF2HFAndS2HF $dst, $src" %} + ins_encode %{ + __ fcvtsh($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +// Without this optimization, ConvHF2F (ReinterpretHF2S src) would result in the following +// instructions (the first one is for ReinterpretHF2S and the last two are for ConvHF2F) - +// mov $tmp1_gpr, $src_fpr // Move the half-precision float from an FPR to a GPR +// mov $tmp2_fpr, $tmp1_gpr // Move the same value from GPR to an FPR +// fcvt $dst_fpr, $tmp2_fpr // Convert the half-precision float to 32-bit float +// The move from FPR to GPR in ReinterpretHF2S and the move from GPR to FPR in ConvHF2F +// can be omitted as the input (src) is already in an FPR required for the fcvths instruction +// resulting in - +// fcvt $dst, $src // Convert half-precision float to a 32-bit float +instruct convHF2SAndHF2F(vRegF dst, vRegF src) +%{ + match(Set dst (ConvHF2F (ReinterpretHF2S src))); + format %{ "convHF2SAndHF2F $dst, $src" %} + ins_encode %{ + __ fcvths($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + // ============================================================================ // This name is KNOWN by the ADLC and cannot be changed. // The ADLC forces a 'TypeRawPtr::BOTTOM' output type diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index c7a0fc5724b..b4e6d79347f 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -1,6 +1,6 @@ // // Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. -// Copyright (c) 2020, 2024, Arm Limited. All rights reserved. +// Copyright (c) 2020, 2025, Arm Limited. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -225,6 +225,26 @@ source %{ return false; } break; + case Op_AddVHF: + case Op_SubVHF: + case Op_MulVHF: + case Op_DivVHF: + case Op_MinVHF: + case Op_MaxVHF: + case Op_SqrtVHF: + // FEAT_FP16 is enabled if both "fphp" and "asimdhp" features are supported. + // Only the Neon instructions need this check. SVE supports half-precision floats + // by default. + if (UseSVE == 0 && !is_feat_fp16_supported()) { + return false; + } + break; + case Op_FmaVHF: + // UseFMA flag needs to be checked along with FEAT_FP16 + if (!UseFMA || (UseSVE == 0 && !is_feat_fp16_supported())) { + return false; + } + break; default: break; } @@ -270,6 +290,19 @@ source %{ case Op_StoreVectorScatter: opcode = Op_StoreVectorScatterMasked; break; + // Currently, the masked versions of the following 8 Float16 operations are disabled. + // When the support for Float16 vector classes is added in VectorAPI and the masked + // Float16 IR can be generated, these masked operations will be enabled and relevant + // backend support added. + case Op_AddVHF: + case Op_SubVHF: + case Op_MulVHF: + case Op_DivVHF: + case Op_MaxVHF: + case Op_MinVHF: + case Op_SqrtVHF: + case Op_FmaVHF: + return false; default: break; } @@ -583,6 +616,22 @@ instruct vaddL(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +instruct vaddHF(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (AddVHF src1 src2)); + format %{ "vaddHF $dst, $src1, $src2" %} + ins_encode %{ + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + __ fadd($dst$$FloatRegister, get_arrangement(this), + $src1$$FloatRegister, $src2$$FloatRegister); + } else { + assert(UseSVE > 0, "must be sve"); + __ sve_fadd($dst$$FloatRegister, __ H, $src1$$FloatRegister, $src2$$FloatRegister); + } + %} + ins_pipe(pipe_slow); +%} + instruct vaddF(vReg dst, vReg src1, vReg src2) %{ match(Set dst (AddVF src1 src2)); format %{ "vaddF $dst, $src1, $src2" %} @@ -807,6 +856,22 @@ instruct vsubL(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +instruct vsubHF(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (SubVHF src1 src2)); + format %{ "vsubHF $dst, $src1, $src2" %} + ins_encode %{ + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + __ fsub($dst$$FloatRegister, get_arrangement(this), + $src1$$FloatRegister, $src2$$FloatRegister); + } else { + assert(UseSVE > 0, "must be sve"); + __ sve_fsub($dst$$FloatRegister, __ H, $src1$$FloatRegister, $src2$$FloatRegister); + } + %} + ins_pipe(pipe_slow); +%} + instruct vsubF(vReg dst, vReg src1, vReg src2) %{ match(Set dst (SubVF src1 src2)); format %{ "vsubF $dst, $src1, $src2" %} @@ -1004,6 +1069,22 @@ instruct vmulL_sve(vReg dst_src1, vReg src2) %{ // vector mul - floating-point +instruct vmulHF(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (MulVHF src1 src2)); + format %{ "vmulHF $dst, $src1, $src2" %} + ins_encode %{ + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + __ fmul($dst$$FloatRegister, get_arrangement(this), + $src1$$FloatRegister, $src2$$FloatRegister); + } else { + assert(UseSVE > 0, "must be sve"); + __ sve_fmul($dst$$FloatRegister, __ H, $src1$$FloatRegister, $src2$$FloatRegister); + } + %} + ins_pipe(pipe_slow); +%} + instruct vmulF(vReg dst, vReg src1, vReg src2) %{ match(Set dst (MulVF src1 src2)); format %{ "vmulF $dst, $src1, $src2" %} @@ -1102,6 +1183,28 @@ instruct vmulD_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ // vector float div +instruct vdivHF_neon(vReg dst, vReg src1, vReg src2) %{ + predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n))); + match(Set dst (DivVHF src1 src2)); + format %{ "vdivHF_neon $dst, $src1, $src2" %} + ins_encode %{ + __ fdiv($dst$$FloatRegister, get_arrangement(this), + $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivHF_sve(vReg dst_src1, vReg src2) %{ + predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n))); + match(Set dst_src1 (DivVHF dst_src1 src2)); + format %{ "vdivHF_sve $dst_src1, $dst_src1, $src2" %} + ins_encode %{ + assert(UseSVE > 0, "must be sve"); + __ sve_fdiv($dst_src1$$FloatRegister, __ H, ptrue, $src2$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + instruct vdivF_neon(vReg dst, vReg src1, vReg src2) %{ predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n))); match(Set dst (DivVF src1 src2)); @@ -1991,6 +2094,21 @@ instruct vnegD_masked(vReg dst_src, pRegGov pg) %{ // vector sqrt +instruct vsqrtHF(vReg dst, vReg src) %{ + match(Set dst (SqrtVHF src)); + format %{ "vsqrtHF $dst, $src" %} + ins_encode %{ + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + __ fsqrt($dst$$FloatRegister, get_arrangement(this), $src$$FloatRegister); + } else { + assert(UseSVE > 0, "must be sve"); + __ sve_fsqrt($dst$$FloatRegister, __ H, ptrue, $src$$FloatRegister); + } + %} + ins_pipe(pipe_slow); +%} + instruct vsqrtF(vReg dst, vReg src) %{ match(Set dst (SqrtVF src)); format %{ "vsqrtF $dst, $src" %} @@ -2069,7 +2187,7 @@ instruct vminL_sve(vReg dst_src1, vReg src2) %{ ins_pipe(pipe_slow); %} -// vector min - B/S/I/F/D +// vector min - B/S/I/HF/F/D instruct vmin_neon(vReg dst, vReg src1, vReg src2) %{ predicate(Matcher::vector_element_basic_type(n) != T_LONG && @@ -2110,6 +2228,29 @@ instruct vmin_sve(vReg dst_src1, vReg src2) %{ ins_pipe(pipe_slow); %} +instruct vmin_HF_neon(vReg dst, vReg src1, vReg src2) %{ + predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n))); + match(Set dst (MinVHF src1 src2)); + format %{ "vmin_HF_neon $dst, $src1, $src2\t# Half float" %} + ins_encode %{ + __ fmin($dst$$FloatRegister, get_arrangement(this), + $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vmin_HF_sve(vReg dst_src1, vReg src2) %{ + predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n))); + match(Set dst_src1 (MinVHF dst_src1 src2)); + format %{ "vmin_HF_sve $dst_src1, $dst_src1, $src2\t# Half float" %} + ins_encode %{ + assert(UseSVE > 0, "must be sve"); + __ sve_fmin($dst_src1$$FloatRegister, __ H, + ptrue, $src2$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + // vector min - predicated instruct vmin_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ @@ -2226,7 +2367,7 @@ instruct vmaxL_sve(vReg dst_src1, vReg src2) %{ ins_pipe(pipe_slow); %} -// vector max - B/S/I/F/D +// vector max - B/S/I/HF/F/D instruct vmax_neon(vReg dst, vReg src1, vReg src2) %{ predicate(Matcher::vector_element_basic_type(n) != T_LONG && @@ -2267,6 +2408,29 @@ instruct vmax_sve(vReg dst_src1, vReg src2) %{ ins_pipe(pipe_slow); %} +instruct vmax_HF_neon(vReg dst, vReg src1, vReg src2) %{ + predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n))); + match(Set dst (MaxVHF src1 src2)); + format %{ "vmax_HF_neon $dst, $src1, $src2\t# Half float" %} + ins_encode %{ + __ fmax($dst$$FloatRegister, get_arrangement(this), + $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vmax_HF_sve(vReg dst_src1, vReg src2) %{ + predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n))); + match(Set dst_src1 (MaxVHF dst_src1 src2)); + format %{ "vmax_HF_sve $dst_src1, $dst_src1, $src2\t# Half float" %} + ins_encode %{ + assert(UseSVE > 0, "must be sve"); + __ sve_fmax($dst_src1$$FloatRegister, __ H, + ptrue, $src2$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + // vector max - predicated instruct vmax_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ @@ -2413,8 +2577,9 @@ instruct vmla_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ // dst_src1 = src2 * src3 + dst_src1 instruct vfmla(vReg dst_src1, vReg src2, vReg src3) %{ - match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); - match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); + match(Set dst_src1 (FmaVHF dst_src1 (Binary src2 src3))); + match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); + match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); format %{ "vfmla $dst_src1, $src2, $src3" %} ins_encode %{ assert(UseFMA, "Needs FMA instructions support."); @@ -4613,6 +4778,23 @@ instruct replicateD(vReg dst, vRegD src) %{ ins_pipe(pipe_slow); %} +// Replicate a half-precision float value held in a floating point register +instruct replicateHF(vReg dst, vRegF src) %{ + predicate(Matcher::vector_element_basic_type(n) == T_SHORT); + match(Set dst (Replicate src)); + format %{ "replicateHF $dst, $src\t# replicate half-precision float" %} + ins_encode %{ + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + __ dup($dst$$FloatRegister, get_arrangement(this), $src$$FloatRegister); + } else { // length_in_bytes must be > 16 and SVE should be enabled + assert(UseSVE > 0, "must be sve"); + __ sve_cpy($dst$$FloatRegister, __ H, ptrue, $src$$FloatRegister); + } + %} + ins_pipe(pipe_slow); +%} + // replicate from imm instruct replicateI_imm_le128b(vReg dst, immI con) %{ @@ -4681,6 +4863,23 @@ instruct replicateL_imm8_gt128b(vReg dst, immL8_shift8 con) %{ ins_pipe(pipe_slow); %} +// Replicate a 16-bit half precision float value +instruct replicateHF_imm(vReg dst, immH con) %{ + match(Set dst (Replicate con)); + format %{ "replicateHF_imm $dst, $con\t# replicate immediate half-precision float" %} + ins_encode %{ + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + int imm = (int)($con$$constant) & 0xffff; + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + __ mov($dst$$FloatRegister, get_arrangement(this), imm); + } else { // length_in_bytes must be > 16 and SVE should be enabled + assert(UseSVE > 0, "must be sve"); + __ sve_dup($dst$$FloatRegister, __ H, imm); + } + %} + ins_pipe(pipe_slow); +%} + // ------------------------------ Vector insert -------------------------------- // BYTE, SHORT, INT diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index 171bc390545..cc07e0e4076 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -1,6 +1,6 @@ // // Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. -// Copyright (c) 2020, 2024, Arm Limited. All rights reserved. +// Copyright (c) 2020, 2025, Arm Limited. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -215,6 +215,26 @@ source %{ return false; } break; + case Op_AddVHF: + case Op_SubVHF: + case Op_MulVHF: + case Op_DivVHF: + case Op_MinVHF: + case Op_MaxVHF: + case Op_SqrtVHF: + // FEAT_FP16 is enabled if both "fphp" and "asimdhp" features are supported. + // Only the Neon instructions need this check. SVE supports half-precision floats + // by default. + if (UseSVE == 0 && !is_feat_fp16_supported()) { + return false; + } + break; + case Op_FmaVHF: + // UseFMA flag needs to be checked along with FEAT_FP16 + if (!UseFMA || (UseSVE == 0 && !is_feat_fp16_supported())) { + return false; + } + break; default: break; } @@ -260,6 +280,19 @@ source %{ case Op_StoreVectorScatter: opcode = Op_StoreVectorScatterMasked; break; + // Currently, the masked versions of the following 8 Float16 operations are disabled. + // When the support for Float16 vector classes is added in VectorAPI and the masked + // Float16 IR can be generated, these masked operations will be enabled and relevant + // backend support added. + case Op_AddVHF: + case Op_SubVHF: + case Op_MulVHF: + case Op_DivVHF: + case Op_MaxVHF: + case Op_MinVHF: + case Op_SqrtVHF: + case Op_FmaVHF: + return false; default: break; } @@ -508,12 +541,13 @@ dnl // ------------------------------ Vector add ----------------------------------- // vector add -BINARY_OP(vaddB, AddVB, addv, sve_add, B) -BINARY_OP(vaddS, AddVS, addv, sve_add, H) -BINARY_OP(vaddI, AddVI, addv, sve_add, S) -BINARY_OP(vaddL, AddVL, addv, sve_add, D) -BINARY_OP(vaddF, AddVF, fadd, sve_fadd, S) -BINARY_OP(vaddD, AddVD, fadd, sve_fadd, D) +BINARY_OP(vaddB, AddVB, addv, sve_add, B) +BINARY_OP(vaddS, AddVS, addv, sve_add, H) +BINARY_OP(vaddI, AddVI, addv, sve_add, S) +BINARY_OP(vaddL, AddVL, addv, sve_add, D) +BINARY_OP(vaddHF, AddVHF, fadd, sve_fadd, H) +BINARY_OP(vaddF, AddVF, fadd, sve_fadd, S) +BINARY_OP(vaddD, AddVD, fadd, sve_fadd, D) // vector add - predicated BINARY_OP_PREDICATE(vaddB, AddVB, sve_add, B) @@ -532,12 +566,13 @@ VADD_IMM(L, immLAddSubV, D) // ------------------------------ Vector sub ----------------------------------- // vector sub -BINARY_OP(vsubB, SubVB, subv, sve_sub, B) -BINARY_OP(vsubS, SubVS, subv, sve_sub, H) -BINARY_OP(vsubI, SubVI, subv, sve_sub, S) -BINARY_OP(vsubL, SubVL, subv, sve_sub, D) -BINARY_OP(vsubF, SubVF, fsub, sve_fsub, S) -BINARY_OP(vsubD, SubVD, fsub, sve_fsub, D) +BINARY_OP(vsubB, SubVB, subv, sve_sub, B) +BINARY_OP(vsubS, SubVS, subv, sve_sub, H) +BINARY_OP(vsubI, SubVI, subv, sve_sub, S) +BINARY_OP(vsubL, SubVL, subv, sve_sub, D) +BINARY_OP(vsubHF, SubVHF, fsub, sve_fsub, H) +BINARY_OP(vsubF, SubVF, fsub, sve_fsub, S) +BINARY_OP(vsubD, SubVD, fsub, sve_fsub, D) // vector sub - predicated BINARY_OP_PREDICATE(vsubB, SubVB, sve_sub, B) @@ -612,8 +647,9 @@ instruct vmulL_sve(vReg dst_src1, vReg src2) %{ %} // vector mul - floating-point -BINARY_OP(vmulF, MulVF, fmul, sve_fmul, S) -BINARY_OP(vmulD, MulVD, fmul, sve_fmul, D) +BINARY_OP(vmulHF, MulVHF, fmul, sve_fmul, H) +BINARY_OP(vmulF, MulVF, fmul, sve_fmul, S) +BINARY_OP(vmulD, MulVD, fmul, sve_fmul, D) // vector mul - predicated BINARY_OP_PREDICATE(vmulB, MulVB, sve_mul, B) @@ -626,8 +662,9 @@ BINARY_OP_PREDICATE(vmulD, MulVD, sve_fmul, D) // ------------------------------ Vector float div ----------------------------- // vector float div -BINARY_OP_NEON_SVE_PAIRWISE(vdivF, DivVF, fdiv, sve_fdiv, S) -BINARY_OP_NEON_SVE_PAIRWISE(vdivD, DivVD, fdiv, sve_fdiv, D) +BINARY_OP_NEON_SVE_PAIRWISE(vdivHF, DivVHF, fdiv, sve_fdiv, H) +BINARY_OP_NEON_SVE_PAIRWISE(vdivF, DivVF, fdiv, sve_fdiv, S) +BINARY_OP_NEON_SVE_PAIRWISE(vdivD, DivVD, fdiv, sve_fdiv, D) // vector float div - predicated BINARY_OP_PREDICATE(vdivF, DivVF, sve_fdiv, S) @@ -1016,8 +1053,9 @@ UNARY_OP_PREDICATE_WITH_SIZE(vnegD, NegVD, sve_fneg, D) // ------------------------------ Vector sqrt ---------------------------------- // vector sqrt -UNARY_OP(vsqrtF, SqrtVF, fsqrt, sve_fsqrt, S) -UNARY_OP(vsqrtD, SqrtVD, fsqrt, sve_fsqrt, D) +UNARY_OP(vsqrtHF, SqrtVHF, fsqrt, sve_fsqrt, H) +UNARY_OP(vsqrtF, SqrtVF, fsqrt, sve_fsqrt, S) +UNARY_OP(vsqrtD, SqrtVD, fsqrt, sve_fsqrt, D) // vector sqrt - predicated UNARY_OP_PREDICATE_WITH_SIZE(vsqrtF, SqrtVF, sve_fsqrt, S) @@ -1074,6 +1112,20 @@ instruct v$1_neon(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %}')dnl dnl +dnl VMINMAX_HF_NEON($1, $2, $3 ) +dnl VMINMAX_HF_NEON(type, op_name, insn_fp) +define(`VMINMAX_HF_NEON', ` +instruct v$1_HF_neon(vReg dst, vReg src1, vReg src2) %{ + predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n))); + match(Set dst ($2 src1 src2)); + format %{ "v$1_HF_neon $dst, $src1, $src2\t# Half float" %} + ins_encode %{ + __ $3($dst$$FloatRegister, get_arrangement(this), + $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl dnl VMINMAX_SVE($1, $2, $3, $4 ) dnl VMINMAX_SVE(type, op_name, insn_fp, insn_integral) define(`VMINMAX_SVE', ` @@ -1097,6 +1149,21 @@ instruct v$1_sve(vReg dst_src1, vReg src2) %{ ins_pipe(pipe_slow); %}')dnl dnl +dnl VMINMAX_HF_SVE($1, $2, $3 ) +dnl VMINMAX_HF_SVE(type, op_name, insn_fp) +define(`VMINMAX_HF_SVE', ` +instruct v$1_HF_sve(vReg dst_src1, vReg src2) %{ + predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n))); + match(Set dst_src1 ($2 dst_src1 src2)); + format %{ "v$1_HF_sve $dst_src1, $dst_src1, $src2\t# Half float" %} + ins_encode %{ + assert(UseSVE > 0, "must be sve"); + __ $3($dst_src1$$FloatRegister, __ H, + ptrue, $src2$$FloatRegister); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl dnl VMINMAX_PREDICATE($1, $2, $3, $4 ) dnl VMINMAX_PREDICATE(type, op_name, insn_fp, insn_integral) define(`VMINMAX_PREDICATE', ` @@ -1175,9 +1242,11 @@ dnl VMINMAX_L_NEON(min, MinV) VMINMAX_L_SVE(min, MinV, sve_smin) -// vector min - B/S/I/F/D +// vector min - B/S/I/HF/F/D VMINMAX_NEON(min, MinV, fmin, minv) VMINMAX_SVE(min, MinV, sve_fmin, sve_smin) +VMINMAX_HF_NEON(min, MinVHF, fmin) +VMINMAX_HF_SVE(min, MinVHF, sve_fmin) // vector min - predicated VMINMAX_PREDICATE(min, MinV, sve_fmin, sve_smin) @@ -1199,9 +1268,11 @@ VUMINMAX_PREDICATE(umin, UMinV, sve_umin) VMINMAX_L_NEON(max, MaxV) VMINMAX_L_SVE(max, MaxV, sve_smax) -// vector max - B/S/I/F/D +// vector max - B/S/I/HF/F/D VMINMAX_NEON(max, MaxV, fmax, maxv) VMINMAX_SVE(max, MaxV, sve_fmax, sve_smax) +VMINMAX_HF_NEON(max, MaxVHF, fmax) +VMINMAX_HF_SVE(max, MaxVHF, sve_fmax) // vector max - predicated VMINMAX_PREDICATE(max, MaxV, sve_fmax, sve_smax) @@ -1273,8 +1344,9 @@ instruct vmla_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ // dst_src1 = src2 * src3 + dst_src1 instruct vfmla(vReg dst_src1, vReg src2, vReg src3) %{ - match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); - match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); + match(Set dst_src1 (FmaVHF dst_src1 (Binary src2 src3))); + match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); + match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); format %{ "vfmla $dst_src1, $src2, $src3" %} ins_encode %{ assert(UseFMA, "Needs FMA instructions support."); @@ -2938,6 +3010,23 @@ REPLICATE_INT(L, iRegL) REPLICATE_FP(F, S, T_FLOAT ) REPLICATE_FP(D, D, T_DOUBLE) +// Replicate a half-precision float value held in a floating point register +instruct replicateHF(vReg dst, vRegF src) %{ + predicate(Matcher::vector_element_basic_type(n) == T_SHORT); + match(Set dst (Replicate src)); + format %{ "replicateHF $dst, $src\t# replicate half-precision float" %} + ins_encode %{ + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + __ dup($dst$$FloatRegister, get_arrangement(this), $src$$FloatRegister); + } else { // length_in_bytes must be > 16 and SVE should be enabled + assert(UseSVE > 0, "must be sve"); + __ sve_cpy($dst$$FloatRegister, __ H, ptrue, $src$$FloatRegister); + } + %} + ins_pipe(pipe_slow); +%} + // replicate from imm instruct replicateI_imm_le128b(vReg dst, immI con) %{ @@ -3006,6 +3095,23 @@ instruct replicateL_imm8_gt128b(vReg dst, immL8_shift8 con) %{ ins_pipe(pipe_slow); %} +// Replicate a 16-bit half precision float value +instruct replicateHF_imm(vReg dst, immH con) %{ + match(Set dst (Replicate con)); + format %{ "replicateHF_imm $dst, $con\t# replicate immediate half-precision float" %} + ins_encode %{ + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + int imm = (int)($con$$constant) & 0xffff; + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + __ mov($dst$$FloatRegister, get_arrangement(this), imm); + } else { // length_in_bytes must be > 16 and SVE should be enabled + assert(UseSVE > 0, "must be sve"); + __ sve_dup($dst$$FloatRegister, __ H, imm); + } + %} + ins_pipe(pipe_slow); +%} + // ------------------------------ Vector insert -------------------------------- // BYTE, SHORT, INT diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp index b03344f2d80..5e5d6c16b45 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp @@ -240,6 +240,19 @@ void Assembler::add_sub_immediate(Instruction_aarch64 ¤t_insn, srf(Rn, 5); } +// This method is used to generate Advanced SIMD data processing instructions +void Assembler::adv_simd_three_same(Instruction_aarch64 ¤t_insn, FloatRegister Vd, + SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, + int op1, int op2, int op3) { + assert(T == T4H || T == T8H || T == T2S || T == T4S || T == T2D, "invalid arrangement"); + int op22 = (T == T2S || T == T4S) ? 0b0 : 0b1; + int op21 = (T == T4H || T == T8H) ? 0b0 : 0b1; + int op14 = (T == T4H || T == T8H) ? 0b00 : 0b11; + f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23); + f(op22, 22); f(op21, 21), rf(Vm, 16), f(op14, 15, 14), f(op3, 13, 10), + rf(Vn, 5), rf(Vd, 0); +} + #undef f #undef sf #undef rf @@ -456,7 +469,7 @@ void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); } bool asm_util::operand_valid_for_immediate_bits(int64_t imm, unsigned nbits) { guarantee(nbits == 8 || nbits == 12, "invalid nbits value"); - uint64_t uimm = (uint64_t)uabs((jlong)imm); + uint64_t uimm = (uint64_t)g_uabs((jlong)imm); if (uimm < (UCONST64(1) << nbits)) return true; if (uimm < (UCONST64(1) << (2 * nbits)) diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index 3db7d308844..573b451261f 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -932,7 +932,7 @@ class Assembler : public AbstractAssembler { static const uint64_t branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); static bool reachable_from_branch_at(address branch, address target) { - return uabs(target - branch) < branch_range; + return g_uabs(target - branch) < branch_range; } // Unconditional branch (immediate) @@ -2032,6 +2032,8 @@ void mvnw(Register Rd, Register Rm, INSN(fsqrtd, 0b01, 0b000011); INSN(fcvtd, 0b01, 0b000100); // Double-precision to single-precision + INSN(fsqrth, 0b11, 0b000011); // Half-precision sqrt + private: void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) { @@ -2059,37 +2061,68 @@ void mvnw(Register Rd, Register Rm, #undef INSN // Floating-point data-processing (2 source) - void data_processing(unsigned op31, unsigned type, unsigned opcode, + void data_processing(unsigned op31, unsigned type, unsigned opcode, unsigned op21, FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { starti; f(op31, 31, 29); f(0b11110, 28, 24); - f(type, 23, 22), f(1, 21), f(opcode, 15, 10); + f(type, 23, 22), f(op21, 21), f(opcode, 15, 10); rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); } -#define INSN(NAME, op31, type, opcode) \ +#define INSN(NAME, op31, type, opcode, op21) \ void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \ - data_processing(op31, type, opcode, Vd, Vn, Vm); \ - } - - INSN(fabds, 0b011, 0b10, 0b110101); - INSN(fmuls, 0b000, 0b00, 0b000010); - INSN(fdivs, 0b000, 0b00, 0b000110); - INSN(fadds, 0b000, 0b00, 0b001010); - INSN(fsubs, 0b000, 0b00, 0b001110); - INSN(fmaxs, 0b000, 0b00, 0b010010); - INSN(fmins, 0b000, 0b00, 0b010110); - INSN(fnmuls, 0b000, 0b00, 0b100010); - - INSN(fabdd, 0b011, 0b11, 0b110101); - INSN(fmuld, 0b000, 0b01, 0b000010); - INSN(fdivd, 0b000, 0b01, 0b000110); - INSN(faddd, 0b000, 0b01, 0b001010); - INSN(fsubd, 0b000, 0b01, 0b001110); - INSN(fmaxd, 0b000, 0b01, 0b010010); - INSN(fmind, 0b000, 0b01, 0b010110); - INSN(fnmuld, 0b000, 0b01, 0b100010); + data_processing(op31, type, opcode, op21, Vd, Vn, Vm); \ + } + + INSN(fmuls, 0b000, 0b00, 0b000010, 0b1); + INSN(fdivs, 0b000, 0b00, 0b000110, 0b1); + INSN(fadds, 0b000, 0b00, 0b001010, 0b1); + INSN(fsubs, 0b000, 0b00, 0b001110, 0b1); + INSN(fmaxs, 0b000, 0b00, 0b010010, 0b1); + INSN(fmins, 0b000, 0b00, 0b010110, 0b1); + INSN(fnmuls, 0b000, 0b00, 0b100010, 0b1); + + INSN(fmuld, 0b000, 0b01, 0b000010, 0b1); + INSN(fdivd, 0b000, 0b01, 0b000110, 0b1); + INSN(faddd, 0b000, 0b01, 0b001010, 0b1); + INSN(fsubd, 0b000, 0b01, 0b001110, 0b1); + INSN(fmaxd, 0b000, 0b01, 0b010010, 0b1); + INSN(fmind, 0b000, 0b01, 0b010110, 0b1); + INSN(fnmuld, 0b000, 0b01, 0b100010, 0b1); + + // Half-precision floating-point instructions + INSN(fmulh, 0b000, 0b11, 0b000010, 0b1); + INSN(fdivh, 0b000, 0b11, 0b000110, 0b1); + INSN(faddh, 0b000, 0b11, 0b001010, 0b1); + INSN(fsubh, 0b000, 0b11, 0b001110, 0b1); + INSN(fmaxh, 0b000, 0b11, 0b010010, 0b1); + INSN(fminh, 0b000, 0b11, 0b010110, 0b1); + INSN(fnmulh, 0b000, 0b11, 0b100010, 0b1); +#undef INSN + +// Advanced SIMD scalar three same +#define INSN(NAME, U, size, opcode) \ + void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + f(0b01, 31, 30), f(U, 29), f(0b11110, 28, 24), f(size, 23, 22), f(1, 21); \ + rf(Vm, 16), f(opcode, 15, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(fabds, 0b1, 0b10, 0b11010); // Floating-point Absolute Difference (single-precision) + INSN(fabdd, 0b1, 0b11, 0b11010); // Floating-point Absolute Difference (double-precision) + +#undef INSN + +// Advanced SIMD scalar three same FP16 +#define INSN(NAME, U, a, opcode) \ + void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + f(0b01, 31, 30), f(U, 29), f(0b11110, 28, 24), f(a, 23), f(0b10, 22, 21); \ + rf(Vm, 16), f(0b00, 15, 14), f(opcode, 13, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(fabdh, 0b1, 0b1, 0b010); // Floating-point Absolute Difference (half-precision float) #undef INSN @@ -2120,6 +2153,7 @@ void mvnw(Register Rd, Register Rm, INSN(fnmaddd, 0b000, 0b01, 1, 0); INSN(fnmsub, 0b000, 0b01, 1, 1); + INSN(fmaddh, 0b000, 0b11, 0, 0); // half-precision fused multiply-add (scalar) #undef INSN // Floating-point conditional select @@ -2709,26 +2743,26 @@ template
#undef INSN -// Advanced SIMD three same -#define INSN(NAME, op1, op2, op3) \ - void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ - starti; \ - assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); \ - f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23); \ - f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \ - } - - INSN(fabd, 1, 1, 0b110101); - INSN(fadd, 0, 0, 0b110101); - INSN(fdiv, 1, 0, 0b111111); - INSN(faddp, 1, 0, 0b110101); - INSN(fmul, 1, 0, 0b110111); - INSN(fsub, 0, 1, 0b110101); - INSN(fmla, 0, 0, 0b110011); - INSN(fmls, 0, 1, 0b110011); - INSN(fmax, 0, 0, 0b111101); - INSN(fmin, 0, 1, 0b111101); - INSN(facgt, 1, 1, 0b111011); + // Advanced SIMD three same + void adv_simd_three_same(Instruction_aarch64 ¤t_insn, FloatRegister Vd, + SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, + int op1, int op2, int op3); +#define INSN(NAME, op1, op2, op3) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + adv_simd_three_same(current_insn, Vd, T, Vn, Vm, op1, op2, op3); \ + } + INSN(fabd, 1, 1, 0b0101); + INSN(fadd, 0, 0, 0b0101); + INSN(fdiv, 1, 0, 0b1111); + INSN(faddp, 1, 0, 0b0101); + INSN(fmul, 1, 0, 0b0111); + INSN(fsub, 0, 1, 0b0101); + INSN(fmla, 0, 0, 0b0011); + INSN(fmls, 0, 1, 0b0011); + INSN(fmax, 0, 0, 0b1101); + INSN(fmin, 0, 1, 0b1101); + INSN(facgt, 1, 1, 0b1011); #undef INSN @@ -3228,18 +3262,24 @@ template // parameter "tmask" is a 2-bit mask used to indicate which bits in the size // field are determined by the SIMD_Arrangement. The bit of "tmask" should be // set to 1 if corresponding bit marked as "x" in the ArmARM. -#define INSN(NAME, U, size, tmask, opcode) \ - void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ - starti; \ - assert((ASSERTION), MSG); \ - f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24); \ - f(size | ((int)(T >> 1) & tmask), 23, 22), f(0b10000, 21, 17); \ - f(opcode, 16, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0); \ +#define INSN(NAME, U, size, tmask, opcode) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ + starti; \ + assert((ASSERTION), MSG); \ + int op22 = (int)(T >> 1) & tmask; \ + int op19 = 0b00; \ + if (tmask == 0b01 && (T == T4H || T == T8H)) { \ + op22 = 0b1; \ + op19 = 0b11; \ + } \ + f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24); \ + f(size | op22, 23, 22), f(1, 21), f(op19, 20, 19), f(0b00, 18, 17); \ + f(opcode, 16, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0); \ } #define MSG "invalid arrangement" -#define ASSERTION (T == T2S || T == T4S || T == T2D) +#define ASSERTION (T == T4H || T == T8H || T == T2S || T == T4S || T == T2D) INSN(fsqrt, 1, 0b10, 0b01, 0b11111); INSN(fabs, 0, 0b10, 0b01, 0b01111); INSN(fneg, 1, 0b10, 0b01, 0b01111); @@ -3366,7 +3406,7 @@ template #define INSN(NAME, opcode) \ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ starti; \ - assert(T == S || T == D, "invalid register variant"); \ + assert(T == H || T == S || T == D, "invalid register variant"); \ f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21), \ rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \ } @@ -3451,7 +3491,7 @@ template // SVE floating-point arithmetic - predicate #define INSN(NAME, op1, op2) \ void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \ - assert(T == S || T == D, "invalid register variant"); \ + assert(T == H || T == S || T == D, "invalid register variant"); \ sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \ } diff --git a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp index 2334cbdff24..2e53ecb8058 100644 --- a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp @@ -69,7 +69,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ far_call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); return; } @@ -90,7 +90,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ blr(lr); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { @@ -103,7 +103,7 @@ void PredicateFailedStub::emit_code(LIR_Assembler* ce) { __ far_call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } void DivByZeroStub::emit_code(LIR_Assembler* ce) { @@ -274,7 +274,7 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { __ far_call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } @@ -289,7 +289,7 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { } __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), rscratch2); ce->add_call_info_here(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index 6b1a5a7f1e0..afa2ddb47b4 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -72,16 +72,17 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr null_check_offset = offset(); - if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(hdr, obj); - ldrb(hdr, Address(hdr, Klass::misc_flags_offset())); - tst(hdr, KlassFlags::_misc_is_value_based_class); - br(Assembler::NE, slow_case); - } - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(disp_hdr, obj, hdr, temp, rscratch2, slow_case); } else if (LockingMode == LM_LEGACY) { + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(hdr, obj); + ldrb(hdr, Address(hdr, Klass::misc_flags_offset())); + tst(hdr, KlassFlags::_misc_is_value_based_class); + br(Assembler::NE, slow_case); + } + Label done; // Load object header ldr(hdr, Address(obj, hdr_offset)); diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 605a05a44a7..914967e4009 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -107,7 +107,8 @@ address C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register assert(is_power_of_2(unroll_factor), "can't use this value to calculate the jump target PC"); andr(tmp2, cnt, unroll_factor - 1); adr(tmp1, BR_BASE); - sub(tmp1, tmp1, tmp2, ext::sxtw, 3); + // For Cortex-A53 offset is 4 because 2 nops are generated. + sub(tmp1, tmp1, tmp2, ext::sxtw, VM_Version::supports_a53mac() ? 4 : 3); movw(tmp2, 0x1f); br(tmp1); @@ -115,6 +116,11 @@ address C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register for (size_t i = 0; i < unroll_factor; ++i) { load(tmp1, Address(post(ary, type2aelembytes(eltype))), eltype); maddw(result, result, tmp2, tmp1); + // maddw generates an extra nop for Cortex-A53 (see maddw definition in macroAssembler). + // Generate 2nd nop to have 4 instructions per iteration. + if (VM_Version::supports_a53mac()) { + nop(); + } } bind(BR_BASE); subsw(cnt, cnt, unroll_factor); @@ -360,7 +366,7 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist Label slow_path; if (UseObjectMonitorTable) { - // Clear cache in case fast locking succeeds. + // Clear cache in case fast locking succeeds or we need to take the slow-path. str(zr, Address(box, BasicLock::object_monitor_cache_offset_in_bytes())); } @@ -2743,3 +2749,107 @@ bool C2_MacroAssembler::in_scratch_emit_size() { } return MacroAssembler::in_scratch_emit_size(); } + +static void abort_verify_int_in_range(uint idx, jint val, jint lo, jint hi) { + fatal("Invalid CastII, idx: %u, val: %d, lo: %d, hi: %d", idx, val, lo, hi); +} + +void C2_MacroAssembler::verify_int_in_range(uint idx, const TypeInt* t, Register rval, Register rtmp) { + assert(!t->empty() && !t->singleton(), "%s", Type::str(t)); + if (t == TypeInt::INT) { + return; + } + BLOCK_COMMENT("verify_int_in_range {"); + Label L_success, L_failure; + + jint lo = t->_lo; + jint hi = t->_hi; + + if (lo != min_jint && hi != max_jint) { + subsw(rtmp, rval, lo); + br(Assembler::LT, L_failure); + subsw(rtmp, rval, hi); + br(Assembler::LE, L_success); + } else if (lo != min_jint) { + subsw(rtmp, rval, lo); + br(Assembler::GE, L_success); + } else if (hi != max_jint) { + subsw(rtmp, rval, hi); + br(Assembler::LE, L_success); + } else { + ShouldNotReachHere(); + } + + bind(L_failure); + movw(c_rarg0, idx); + mov(c_rarg1, rval); + movw(c_rarg2, lo); + movw(c_rarg3, hi); + reconstruct_frame_pointer(rtmp); + rt_call(CAST_FROM_FN_PTR(address, abort_verify_int_in_range), rtmp); + hlt(0); + + bind(L_success); + BLOCK_COMMENT("} verify_int_in_range"); +} + +static void abort_verify_long_in_range(uint idx, jlong val, jlong lo, jlong hi) { + fatal("Invalid CastLL, idx: %u, val: " JLONG_FORMAT ", lo: " JLONG_FORMAT ", hi: " JLONG_FORMAT, idx, val, lo, hi); +} + +void C2_MacroAssembler::verify_long_in_range(uint idx, const TypeLong* t, Register rval, Register rtmp) { + assert(!t->empty() && !t->singleton(), "%s", Type::str(t)); + if (t == TypeLong::LONG) { + return; + } + BLOCK_COMMENT("verify_long_in_range {"); + Label L_success, L_failure; + + jlong lo = t->_lo; + jlong hi = t->_hi; + + if (lo != min_jlong && hi != max_jlong) { + subs(rtmp, rval, lo); + br(Assembler::LT, L_failure); + subs(rtmp, rval, hi); + br(Assembler::LE, L_success); + } else if (lo != min_jlong) { + subs(rtmp, rval, lo); + br(Assembler::GE, L_success); + } else if (hi != max_jlong) { + subs(rtmp, rval, hi); + br(Assembler::LE, L_success); + } else { + ShouldNotReachHere(); + } + + bind(L_failure); + movw(c_rarg0, idx); + mov(c_rarg1, rval); + mov(c_rarg2, lo); + mov(c_rarg3, hi); + reconstruct_frame_pointer(rtmp); + rt_call(CAST_FROM_FN_PTR(address, abort_verify_long_in_range), rtmp); + hlt(0); + + bind(L_success); + BLOCK_COMMENT("} verify_long_in_range"); +} + +void C2_MacroAssembler::reconstruct_frame_pointer(Register rtmp) { + const int framesize = Compile::current()->output()->frame_size_in_bytes(); + if (PreserveFramePointer) { + // frame pointer is valid +#ifdef ASSERT + // Verify frame pointer value in rfp. + add(rtmp, sp, framesize - 2 * wordSize); + Label L_success; + cmp(rfp, rtmp); + br(Assembler::EQ, L_success); + stop("frame pointer mismatch"); + bind(L_success); +#endif // ASSERT + } else { + add(rfp, sp, framesize - 2 * wordSize); + } +} diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index e0eaa0b76e6..70e4265c7cc 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -188,4 +188,9 @@ void vector_signum_sve(FloatRegister dst, FloatRegister src, FloatRegister zero, FloatRegister one, FloatRegister vtmp, PRegister pgtmp, SIMD_RegVariant T); + void verify_int_in_range(uint idx, const TypeInt* t, Register val, Register tmp); + void verify_long_in_range(uint idx, const TypeLong* t, Register val, Register tmp); + + void reconstruct_frame_pointer(Register rtmp); + #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp b/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp index 0c2d9a32c8c..3874c8cd54e 100644 --- a/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp @@ -70,7 +70,7 @@ static char* reserve_at_eor_compatible_address(size_t size, bool aslr) { const uint64_t immediate = ((uint64_t)immediates[index]) << 32; assert(immediate > 0 && Assembler::operand_valid_for_logical_immediate(/*is32*/false, immediate), "Invalid immediate %d " UINT64_FORMAT, index, immediate); - result = os::attempt_reserve_memory_at((char*)immediate, size, false); + result = os::attempt_reserve_memory_at((char*)immediate, size, mtNone); if (result == nullptr) { log_trace(metaspace, map)("Failed to attach at " UINT64_FORMAT_X, immediate); } @@ -114,7 +114,7 @@ char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size if (result == nullptr) { constexpr size_t alignment = nth_bit(32); log_debug(metaspace, map)("Trying to reserve at a 32-bit-aligned address"); - result = os::reserve_memory_aligned(size, alignment, false); + result = os::reserve_memory_aligned(size, alignment, mtNone); } return result; diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.cpp b/src/hotspot/cpu/aarch64/frame_aarch64.cpp index b07fa2fa9df..7ffba17bab3 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp @@ -828,7 +828,6 @@ void JavaFrameAnchor::make_walkable() { // already walkable? if (walkable()) return; vmassert(last_Java_sp() != nullptr, "not called from Java code?"); - vmassert(last_Java_pc() == nullptr, "already walkable"); _last_Java_pc = (address)_last_Java_sp[-1]; vmassert(walkable(), "something went wrong"); } diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp index d22442db0d7..47ae93a4932 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -35,6 +35,53 @@ // Inline functions for AArch64 frames: +#if INCLUDE_JFR + +// Static helper routines + +inline address frame::interpreter_bcp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast(fp[frame::interpreter_frame_bcp_offset]); +} + +inline address frame::interpreter_return_address(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast(fp[frame::return_addr_offset]); +} + +inline intptr_t* frame::interpreter_sender_sp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast (fp[frame::interpreter_frame_sender_sp_offset]); +} + +inline bool frame::is_interpreter_frame_setup_at(const intptr_t* fp, const void* sp) { + assert(fp != nullptr, "invariant"); + assert(sp != nullptr, "invariant"); + return sp <= fp + frame::interpreter_frame_initial_sp_offset; +} + +inline intptr_t* frame::sender_sp(intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return fp + frame::sender_sp_offset; +} + +inline intptr_t* frame::link(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast (fp[frame::link_offset]); +} + +inline address frame::return_address(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast(sp[-1]); +} + +inline intptr_t* frame::fp(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast (sp[-2]); +} + +#endif // INCLUDE_JFR + // Constructors: inline frame::frame() { diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp index e33ef47cf3c..e4db8a9ab1f 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp @@ -27,9 +27,9 @@ #include "c1/c1_MacroAssembler.hpp" #include "compiler/compilerDefinitions.inline.hpp" #include "gc/shared/gc_globals.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" #define __ masm->masm()-> diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index ac22b43faaf..a2b3f44c68b 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -23,6 +23,8 @@ * */ +#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" +#include "gc/shenandoah/mode/shenandoahMode.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" #include "gc/shenandoah/shenandoahForwarding.hpp" @@ -30,10 +32,8 @@ #include "gc/shenandoah/shenandoahHeapRegion.hpp" #include "gc/shenandoah/shenandoahRuntime.hpp" #include "gc/shenandoah/shenandoahThreadLocalData.hpp" -#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" -#include "gc/shenandoah/mode/shenandoahMode.hpp" -#include "interpreter/interpreter.hpp" #include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" #include "runtime/javaThread.hpp" #include "runtime/sharedRuntime.hpp" #ifdef COMPILER1 diff --git a/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp index 20e37528c04..7008615ed43 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp @@ -21,8 +21,8 @@ * questions. */ -#include "gc/shared/gcLogPrecious.hpp" #include "gc/shared/gc_globals.hpp" +#include "gc/shared/gcLogPrecious.hpp" #include "gc/z/zAddress.hpp" #include "gc/z/zBarrierSetAssembler.hpp" #include "gc/z/zGlobals.hpp" diff --git a/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp b/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp index faf635dc332..948ba97aa22 100644 --- a/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -46,7 +46,7 @@ const bool CCallingConventionRequiresIntsAsLongs = false; #define DEFAULT_CACHE_LINE_SIZE 64 // The default padding size for data structures to avoid false sharing. -#define DEFAULT_PADDING_SIZE DEFAULT_CACHE_LINE_SIZE +#define DEFAULT_PADDING_SIZE (2*DEFAULT_CACHE_LINE_SIZE) // According to the ARMv8 ARM, "Concurrent modification and execution // of instructions can lead to the resulting instruction performing diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp index 800e7718921..b316103d656 100644 --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, 2019, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -83,8 +83,6 @@ define_pd_global(intx, InlineSmallCode, 1000); range, \ constraint) \ \ - product(bool, NearCpool, true, \ - "constant pool is close to instructions") \ product(bool, UseCRC32, false, \ "Use CRC32 instructions for CRC32 computation") \ product(bool, UseCryptoPmullForCRC32, false, \ @@ -97,6 +95,8 @@ define_pd_global(intx, InlineSmallCode, 1000); "Use simplest and shortest implementation for array equals") \ product(bool, UseSIMDForBigIntegerShiftIntrinsics, true, \ "Use SIMD instructions for left/right shift of BigInteger") \ + product(bool, UseSIMDForSHA3Intrinsic, true, \ + "Use SIMD SHA3 instructions for SHA3 intrinsic") \ product(bool, AvoidUnalignedAccesses, false, \ "Avoid generating unaligned memory accesses") \ product(bool, UseLSE, false, \ diff --git a/src/hotspot/cpu/aarch64/icache_aarch64.cpp b/src/hotspot/cpu/aarch64/icache_aarch64.cpp index 311f3a7de1f..a942406f45e 100644 --- a/src/hotspot/cpu/aarch64/icache_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/icache_aarch64.cpp @@ -31,4 +31,4 @@ void ICacheStubGenerator::generate_icache_flush( *flush_icache_stub = nullptr; } -void ICache::initialize() {} +void ICache::initialize(int phase) {} diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp index d5ba85da989..276fdd013db 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp @@ -458,9 +458,10 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { // remove activation // -// Apply stack watermark barrier. // Unlock the receiver if this is a synchronized method. // Unlock any Java monitors from synchronized blocks. +// Apply stack watermark barrier. +// Notify JVMTI. // Remove the activation from the stack. // // If there are locked Java monitors @@ -470,30 +471,14 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { // installs IllegalMonitorStateException // Else // no error processing -void InterpreterMacroAssembler::remove_activation( - TosState state, - bool throw_monitor_exception, - bool install_monitor_exception, - bool notify_jvmdi) { +void InterpreterMacroAssembler::remove_activation(TosState state, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { // Note: Registers r3 xmm0 may be in use for the // result check if synchronized method Label unlocked, unlock, no_unlock; - // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, - // that would normally not be safe to use. Such bad returns into unsafe territory of - // the stack, will call InterpreterRuntime::at_unwind. - Label slow_path; - Label fast_path; - safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); - br(Assembler::AL, fast_path); - bind(slow_path); - push(state); - set_last_Java_frame(esp, rfp, (address)pc(), rscratch1); - super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread); - reset_last_Java_frame(true); - pop(state); - bind(fast_path); - // get the value of _do_not_unlock_if_synchronized into r3 const Address do_not_unlock_if_synchronized(rthread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); @@ -611,7 +596,24 @@ void InterpreterMacroAssembler::remove_activation( bind(no_unlock); - // jvmti support + JFR_ONLY(enter_jfr_critical_section();) + + // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, + // that would normally not be safe to use. Such bad returns into unsafe territory of + // the stack, will call InterpreterRuntime::at_unwind. + Label slow_path; + Label fast_path; + safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); + br(Assembler::AL, fast_path); + bind(slow_path); + push(state); + set_last_Java_frame(esp, rfp, pc(), rscratch1); + super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread); + reset_last_Java_frame(true); + pop(state); + bind(fast_path); + + // JVMTI support. Make sure the safepoint poll test is issued prior. if (notify_jvmdi) { notify_method_exit(state, NotifyJVMTI); // preserve TOSCA } else { @@ -638,6 +640,8 @@ void InterpreterMacroAssembler::remove_activation( cmp(rscratch2, rscratch1); br(Assembler::LS, no_reserved_zone_enabling); + JFR_ONLY(leave_jfr_critical_section();) + call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), rthread); call_VM(noreg, CAST_FROM_FN_PTR(address, @@ -647,10 +651,14 @@ void InterpreterMacroAssembler::remove_activation( bind(no_reserved_zone_enabling); } - // restore sender esp - mov(esp, rscratch2); // remove frame anchor leave(); + + JFR_ONLY(leave_jfr_critical_section();) + + // restore sender esp + mov(esp, rscratch2); + // If we're returning to interpreted code we will shortly be // adjusting SP to allow some space for ESP. If we're returning to // compiled code the saved sender SP was saved in sender_sp, so this @@ -658,6 +666,19 @@ void InterpreterMacroAssembler::remove_activation( andr(sp, esp, -16); } +#if INCLUDE_JFR +void InterpreterMacroAssembler::enter_jfr_critical_section() { + const Address sampling_critical_section(rthread, in_bytes(SAMPLING_CRITICAL_SECTION_OFFSET_JFR)); + mov(rscratch1, true); + strb(rscratch1, sampling_critical_section); +} + +void InterpreterMacroAssembler::leave_jfr_critical_section() { + const Address sampling_critical_section(rthread, in_bytes(SAMPLING_CRITICAL_SECTION_OFFSET_JFR)); + strb(zr, sampling_critical_section); +} +#endif // INCLUDE_JFR + // Lock object // // Args: @@ -693,17 +714,18 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) // Load object pointer into obj_reg %c_rarg3 ldr(obj_reg, Address(lock_reg, obj_offset)); - if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(tmp, obj_reg); - ldrb(tmp, Address(tmp, Klass::misc_flags_offset())); - tst(tmp, KlassFlags::_misc_is_value_based_class); - br(Assembler::NE, slow_case); - } - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(lock_reg, obj_reg, tmp, tmp2, tmp3, slow_case); b(done); } else if (LockingMode == LM_LEGACY) { + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp, obj_reg); + ldrb(tmp, Address(tmp, Klass::misc_flags_offset())); + tst(tmp, KlassFlags::_misc_is_value_based_class); + br(Assembler::NE, slow_case); + } + // Load (object->mark() | 1) into swap_reg ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); orr(swap_reg, rscratch1, 1); @@ -904,60 +926,26 @@ void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, - int constant, - bool decrement) { - increment_mdp_data_at(mdp_in, noreg, constant, decrement); + int constant) { + increment_mdp_data_at(mdp_in, noreg, constant); } void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, - Register reg, - int constant, - bool decrement) { + Register index, + int constant) { assert(ProfileInterpreter, "must be profiling interpreter"); - // %%% this does 64bit counters at best it is wasting space - // at worst it is a rare bug when counters overflow - assert_different_registers(rscratch2, rscratch1, mdp_in, reg); + assert_different_registers(rscratch2, rscratch1, mdp_in, index); Address addr1(mdp_in, constant); - Address addr2(rscratch2, reg, Address::lsl(0)); + Address addr2(rscratch2, index, Address::lsl(0)); Address &addr = addr1; - if (reg != noreg) { + if (index != noreg) { lea(rscratch2, addr1); addr = addr2; } - if (decrement) { - // Decrement the register. Set condition codes. - // Intel does this - // addptr(data, (int32_t) -DataLayout::counter_increment); - // If the decrement causes the counter to overflow, stay negative - // Label L; - // jcc(Assembler::negative, L); - // addptr(data, (int32_t) DataLayout::counter_increment); - // so we do this - ldr(rscratch1, addr); - subs(rscratch1, rscratch1, (unsigned)DataLayout::counter_increment); - Label L; - br(Assembler::LO, L); // skip store if counter underflow - str(rscratch1, addr); - bind(L); - } else { - assert(DataLayout::counter_increment == 1, - "flow-free idiom only works with 1"); - // Intel does this - // Increment the register. Set carry flag. - // addptr(data, DataLayout::counter_increment); - // If the increment causes the counter to overflow, pull back by 1. - // sbbptr(data, (int32_t)0); - // so we do this - ldr(rscratch1, addr); - adds(rscratch1, rscratch1, DataLayout::counter_increment); - Label L; - br(Assembler::CS, L); // skip store if counter overflow - str(rscratch1, addr); - bind(L); - } + increment(addr, DataLayout::counter_increment); } void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp index 059d79c3cb9..447d4f8244e 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -247,11 +247,8 @@ class InterpreterMacroAssembler: public MacroAssembler { void verify_method_data_pointer(); void set_mdp_data_at(Register mdp_in, int constant, Register value); - void increment_mdp_data_at(Address data, bool decrement = false); - void increment_mdp_data_at(Register mdp_in, int constant, - bool decrement = false); - void increment_mdp_data_at(Register mdp_in, Register reg, int constant, - bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant); + void increment_mdp_data_at(Register mdp_in, Register index, int constant); void increment_mask_and_jump(Address counter_addr, int increment, Address mask, Register scratch, Register scratch2, @@ -310,6 +307,9 @@ class InterpreterMacroAssembler: public MacroAssembler { void notify_method_entry(); void notify_method_exit(TosState state, NotifyMethodExitMode mode); + JFR_ONLY(void enter_jfr_critical_section();) + JFR_ONLY(void leave_jfr_critical_section();) + virtual void _call_Unimplemented(address call_site) { save_bcp(); set_last_Java_frame(esp, rfp, (address) pc(), rscratch1); diff --git a/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp b/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp index 3015206dadc..071dd2c4179 100644 --- a/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp @@ -180,6 +180,7 @@ bool CodeInstaller::pd_relocate(address pc, jint mark) { case POLL_RETURN_FAR: _instructions->relocate(pc, relocInfo::poll_return_type); return true; +#if INCLUDE_ZGC case Z_BARRIER_RELOCATION_FORMAT_LOAD_GOOD_BEFORE_TB_X: _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatLoadGoodBeforeTbX); return true; @@ -192,6 +193,7 @@ bool CodeInstaller::pd_relocate(address pc, jint mark) { case Z_BARRIER_RELOCATION_FORMAT_STORE_BAD_BEFORE_MOV: _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatStoreBadBeforeMov); return true; +#endif } return false; diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 1e226c70420..a277a689280 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -675,6 +675,9 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp, } static inline bool target_needs_far_branch(address addr) { + if (AOTCodeCache::is_on_for_dump()) { + return true; + } // codecache size <= 128M if (!MacroAssembler::far_branches()) { return false; @@ -859,6 +862,9 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in // Check the entry target is always reachable from any branch. static bool is_always_within_branch_range(Address entry) { + if (AOTCodeCache::is_on_for_dump()) { + return false; + } const address target = entry.target(); if (!CodeCache::contains(target)) { @@ -1003,9 +1009,6 @@ void MacroAssembler::c2bool(Register x) { address MacroAssembler::ic_call(address entry, jint method_index) { RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); - // address const_ptr = long_constant((jlong)Universe::non_oop_word()); - // uintptr_t offset; - // ldr_constant(rscratch2, const_ptr); movptr(rscratch2, (intptr_t)Universe::non_oop_word()); return trampoline_call(Address(entry, rh)); } @@ -2041,7 +2044,7 @@ void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_f // Fast path check: class is fully initialized lea(scratch, Address(klass, InstanceKlass::init_state_offset())); ldarb(scratch, scratch); - subs(zr, scratch, InstanceKlass::fully_initialized); + cmp(scratch, InstanceKlass::fully_initialized); br(Assembler::EQ, *L_fast_path); // Fast path check: current thread is initializer thread @@ -2157,7 +2160,7 @@ void MacroAssembler::call_VM_leaf_base(address entry_point, stp(rscratch1, rmethod, Address(pre(sp, -2 * wordSize))); - mov(rscratch1, entry_point); + mov(rscratch1, RuntimeAddress(entry_point)); blr(rscratch1); if (retaddr) bind(*retaddr); @@ -3234,9 +3237,13 @@ void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Regis } void MacroAssembler::stop(const char* msg) { - BLOCK_COMMENT(msg); + // Skip AOT caching C strings in scratch buffer. + const char* str = (code_section()->scratch_emit()) ? msg : AOTCodeCache::add_C_string(msg); + BLOCK_COMMENT(str); + // load msg into r0 so we can access it from the signal handler + // ExternalAddress enables saving and restoring via the code cache + lea(c_rarg0, ExternalAddress((address) str)); dcps1(0xdeae); - emit_int64((uintptr_t)msg); } void MacroAssembler::unimplemented(const char* what) { @@ -3270,7 +3277,7 @@ void MacroAssembler::wrap_add_sub_imm_insn(Register Rd, Register Rn, uint64_t im if (fits) { (this->*insn1)(Rd, Rn, imm); } else { - if (uabs(imm) < (1 << 24)) { + if (g_uabs(imm) < (1 << 24)) { (this->*insn1)(Rd, Rn, imm & -(1 << 12)); (this->*insn1)(Rd, Rd, imm & ((1 << 12)-1)); } else { @@ -5333,7 +5340,41 @@ bool MacroAssembler::set_klass_decode_mode(address base, int shift, const size_t return _klass_decode_mode != KlassDecodeNone; } +static Register pick_different_tmp(Register dst, Register src) { + auto tmps = RegSet::of(r0, r1, r2) - RegSet::of(src, dst); + return *tmps.begin(); +} + +void MacroAssembler::encode_klass_not_null_for_aot(Register dst, Register src) { + // we have to load the klass base from the AOT constants area but + // not the shift because it is not allowed to change + int shift = CompressedKlassPointers::shift(); + assert(shift >= 0 && shift <= CompressedKlassPointers::max_shift(), "unexpected compressed klass shift!"); + if (dst != src) { + // we can load the base into dst, subtract it formthe src and shift down + lea(dst, ExternalAddress(CompressedKlassPointers::base_addr())); + ldr(dst, dst); + sub(dst, src, dst); + lsr(dst, dst, shift); + } else { + // we need an extra register in order to load the coop base + Register tmp = pick_different_tmp(dst, src); + RegSet regs = RegSet::of(tmp); + push(regs, sp); + lea(tmp, ExternalAddress(CompressedKlassPointers::base_addr())); + ldr(tmp, tmp); + sub(dst, src, tmp); + lsr(dst, dst, shift); + pop(regs, sp); + } +} + void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + if (AOTCodeCache::is_on_for_dump()) { + encode_klass_not_null_for_aot(dst, src); + return; + } + switch (klass_decode_mode()) { case KlassDecodeZero: if (CompressedKlassPointers::shift() != 0) { @@ -5370,9 +5411,36 @@ void MacroAssembler::encode_klass_not_null(Register r) { encode_klass_not_null(r, r); } +void MacroAssembler::decode_klass_not_null_for_aot(Register dst, Register src) { + // we have to load the klass base from the AOT constants area but + // not the shift because it is not allowed to change + int shift = CompressedKlassPointers::shift(); + assert(shift >= 0 && shift <= CompressedKlassPointers::max_shift(), "unexpected compressed klass shift!"); + if (dst != src) { + // we can load the base into dst then add the offset with a suitable shift + lea(dst, ExternalAddress(CompressedKlassPointers::base_addr())); + ldr(dst, dst); + add(dst, dst, src, LSL, shift); + } else { + // we need an extra register in order to load the coop base + Register tmp = pick_different_tmp(dst, src); + RegSet regs = RegSet::of(tmp); + push(regs, sp); + lea(tmp, ExternalAddress(CompressedKlassPointers::base_addr())); + ldr(tmp, tmp); + add(dst, tmp, src, LSL, shift); + pop(regs, sp); + } +} + void MacroAssembler::decode_klass_not_null(Register dst, Register src) { assert (UseCompressedClassPointers, "should only be used for compressed headers"); + if (AOTCodeCache::is_on_for_dump()) { + decode_klass_not_null_for_aot(dst, src); + return; + } + switch (klass_decode_mode()) { case KlassDecodeZero: if (CompressedKlassPointers::shift() != 0) { @@ -5520,9 +5588,8 @@ void MacroAssembler::movoop(Register dst, jobject obj) { mov(dst, Address((address)obj, rspec)); } else { address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address - ldr_constant(dst, Address(dummy, rspec)); + ldr(dst, Address(dummy, rspec)); } - } // Move a metadata address into a register. @@ -6648,7 +6715,7 @@ void MacroAssembler::get_thread(Register dst) { protect_return_address(); push(saved_regs, sp); - mov(lr, CAST_FROM_FN_PTR(address, JavaThread::aarch64_get_thread_helper)); + mov(lr, ExternalAddress(CAST_FROM_FN_PTR(address, JavaThread::aarch64_get_thread_helper))); blr(lr); if (dst != c_rarg0) { mov(dst, c_rarg0); @@ -7034,10 +7101,17 @@ void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Registe ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes())); if (UseObjectMonitorTable) { - // Clear cache in case fast locking succeeds. + // Clear cache in case fast locking succeeds or we need to take the slow-path. str(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes())))); } + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(t1, obj); + ldrb(t1, Address(t1, Klass::misc_flags_offset())); + tst(t1, KlassFlags::_misc_is_value_based_class); + br(Assembler::NE, slow); + } + // Check if the lock-stack is full. ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset())); cmpw(top, (unsigned)LockStack::end_offset()); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 11d1985e50b..d77bc92875f 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -27,6 +27,7 @@ #define CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP #include "asm/assembler.inline.hpp" +#include "code/aotCodeCache.hpp" #include "code/vmreg.hpp" #include "metaprogramming/enableIf.hpp" #include "oops/compressedOops.hpp" @@ -322,6 +323,27 @@ class MacroAssembler: public Assembler { extr(Rd, Rn, Rn, imm); } + inline void rolw(Register Rd, Register Rn, unsigned imm) { + extrw(Rd, Rn, Rn, (32 - imm)); + } + + inline void rol(Register Rd, Register Rn, unsigned imm) { + extr(Rd, Rn, Rn, (64 - imm)); + } + + using Assembler::rax1; + using Assembler::eor3; + + inline void rax1(Register Rd, Register Rn, Register Rm) { + eor(Rd, Rn, Rm, ROR, 63); // Rd = Rn ^ rol(Rm, 1) + } + + inline void eor3(Register Rd, Register Rn, Register Rm, Register Rk) { + assert(Rd != Rn, "Use tmp register"); + eor(Rd, Rm, Rk); + eor(Rd, Rd, Rn); + } + inline void sxtbw(Register Rd, Register Rn) { sbfmw(Rd, Rn, 0, 7); } @@ -934,6 +956,8 @@ class MacroAssembler: public Assembler { void set_narrow_oop(Register dst, jobject obj); + void decode_klass_not_null_for_aot(Register dst, Register src); + void encode_klass_not_null_for_aot(Register dst, Register src); void encode_klass_not_null(Register r); void decode_klass_not_null(Register r); void encode_klass_not_null(Register dst, Register src); @@ -1315,6 +1339,10 @@ class MacroAssembler: public Assembler { // Check if branches to the non nmethod section require a far jump static bool codestub_branch_needs_far_jump() { + if (AOTCodeCache::is_on_for_dump()) { + // To calculate far_codestub_branch_size correctly. + return true; + } return CodeCache::max_distance_to_non_nmethod() > branch_range; } @@ -1472,16 +1500,6 @@ class MacroAssembler: public Assembler { public: - void ldr_constant(Register dest, const Address &const_addr) { - if (NearCpool) { - ldr(dest, const_addr); - } else { - uint64_t offset; - adrp(dest, InternalAddress(const_addr.target()), offset); - ldr(dest, Address(dest, offset)); - } - } - address read_polling_page(Register r, relocInfo::relocType rtype); void get_polling_page(Register dest, relocInfo::relocType rtype); @@ -1611,11 +1629,15 @@ class MacroAssembler: public Assembler { void aes_round(FloatRegister input, FloatRegister subkey); // ChaCha20 functions support block - void cc20_quarter_round(FloatRegister aVec, FloatRegister bVec, - FloatRegister cVec, FloatRegister dVec, FloatRegister scratch, - FloatRegister tbl); - void cc20_shift_lane_org(FloatRegister bVec, FloatRegister cVec, - FloatRegister dVec, bool colToDiag); + void cc20_qr_add4(FloatRegister (&addFirst)[4], + FloatRegister (&addSecond)[4]); + void cc20_qr_xor4(FloatRegister (&firstElem)[4], + FloatRegister (&secondElem)[4], FloatRegister (&result)[4]); + void cc20_qr_lrot4(FloatRegister (&sourceReg)[4], + FloatRegister (&destReg)[4], int bits, FloatRegister table); + void cc20_set_qr_registers(FloatRegister (&vectorSet)[4], + const FloatRegister (&stateVectors)[16], int idx1, int idx2, + int idx3, int idx4); // Place an ISB after code may have been modified due to a safepoint. void safepoint_isb(); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64_chacha.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64_chacha.cpp index 1f7bb8f46f6..083e81af5d9 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64_chacha.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64_chacha.cpp @@ -28,60 +28,119 @@ #include "runtime/stubRoutines.hpp" /** - * Perform the quarter round calculations on values contained within - * four SIMD registers. + * Perform the vectorized add for a group of 4 quarter round operations. + * In the ChaCha20 quarter round, there are two add ops: a += b and c += d. + * Each parameter is a set of 4 registers representing the 4 registers + * for the each addend in the add operation for each of the quarter rounds. + * (e.g. for "a" it would consist of v0/v1/v2/v3). The result of the add + * is placed into the vectors in the "addFirst" array. * - * @param aVec the SIMD register containing only the "a" values - * @param bVec the SIMD register containing only the "b" values - * @param cVec the SIMD register containing only the "c" values - * @param dVec the SIMD register containing only the "d" values - * @param scratch scratch SIMD register used for 12 and 7 bit left rotations - * @param table the SIMD register used as a table for 8 bit left rotations + * @param addFirst array of SIMD registers representing the first addend. + * @param addSecond array of SIMD registers representing the second addend. */ -void MacroAssembler::cc20_quarter_round(FloatRegister aVec, FloatRegister bVec, - FloatRegister cVec, FloatRegister dVec, FloatRegister scratch, - FloatRegister table) { +void MacroAssembler::cc20_qr_add4(FloatRegister (&addFirst)[4], + FloatRegister (&addSecond)[4]) { + for (int i = 0; i < 4; i++) { + addv(addFirst[i], T4S, addFirst[i], addSecond[i]); + } +} + + +/** + * Perform the vectorized XOR for a group of 4 quarter round operations. + * In the ChaCha20 quarter round, there are two XOR ops: d ^= a and b ^= c + * Each parameter is a set of 4 registers representing the 4 registers + * for the each element in the xor operation for each of the quarter rounds. + * (e.g. for "a" it would consist of v0/v1/v2/v3) + * Note: because the b ^= c ops precede a non-byte-aligned left-rotation, + * there is a third parameter which can take a set of scratch registers + * for the result, which facilitates doing the subsequent operations for + * the left rotation. + * + * @param firstElem array of SIMD registers representing the first element. + * @param secondElem array of SIMD registers representing the second element. + * @param result array of SIMD registers representing the destination. + * May be the same as firstElem or secondElem, or a separate array. + */ +void MacroAssembler::cc20_qr_xor4(FloatRegister (&firstElem)[4], + FloatRegister (&secondElem)[4], FloatRegister (&result)[4]) { + for (int i = 0; i < 4; i++) { + eor(result[i], T16B, firstElem[i], secondElem[i]); + } +} + +/** + * Perform the vectorized left-rotation on 32-bit lanes for a group of + * 4 quarter round operations. + * Each parameter is a set of 4 registers representing the 4 registers + * for the each element in the source and destination for each of the quarter + * rounds (e.g. for "d" it would consist of v12/v13/v14/v15 on columns and + * v15/v12/v13/v14 on diagonal alignments). + * + * @param sourceReg array of SIMD registers representing the source + * @param destReg array of SIMD registers representing the destination + * @param bits the distance of the rotation in bits, must be 16/12/8/7 per + * the ChaCha20 specification. + */ +void MacroAssembler::cc20_qr_lrot4(FloatRegister (&sourceReg)[4], + FloatRegister (&destReg)[4], int bits, FloatRegister table) { + switch (bits) { + case 16: // reg <<<= 16, in-place swap of half-words + for (int i = 0; i < 4; i++) { + rev32(destReg[i], T8H, sourceReg[i]); + } + break; - // a += b, d ^= a, d <<<= 16 - addv(aVec, T4S, aVec, bVec); - eor(dVec, T16B, dVec, aVec); - rev32(dVec, T8H, dVec); + case 7: // reg <<<= (12 || 7) + case 12: // r-shift src -> dest, l-shift src & ins to dest + for (int i = 0; i < 4; i++) { + ushr(destReg[i], T4S, sourceReg[i], 32 - bits); + } - // c += d, b ^= c, b <<<= 12 - addv(cVec, T4S, cVec, dVec); - eor(scratch, T16B, bVec, cVec); - ushr(bVec, T4S, scratch, 20); - sli(bVec, T4S, scratch, 12); + for (int i = 0; i < 4; i++) { + sli(destReg[i], T4S, sourceReg[i], bits); + } + break; - // a += b, d ^= a, d <<<= 8 - addv(aVec, T4S, aVec, bVec); - eor(dVec, T16B, dVec, aVec); - tbl(dVec, T16B, dVec, 1, table); + case 8: // reg <<<= 8, simulate left rotation with table reorg + for (int i = 0; i < 4; i++) { + tbl(destReg[i], T16B, sourceReg[i], 1, table); + } + break; - // c += d, b ^= c, b <<<= 7 - addv(cVec, T4S, cVec, dVec); - eor(scratch, T16B, bVec, cVec); - ushr(bVec, T4S, scratch, 25); - sli(bVec, T4S, scratch, 7); + default: + // The caller shouldn't be sending bit rotation values outside + // of the 16/12/8/7 as defined in the specification. + ShouldNotReachHere(); + } } /** - * Shift the b, c, and d vectors between columnar and diagonal representations. - * Note that the "a" vector does not shift. + * Set the FloatRegisters for a 4-vector register set. These will be used + * during various quarter round transformations (adds, xors and left-rotations). + * This method itself does not result in the output of any assembly + * instructions. It just organizes the vectors so they can be in columnar or + * diagonal alignments. * - * @param bVec the SIMD register containing only the "b" values - * @param cVec the SIMD register containing only the "c" values - * @param dVec the SIMD register containing only the "d" values - * @param colToDiag true if moving columnar to diagonal, false if - * moving diagonal back to columnar. + * @param vectorSet a 4-vector array to be altered into a new alignment + * @param stateVectors the 16-vector array that represents the current + * working state. The indices of this array match up with the + * organization of the ChaCha20 state per RFC 7539 (e.g. stateVectors[12] + * would contain the vector that holds the 32-bit counter, etc.) + * @param idx1 the index of the stateVectors array to be assigned to the + * first vectorSet element. + * @param idx2 the index of the stateVectors array to be assigned to the + * second vectorSet element. + * @param idx3 the index of the stateVectors array to be assigned to the + * third vectorSet element. + * @param idx4 the index of the stateVectors array to be assigned to the + * fourth vectorSet element. */ -void MacroAssembler::cc20_shift_lane_org(FloatRegister bVec, FloatRegister cVec, - FloatRegister dVec, bool colToDiag) { - int bShift = colToDiag ? 4 : 12; - int cShift = 8; - int dShift = colToDiag ? 12 : 4; - - ext(bVec, T16B, bVec, bVec, bShift); - ext(cVec, T16B, cVec, cVec, cShift); - ext(dVec, T16B, dVec, dVec, dShift); +void MacroAssembler::cc20_set_qr_registers(FloatRegister (&vectorSet)[4], + const FloatRegister (&stateVectors)[16], int idx1, int idx2, + int idx3, int idx4) { + vectorSet[0] = stateVectors[idx1]; + vectorSet[1] = stateVectors[idx2]; + vectorSet[2] = stateVectors[idx3]; + vectorSet[3] = stateVectors[idx4]; } diff --git a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp index a6cd0557758..0fbc2ef141e 100644 --- a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -200,4 +200,8 @@ return false; } + // Is FEAT_FP16 supported for this CPU? + static bool is_feat_fp16_supported() { + return (VM_Version::supports_fphp() && VM_Version::supports_asimdhp()); + } #endif // CPU_AARCH64_MATCHER_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp index 588b8898d2d..cdf67e3423f 100644 --- a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp @@ -93,14 +93,60 @@ void MethodHandles::verify_klass(MacroAssembler* _masm, void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { } +void MethodHandles::verify_method(MacroAssembler* _masm, Register method, vmIntrinsics::ID iid) { + BLOCK_COMMENT("verify_method {"); + __ verify_method_ptr(method); + if (VerifyMethodHandles) { + Label L_ok; + assert_different_registers(method, rscratch1, rscratch2); + const Register method_holder = rscratch1; + __ load_method_holder(method_holder, method); + + switch (iid) { + case vmIntrinsicID::_invokeBasic: + // Require compiled LambdaForm class to be fully initialized. + __ lea(rscratch2, Address(method_holder, InstanceKlass::init_state_offset())); + __ ldarb(rscratch2, rscratch2); + __ cmp(rscratch2, InstanceKlass::fully_initialized); + __ br(Assembler::EQ, L_ok); + break; + + case vmIntrinsicID::_linkToStatic: + __ clinit_barrier(method_holder, rscratch2, &L_ok); + break; + + case vmIntrinsicID::_linkToVirtual: + case vmIntrinsicID::_linkToSpecial: + case vmIntrinsicID::_linkToInterface: + // Class initialization check is too strong here. Just ensure that class initialization has been initiated. + __ lea(rscratch2, Address(method_holder, InstanceKlass::init_state_offset())); + __ ldarb(rscratch2, rscratch2); + __ cmp(rscratch2, InstanceKlass::being_initialized); + __ br(Assembler::GE, L_ok); + + // init_state check failed, but it may be an abstract interface method + __ ldrh(rscratch2, Address(method, Method::access_flags_offset())); + __ tbnz(rscratch2, exact_log2(JVM_ACC_ABSTRACT), L_ok); + break; + + default: + fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); + } + + // Method holder init state check failed for a concrete method. + __ stop("Method holder klass is not initialized"); + __ bind(L_ok); + } + BLOCK_COMMENT("} verify_method"); +} #endif //ASSERT void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, - bool for_compiler_entry) { + bool for_compiler_entry, vmIntrinsics::ID iid) { assert(method == rmethod, "interpreter calling convention"); Label L_no_such_method; __ cbz(rmethod, L_no_such_method); - __ verify_method_ptr(method); + verify_method(_masm, method, iid); if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { Label run_compiled_code; @@ -160,7 +206,7 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, __ BIND(L); } - jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); + jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry, vmIntrinsics::_invokeBasic); BLOCK_COMMENT("} jump_to_lambda_form"); } @@ -447,8 +493,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, // After figuring out which concrete method to call, jump into it. // Note that this works in the interpreter with no data motion. // But the compiled version will require that r2_recv be shifted out. - __ verify_method_ptr(rmethod); - jump_from_method_handle(_masm, rmethod, temp1, for_compiler_entry); + jump_from_method_handle(_masm, rmethod, temp1, for_compiler_entry, iid); if (iid == vmIntrinsics::_linkToInterface) { __ bind(L_incompatible_class_change_error); __ far_jump(RuntimeAddress(SharedRuntime::throw_IncompatibleClassChangeError_entry())); diff --git a/src/hotspot/cpu/aarch64/methodHandles_aarch64.hpp b/src/hotspot/cpu/aarch64/methodHandles_aarch64.hpp index bd36f3e84c2..e82f4d6237e 100644 --- a/src/hotspot/cpu/aarch64/methodHandles_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/methodHandles_aarch64.hpp @@ -39,6 +39,8 @@ enum /* platform_dependent_constants */ { Register obj, vmClassID klass_id, const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + static void verify_method(MacroAssembler* _masm, Register method, vmIntrinsics::ID iid) NOT_DEBUG_RETURN; + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle), "reference is a MH"); @@ -49,7 +51,7 @@ enum /* platform_dependent_constants */ { // Similar to InterpreterMacroAssembler::jump_from_interpreted. // Takes care of special dispatch from single stepping too. static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, - bool for_compiler_entry); + bool for_compiler_entry, vmIntrinsics::ID iid); static void jump_to_lambda_form(MacroAssembler* _masm, Register recv, Register method_temp, diff --git a/src/hotspot/cpu/aarch64/runtime_aarch64.cpp b/src/hotspot/cpu/aarch64/runtime_aarch64.cpp index 83e43c3ebd2..3fcb0e70b57 100644 --- a/src/hotspot/cpu/aarch64/runtime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/runtime_aarch64.cpp @@ -26,6 +26,7 @@ #ifdef COMPILER2 #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "code/aotCodeCache.hpp" #include "code/vmreg.hpp" #include "interpreter/interpreter.hpp" #include "opto/runtime.hpp" @@ -60,11 +61,19 @@ class SimpleRuntimeFrame { //------------------------------generate_uncommon_trap_blob-------------------- UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() { + const char* name = OptoRuntime::stub_name(OptoStubId::uncommon_trap_id); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::C2Blob, (uint)OptoStubId::uncommon_trap_id, name); + if (blob != nullptr) { + return blob->as_uncommon_trap_blob(); + } + // Allocate space for the code ResourceMark rm; // Setup code generation tools - const char* name = OptoRuntime::stub_name(OptoStubId::uncommon_trap_id); CodeBuffer buffer(name, 2048, 1024); + if (buffer.blob() == nullptr) { + return nullptr; + } MacroAssembler* masm = new MacroAssembler(&buffer); assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); @@ -243,8 +252,10 @@ UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() { // Make sure all code is generated masm->flush(); - return UncommonTrapBlob::create(&buffer, oop_maps, - SimpleRuntimeFrame::framesize >> 1); + UncommonTrapBlob *ut_blob = UncommonTrapBlob::create(&buffer, oop_maps, + SimpleRuntimeFrame::framesize >> 1); + AOTCodeCache::store_code_blob(*ut_blob, AOTCodeEntry::C2Blob, (uint)OptoStubId::uncommon_trap_id, name); + return ut_blob; } //------------------------------generate_exception_blob--------------------------- @@ -280,11 +291,19 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() { assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + const char* name = OptoRuntime::stub_name(OptoStubId::exception_id); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::C2Blob, (uint)OptoStubId::exception_id, name); + if (blob != nullptr) { + return blob->as_exception_blob(); + } + // Allocate space for the code ResourceMark rm; // Setup code generation tools - const char* name = OptoRuntime::stub_name(OptoStubId::exception_id); CodeBuffer buffer(name, 2048, 1024); + if (buffer.blob() == nullptr) { + return nullptr; + } MacroAssembler* masm = new MacroAssembler(&buffer); // TODO check various assumptions made here @@ -378,7 +397,9 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() { masm->flush(); // Set exception blob - return ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); + ExceptionBlob* ex_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); + AOTCodeCache::store_code_blob(*ex_blob, AOTCodeEntry::C2Blob, (uint)OptoStubId::exception_id, name); + return ex_blob; } #endif // COMPILER2 diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index 967984b8821..51f18cb1bbe 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -26,6 +26,7 @@ #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "code/aotCodeCache.hpp" #include "code/codeCache.hpp" #include "code/compiledIC.hpp" #include "code/debugInfoRec.hpp" @@ -557,40 +558,6 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // If this happens, control eventually transfers back to the compiled // caller, but with an uncorrected stack, causing delayed havoc. - if (VerifyAdapterCalls && - (Interpreter::code() != nullptr || StubRoutines::final_stubs_code() != nullptr)) { -#if 0 - // So, let's test for cascading c2i/i2c adapters right now. - // assert(Interpreter::contains($return_addr) || - // StubRoutines::contains($return_addr), - // "i2c adapter must return to an interpreter frame"); - __ block_comment("verify_i2c { "); - Label L_ok; - if (Interpreter::code() != nullptr) { - range_check(masm, rax, r11, - Interpreter::code()->code_start(), Interpreter::code()->code_end(), - L_ok); - } - if (StubRoutines::initial_stubs_code() != nullptr) { - range_check(masm, rax, r11, - StubRoutines::initial_stubs_code()->code_begin(), - StubRoutines::initial_stubs_code()->code_end(), - L_ok); - } - if (StubRoutines::final_stubs_code() != nullptr) { - range_check(masm, rax, r11, - StubRoutines::final_stubs_code()->code_begin(), - StubRoutines::final_stubs_code()->code_end(), - L_ok); - } - const char* msg = "i2c adapter must return to an interpreter frame"; - __ block_comment(msg); - __ stop(msg); - __ bind(L_ok); - __ block_comment("} verify_i2ce "); -#endif - } - // Cut-out for having no stack args. int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; if (comp_args_on_stack) { @@ -711,12 +678,12 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } // --------------------------------------------------------------- -AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterHandlerEntry* handler) { address i2c_entry = __ pc(); gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); @@ -777,7 +744,8 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + handler->set_entry_points(i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + return; } static int c_calling_convention_priv(const BasicType *sig_bt, @@ -2017,6 +1985,23 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ leave(); + #if INCLUDE_JFR + // We need to do a poll test after unwind in case the sampler + // managed to sample the native frame after returning to Java. + Label L_return; + __ ldr(rscratch1, Address(rthread, JavaThread::polling_word_offset())); + address poll_test_pc = __ pc(); + __ relocate(relocInfo::poll_return_type); + __ tbz(rscratch1, log2i_exact(SafepointMechanism::poll_bit()), L_return); + assert(SharedRuntime::polling_page_return_handler_blob() != nullptr, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + __ adr(rscratch1, InternalAddress(poll_test_pc)); + __ str(rscratch1, Address(rthread, JavaThread::saved_exception_pc_offset())); + __ far_jump(RuntimeAddress(stub)); + __ bind(L_return); +#endif // INCLUDE_JFR + // Any exception pending? __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); __ cbnz(rscratch1, exception_pending); @@ -2217,6 +2202,12 @@ void SharedRuntime::generate_deopt_blob() { } #endif const char* name = SharedRuntime::stub_name(SharedStubId::deopt_id); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, (uint)SharedStubId::deopt_id, name); + if (blob != nullptr) { + _deopt_blob = blob->as_deoptimization_blob(); + return; + } + CodeBuffer buffer(name, 2048+pad, 1024); MacroAssembler* masm = new MacroAssembler(&buffer); int frame_size_in_words; @@ -2580,6 +2571,8 @@ void SharedRuntime::generate_deopt_blob() { _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); } #endif + + AOTCodeCache::store_code_blob(*_deopt_blob, AOTCodeEntry::SharedBlob, (uint)SharedStubId::deopt_id, name); } // Number of stack slots between incoming argument block and the start of @@ -2608,12 +2601,16 @@ VMReg SharedRuntime::thread_register() { SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address call_ptr) { assert(is_polling_page_id(id), "expected a polling page stub id"); + // Allocate space for the code. Setup code generation tools. + const char* name = SharedRuntime::stub_name(id); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, (uint)id, name); + if (blob != nullptr) { + return blob->as_safepoint_blob(); + } + ResourceMark rm; OopMapSet *oop_maps = new OopMapSet(); OopMap* map; - - // Allocate space for the code. Setup code generation tools. - const char* name = SharedRuntime::stub_name(id); CodeBuffer buffer(name, 2048, 1024); MacroAssembler* masm = new MacroAssembler(&buffer); @@ -2722,7 +2719,10 @@ SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address cal masm->flush(); // Fill-out other meta info - return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); + SafepointBlob* sp_blob = SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); + + AOTCodeCache::store_code_blob(*sp_blob, AOTCodeEntry::SharedBlob, (uint)id, name); + return sp_blob; } // @@ -2737,10 +2737,14 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address desti assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); assert(is_resolve_id(id), "expected a resolve stub id"); + const char* name = SharedRuntime::stub_name(id); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, (uint)id, name); + if (blob != nullptr) { + return blob->as_runtime_stub(); + } + // allocate space for the code ResourceMark rm; - - const char* name = SharedRuntime::stub_name(id); CodeBuffer buffer(name, 1000, 512); MacroAssembler* masm = new MacroAssembler(&buffer); @@ -2813,7 +2817,10 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address desti // return the blob // frame_size_words or bytes?? - return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); + RuntimeStub* rs_blob = RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); + + AOTCodeCache::store_code_blob(*rs_blob, AOTCodeEntry::SharedBlob, (uint)id, name); + return rs_blob; } // Continuation point for throwing of implicit exceptions that are @@ -2853,10 +2860,15 @@ RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address ru int insts_size = 512; int locs_size = 64; - ResourceMark rm; const char* timer_msg = "SharedRuntime generate_throw_exception"; TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime)); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, (uint)id, name); + if (blob != nullptr) { + return blob->as_runtime_stub(); + } + + ResourceMark rm; CodeBuffer code(name, insts_size, locs_size); OopMapSet* oop_maps = new OopMapSet(); MacroAssembler* masm = new MacroAssembler(&code); @@ -2883,7 +2895,7 @@ RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address ru __ mov(c_rarg0, rthread); BLOCK_COMMENT("call runtime_entry"); - __ mov(rscratch1, runtime_entry); + __ lea(rscratch1, RuntimeAddress(runtime_entry)); __ blr(rscratch1); // Generate oop map @@ -2916,6 +2928,8 @@ RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address ru frame_complete, (framesize >> (LogBytesPerWord - LogBytesPerInt)), oop_maps, false); + AOTCodeCache::store_code_blob(*stub, AOTCodeEntry::SharedBlob, (uint)id, name); + return stub; } diff --git a/src/hotspot/cpu/aarch64/stubDeclarations_aarch64.hpp b/src/hotspot/cpu/aarch64/stubDeclarations_aarch64.hpp index c8b215b6eb4..e0ca01ba6ce 100644 --- a/src/hotspot/cpu/aarch64/stubDeclarations_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/stubDeclarations_aarch64.hpp @@ -44,7 +44,7 @@ do_arch_blob, \ do_arch_entry, \ do_arch_entry_init) \ - do_arch_blob(compiler, 65000 ZGC_ONLY(+5000)) \ + do_arch_blob(compiler, 70000) \ do_stub(compiler, vector_iota_indices) \ do_arch_entry(aarch64, compiler, vector_iota_indices, \ vector_iota_indices, vector_iota_indices) \ @@ -109,7 +109,7 @@ do_arch_blob, \ do_arch_entry, \ do_arch_entry_init) \ - do_arch_blob(final, 20000 ZGC_ONLY(+60000)) \ + do_arch_blob(final, 20000 ZGC_ONLY(+85000)) \ do_stub(final, copy_byte_f) \ do_arch_entry(aarch64, final, copy_byte_f, copy_byte_f, \ copy_byte_f) \ diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index 467505ed337..a0d1e22ff96 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -1162,7 +1162,7 @@ class StubGenerator: public StubCodeGenerator { void copy_memory_small(DecoratorSet decorators, BasicType type, Register s, Register d, Register count, int step) { bool is_backwards = step < 0; - size_t granularity = uabs(step); + size_t granularity = g_uabs(step); int direction = is_backwards ? -1 : 1; Label Lword, Lint, Lshort, Lbyte; @@ -1221,7 +1221,7 @@ class StubGenerator: public StubCodeGenerator { Register s, Register d, Register count, int step) { copy_direction direction = step < 0 ? copy_backwards : copy_forwards; bool is_backwards = step < 0; - unsigned int granularity = uabs(step); + unsigned int granularity = g_uabs(step); const Register t0 = r3, t1 = r4; // <= 80 (or 96 for SIMD) bytes do inline. Direction doesn't matter because we always @@ -2566,6 +2566,123 @@ class StubGenerator: public StubCodeGenerator { return start; } + address generate_unsafecopy_common_error_exit() { + address start_pc = __ pc(); + __ leave(); + __ mov(r0, 0); + __ ret(lr); + return start_pc; + } + + // + // Generate 'unsafe' set memory stub + // Though just as safe as the other stubs, it takes an unscaled + // size_t (# bytes) argument instead of an element count. + // + // This fill operation is atomicity preserving: as long as the + // address supplied is sufficiently aligned, all writes of up to 64 + // bits in size are single-copy atomic. + // + // Input: + // c_rarg0 - destination array address + // c_rarg1 - byte count (size_t) + // c_rarg2 - byte value + // + address generate_unsafe_setmemory() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, StubGenStubId::unsafe_setmemory_id); + address start = __ pc(); + + Register dest = c_rarg0, count = c_rarg1, value = c_rarg2; + Label tail; + + UnsafeMemoryAccessMark umam(this, true, false); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ dup(v0, __ T16B, value); + + if (AvoidUnalignedAccesses) { + __ cmp(count, (u1)16); + __ br(__ LO, tail); + + __ mov(rscratch1, 16); + __ andr(rscratch2, dest, 15); + __ sub(rscratch1, rscratch1, rscratch2); // Bytes needed to 16-align dest + __ strq(v0, Address(dest)); + __ sub(count, count, rscratch1); + __ add(dest, dest, rscratch1); + } + + __ subs(count, count, (u1)64); + __ br(__ LO, tail); + { + Label again; + __ bind(again); + __ stpq(v0, v0, Address(dest)); + __ stpq(v0, v0, Address(dest, 32)); + + __ subs(count, count, 64); + __ add(dest, dest, 64); + __ br(__ HS, again); + } + + __ bind(tail); + // The count of bytes is off by 64, but we don't need to correct + // it because we're only going to use the least-significant few + // count bits from here on. + // __ add(count, count, 64); + + { + Label dont; + __ tbz(count, exact_log2(32), dont); + __ stpq(v0, v0, __ post(dest, 32)); + __ bind(dont); + } + { + Label dont; + __ tbz(count, exact_log2(16), dont); + __ strq(v0, __ post(dest, 16)); + __ bind(dont); + } + { + Label dont; + __ tbz(count, exact_log2(8), dont); + __ strd(v0, __ post(dest, 8)); + __ bind(dont); + } + + Label finished; + __ tst(count, 7); + __ br(__ EQ, finished); + + { + Label dont; + __ tbz(count, exact_log2(4), dont); + __ strs(v0, __ post(dest, 4)); + __ bind(dont); + } + { + Label dont; + __ tbz(count, exact_log2(2), dont); + __ bfi(value, value, 8, 8); + __ strh(value, __ post(dest, 2)); + __ bind(dont); + } + { + Label dont; + __ tbz(count, exact_log2(1), dont); + __ strb(value, Address(dest)); + __ bind(dont); + } + + __ bind(finished); + __ leave(); + __ ret(lr); + + return start; + } + address generate_data_cache_writeback() { const Register line = c_rarg0; // address of line to write back @@ -2615,6 +2732,9 @@ class StubGenerator: public StubCodeGenerator { address entry_jlong_arraycopy; address entry_checkcast_arraycopy; + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeMemoryAccess::set_common_exit_stub_pc(ucm_common_error_exit); + generate_copy_longs(StubGenStubId::copy_byte_f_id, IN_HEAP | IS_ARRAY, copy_f, r0, r1, r15); generate_copy_longs(StubGenStubId::copy_byte_b_id, IN_HEAP | IS_ARRAY, copy_b, r0, r1, r15); @@ -4405,89 +4525,44 @@ class StubGenerator: public StubCodeGenerator { return start; } - /** - * Arguments: - * - * Inputs: - * c_rarg0 - int crc - * c_rarg1 - byte* buf - * c_rarg2 - int length - * - * Output: - * rax - int crc result - */ - address generate_updateBytesCRC32() { - assert(UseCRC32Intrinsics, "what are we doing here?"); - - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::updateBytesCRC32_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - const Register crc = c_rarg0; // crc - const Register buf = c_rarg1; // source java byte array address - const Register len = c_rarg2; // length - const Register table0 = c_rarg3; // crc_table address - const Register table1 = c_rarg4; - const Register table2 = c_rarg5; - const Register table3 = c_rarg6; - const Register tmp3 = c_rarg7; - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - - __ kernel_crc32(crc, buf, len, - table0, table1, table2, table3, rscratch1, rscratch2, tmp3); - - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(lr); - - return start; - } - - // ChaCha20 block function. This version parallelizes 4 quarter - // round operations at a time. It uses 16 SIMD registers to - // produce 4 blocks of key stream. + // ChaCha20 block function. This version parallelizes the 32-bit + // state elements on each of 16 vectors, producing 4 blocks of + // keystream at a time. // // state (int[16]) = c_rarg0 // keystream (byte[256]) = c_rarg1 - // return - number of bytes of keystream (always 256) - // - // In this approach, we load the 512-bit start state sequentially into - // 4 128-bit vectors. We then make 4 4-vector copies of that starting - // state, with each successive set of 4 vectors having a +1 added into - // the first 32-bit lane of the 4th vector in that group (the counter). - // By doing this, we can perform the block function on 4 512-bit blocks - // within one run of this intrinsic. - // The alignment of the data across the 4-vector group is such that at - // the start it is already aligned for the first round of each two-round - // loop iteration. In other words, the corresponding lanes of each vector - // will contain the values needed for that quarter round operation (e.g. - // elements 0/4/8/12, 1/5/9/13, 2/6/10/14, etc.). - // In between each full round, a lane shift must occur. Within a loop - // iteration, between the first and second rounds, the 2nd, 3rd, and 4th - // vectors are rotated left 32, 64 and 96 bits, respectively. The result - // is effectively a diagonal orientation in columnar form. After the - // second full round, those registers are left-rotated again, this time - // 96, 64, and 32 bits - returning the vectors to their columnar organization. - // After all 10 iterations, the original state is added to each 4-vector - // working state along with the add mask, and the 4 vector groups are - // sequentially written to the memory dedicated for the output key stream. + // return - number of bytes of produced keystream (always 256) // - // For a more detailed explanation, see Goll and Gueron, "Vectorization of - // ChaCha Stream Cipher", 2014 11th Int. Conf. on Information Technology: - // New Generations, Las Vegas, NV, USA, April 2014, DOI: 10.1109/ITNG.2014.33 - address generate_chacha20Block_qrpar() { - Label L_Q_twoRounds, L_Q_cc20_const; + // This implementation takes each 32-bit integer from the state + // array and broadcasts it across all 4 32-bit lanes of a vector register + // (e.g. state[0] is replicated on all 4 lanes of v4, state[1] to all 4 lanes + // of v5, etc.). Once all 16 elements have been broadcast onto 16 vectors, + // the quarter round schedule is implemented as outlined in RFC 7539 section + // 2.3. However, instead of sequentially processing the 3 quarter round + // operations represented by one QUARTERROUND function, we instead stack all + // the adds, xors and left-rotations from the first 4 quarter rounds together + // and then do the same for the second set of 4 quarter rounds. This removes + // some latency that would otherwise be incurred by waiting for an add to + // complete before performing an xor (which depends on the result of the + // add), etc. An adjustment happens between the first and second groups of 4 + // quarter rounds, but this is done only in the inputs to the macro functions + // that generate the assembly instructions - these adjustments themselves are + // not part of the resulting assembly. + // The 4 registers v0-v3 are used during the quarter round operations as + // scratch registers. Once the 20 rounds are complete, these 4 scratch + // registers become the vectors involved in adding the start state back onto + // the post-QR working state. After the adds are complete, each of the 16 + // vectors write their first lane back to the keystream buffer, followed + // by the second lane from all vectors and so on. + address generate_chacha20Block_blockpar() { + Label L_twoRounds, L_cc20_const; // The constant data is broken into two 128-bit segments to be loaded - // onto SIMD registers. The first 128 bits are a counter add overlay - // that adds +1/+0/+0/+0 to the vectors holding replicated state[12]. + // onto FloatRegisters. The first 128 bits are a counter add overlay + // that adds +0/+1/+2/+3 to the vector holding replicated state[12]. // The second 128-bits is a table constant used for 8-bit left rotations. - // on 32-bit lanes within a SIMD register. - __ BIND(L_Q_cc20_const); - __ emit_int64(0x0000000000000001UL); - __ emit_int64(0x0000000000000000UL); + __ BIND(L_cc20_const); + __ emit_int64(0x0000000100000000UL); + __ emit_int64(0x0000000300000002UL); __ emit_int64(0x0605040702010003UL); __ emit_int64(0x0E0D0C0F0A09080BUL); @@ -4497,144 +4572,142 @@ class StubGenerator: public StubCodeGenerator { address start = __ pc(); __ enter(); + int i, j; const Register state = c_rarg0; const Register keystream = c_rarg1; const Register loopCtr = r10; const Register tmpAddr = r11; + const FloatRegister ctrAddOverlay = v28; + const FloatRegister lrot8Tbl = v29; + + // Organize SIMD registers in an array that facilitates + // putting repetitive opcodes into loop structures. It is + // important that each grouping of 4 registers is monotonically + // increasing to support the requirements of multi-register + // instructions (e.g. ld4r, st4, etc.) + const FloatRegister workSt[16] = { + v4, v5, v6, v7, v16, v17, v18, v19, + v20, v21, v22, v23, v24, v25, v26, v27 + }; - const FloatRegister aState = v0; - const FloatRegister bState = v1; - const FloatRegister cState = v2; - const FloatRegister dState = v3; - const FloatRegister a1Vec = v4; - const FloatRegister b1Vec = v5; - const FloatRegister c1Vec = v6; - const FloatRegister d1Vec = v7; - // Skip the callee-saved registers v8 - v15 - const FloatRegister a2Vec = v16; - const FloatRegister b2Vec = v17; - const FloatRegister c2Vec = v18; - const FloatRegister d2Vec = v19; - const FloatRegister a3Vec = v20; - const FloatRegister b3Vec = v21; - const FloatRegister c3Vec = v22; - const FloatRegister d3Vec = v23; - const FloatRegister a4Vec = v24; - const FloatRegister b4Vec = v25; - const FloatRegister c4Vec = v26; - const FloatRegister d4Vec = v27; - const FloatRegister scratch = v28; - const FloatRegister addMask = v29; - const FloatRegister lrot8Tbl = v30; - - // Load the initial state in the first 4 quadword registers, - // then copy the initial state into the next 4 quadword registers - // that will be used for the working state. - __ ld1(aState, bState, cState, dState, __ T16B, Address(state)); - - // Load the index register for 2 constant 128-bit data fields. - // The first represents the +1/+0/+0/+0 add mask. The second is - // the 8-bit left rotation. - __ adr(tmpAddr, L_Q_cc20_const); - __ ldpq(addMask, lrot8Tbl, Address(tmpAddr)); - - __ mov(a1Vec, __ T16B, aState); - __ mov(b1Vec, __ T16B, bState); - __ mov(c1Vec, __ T16B, cState); - __ mov(d1Vec, __ T16B, dState); - - __ mov(a2Vec, __ T16B, aState); - __ mov(b2Vec, __ T16B, bState); - __ mov(c2Vec, __ T16B, cState); - __ addv(d2Vec, __ T4S, d1Vec, addMask); - - __ mov(a3Vec, __ T16B, aState); - __ mov(b3Vec, __ T16B, bState); - __ mov(c3Vec, __ T16B, cState); - __ addv(d3Vec, __ T4S, d2Vec, addMask); - - __ mov(a4Vec, __ T16B, aState); - __ mov(b4Vec, __ T16B, bState); - __ mov(c4Vec, __ T16B, cState); - __ addv(d4Vec, __ T4S, d3Vec, addMask); - - // Set up the 10 iteration loop + // Pull in constant data. The first 16 bytes are the add overlay + // which is applied to the vector holding the counter (state[12]). + // The second 16 bytes is the index register for the 8-bit left + // rotation tbl instruction. + __ adr(tmpAddr, L_cc20_const); + __ ldpq(ctrAddOverlay, lrot8Tbl, Address(tmpAddr)); + + // Load from memory and interlace across 16 SIMD registers, + // With each word from memory being broadcast to all lanes of + // each successive SIMD register. + // Addr(0) -> All lanes in workSt[i] + // Addr(4) -> All lanes workSt[i + 1], etc. + __ mov(tmpAddr, state); + for (i = 0; i < 16; i += 4) { + __ ld4r(workSt[i], workSt[i + 1], workSt[i + 2], workSt[i + 3], __ T4S, + __ post(tmpAddr, 16)); + } + __ addv(workSt[12], __ T4S, workSt[12], ctrAddOverlay); // Add ctr overlay + + // Before entering the loop, create 5 4-register arrays. These + // will hold the 4 registers that represent the a/b/c/d fields + // in the quarter round operation. For instance the "b" field + // for the first 4 quarter round operations is the set of v16/v17/v18/v19, + // but in the second 4 quarter rounds it gets adjusted to v17/v18/v19/v16 + // since it is part of a diagonal organization. The aSet and scratch + // register sets are defined at declaration time because they do not change + // organization at any point during the 20-round processing. + FloatRegister aSet[4] = { v4, v5, v6, v7 }; + FloatRegister bSet[4]; + FloatRegister cSet[4]; + FloatRegister dSet[4]; + FloatRegister scratch[4] = { v0, v1, v2, v3 }; + + // Set up the 10 iteration loop and perform all 8 quarter round ops __ mov(loopCtr, 10); - __ BIND(L_Q_twoRounds); - - // The first set of operations on the vectors covers the first 4 quarter - // round operations: - // Qround(state, 0, 4, 8,12) - // Qround(state, 1, 5, 9,13) - // Qround(state, 2, 6,10,14) - // Qround(state, 3, 7,11,15) - __ cc20_quarter_round(a1Vec, b1Vec, c1Vec, d1Vec, scratch, lrot8Tbl); - __ cc20_quarter_round(a2Vec, b2Vec, c2Vec, d2Vec, scratch, lrot8Tbl); - __ cc20_quarter_round(a3Vec, b3Vec, c3Vec, d3Vec, scratch, lrot8Tbl); - __ cc20_quarter_round(a4Vec, b4Vec, c4Vec, d4Vec, scratch, lrot8Tbl); - - // Shuffle the b1Vec/c1Vec/d1Vec to reorganize the state vectors to - // diagonals. The a1Vec does not need to change orientation. - __ cc20_shift_lane_org(b1Vec, c1Vec, d1Vec, true); - __ cc20_shift_lane_org(b2Vec, c2Vec, d2Vec, true); - __ cc20_shift_lane_org(b3Vec, c3Vec, d3Vec, true); - __ cc20_shift_lane_org(b4Vec, c4Vec, d4Vec, true); - - // The second set of operations on the vectors covers the second 4 quarter - // round operations, now acting on the diagonals: - // Qround(state, 0, 5,10,15) - // Qround(state, 1, 6,11,12) - // Qround(state, 2, 7, 8,13) - // Qround(state, 3, 4, 9,14) - __ cc20_quarter_round(a1Vec, b1Vec, c1Vec, d1Vec, scratch, lrot8Tbl); - __ cc20_quarter_round(a2Vec, b2Vec, c2Vec, d2Vec, scratch, lrot8Tbl); - __ cc20_quarter_round(a3Vec, b3Vec, c3Vec, d3Vec, scratch, lrot8Tbl); - __ cc20_quarter_round(a4Vec, b4Vec, c4Vec, d4Vec, scratch, lrot8Tbl); - - // Before we start the next iteration, we need to perform shuffles - // on the b/c/d vectors to move them back to columnar organizations - // from their current diagonal orientation. - __ cc20_shift_lane_org(b1Vec, c1Vec, d1Vec, false); - __ cc20_shift_lane_org(b2Vec, c2Vec, d2Vec, false); - __ cc20_shift_lane_org(b3Vec, c3Vec, d3Vec, false); - __ cc20_shift_lane_org(b4Vec, c4Vec, d4Vec, false); + __ BIND(L_twoRounds); + + // Set to columnar organization and do the following 4 quarter-rounds: + // QUARTERROUND(0, 4, 8, 12) + // QUARTERROUND(1, 5, 9, 13) + // QUARTERROUND(2, 6, 10, 14) + // QUARTERROUND(3, 7, 11, 15) + __ cc20_set_qr_registers(bSet, workSt, 4, 5, 6, 7); + __ cc20_set_qr_registers(cSet, workSt, 8, 9, 10, 11); + __ cc20_set_qr_registers(dSet, workSt, 12, 13, 14, 15); + + __ cc20_qr_add4(aSet, bSet); // a += b + __ cc20_qr_xor4(dSet, aSet, dSet); // d ^= a + __ cc20_qr_lrot4(dSet, dSet, 16, lrot8Tbl); // d <<<= 16 + + __ cc20_qr_add4(cSet, dSet); // c += d + __ cc20_qr_xor4(bSet, cSet, scratch); // b ^= c (scratch) + __ cc20_qr_lrot4(scratch, bSet, 12, lrot8Tbl); // b <<<= 12 + + __ cc20_qr_add4(aSet, bSet); // a += b + __ cc20_qr_xor4(dSet, aSet, dSet); // d ^= a + __ cc20_qr_lrot4(dSet, dSet, 8, lrot8Tbl); // d <<<= 8 + + __ cc20_qr_add4(cSet, dSet); // c += d + __ cc20_qr_xor4(bSet, cSet, scratch); // b ^= c (scratch) + __ cc20_qr_lrot4(scratch, bSet, 7, lrot8Tbl); // b <<<= 12 + + // Set to diagonal organization and do the next 4 quarter-rounds: + // QUARTERROUND(0, 5, 10, 15) + // QUARTERROUND(1, 6, 11, 12) + // QUARTERROUND(2, 7, 8, 13) + // QUARTERROUND(3, 4, 9, 14) + __ cc20_set_qr_registers(bSet, workSt, 5, 6, 7, 4); + __ cc20_set_qr_registers(cSet, workSt, 10, 11, 8, 9); + __ cc20_set_qr_registers(dSet, workSt, 15, 12, 13, 14); + + __ cc20_qr_add4(aSet, bSet); // a += b + __ cc20_qr_xor4(dSet, aSet, dSet); // d ^= a + __ cc20_qr_lrot4(dSet, dSet, 16, lrot8Tbl); // d <<<= 16 + + __ cc20_qr_add4(cSet, dSet); // c += d + __ cc20_qr_xor4(bSet, cSet, scratch); // b ^= c (scratch) + __ cc20_qr_lrot4(scratch, bSet, 12, lrot8Tbl); // b <<<= 12 + + __ cc20_qr_add4(aSet, bSet); // a += b + __ cc20_qr_xor4(dSet, aSet, dSet); // d ^= a + __ cc20_qr_lrot4(dSet, dSet, 8, lrot8Tbl); // d <<<= 8 + + __ cc20_qr_add4(cSet, dSet); // c += d + __ cc20_qr_xor4(bSet, cSet, scratch); // b ^= c (scratch) + __ cc20_qr_lrot4(scratch, bSet, 7, lrot8Tbl); // b <<<= 12 // Decrement and iterate __ sub(loopCtr, loopCtr, 1); - __ cbnz(loopCtr, L_Q_twoRounds); - - // Once the counter reaches zero, we fall out of the loop - // and need to add the initial state back into the working state - // represented by the a/b/c/d1Vec registers. This is destructive - // on the dState register but we no longer will need it. - __ addv(a1Vec, __ T4S, a1Vec, aState); - __ addv(b1Vec, __ T4S, b1Vec, bState); - __ addv(c1Vec, __ T4S, c1Vec, cState); - __ addv(d1Vec, __ T4S, d1Vec, dState); - - __ addv(a2Vec, __ T4S, a2Vec, aState); - __ addv(b2Vec, __ T4S, b2Vec, bState); - __ addv(c2Vec, __ T4S, c2Vec, cState); - __ addv(dState, __ T4S, dState, addMask); - __ addv(d2Vec, __ T4S, d2Vec, dState); - - __ addv(a3Vec, __ T4S, a3Vec, aState); - __ addv(b3Vec, __ T4S, b3Vec, bState); - __ addv(c3Vec, __ T4S, c3Vec, cState); - __ addv(dState, __ T4S, dState, addMask); - __ addv(d3Vec, __ T4S, d3Vec, dState); - - __ addv(a4Vec, __ T4S, a4Vec, aState); - __ addv(b4Vec, __ T4S, b4Vec, bState); - __ addv(c4Vec, __ T4S, c4Vec, cState); - __ addv(dState, __ T4S, dState, addMask); - __ addv(d4Vec, __ T4S, d4Vec, dState); - - // Write the final state back to the result buffer - __ st1(a1Vec, b1Vec, c1Vec, d1Vec, __ T16B, __ post(keystream, 64)); - __ st1(a2Vec, b2Vec, c2Vec, d2Vec, __ T16B, __ post(keystream, 64)); - __ st1(a3Vec, b3Vec, c3Vec, d3Vec, __ T16B, __ post(keystream, 64)); - __ st1(a4Vec, b4Vec, c4Vec, d4Vec, __ T16B, __ post(keystream, 64)); + __ cbnz(loopCtr, L_twoRounds); + + __ mov(tmpAddr, state); + + // Add the starting state back to the post-loop keystream + // state. We read/interlace the state array from memory into + // 4 registers similar to what we did in the beginning. Then + // add the counter overlay onto workSt[12] at the end. + for (i = 0; i < 16; i += 4) { + __ ld4r(v0, v1, v2, v3, __ T4S, __ post(tmpAddr, 16)); + __ addv(workSt[i], __ T4S, workSt[i], v0); + __ addv(workSt[i + 1], __ T4S, workSt[i + 1], v1); + __ addv(workSt[i + 2], __ T4S, workSt[i + 2], v2); + __ addv(workSt[i + 3], __ T4S, workSt[i + 3], v3); + } + __ addv(workSt[12], __ T4S, workSt[12], ctrAddOverlay); // Add ctr overlay + + // Write working state into the keystream buffer. This is accomplished + // by taking the lane "i" from each of the four vectors and writing + // it to consecutive 4-byte offsets, then post-incrementing by 16 and + // repeating with the next 4 vectors until all 16 vectors have been used. + // Then move to the next lane and repeat the process until all lanes have + // been written. + for (i = 0; i < 4; i++) { + for (j = 0; j < 16; j += 4) { + __ st4(workSt[j], workSt[j + 1], workSt[j + 2], workSt[j + 3], __ S, i, + __ post(keystream, 16)); + } + } __ mov(r0, 256); // Return length of output keystream __ leave(); @@ -7008,6 +7081,407 @@ class StubGenerator: public StubCodeGenerator { return start; } + void bcax5(Register a0, Register a1, Register a2, Register a3, Register a4, + Register tmp0, Register tmp1, Register tmp2) { + __ bic(tmp0, a2, a1); // for a0 + __ bic(tmp1, a3, a2); // for a1 + __ bic(tmp2, a4, a3); // for a2 + __ eor(a2, a2, tmp2); + __ bic(tmp2, a0, a4); // for a3 + __ eor(a3, a3, tmp2); + __ bic(tmp2, a1, a0); // for a4 + __ eor(a0, a0, tmp0); + __ eor(a1, a1, tmp1); + __ eor(a4, a4, tmp2); + } + + void keccak_round_gpr(bool can_use_fp, bool can_use_r18, Register rc, + Register a0, Register a1, Register a2, Register a3, Register a4, + Register a5, Register a6, Register a7, Register a8, Register a9, + Register a10, Register a11, Register a12, Register a13, Register a14, + Register a15, Register a16, Register a17, Register a18, Register a19, + Register a20, Register a21, Register a22, Register a23, Register a24, + Register tmp0, Register tmp1, Register tmp2) { + __ eor3(tmp1, a4, a9, a14); + __ eor3(tmp0, tmp1, a19, a24); // tmp0 = a4^a9^a14^a19^a24 = c4 + __ eor3(tmp2, a1, a6, a11); + __ eor3(tmp1, tmp2, a16, a21); // tmp1 = a1^a6^a11^a16^a21 = c1 + __ rax1(tmp2, tmp0, tmp1); // d0 + { + + Register tmp3, tmp4; + if (can_use_fp && can_use_r18) { + tmp3 = rfp; + tmp4 = r18_tls; + } else { + tmp3 = a4; + tmp4 = a9; + __ stp(tmp3, tmp4, __ pre(sp, -16)); + } + + __ eor3(tmp3, a0, a5, a10); + __ eor3(tmp4, tmp3, a15, a20); // tmp4 = a0^a5^a10^a15^a20 = c0 + __ eor(a0, a0, tmp2); + __ eor(a5, a5, tmp2); + __ eor(a10, a10, tmp2); + __ eor(a15, a15, tmp2); + __ eor(a20, a20, tmp2); // d0(tmp2) + __ eor3(tmp3, a2, a7, a12); + __ eor3(tmp2, tmp3, a17, a22); // tmp2 = a2^a7^a12^a17^a22 = c2 + __ rax1(tmp3, tmp4, tmp2); // d1 + __ eor(a1, a1, tmp3); + __ eor(a6, a6, tmp3); + __ eor(a11, a11, tmp3); + __ eor(a16, a16, tmp3); + __ eor(a21, a21, tmp3); // d1(tmp3) + __ rax1(tmp3, tmp2, tmp0); // d3 + __ eor3(tmp2, a3, a8, a13); + __ eor3(tmp0, tmp2, a18, a23); // tmp0 = a3^a8^a13^a18^a23 = c3 + __ eor(a3, a3, tmp3); + __ eor(a8, a8, tmp3); + __ eor(a13, a13, tmp3); + __ eor(a18, a18, tmp3); + __ eor(a23, a23, tmp3); + __ rax1(tmp2, tmp1, tmp0); // d2 + __ eor(a2, a2, tmp2); + __ eor(a7, a7, tmp2); + __ eor(a12, a12, tmp2); + __ rax1(tmp0, tmp0, tmp4); // d4 + if (!can_use_fp || !can_use_r18) { + __ ldp(tmp3, tmp4, __ post(sp, 16)); + } + __ eor(a17, a17, tmp2); + __ eor(a22, a22, tmp2); + __ eor(a4, a4, tmp0); + __ eor(a9, a9, tmp0); + __ eor(a14, a14, tmp0); + __ eor(a19, a19, tmp0); + __ eor(a24, a24, tmp0); + } + + __ rol(tmp0, a10, 3); + __ rol(a10, a1, 1); + __ rol(a1, a6, 44); + __ rol(a6, a9, 20); + __ rol(a9, a22, 61); + __ rol(a22, a14, 39); + __ rol(a14, a20, 18); + __ rol(a20, a2, 62); + __ rol(a2, a12, 43); + __ rol(a12, a13, 25); + __ rol(a13, a19, 8) ; + __ rol(a19, a23, 56); + __ rol(a23, a15, 41); + __ rol(a15, a4, 27); + __ rol(a4, a24, 14); + __ rol(a24, a21, 2); + __ rol(a21, a8, 55); + __ rol(a8, a16, 45); + __ rol(a16, a5, 36); + __ rol(a5, a3, 28); + __ rol(a3, a18, 21); + __ rol(a18, a17, 15); + __ rol(a17, a11, 10); + __ rol(a11, a7, 6); + __ mov(a7, tmp0); + + bcax5(a0, a1, a2, a3, a4, tmp0, tmp1, tmp2); + bcax5(a5, a6, a7, a8, a9, tmp0, tmp1, tmp2); + bcax5(a10, a11, a12, a13, a14, tmp0, tmp1, tmp2); + bcax5(a15, a16, a17, a18, a19, tmp0, tmp1, tmp2); + bcax5(a20, a21, a22, a23, a24, tmp0, tmp1, tmp2); + + __ ldr(tmp1, __ post(rc, 8)); + __ eor(a0, a0, tmp1); + + } + + // Arguments: + // + // Inputs: + // c_rarg0 - byte[] source+offset + // c_rarg1 - byte[] SHA.state + // c_rarg2 - int block_size + // c_rarg3 - int offset + // c_rarg4 - int limit + // + address generate_sha3_implCompress_gpr(StubGenStubId stub_id) { + bool multi_block; + switch (stub_id) { + case sha3_implCompress_id: + multi_block = false; + break; + case sha3_implCompressMB_id: + multi_block = true; + break; + default: + ShouldNotReachHere(); + } + + static const uint64_t round_consts[24] = { + 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL, + 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L, + 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL, + 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL, + 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L, + 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L, + 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L, + 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L + }; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, stub_id); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register block_size = c_rarg2; + Register ofs = c_rarg3; + Register limit = c_rarg4; + + // use r3.r17,r19..r28 to keep a0..a24. + // a0..a24 are respective locals from SHA3.java + Register a0 = r25, + a1 = r26, + a2 = r27, + a3 = r3, + a4 = r4, + a5 = r5, + a6 = r6, + a7 = r7, + a8 = rscratch1, // r8 + a9 = rscratch2, // r9 + a10 = r10, + a11 = r11, + a12 = r12, + a13 = r13, + a14 = r14, + a15 = r15, + a16 = r16, + a17 = r17, + a18 = r28, + a19 = r19, + a20 = r20, + a21 = r21, + a22 = r22, + a23 = r23, + a24 = r24; + + Register tmp0 = block_size, tmp1 = buf, tmp2 = state, tmp3 = r30; + + Label sha3_loop, rounds24_preloop, loop_body; + Label sha3_512_or_sha3_384, shake128; + + bool can_use_r18 = false; +#ifndef R18_RESERVED + can_use_r18 = true; +#endif + bool can_use_fp = !PreserveFramePointer; + + __ enter(); + + // save almost all yet unsaved gpr registers on stack + __ str(block_size, __ pre(sp, -128)); + if (multi_block) { + __ stpw(ofs, limit, Address(sp, 8)); + } + // 8 bytes at sp+16 will be used to keep buf + __ stp(r19, r20, Address(sp, 32)); + __ stp(r21, r22, Address(sp, 48)); + __ stp(r23, r24, Address(sp, 64)); + __ stp(r25, r26, Address(sp, 80)); + __ stp(r27, r28, Address(sp, 96)); + if (can_use_r18 && can_use_fp) { + __ stp(r18_tls, state, Address(sp, 112)); + } else { + __ str(state, Address(sp, 112)); + } + + // begin sha3 calculations: loading a0..a24 from state arrary + __ ldp(a0, a1, state); + __ ldp(a2, a3, Address(state, 16)); + __ ldp(a4, a5, Address(state, 32)); + __ ldp(a6, a7, Address(state, 48)); + __ ldp(a8, a9, Address(state, 64)); + __ ldp(a10, a11, Address(state, 80)); + __ ldp(a12, a13, Address(state, 96)); + __ ldp(a14, a15, Address(state, 112)); + __ ldp(a16, a17, Address(state, 128)); + __ ldp(a18, a19, Address(state, 144)); + __ ldp(a20, a21, Address(state, 160)); + __ ldp(a22, a23, Address(state, 176)); + __ ldr(a24, Address(state, 192)); + + __ BIND(sha3_loop); + + // load input + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a0, a0, tmp3); + __ eor(a1, a1, tmp2); + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a2, a2, tmp3); + __ eor(a3, a3, tmp2); + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a4, a4, tmp3); + __ eor(a5, a5, tmp2); + __ ldr(tmp3, __ post(buf, 8)); + __ eor(a6, a6, tmp3); + + // block_size == 72, SHA3-512; block_size == 104, SHA3-384 + __ tbz(block_size, 7, sha3_512_or_sha3_384); + + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a7, a7, tmp3); + __ eor(a8, a8, tmp2); + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a9, a9, tmp3); + __ eor(a10, a10, tmp2); + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a11, a11, tmp3); + __ eor(a12, a12, tmp2); + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a13, a13, tmp3); + __ eor(a14, a14, tmp2); + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a15, a15, tmp3); + __ eor(a16, a16, tmp2); + + // block_size == 136, bit4 == 0 and bit5 == 0, SHA3-256 or SHAKE256 + __ andw(tmp2, block_size, 48); + __ cbzw(tmp2, rounds24_preloop); + __ tbnz(block_size, 5, shake128); + // block_size == 144, bit5 == 0, SHA3-244 + __ ldr(tmp3, __ post(buf, 8)); + __ eor(a17, a17, tmp3); + __ b(rounds24_preloop); + + __ BIND(shake128); + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a17, a17, tmp3); + __ eor(a18, a18, tmp2); + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a19, a19, tmp3); + __ eor(a20, a20, tmp2); + __ b(rounds24_preloop); // block_size == 168, SHAKE128 + + __ BIND(sha3_512_or_sha3_384); + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a7, a7, tmp3); + __ eor(a8, a8, tmp2); + __ tbz(block_size, 5, rounds24_preloop); // SHA3-512 + + // SHA3-384 + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a9, a9, tmp3); + __ eor(a10, a10, tmp2); + __ ldp(tmp3, tmp2, __ post(buf, 16)); + __ eor(a11, a11, tmp3); + __ eor(a12, a12, tmp2); + + __ BIND(rounds24_preloop); + __ fmovs(v0, 24.0); // float loop counter, + __ fmovs(v1, 1.0); // exact representation + + __ str(buf, Address(sp, 16)); + __ lea(tmp3, ExternalAddress((address) round_consts)); + + __ BIND(loop_body); + keccak_round_gpr(can_use_fp, can_use_r18, tmp3, + a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, + a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, + tmp0, tmp1, tmp2); + __ fsubs(v0, v0, v1); + __ fcmps(v0, 0.0); + __ br(__ NE, loop_body); + + if (multi_block) { + __ ldrw(block_size, sp); // block_size + __ ldpw(tmp2, tmp1, Address(sp, 8)); // offset, limit + __ addw(tmp2, tmp2, block_size); + __ cmpw(tmp2, tmp1); + __ strw(tmp2, Address(sp, 8)); // store offset in case we're jumping + __ ldr(buf, Address(sp, 16)); // restore buf in case we're jumping + __ br(Assembler::LE, sha3_loop); + __ movw(c_rarg0, tmp2); // return offset + } + if (can_use_fp && can_use_r18) { + __ ldp(r18_tls, state, Address(sp, 112)); + } else { + __ ldr(state, Address(sp, 112)); + } + // save calculated sha3 state + __ stp(a0, a1, Address(state)); + __ stp(a2, a3, Address(state, 16)); + __ stp(a4, a5, Address(state, 32)); + __ stp(a6, a7, Address(state, 48)); + __ stp(a8, a9, Address(state, 64)); + __ stp(a10, a11, Address(state, 80)); + __ stp(a12, a13, Address(state, 96)); + __ stp(a14, a15, Address(state, 112)); + __ stp(a16, a17, Address(state, 128)); + __ stp(a18, a19, Address(state, 144)); + __ stp(a20, a21, Address(state, 160)); + __ stp(a22, a23, Address(state, 176)); + __ str(a24, Address(state, 192)); + + // restore required registers from stack + __ ldp(r19, r20, Address(sp, 32)); + __ ldp(r21, r22, Address(sp, 48)); + __ ldp(r23, r24, Address(sp, 64)); + __ ldp(r25, r26, Address(sp, 80)); + __ ldp(r27, r28, Address(sp, 96)); + if (can_use_fp && can_use_r18) { + __ add(rfp, sp, 128); // leave() will copy rfp to sp below + } // else no need to recalculate rfp, since it wasn't changed + + __ leave(); + + __ ret(lr); + + return start; + } + + /** + * Arguments: + * + * Inputs: + * c_rarg0 - int crc + * c_rarg1 - byte* buf + * c_rarg2 - int length + * + * Output: + * rax - int crc result + */ + address generate_updateBytesCRC32() { + assert(UseCRC32Intrinsics, "what are we doing here?"); + + __ align(CodeEntryAlignment); + StubGenStubId stub_id = StubGenStubId::updateBytesCRC32_id; + StubCodeMark mark(this, stub_id); + + address start = __ pc(); + + const Register crc = c_rarg0; // crc + const Register buf = c_rarg1; // source java byte array address + const Register len = c_rarg2; // length + const Register table0 = c_rarg3; // crc_table address + const Register table1 = c_rarg4; + const Register table2 = c_rarg5; + const Register table3 = c_rarg6; + const Register tmp3 = c_rarg7; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ kernel_crc32(crc, buf, len, + table0, table1, table2, table3, rscratch1, rscratch2, tmp3); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + return start; + } + /** * Arguments: * @@ -8115,7 +8589,8 @@ class StubGenerator: public StubCodeGenerator { __ andr(rscratch2, cnt, vf - 1); __ bind(TAIL_SHORTCUT); __ adr(rscratch1, BR_BASE); - __ sub(rscratch1, rscratch1, rscratch2, ext::uxtw, 3); + // For Cortex-A53 offset is 4 because 2 nops are generated. + __ sub(rscratch1, rscratch1, rscratch2, ext::uxtw, VM_Version::supports_a53mac() ? 4 : 3); __ movw(rscratch2, 0x1f); __ br(rscratch1); @@ -8123,6 +8598,11 @@ class StubGenerator: public StubCodeGenerator { __ load(rscratch1, Address(__ post(ary, type2aelembytes(eltype))), eltype); __ maddw(result, result, rscratch2, rscratch1); + // maddw generates an extra nop for Cortex-A53 (see maddw definition in macroAssembler). + // Generate 2nd nop to have 4 instructions per iteration. + if (VM_Version::supports_a53mac()) { + __ nop(); + } } __ bind(BR_BASE); @@ -11172,79 +11652,6 @@ class StubGenerator: public StubCodeGenerator { // } }; - void generate_vector_math_stubs() { - // Get native vector math stub routine addresses - void* libsleef = nullptr; - char ebuf[1024]; - char dll_name[JVM_MAXPATHLEN]; - if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) { - libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf); - } - if (libsleef == nullptr) { - log_info(library)("Failed to load native vector math library, %s!", ebuf); - return; - } - // Method naming convention - // All the methods are named as _ - // Where: - // is the operation name, e.g. sin - // is optional to indicate float/double - // "f/d" for vector float/double operation - // is the number of elements in the vector - // "2/4" for neon, and "x" for sve - // is the precision level - // "u10/u05" represents 1.0/0.5 ULP error bounds - // We use "u10" for all operations by default - // But for those functions do not have u10 support, we use "u05" instead - // indicates neon/sve - // "sve/advsimd" for sve/neon implementations - // e.g. sinfx_u10sve is the method for computing vector float sin using SVE instructions - // cosd2_u10advsimd is the method for computing 2 elements vector double cos using NEON instructions - // - log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef)); - - // Math vector stubs implemented with SVE for scalable vector size. - if (UseSVE > 0) { - for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { - int vop = VectorSupport::VECTOR_OP_MATH_START + op; - // Skip "tanh" because there is performance regression - if (vop == VectorSupport::VECTOR_OP_TANH) { - continue; - } - - // The native library does not support u10 level of "hypot". - const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10"; - - snprintf(ebuf, sizeof(ebuf), "%sfx_%ssve", VectorSupport::mathname[op], ulf); - StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf); - - snprintf(ebuf, sizeof(ebuf), "%sdx_%ssve", VectorSupport::mathname[op], ulf); - StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf); - } - } - - // Math vector stubs implemented with NEON for 64/128 bits vector size. - for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { - int vop = VectorSupport::VECTOR_OP_MATH_START + op; - // Skip "tanh" because there is performance regression - if (vop == VectorSupport::VECTOR_OP_TANH) { - continue; - } - - // The native library does not support u10 level of "hypot". - const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10"; - - snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf); - StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libsleef, ebuf); - - snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf); - StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsleef, ebuf); - - snprintf(ebuf, sizeof(ebuf), "%sd2_%sadvsimd", VectorSupport::mathname[op], ulf); - StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsleef, ebuf); - } - } - // Initialization void generate_initial_stubs() { // Generate initial stubs and initializes the entry points @@ -11332,6 +11739,8 @@ class StubGenerator: public StubCodeGenerator { } #endif + StubRoutines::_unsafe_setmemory = generate_unsafe_setmemory(); + StubRoutines::aarch64::set_completed(); // Inidicate that arraycopy and zero_blocks stubs are generated } @@ -11398,12 +11807,10 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_montgomerySquare = g.generate_multiply(); } - generate_vector_math_stubs(); - #endif // COMPILER2 if (UseChaCha20Intrinsics) { - StubRoutines::_chacha20Block = generate_chacha20Block_qrpar(); + StubRoutines::_chacha20Block = generate_chacha20Block_blockpar(); } if (UseKyberIntrinsics) { @@ -11465,9 +11872,15 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(StubGenStubId::sha512_implCompressMB_id); } if (UseSHA3Intrinsics) { - StubRoutines::_sha3_implCompress = generate_sha3_implCompress(StubGenStubId::sha3_implCompress_id); + StubRoutines::_double_keccak = generate_double_keccak(); - StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress(StubGenStubId::sha3_implCompressMB_id); + if (UseSIMDForSHA3Intrinsic) { + StubRoutines::_sha3_implCompress = generate_sha3_implCompress(StubGenStubId::sha3_implCompress_id); + StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress(StubGenStubId::sha3_implCompressMB_id); + } else { + StubRoutines::_sha3_implCompress = generate_sha3_implCompress_gpr(StubGenStubId::sha3_implCompress_id); + StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress_gpr(StubGenStubId::sha3_implCompressMB_id); + } } if (UsePoly1305Intrinsics) { diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index 2db3b435abb..710970d1ea2 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -865,6 +865,10 @@ void TemplateInterpreterGenerator::lock_method() { // rcpool: cp cache // stack_pointer: previous sp void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + // Save ConstMethod* in r5_const_method for later use to avoid loading multiple times + Register r5_const_method = r5; + __ ldr(r5_const_method, Address(rmethod, Method::const_offset())); + // initialize fixed part of activation frame if (native_call) { __ sub(esp, sp, 14 * wordSize); @@ -875,8 +879,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { __ stp(zr, zr, Address(sp, 12 * wordSize)); } else { __ sub(esp, sp, 12 * wordSize); - __ ldr(rscratch1, Address(rmethod, Method::const_offset())); // get ConstMethod - __ add(rbcp, rscratch1, in_bytes(ConstMethod::codes_offset())); // get codebase + __ add(rbcp, r5_const_method, in_bytes(ConstMethod::codes_offset())); // get codebase __ mov(rscratch1, frame::interpreter_frame_initial_sp_offset); __ stp(rscratch1, rbcp, Address(__ pre(sp, -12 * wordSize))); } @@ -896,9 +899,10 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { __ stp(rfp, lr, Address(sp, 10 * wordSize)); __ lea(rfp, Address(sp, 10 * wordSize)); - __ ldr(rcpool, Address(rmethod, Method::const_offset())); - __ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset())); - __ ldr(rcpool, Address(rcpool, ConstantPool::cache_offset())); + // Save ConstantPool* in r11_constants for later use to avoid loading multiple times + Register r11_constants = r11; + __ ldr(r11_constants, Address(r5_const_method, ConstMethod::constants_offset())); + __ ldr(rcpool, Address(r11_constants, ConstantPool::cache_offset())); __ sub(rscratch1, rlocals, rfp); __ lsr(rscratch1, rscratch1, Interpreter::logStackElementSize); // rscratch1 = rlocals - fp(); // Store relativized rlocals, see frame::interpreter_frame_locals(). @@ -908,11 +912,12 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { // leave last_sp as null __ stp(zr, r19_sender_sp, Address(sp, 8 * wordSize)); - // Get mirror - __ load_mirror(r10, rmethod, r5, rscratch2); + // Get mirror. Resolve ConstantPool* -> InstanceKlass* -> Java mirror. + __ ldr(r10, Address(r11_constants, ConstantPool::pool_holder_offset())); + __ ldr(r10, Address(r10, in_bytes(Klass::java_mirror_offset()))); + __ resolve_oop_handle(r10, rscratch1, rscratch2); if (! native_call) { - __ ldr(rscratch1, Address(rmethod, Method::const_offset())); - __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ ldrh(rscratch1, Address(r5_const_method, ConstMethod::max_stack_offset())); __ add(rscratch1, rscratch1, MAX2(3, Method::extra_stack_entries())); __ sub(rscratch1, sp, rscratch1, ext::uxtw, 3); __ andr(rscratch1, rscratch1, -16); @@ -1593,6 +1598,30 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ bind(L); } + #if INCLUDE_JFR + __ enter_jfr_critical_section(); + + // This poll test is to uphold the invariant that a JFR sampled frame + // must not return to its caller without a prior safepoint poll check. + // The earlier poll check in this routine is insufficient for this purpose + // because the thread has transitioned back to Java. + + Label slow_path; + Label fast_path; + __ safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); + __ br(Assembler::AL, fast_path); + __ bind(slow_path); + __ push(dtos); + __ push(ltos); + __ set_last_Java_frame(esp, rfp, __ pc(), rscratch1); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread); + __ reset_last_Java_frame(true); + __ pop(ltos); + __ pop(dtos); + __ bind(fast_path); + +#endif // INCLUDE_JFR + // jvmti support // Note: This must happen _after_ handling/throwing any exceptions since // the exception handler code notifies the runtime of method exits @@ -1615,6 +1644,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // remove frame anchor __ leave(); + JFR_ONLY(__ leave_jfr_critical_section();) + // restore sender sp __ mov(sp, esp); @@ -1862,6 +1893,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { Interpreter::_remove_activation_preserving_args_entry = __ pc(); __ empty_expression_stack(); + __ restore_bcp(); // We could have returned from deoptimizing this frame, so restore rbcp. // Set the popframe_processing bit in pending_popframe_condition // indicating that we are currently handling popframe, so that // call_VMs that may happen later do not trigger new popframe diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp index 2cc9b39983a..fcfe153a9a5 100644 --- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp @@ -1144,6 +1144,7 @@ void TemplateTable::aastore() { // Get the value we will store __ ldr(r0, at_tos()); // Now store using the appropriate barrier + // Clobbers: r10, r11, r3 do_oop_store(_masm, element_address, r0, IS_ARRAY); __ b(done); @@ -1152,6 +1153,7 @@ void TemplateTable::aastore() { __ profile_null_seen(r2); // Store a null + // Clobbers: r10, r11, r3 do_oop_store(_masm, element_address, noreg, IS_ARRAY); // Pop stack arguments @@ -1890,6 +1892,8 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) __ mov(r19, r0); // save the nmethod + JFR_ONLY(__ enter_jfr_critical_section();) + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); // r0 is OSR buffer, move it to expected parameter location @@ -1901,6 +1905,9 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // remove frame anchor __ leave(); + + JFR_ONLY(__ leave_jfr_critical_section();) + // Ensure compiled code always sees stack at proper alignment __ andr(sp, esp, -16); @@ -2877,6 +2884,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr __ pop(atos); if (!is_static) pop_and_check_object(obj); // Store into the field + // Clobbers: r10, r11, r3 do_oop_store(_masm, field, r0, IN_HEAP); if (rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); @@ -3072,12 +3080,12 @@ void TemplateTable::fast_storefield(TosState state) // access constant pool cache __ load_field_entry(r2, r1); - // R1: field offset, R2: field holder, R3: flags - load_resolved_field_entry(r2, r2, noreg, r1, r3); + // R1: field offset, R2: field holder, R5: flags + load_resolved_field_entry(r2, r2, noreg, r1, r5); { Label notVolatile; - __ tbz(r3, ResolvedFieldEntry::is_volatile_shift, notVolatile); + __ tbz(r5, ResolvedFieldEntry::is_volatile_shift, notVolatile); __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); __ bind(notVolatile); } @@ -3093,6 +3101,7 @@ void TemplateTable::fast_storefield(TosState state) // access field switch (bytecode()) { case Bytecodes::_fast_aputfield: + // Clobbers: r10, r11, r3 do_oop_store(_masm, field, r0, IN_HEAP); break; case Bytecodes::_fast_lputfield: @@ -3125,7 +3134,7 @@ void TemplateTable::fast_storefield(TosState state) { Label notVolatile; - __ tbz(r3, ResolvedFieldEntry::is_volatile_shift, notVolatile); + __ tbz(r5, ResolvedFieldEntry::is_volatile_shift, notVolatile); __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); __ bind(notVolatile); } diff --git a/src/hotspot/cpu/aarch64/vmStructs_aarch64.hpp b/src/hotspot/cpu/aarch64/vmStructs_aarch64.hpp index bf9c965213c..2ec901f6a2e 100644 --- a/src/hotspot/cpu/aarch64/vmStructs_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vmStructs_aarch64.hpp @@ -35,8 +35,7 @@ static_field(VM_Version, _rop_protection, bool) \ static_field(VM_Version, _pac_mask, uintptr_t) -#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type) \ - declare_toplevel_type(VM_Version) +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type) #define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant) diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp index c555e393ca5..941cb254532 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -161,6 +161,9 @@ void VM_Version::initialize() { (_model == CPU_MODEL_AMPERE_1A || _model == CPU_MODEL_AMPERE_1B)) { FLAG_SET_DEFAULT(CodeEntryAlignment, 32); } + if (FLAG_IS_DEFAULT(AlwaysMergeDMB)) { + FLAG_SET_DEFAULT(AlwaysMergeDMB, false); + } } // ThunderX @@ -376,7 +379,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA3Intrinsics, true); } } - } else if (UseSHA3Intrinsics) { + } else if (UseSHA3Intrinsics && UseSIMDForSHA3Intrinsic) { warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); } @@ -639,6 +642,7 @@ void VM_Version::initialize() { if (_model2) { os::snprintf_checked(buf + buf_used_len, sizeof(buf) - buf_used_len, "(0x%03x)", _model2); } + size_t features_offset = strnlen(buf, sizeof(buf)); #define ADD_FEATURE_IF_SUPPORTED(id, name, bit) \ do { \ if (VM_Version::supports_##name()) strcat(buf, ", " #name); \ @@ -646,7 +650,11 @@ void VM_Version::initialize() { CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED) #undef ADD_FEATURE_IF_SUPPORTED - _features_string = os::strdup(buf); + _cpu_info_string = os::strdup(buf); + + _features_string = extract_features_string(_cpu_info_string, + strnlen(_cpu_info_string, sizeof(buf)), + features_offset); } #if defined(LINUX) @@ -713,7 +721,7 @@ void VM_Version::initialize_cpu_information(void) { int desc_len = snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "AArch64 "); get_compatible_board(_cpu_desc + desc_len, CPU_DETAILED_DESC_BUF_SIZE - desc_len); desc_len = (int)strlen(_cpu_desc); - snprintf(_cpu_desc + desc_len, CPU_DETAILED_DESC_BUF_SIZE - desc_len, " %s", _features_string); + snprintf(_cpu_desc + desc_len, CPU_DETAILED_DESC_BUF_SIZE - desc_len, " %s", _cpu_info_string); _initialized = true; } diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp index 04cf9c9c2a0..373f8da5405 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -125,6 +125,8 @@ enum Ampere_CPU_Model { decl(SHA2, sha256, 6) \ decl(CRC32, crc32, 7) \ decl(LSE, lse, 8) \ + decl(FPHP, fphp, 9) \ + decl(ASIMDHP, asimdhp, 10) \ decl(DCPOP, dcpop, 16) \ decl(SHA3, sha3, 17) \ decl(SHA512, sha512, 21) \ diff --git a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp index 11ea02621d7..714904ab3df 100644 --- a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp @@ -25,6 +25,7 @@ #include "asm/assembler.inline.hpp" #include "asm/macroAssembler.inline.hpp" +#include "code/aotCodeCache.hpp" #include "code/compiledIC.hpp" #include "code/vtableStubs.hpp" #include "interp_masm_aarch64.hpp" @@ -196,7 +197,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { temp_reg, temp_reg2, itable_index, L_no_such_interface); // Reduce "estimate" such that "padding" does not drop below 8. - const ptrdiff_t estimate = 144; + const ptrdiff_t estimate = AOTCodeCache::is_on_for_dump() ? 148 : 144; const ptrdiff_t codesize = __ pc() - start_pc; slop_delta = (int)(estimate - codesize); slop_bytes += slop_delta; diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index f3b97d23ad3..4a0b557968c 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -1238,11 +1238,11 @@ encode %{ enc_class save_last_PC %{ // preserve mark address mark = __ inst_mark(); - debug_only(int off0 = __ offset()); + DEBUG_ONLY(int off0 = __ offset()); int ret_addr_offset = as_MachCall()->ret_addr_offset(); __ adr(LR, mark + ret_addr_offset); __ str(LR, Address(Rthread, JavaThread::last_Java_pc_offset())); - debug_only(int off1 = __ offset()); + DEBUG_ONLY(int off1 = __ offset()); assert(off1 - off0 == 2 * Assembler::InstructionSize, "correct size prediction"); // restore mark __ set_inst_mark(mark); @@ -1251,11 +1251,11 @@ encode %{ enc_class preserve_SP %{ // preserve mark address mark = __ inst_mark(); - debug_only(int off0 = __ offset()); + DEBUG_ONLY(int off0 = __ offset()); // FP is preserved across all calls, even compiled calls. // Use it to preserve SP in places where the callee might change the SP. __ mov(Rmh_SP_save, SP); - debug_only(int off1 = __ offset()); + DEBUG_ONLY(int off1 = __ offset()); assert(off1 - off0 == 4, "correct size prediction"); // restore mark __ set_inst_mark(mark); diff --git a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp index bca6c7ca30c..5683bc59d5c 100644 --- a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp +++ b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp @@ -59,7 +59,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ call(Runtime1::entry_for(C1StubId::predicate_failed_trap_id), relocInfo::runtime_call_type); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); return; } // Pass the array index on stack because all registers must be preserved @@ -91,7 +91,7 @@ void PredicateFailedStub::emit_code(LIR_Assembler* ce) { __ call(Runtime1::entry_for(C1StubId::predicate_failed_trap_id), relocInfo::runtime_call_type); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } void DivByZeroStub::emit_code(LIR_Assembler* ce) { diff --git a/src/hotspot/cpu/arm/frame_arm.hpp b/src/hotspot/cpu/arm/frame_arm.hpp index dee005b8d75..dec27554a47 100644 --- a/src/hotspot/cpu/arm/frame_arm.hpp +++ b/src/hotspot/cpu/arm/frame_arm.hpp @@ -108,6 +108,9 @@ frame(intptr_t* sp, intptr_t* fp); + frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc, CodeBlob* cb, bool allow_cb_null = false); + + void setup(address pc); void init(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); // accessors for the instance variables diff --git a/src/hotspot/cpu/arm/frame_arm.inline.hpp b/src/hotspot/cpu/arm/frame_arm.inline.hpp index 92a48f22f8c..4be190f0504 100644 --- a/src/hotspot/cpu/arm/frame_arm.inline.hpp +++ b/src/hotspot/cpu/arm/frame_arm.inline.hpp @@ -27,9 +27,58 @@ #include "code/codeCache.hpp" #include "code/vmreg.inline.hpp" +#include "runtime/sharedRuntime.hpp" // Inline functions for ARM frames: +#if INCLUDE_JFR + +// Static helper routines + +inline address frame::interpreter_bcp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast(fp[frame::interpreter_frame_bcp_offset]); +} + +inline address frame::interpreter_return_address(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast(fp[frame::return_addr_offset]); +} + +inline intptr_t* frame::interpreter_sender_sp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast (fp[frame::interpreter_frame_sender_sp_offset]); +} + +inline bool frame::is_interpreter_frame_setup_at(const intptr_t* fp, const void* sp) { + assert(fp != nullptr, "invariant"); + assert(sp != nullptr, "invariant"); + return sp <= fp + frame::interpreter_frame_initial_sp_offset; +} + +inline intptr_t* frame::sender_sp(intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return fp + frame::sender_sp_offset; +} + +inline intptr_t* frame::link(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast (fp[frame::link_offset]); +} + +inline address frame::return_address(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast(sp[-1]); +} + +inline intptr_t* frame::fp(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast (sp[-2]); +} + +#endif // INCLUDE_JFR + + // Constructors: inline frame::frame() { @@ -54,21 +103,30 @@ inline void frame::init(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, add _fp = fp; _pc = pc; assert(pc != nullptr, "no pc?"); + _on_heap = false; + _oop_map = nullptr; _cb = CodeCache::find_blob(pc); - adjust_unextended_sp(); DEBUG_ONLY(_frame_index = -1;) + setup(pc); +} + +inline void frame::setup(address pc) { + adjust_unextended_sp(); + address original_pc = get_deopt_original_pc(); if (original_pc != nullptr) { _pc = original_pc; - assert(_cb->as_nmethod()->insts_contains_inclusive(_pc), - "original PC must be in the main code section of the compiled method (or must be immediately following it)"); _deopt_state = is_deoptimized; + assert(_cb == nullptr || _cb->as_nmethod()->insts_contains_inclusive(_pc), + "original PC must be in the main code section of the compiled method (or must be immediately following it)"); } else { - _deopt_state = not_deoptimized; + if (_cb == SharedRuntime::deopt_blob()) { + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } } - _on_heap = false; - _oop_map = nullptr; } inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { @@ -85,6 +143,22 @@ inline frame::frame(intptr_t* sp, intptr_t* fp) { init(sp, sp, fp, pc); } +inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc, CodeBlob* cb, bool allow_cb_null) { + intptr_t a = intptr_t(sp); + intptr_t b = intptr_t(fp); + _sp = sp; + _unextended_sp = unextended_sp; + _fp = fp; + _pc = pc; + assert(pc != nullptr, "no pc?"); + _cb = cb; + _oop_map = nullptr; + assert(_cb != nullptr || allow_cb_null, "pc: " INTPTR_FORMAT, p2i(pc)); + _on_heap = false; + DEBUG_ONLY(_frame_index = -1;) + + setup(pc); +} // Accessors diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp index 466dcc8fe66..049477cda76 100644 --- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp @@ -26,7 +26,6 @@ #include "gc/g1/g1BarrierSet.hpp" #include "gc/g1/g1BarrierSetAssembler.hpp" #include "gc/g1/g1BarrierSetRuntime.hpp" -#include "gc/g1/g1ThreadLocalData.hpp" #include "gc/g1/g1CardTable.hpp" #include "gc/g1/g1HeapRegion.hpp" #include "gc/g1/g1ThreadLocalData.hpp" diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetNMethod_arm.cpp b/src/hotspot/cpu/arm/gc/shared/barrierSetNMethod_arm.cpp index 224a499ff54..52d71ca65c2 100644 --- a/src/hotspot/cpu/arm/gc/shared/barrierSetNMethod_arm.cpp +++ b/src/hotspot/cpu/arm/gc/shared/barrierSetNMethod_arm.cpp @@ -29,8 +29,8 @@ #include "memory/resourceArea.hpp" #include "runtime/frame.inline.hpp" #include "runtime/javaThread.hpp" -#include "runtime/sharedRuntime.hpp" #include "runtime/registerMap.hpp" +#include "runtime/sharedRuntime.hpp" #include "utilities/align.hpp" #include "utilities/debug.hpp" @@ -72,7 +72,7 @@ void NativeNMethodBarrier::verify() const { static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { address barrier_address = nm->code_begin() + nm->frame_complete_offset() - entry_barrier_bytes; NativeNMethodBarrier* barrier = reinterpret_cast (barrier_address); - debug_only(barrier->verify()); + DEBUG_ONLY(barrier->verify()); return barrier; } diff --git a/src/hotspot/cpu/arm/runtime_arm.cpp b/src/hotspot/cpu/arm/runtime_arm.cpp index 20c1bc199d3..615a63eac19 100644 --- a/src/hotspot/cpu/arm/runtime_arm.cpp +++ b/src/hotspot/cpu/arm/runtime_arm.cpp @@ -54,6 +54,9 @@ UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() { // Measured 8/7/03 at 660 in 32bit debug build CodeBuffer buffer(name, 2000, 512); #endif + if (buffer.blob() == nullptr) { + return nullptr; + } // bypassed when code generation useless MacroAssembler* masm = new MacroAssembler(&buffer); const Register Rublock = R6; @@ -209,6 +212,9 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() { // Measured 8/7/03 at 256 in 32bit debug build const char* name = OptoRuntime::stub_name(OptoStubId::exception_id); CodeBuffer buffer(name, 600, 512); + if (buffer.blob() == nullptr) { + return nullptr; + } MacroAssembler* masm = new MacroAssembler(&buffer); int framesize_in_words = 2; // FP + LR diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp index 6dde82daaf9..8ba847e7e32 100644 --- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp +++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp @@ -612,12 +612,12 @@ static void gen_c2i_adapter(MacroAssembler *masm, } -AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterHandlerEntry* handler) { address i2c_entry = __ pc(); gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); @@ -637,7 +637,8 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm address c2i_entry = __ pc(); gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); + handler->set_entry_points(i2c_entry, c2i_entry, c2i_unverified_entry, nullptr); + return; } diff --git a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp index 30d88a4db91..db4a5c8625c 100644 --- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp @@ -175,6 +175,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M case Interpreter::java_lang_math_fmaD: case Interpreter::java_lang_math_fmaF: case Interpreter::java_lang_math_tanh: + case Interpreter::java_lang_math_cbrt: // TODO: Implement intrinsic break; default: diff --git a/src/hotspot/cpu/arm/vm_version_arm_32.cpp b/src/hotspot/cpu/arm/vm_version_arm_32.cpp index 148786a55da..d0941936035 100644 --- a/src/hotspot/cpu/arm/vm_version_arm_32.cpp +++ b/src/hotspot/cpu/arm/vm_version_arm_32.cpp @@ -295,7 +295,7 @@ void VM_Version::initialize() { (has_multiprocessing_extensions() ? ", mp_ext" : "")); // buf is started with ", " or is empty - _features_string = os::strdup(buf); + _cpu_info_string = os::strdup(buf); if (has_simd()) { if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { @@ -363,6 +363,6 @@ void VM_Version::initialize_cpu_information(void) { _no_of_threads = _no_of_cores; _no_of_sockets = _no_of_cores; snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "ARM%d", _arm_arch); - snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _features_string); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _cpu_info_string); _initialized = true; } diff --git a/src/hotspot/cpu/ppc/assembler_ppc.hpp b/src/hotspot/cpu/ppc/assembler_ppc.hpp index b38c4ac5bae..314517fd56a 100644 --- a/src/hotspot/cpu/ppc/assembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2024 SAP SE. All rights reserved. + * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -535,8 +535,12 @@ class Assembler : public AbstractAssembler { // Vector-Scalar (VSX) instruction support. LXV_OPCODE = (61u << OPCODE_SHIFT | 1u ), - LXVL_OPCODE = (31u << OPCODE_SHIFT | 269u << 1), STXV_OPCODE = (61u << OPCODE_SHIFT | 5u ), + LXVX_OPCODE = (31u << OPCODE_SHIFT | 4u << 7 | 12u << 1), + STXVX_OPCODE = (31u << OPCODE_SHIFT | 396u << 1), + LXVP_OPCODE = ( 6u << OPCODE_SHIFT ), + STXVP_OPCODE = ( 6u << OPCODE_SHIFT | 1u ), + LXVL_OPCODE = (31u << OPCODE_SHIFT | 269u << 1), STXVL_OPCODE = (31u << OPCODE_SHIFT | 397u << 1), LXVD2X_OPCODE = (31u << OPCODE_SHIFT | 844u << 1), STXVD2X_OPCODE = (31u << OPCODE_SHIFT | 972u << 1), @@ -587,6 +591,10 @@ class Assembler : public AbstractAssembler { XVRDPIC_OPCODE = (60u << OPCODE_SHIFT | 235u << 2), XVRDPIM_OPCODE = (60u << OPCODE_SHIFT | 249u << 2), XVRDPIP_OPCODE = (60u << OPCODE_SHIFT | 233u << 2), + XVMINSP_OPCODE = (60u << OPCODE_SHIFT | 200u << 3), + XVMINDP_OPCODE = (60u << OPCODE_SHIFT | 232u << 3), + XVMAXSP_OPCODE = (60u << OPCODE_SHIFT | 192u << 3), + XVMAXDP_OPCODE = (60u << OPCODE_SHIFT | 224u << 3), // Deliver A Random Number (introduced with POWER9) DARN_OPCODE = (31u << OPCODE_SHIFT | 755u << 1), @@ -695,15 +703,19 @@ class Assembler : public AbstractAssembler { VMAXSB_OPCODE = (4u << OPCODE_SHIFT | 258u ), VMAXSW_OPCODE = (4u << OPCODE_SHIFT | 386u ), VMAXSH_OPCODE = (4u << OPCODE_SHIFT | 322u ), + VMAXSD_OPCODE = (4u << OPCODE_SHIFT | 450u ), VMAXUB_OPCODE = (4u << OPCODE_SHIFT | 2u ), VMAXUW_OPCODE = (4u << OPCODE_SHIFT | 130u ), VMAXUH_OPCODE = (4u << OPCODE_SHIFT | 66u ), + VMAXUD_OPCODE = (4u << OPCODE_SHIFT | 194u ), VMINSB_OPCODE = (4u << OPCODE_SHIFT | 770u ), VMINSW_OPCODE = (4u << OPCODE_SHIFT | 898u ), VMINSH_OPCODE = (4u << OPCODE_SHIFT | 834u ), + VMINSD_OPCODE = (4u << OPCODE_SHIFT | 962u ), VMINUB_OPCODE = (4u << OPCODE_SHIFT | 514u ), VMINUW_OPCODE = (4u << OPCODE_SHIFT | 642u ), VMINUH_OPCODE = (4u << OPCODE_SHIFT | 578u ), + VMINUD_OPCODE = (4u << OPCODE_SHIFT | 706u ), VCMPEQUB_OPCODE= (4u << OPCODE_SHIFT | 6u ), VCMPEQUH_OPCODE= (4u << OPCODE_SHIFT | 70u ), @@ -1243,6 +1255,11 @@ class Assembler : public AbstractAssembler { static int vsdm( int x) { return opp_u_field(x, 23, 22); } static int vsrs_dq( int x) { return opp_u_field(x & 0x1F, 10, 6) | opp_u_field((x & 0x20) >> 5, 28, 28); } static int vsrt_dq( int x) { return vsrs_dq(x); } + static int vsrtp( int x) { + assert((x & 1) == 0, "must be even"); + return opp_u_field((x & 0x1F) >> 1, 9, 6) | opp_u_field((x & 0x20) >> 5, 10, 10); + } + static int vsrsp( int x) { return vsrtp(x); } static int vsra( VectorSRegister r) { return vsra(r->encoding());} static int vsrb( VectorSRegister r) { return vsrb(r->encoding());} @@ -1251,6 +1268,8 @@ class Assembler : public AbstractAssembler { static int vsrt( VectorSRegister r) { return vsrt(r->encoding());} static int vsrs_dq(VectorSRegister r) { return vsrs_dq(r->encoding());} static int vsrt_dq(VectorSRegister r) { return vsrt_dq(r->encoding());} + static int vsrtp( VectorSRegister r) { return vsrtp(r->encoding());} + static int vsrsp( VectorSRegister r) { return vsrsp(r->encoding());} static int vsplt_uim( int x) { return opp_u_field(x, 15, 12); } // for vsplt* instructions static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions @@ -1997,7 +2016,7 @@ class Assembler : public AbstractAssembler { // Wait instructions for polling. Attention: May result in SIGILL. inline void wait(); - inline void waitrsv(); // >=Power7 + inline void waitrsv(); // atomics inline void lbarx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8 @@ -2005,7 +2024,6 @@ class Assembler : public AbstractAssembler { inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0); inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0); inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8 - inline bool lxarx_hint_exclusive_access(); inline void lbarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inline void lharx( Register d, Register a, Register b, bool hint_exclusive_access = false); inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false); @@ -2028,7 +2046,6 @@ class Assembler : public AbstractAssembler { inline void smt_prio_low(); inline void smt_prio_medium_low(); inline void smt_prio_medium(); - // >= Power7 inline void smt_yield(); inline void smt_mdoio(); inline void smt_mdoom(); @@ -2293,15 +2310,19 @@ class Assembler : public AbstractAssembler { inline void vmaxsb( VectorRegister d, VectorRegister a, VectorRegister b); inline void vmaxsw( VectorRegister d, VectorRegister a, VectorRegister b); inline void vmaxsh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxsd( VectorRegister d, VectorRegister a, VectorRegister b); inline void vmaxub( VectorRegister d, VectorRegister a, VectorRegister b); inline void vmaxuw( VectorRegister d, VectorRegister a, VectorRegister b); inline void vmaxuh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxud( VectorRegister d, VectorRegister a, VectorRegister b); inline void vminsb( VectorRegister d, VectorRegister a, VectorRegister b); inline void vminsw( VectorRegister d, VectorRegister a, VectorRegister b); inline void vminsh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminsd( VectorRegister d, VectorRegister a, VectorRegister b); inline void vminub( VectorRegister d, VectorRegister a, VectorRegister b); inline void vminuw( VectorRegister d, VectorRegister a, VectorRegister b); inline void vminuh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminud( VectorRegister d, VectorRegister a, VectorRegister b); inline void vcmpequb( VectorRegister d, VectorRegister a, VectorRegister b); inline void vcmpequh( VectorRegister d, VectorRegister a, VectorRegister b); inline void vcmpequw( VectorRegister d, VectorRegister a, VectorRegister b); @@ -2356,14 +2377,24 @@ class Assembler : public AbstractAssembler { inline void mfvscr( VectorRegister d); // Vector-Scalar (VSX) instructions. - inline void lxv( VectorSRegister d, int si16, Register a); - inline void stxv( VectorSRegister d, int si16, Register a); - inline void lxvl( VectorSRegister d, Register a, Register b); - inline void stxvl( VectorSRegister d, Register a, Register b); + // Power8 inline void lxvd2x( VectorSRegister d, Register a); inline void lxvd2x( VectorSRegister d, Register a, Register b); inline void stxvd2x( VectorSRegister d, Register a); inline void stxvd2x( VectorSRegister d, Register a, Register b); + + // Power9 + inline void lxv( VectorSRegister d, int si16, Register a); + inline void stxv( VectorSRegister d, int si16, Register a); + inline void lxvx( VectorSRegister d, Register a, Register b); + inline void stxvx( VectorSRegister d, Register a, Register b); + inline void lxvl( VectorSRegister d, Register a, Register b); + inline void stxvl( VectorSRegister d, Register a, Register b); + + // Power10 + inline void lxvp( VectorSRegister d, int si16, Register a); + inline void stxvp( VectorSRegister d, int si16, Register a); + inline void mtvrwz( VectorRegister d, Register a); inline void mfvrwz( Register a, VectorRegister d); inline void mtvrd( VectorRegister d, Register a); @@ -2416,6 +2447,12 @@ class Assembler : public AbstractAssembler { inline void xvrdpim( VectorSRegister d, VectorSRegister b); inline void xvrdpip( VectorSRegister d, VectorSRegister b); + // The following functions do not match exactly the Java.math semantics. + inline void xvminsp( VectorSRegister d, VectorSRegister a, VectorSRegister b); + inline void xvmindp( VectorSRegister d, VectorSRegister a, VectorSRegister b); + inline void xvmaxsp( VectorSRegister d, VectorSRegister a, VectorSRegister b); + inline void xvmaxdp( VectorSRegister d, VectorSRegister a, VectorSRegister b); + // VSX Extended Mnemonics inline void xxspltd( VectorSRegister d, VectorSRegister a, int x); inline void xxmrghd( VectorSRegister d, VectorSRegister a, VectorSRegister b); @@ -2480,6 +2517,9 @@ class Assembler : public AbstractAssembler { inline void std( Register d, int si16); inline void stdbrx( Register d, Register s2); + inline void lxvx( VectorSRegister d, Register b); + inline void stxvx(VectorSRegister d, Register b); + // PPC 2, section 3.2.1 Instruction Cache Instructions inline void icbi( Register s2); // PPC 2, section 3.2.2 Data Cache Instructions diff --git a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp index 4fb8c5c4198..792e5d6d5ad 100644 --- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp @@ -208,8 +208,7 @@ inline void Assembler::cmpldi(ConditionRegister crx, Register a, int ui16) { A inline void Assembler::cmplw( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 0, a, b); } inline void Assembler::cmpld( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 1, a, b); } -inline void Assembler::isel(Register d, Register a, Register b, int c) { guarantee(VM_Version::has_isel(), "opcode not supported on this hardware"); - emit_int32(ISEL_OPCODE | rt(d) | ra(a) | rb(b) | bc(c)); } +inline void Assembler::isel(Register d, Register a, Register b, int c) { emit_int32(ISEL_OPCODE | rt(d) | ra(a) | rb(b) | bc(c)); } // PPC 1, section 3.3.11, Fixed-Point Logical Instructions inline void Assembler::andi_( Register a, Register s, int ui16) { emit_int32(ANDI_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } @@ -701,12 +700,11 @@ inline void Assembler::lharx_unchecked(Register d, Register a, Register b, int e inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } -inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); } -inline void Assembler::lbarx( Register d, Register a, Register b, bool hint_exclusive_access) { lbarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } -inline void Assembler::lharx( Register d, Register a, Register b, bool hint_exclusive_access) { lharx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } -inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } -inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } -inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lbarx( Register d, Register a, Register b, bool hint_exclusive_access) { lbarx_unchecked(d, a, b, (hint_exclusive_access && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lharx( Register d, Register a, Register b, bool hint_exclusive_access) { lharx_unchecked(d, a, b, (hint_exclusive_access && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::stbcx_(Register s, Register a, Register b) { emit_int32( STBCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } inline void Assembler::sthcx_(Register s, Register a, Register b) { emit_int32( STHCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } @@ -775,12 +773,9 @@ inline void Assembler::frim( FloatRegister d, FloatRegister b) { emit_int32( FRI //inline void Assembler::mffgpr( FloatRegister d, Register b) { emit_int32( MFFGPR_OPCODE | frt(d) | rb(b) | rc(0)); } //inline void Assembler::mftgpr( Register d, FloatRegister b) { emit_int32( MFTGPR_OPCODE | rt(d) | frb(b) | rc(0)); } // add cmpb and popcntb to detect ppc power version. -inline void Assembler::cmpb( Register a, Register s, Register b) { guarantee(VM_Version::has_cmpb(), "opcode not supported on this hardware"); - emit_int32( CMPB_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } -inline void Assembler::popcntb(Register a, Register s) { guarantee(VM_Version::has_popcntb(), "opcode not supported on this hardware"); - emit_int32( POPCNTB_OPCODE | rta(a) | rs(s)); }; -inline void Assembler::popcntw(Register a, Register s) { guarantee(VM_Version::has_popcntw(), "opcode not supported on this hardware"); - emit_int32( POPCNTW_OPCODE | rta(a) | rs(s)); }; +inline void Assembler::cmpb( Register a, Register s, Register b) { emit_int32( CMPB_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::popcntb(Register a, Register s) { emit_int32( POPCNTB_OPCODE | rta(a) | rs(s)); }; +inline void Assembler::popcntw(Register a, Register s) { emit_int32( POPCNTW_OPCODE | rta(a) | rs(s)); }; inline void Assembler::popcntd(Register a, Register s) { emit_int32( POPCNTD_OPCODE | rta(a) | rs(s)); }; inline void Assembler::fneg( FloatRegister d, FloatRegister b) { emit_int32( FNEG_OPCODE | frt(d) | frb(b) | rc(0)); } @@ -835,17 +830,14 @@ inline void Assembler::fctidz(FloatRegister d, FloatRegister b) { emit_int32( FC inline void Assembler::fctiw( FloatRegister d, FloatRegister b) { emit_int32( FCTIW_OPCODE | frt(d) | frb(b) | rc(0)); } inline void Assembler::fctiwz(FloatRegister d, FloatRegister b) { emit_int32( FCTIWZ_OPCODE | frt(d) | frb(b) | rc(0)); } inline void Assembler::fcfid( FloatRegister d, FloatRegister b) { emit_int32( FCFID_OPCODE | frt(d) | frb(b) | rc(0)); } -inline void Assembler::fcfids(FloatRegister d, FloatRegister b) { guarantee(VM_Version::has_fcfids(), "opcode not supported on this hardware"); - emit_int32( FCFIDS_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fcfids(FloatRegister d, FloatRegister b) { emit_int32( FCFIDS_OPCODE | frt(d) | frb(b) | rc(0)); } // PPC 1, section 4.6.7 Floating-Point Compare Instructions inline void Assembler::fcmpu( ConditionRegister crx, FloatRegister a, FloatRegister b) { emit_int32( FCMPU_OPCODE | bf(crx) | fra(a) | frb(b)); } // PPC 1, section 5.2.1 Floating-Point Arithmetic Instructions -inline void Assembler::fsqrt( FloatRegister d, FloatRegister b) { guarantee(VM_Version::has_fsqrt(), "opcode not supported on this hardware"); - emit_int32( FSQRT_OPCODE | frt(d) | frb(b) | rc(0)); } -inline void Assembler::fsqrts(FloatRegister d, FloatRegister b) { guarantee(VM_Version::has_fsqrts(), "opcode not supported on this hardware"); - emit_int32( FSQRTS_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fsqrt( FloatRegister d, FloatRegister b) { emit_int32( FSQRT_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fsqrts(FloatRegister d, FloatRegister b) { emit_int32( FSQRTS_OPCODE | frt(d) | frb(b) | rc(0)); } // Vector instructions for >= Power6. inline void Assembler::lvebx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEBX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } @@ -862,8 +854,12 @@ inline void Assembler::lvsl( VectorRegister d, Register s1, Register s2) { emit inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } // Vector-Scalar (VSX) instructions. -inline void Assembler::lxv( VectorSRegister d, int ui16, Register a) { assert(is_aligned(ui16, 16), "displacement must be a multiple of 16"); emit_int32( LXV_OPCODE | vsrt_dq(d) | ra0mem(a) | uimm(ui16, 16)); } -inline void Assembler::stxv( VectorSRegister d, int ui16, Register a) { assert(is_aligned(ui16, 16), "displacement must be a multiple of 16"); emit_int32( STXV_OPCODE | vsrs_dq(d) | ra0mem(a) | uimm(ui16, 16)); } +inline void Assembler::lxv( VectorSRegister d, int si16, Register a) { assert(is_aligned(si16, 16), "displacement must be a multiple of 16"); emit_int32( LXV_OPCODE | vsrt_dq(d) | ra0mem(a) | simm(si16, 16)); } +inline void Assembler::stxv( VectorSRegister d, int si16, Register a) { assert(is_aligned(si16, 16), "displacement must be a multiple of 16"); emit_int32( STXV_OPCODE | vsrs_dq(d) | ra0mem(a) | simm(si16, 16)); } +inline void Assembler::lxvx( VectorSRegister d, Register a, Register b) { emit_int32( LXVX_OPCODE | vsrt(d) | ra0mem(a) | rb(b)); } +inline void Assembler::stxvx( VectorSRegister d, Register a, Register b) { emit_int32( STXVX_OPCODE | vsrs(d) | ra0mem(a) | rb(b)); } +inline void Assembler::lxvp( VectorSRegister d, int si16, Register a) { assert(is_aligned(si16, 16), "displacement must be a multiple of 16"); emit_int32( LXVP_OPCODE | vsrtp(d) | ra0mem(a) | simm(si16, 16)); } +inline void Assembler::stxvp( VectorSRegister d, int si16, Register a) { assert(is_aligned(si16, 16), "displacement must be a multiple of 16"); emit_int32( STXVP_OPCODE | vsrsp(d) | ra0mem(a) | simm(si16, 16)); } inline void Assembler::lxvl( VectorSRegister d, Register s1, Register b) { emit_int32( LXVL_OPCODE | vsrt(d) | ra0mem(s1) | rb(b)); } inline void Assembler::stxvl( VectorSRegister d, Register s1, Register b) { emit_int32( STXVL_OPCODE | vsrt(d) | ra0mem(s1) | rb(b)); } inline void Assembler::lxvd2x( VectorSRegister d, Register s1) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); } @@ -912,6 +908,11 @@ inline void Assembler::xvrdpic( VectorSRegister d, VectorSRegister b) inline void Assembler::xvrdpim( VectorSRegister d, VectorSRegister b) { emit_int32( XVRDPIM_OPCODE | vsrt(d) | vsrb(b)); } inline void Assembler::xvrdpip( VectorSRegister d, VectorSRegister b) { emit_int32( XVRDPIP_OPCODE | vsrt(d) | vsrb(b)); } +inline void Assembler::xvminsp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMINSP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); } +inline void Assembler::xvmindp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMINDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); } +inline void Assembler::xvmaxsp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMAXSP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); } +inline void Assembler::xvmaxdp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMAXDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); } + inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); } inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); } inline void Assembler::mtvrwz( VectorRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); } @@ -1026,15 +1027,19 @@ inline void Assembler::vavguh( VectorRegister d, VectorRegister a, VectorRegist inline void Assembler::vmaxsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSB_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vmaxsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSW_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vmaxsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxsd( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSD_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vmaxub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vmaxuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vmaxuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxud( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUD_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vminsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSB_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vminsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSW_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vminsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminsd( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSD_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vminub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vminuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vminuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminud( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUD_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vcmpequb(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } inline void Assembler::vcmpequh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } inline void Assembler::vcmpequw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } @@ -1053,8 +1058,7 @@ inline void Assembler::vcmpgtsw_(VectorRegister d,VectorRegister a, VectorRegist inline void Assembler::vcmpgtub_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } inline void Assembler::vcmpgtuh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } inline void Assembler::vcmpgtuw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } -inline void Assembler::vand( VectorRegister d, VectorRegister a, VectorRegister b) { guarantee(VM_Version::has_vand(), "opcode not supported on this hardware"); - emit_int32( VAND_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vand( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAND_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vandc( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VANDC_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vnor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VNOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } @@ -1140,6 +1144,10 @@ inline void Assembler::std( Register d, int si16 ) { emit_int32( STD_OPCODE inline void Assembler::stdx( Register d, Register s2) { emit_int32( STDX_OPCODE | rs(d) | rb(s2));} inline void Assembler::stdbrx(Register d, Register s2){ emit_int32(STDBRX_OPCODE| rs(d) | rb(s2));} +inline void Assembler::lxvx( VectorSRegister d, Register b) { emit_int32( LXVX_OPCODE | vsrt(d) | rb(b)); } +inline void Assembler::stxvx(VectorSRegister d, Register b) { emit_int32( STXVX_OPCODE | vsrs(d) | rb(b)); } + + // ra0 version inline void Assembler::icbi( Register s2) { emit_int32( ICBI_OPCODE | rb(s2) ); } //inline void Assembler::dcba( Register s2) { emit_int32( DCBA_OPCODE | rb(s2) ); } @@ -1158,11 +1166,11 @@ inline void Assembler::lharx_unchecked(Register d, Register b, int eh1) inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } -inline void Assembler::lbarx( Register d, Register b, bool hint_exclusive_access){ lbarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } -inline void Assembler::lharx( Register d, Register b, bool hint_exclusive_access){ lharx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } -inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } -inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } -inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lbarx( Register d, Register b, bool hint_exclusive_access){ lbarx_unchecked(d, b, (hint_exclusive_access && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lharx( Register d, Register b, bool hint_exclusive_access){ lharx_unchecked(d, b, (hint_exclusive_access && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::stbcx_(Register s, Register b) { emit_int32( STBCX_OPCODE | rs(s) | rb(b) | rc(1)); } inline void Assembler::sthcx_(Register s, Register b) { emit_int32( STHCX_OPCODE | rs(s) | rb(b) | rc(1)); } inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); } diff --git a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp index d4f5faa29a8..b1cdf38daf3 100644 --- a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp @@ -41,25 +41,8 @@ void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { if (UseSIGTRAP) { DEBUG_ONLY( __ should_not_reach_here("C1SafepointPollStub::emit_code"); ) } else { - assert(SharedRuntime::polling_page_return_handler_blob() != nullptr, - "polling page return stub not created yet"); - address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); - __ bind(_entry); - // Using pc relative address computation. - { - Label next_pc; - __ bl(next_pc); - __ bind(next_pc); - } - int current_offset = __ offset(); - __ mflr(R12); - __ add_const_optimized(R12, R12, safepoint_offset() - current_offset); - __ std(R12, in_bytes(JavaThread::saved_exception_pc_offset()), R16_thread); - - __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub)); - __ mtctr(R0); - __ bctr(); + __ jump_to_polling_page_return_handler_blob(safepoint_offset()); } } @@ -74,7 +57,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ bctrl(); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ illtrap()); + DEBUG_ONLY(__ illtrap()); return; } @@ -98,7 +81,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ bctrl(); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ illtrap()); + DEBUG_ONLY(__ illtrap()); } @@ -115,7 +98,7 @@ void PredicateFailedStub::emit_code(LIR_Assembler* ce) { __ bctrl(); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ illtrap()); + DEBUG_ONLY(__ illtrap()); } @@ -156,7 +139,7 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) { __ bctrl(); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ illtrap()); + DEBUG_ONLY(__ illtrap()); } @@ -179,7 +162,7 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { __ bctrl(); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ illtrap()); + DEBUG_ONLY(__ illtrap()); } @@ -193,7 +176,7 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { __ mtctr(R0); __ bctrl(); ce->add_call_info_here(_info); - debug_only( __ illtrap(); ) + DEBUG_ONLY( __ illtrap(); ) } @@ -441,7 +424,7 @@ void DeoptimizeStub::emit_code(LIR_Assembler* ce) { __ load_const_optimized(R0, _trap_request); // Pass trap request in R0. __ bctrl(); ce->add_call_info_here(_info); - debug_only(__ illtrap()); + DEBUG_ONLY(__ illtrap()); } diff --git a/src/hotspot/cpu/ppc/c1_FrameMap_ppc.cpp b/src/hotspot/cpu/ppc/c1_FrameMap_ppc.cpp index e4684613e25..8ce324a570b 100644 --- a/src/hotspot/cpu/ppc/c1_FrameMap_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_FrameMap_ppc.cpp @@ -189,7 +189,7 @@ LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; FloatRegister FrameMap::nr2floatreg (int rnr) { assert(_init_done, "tables not initialized"); - debug_only(fpu_range_check(rnr);) + DEBUG_ONLY(fpu_range_check(rnr);) return _fpu_regs[rnr]; } diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp index c678f409c49..7dfde40364e 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp @@ -538,48 +538,32 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { __ extsh(dst->as_register(), src->as_register()); break; } - case Bytecodes::_i2d: + case Bytecodes::_i2d:{ + FloatRegister rdst = dst->as_double_reg(); + // move src to dst register + __ mtfprwa(rdst, src->as_register()); + __ fcfid(rdst, rdst); + break; + } case Bytecodes::_l2d: { - bool src_in_memory = !VM_Version::has_mtfprd(); FloatRegister rdst = dst->as_double_reg(); - FloatRegister rsrc; - if (src_in_memory) { - rsrc = src->as_double_reg(); // via mem - } else { - // move src to dst register - if (code == Bytecodes::_i2d) { - __ mtfprwa(rdst, src->as_register()); - } else { - __ mtfprd(rdst, src->as_register_lo()); - } - rsrc = rdst; - } - __ fcfid(rdst, rsrc); + // move src to dst register + __ mtfprd(rdst, src->as_register_lo()); + __ fcfid(rdst, rdst); + break; + } + case Bytecodes::_i2f:{ + FloatRegister rdst = dst->as_float_reg(); + // move src to dst register + __ mtfprwa(rdst, src->as_register()); + __ fcfids(rdst, rdst); break; } - case Bytecodes::_i2f: case Bytecodes::_l2f: { - bool src_in_memory = !VM_Version::has_mtfprd(); FloatRegister rdst = dst->as_float_reg(); - FloatRegister rsrc; - if (src_in_memory) { - rsrc = src->as_double_reg(); // via mem - } else { - // move src to dst register - if (code == Bytecodes::_i2f) { - __ mtfprwa(rdst, src->as_register()); - } else { - __ mtfprd(rdst, src->as_register_lo()); - } - rsrc = rdst; - } - if (VM_Version::has_fcfids()) { - __ fcfids(rdst, rsrc); - } else { - assert(code == Bytecodes::_i2f, "fcfid+frsp needs fixup code to avoid rounding incompatibility"); - __ fcfid(rdst, rsrc); - __ frsp(rdst, rdst); - } + // move src to dst register + __ mtfprd(rdst, src->as_register_lo()); + __ fcfids(rdst, rdst); break; } case Bytecodes::_f2d: { @@ -592,49 +576,27 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { } case Bytecodes::_d2i: case Bytecodes::_f2i: { - bool dst_in_memory = !VM_Version::has_mtfprd(); FloatRegister rsrc = (code == Bytecodes::_d2i) ? src->as_double_reg() : src->as_float_reg(); - Address addr = dst_in_memory ? frame_map()->address_for_slot(dst->double_stack_ix()) : Address(); Label L; // Result must be 0 if value is NaN; test by comparing value to itself. __ fcmpu(CR0, rsrc, rsrc); - if (dst_in_memory) { - __ li(R0, 0); // 0 in case of NAN - __ std(R0, addr); - } else { - __ li(dst->as_register(), 0); - } + __ li(dst->as_register(), 0); __ bso(CR0, L); __ fctiwz(rsrc, rsrc); // USE_KILL - if (dst_in_memory) { - __ stfd(rsrc, addr.disp(), addr.base()); - } else { - __ mffprd(dst->as_register(), rsrc); - } + __ mffprd(dst->as_register(), rsrc); __ bind(L); break; } case Bytecodes::_d2l: case Bytecodes::_f2l: { - bool dst_in_memory = !VM_Version::has_mtfprd(); FloatRegister rsrc = (code == Bytecodes::_d2l) ? src->as_double_reg() : src->as_float_reg(); - Address addr = dst_in_memory ? frame_map()->address_for_slot(dst->double_stack_ix()) : Address(); Label L; // Result must be 0 if value is NaN; test by comparing value to itself. __ fcmpu(CR0, rsrc, rsrc); - if (dst_in_memory) { - __ li(R0, 0); // 0 in case of NAN - __ std(R0, addr); - } else { - __ li(dst->as_register_lo(), 0); - } + __ li(dst->as_register_lo(), 0); __ bso(CR0, L); __ fctidz(rsrc, rsrc); // USE_KILL - if (dst_in_memory) { - __ stfd(rsrc, addr.disp(), addr.base()); - } else { - __ mffprd(dst->as_register_lo(), rsrc); - } + __ mffprd(dst->as_register_lo(), rsrc); __ bind(L); break; } @@ -1581,8 +1543,7 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L default: ShouldNotReachHere(); } - // Try to use isel on >=Power7. - if (VM_Version::has_isel() && result->is_cpu_register()) { + if (result->is_cpu_register()) { bool o1_is_reg = opr1->is_cpu_register(), o2_is_reg = opr2->is_cpu_register(); const Register result_reg = result->is_single_cpu() ? result->as_register() : result->as_register_lo(); diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp index b9c8ced8ef1..815e5c83a1b 100644 --- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp @@ -714,14 +714,12 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { } case vmIntrinsics::_dsqrt: case vmIntrinsics::_dsqrt_strict: { - if (VM_Version::has_fsqrt()) { - assert(x->number_of_arguments() == 1, "wrong type"); - LIRItem value(x->argument_at(0), this); - value.load_item(); - LIR_Opr dst = rlock_result(x); - __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); - break; - } // else fallthru + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; } case vmIntrinsics::_dsin: // fall through case vmIntrinsics::_dcos: // fall through @@ -733,10 +731,6 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { address runtime_entry = nullptr; switch (x->id()) { - case vmIntrinsics::_dsqrt: - case vmIntrinsics::_dsqrt_strict: - runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); - break; case vmIntrinsics::_dsin: runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); break; @@ -819,78 +813,6 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) { // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f // _i2b, _i2c, _i2s void LIRGenerator::do_Convert(Convert* x) { - if (!VM_Version::has_mtfprd()) { - switch (x->op()) { - - // int -> float: force spill - case Bytecodes::_l2f: { - if (!VM_Version::has_fcfids()) { // fcfids is >= Power7 only - // fcfid+frsp needs fixup code to avoid rounding incompatibility. - address entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2f); - LIR_Opr result = call_runtime(x->value(), entry, x->type(), nullptr); - set_result(x, result); - return; - } // else fallthru - } - case Bytecodes::_l2d: { - LIRItem value(x->value(), this); - LIR_Opr reg = rlock_result(x); - value.load_item(); - LIR_Opr tmp = force_to_spill(value.result(), T_DOUBLE); - __ convert(x->op(), tmp, reg); - return; - } - case Bytecodes::_i2f: - case Bytecodes::_i2d: { - LIRItem value(x->value(), this); - LIR_Opr reg = rlock_result(x); - value.load_item(); - // Convert i2l first. - LIR_Opr tmp1 = new_register(T_LONG); - __ convert(Bytecodes::_i2l, value.result(), tmp1); - LIR_Opr tmp2 = force_to_spill(tmp1, T_DOUBLE); - __ convert(x->op(), tmp2, reg); - return; - } - - // float -> int: result will be stored - case Bytecodes::_f2l: - case Bytecodes::_d2l: { - LIRItem value(x->value(), this); - LIR_Opr reg = rlock_result(x); - value.set_destroys_register(); // USE_KILL - value.load_item(); - set_vreg_flag(reg, must_start_in_memory); - __ convert(x->op(), value.result(), reg); - return; - } - case Bytecodes::_f2i: - case Bytecodes::_d2i: { - LIRItem value(x->value(), this); - LIR_Opr reg = rlock_result(x); - value.set_destroys_register(); // USE_KILL - value.load_item(); - // Convert l2i afterwards. - LIR_Opr tmp1 = new_register(T_LONG); - set_vreg_flag(tmp1, must_start_in_memory); - __ convert(x->op(), value.result(), tmp1); - __ convert(Bytecodes::_l2i, tmp1, reg); - return; - } - - // Within same category: just register conversions. - case Bytecodes::_i2b: - case Bytecodes::_i2c: - case Bytecodes::_i2s: - case Bytecodes::_i2l: - case Bytecodes::_l2i: - case Bytecodes::_f2d: - case Bytecodes::_d2f: - break; - - default: ShouldNotReachHere(); - } - } // Register conversion. LIRItem value(x->value(), this); diff --git a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp index ac9c5984de0..77d3653aefd 100644 --- a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp @@ -83,16 +83,17 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox // Save object being locked into the BasicObjectLock... std(Roop, in_bytes(BasicObjectLock::obj_offset()), Rbox); - if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(Rscratch, Roop); - lbz(Rscratch, in_bytes(Klass::misc_flags_offset()), Rscratch); - testbitdi(CR0, R0, Rscratch, exact_log2(KlassFlags::_misc_is_value_based_class)); - bne(CR0, slow_int); - } - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(Rbox, Roop, Rmark, Rscratch, slow_int); } else if (LockingMode == LM_LEGACY) { + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(Rscratch, Roop); + lbz(Rscratch, in_bytes(Klass::misc_flags_offset()), Rscratch); + testbitdi(CR0, R0, Rscratch, exact_log2(KlassFlags::_misc_is_value_based_class)); + bne(CR0, slow_int); + } + // ... and mark it unlocked. ori(Rmark, Rmark, markWord::unlocked_value); diff --git a/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp b/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp index 79b129c08ae..f1afbdd3a1d 100644 --- a/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp @@ -162,8 +162,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { Register r = as_Register(i); if (FrameMap::reg_needs_save(r)) { int sp_offset = cpu_reg_save_offsets[i]; - oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset>>2), r->as_VMReg()); - oop_map->set_callee_saved(VMRegImpl::stack2reg((sp_offset>>2) + 1), r->as_VMReg()->next()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset >> 2), r->as_VMReg()); } } @@ -171,8 +170,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { for (i = 0; i < FrameMap::nof_fpu_regs; i++) { FloatRegister r = as_FloatRegister(i); int sp_offset = fpu_reg_save_offsets[i]; - oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset>>2), r->as_VMReg()); - oop_map->set_callee_saved(VMRegImpl::stack2reg((sp_offset>>2) + 1), r->as_VMReg()->next()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset >> 2), r->as_VMReg()); } } diff --git a/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp index 484e0fd0196..632ad87cd4c 100644 --- a/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp +++ b/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2021, 2022, SAP SE. All rights reserved. + * Copyright (c) 2021, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,26 +34,8 @@ int C2SafepointPollStub::max_size() const { } void C2SafepointPollStub::emit(C2_MacroAssembler& masm) { - assert(SharedRuntime::polling_page_return_handler_blob() != nullptr, - "polling page return stub not created yet"); - address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); - __ bind(entry()); - // Using pc relative address computation. - { - Label next_pc; - __ bl(next_pc); - __ bind(next_pc); - } - int current_offset = __ offset(); // Code size should not depend on offset: see _stub_size computation in output.cpp - __ load_const32(R12, _safepoint_offset - current_offset); - __ mflr(R0); - __ add(R12, R12, R0); - __ std(R12, in_bytes(JavaThread::saved_exception_pc_offset()), R16_thread); - - __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub)); - __ mtctr(R0); - __ bctr(); + __ jump_to_polling_page_return_handler_blob(_safepoint_offset, true); } #undef __ diff --git a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp index cddf08eceb1..eab3df03fde 100644 --- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp @@ -234,14 +234,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, // Save diff in case we need it for a tie-breaker. subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2 // if (diff > 0) { cnt1 = cnt2; } - if (VM_Version::has_isel()) { - isel(cnt1, CR0, Assembler::greater, /*invert*/ false, cnt2); - } else { - Label Lskip; - blt(CR0, Lskip); - mr(cnt1, cnt2); - bind(Lskip); - } + isel(cnt1, CR0, Assembler::greater, /*invert*/ false, cnt2); // Rename registers Register chr1 = result; @@ -626,3 +619,48 @@ void C2_MacroAssembler::count_positives(Register src, Register cnt, Register res bind(Ldone); subf(result, src, result); // Result is offset from src. } + +void C2_MacroAssembler::reduceI(int opcode, Register dst, Register iSrc, VectorRegister vSrc, + VectorRegister vTmp1, VectorRegister vTmp2) { + + auto fn_vec_op = [this](int opcode, const VectorRegister &dst, const VectorRegister &a, const VectorRegister &b) { + switch(opcode) { + case Op_AddReductionVI: vadduwm(dst, a, b); break; + case Op_MulReductionVI: vmuluwm(dst, a , b); break; + case Op_AndReductionV: vand(dst, a, b); break; + case Op_OrReductionV: vor(dst, a, b); break; + case Op_XorReductionV: vxor(dst, a, b); break; + case Op_MinReductionV: vminsw(dst, a, b); break; + case Op_MaxReductionV: vmaxsw(dst, a, b); break; + default: assert(false, "wrong opcode"); + } + }; + + auto fn_scalar_op = [this](int opcode, const Register &dst, const Register &a, const Register &b) { + switch (opcode) { + case Op_AddReductionVI: add(dst, a, b); break; + case Op_MulReductionVI: mullw(dst, a, b); break; + case Op_AndReductionV: andr(dst, a, b); break; + case Op_OrReductionV: orr(dst, a, b); break; + case Op_XorReductionV: xorr(dst, a, b); break; + case Op_MinReductionV: + cmpw(CR0, a, b); + isel(dst, CR0, Assembler::less, /*invert*/false, a, b); + break; + case Op_MaxReductionV: + cmpw(CR0, a, b); + isel(dst, CR0, Assembler::greater, /*invert*/false, a, b); + break; + default: assert(false, "wrong opcode"); + } + }; + + // vSrc = [i0,i1,i2,i3] + vsldoi(vTmp1, vSrc, vSrc, 8); // vTmp1 <- [i2,i3,i0,i1] + fn_vec_op(opcode, vTmp2, vSrc, vTmp1); // vTmp2 <- [op(i0,i2), op(i1,i3), op(i2,i0), op(i3,i1)] + vsldoi(vTmp1, vTmp2, vTmp2, 4); // vTmp1 <- [op(i1,i3), op(i2,i0), op(i3,i1), op(i0,i2)] + fn_vec_op(opcode, vTmp1, vTmp1, vTmp2); // vTmp1 <- [op(i0,i1,i2,i3), op(i0,i1,i2,i3), op(i0,i1,i2,i3), op(i0,i1,i2,i3)] + mfvsrwz(R0, vTmp1.to_vsr()); // R0 <- op(i0,i1,i2,i3) + fn_scalar_op(opcode, dst, iSrc, R0); // dst <- op(iSrc, R0) +} + diff --git a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp index 345d5a6350d..16b6d1935ba 100644 --- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp @@ -73,4 +73,6 @@ void count_positives(Register src, Register cnt, Register result, Register tmp1, Register tmp2); + void reduceI(int opcode, Register dst, Register iSrc, VectorRegister vSrc, VectorRegister vTmp1, VectorRegister vTmp2); + #endif // CPU_PPC_C2_MACROASSEMBLER_PPC_HPP diff --git a/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp b/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp index 4d98b763078..c74cd3781a2 100644 --- a/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp +++ b/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2023, SAP SE. All rights reserved. + * Copyright (c) 2020, 2025, SAP SE. All rights reserved. * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -35,16 +35,6 @@ #define __ masm-> -bool ABIDescriptor::is_volatile_reg(Register reg) const { - return _integer_argument_registers.contains(reg) - || _integer_additional_volatile_registers.contains(reg); -} - -bool ABIDescriptor::is_volatile_reg(FloatRegister reg) const { - return _float_argument_registers.contains(reg) - || _float_additional_volatile_registers.contains(reg); -} - bool ForeignGlobals::is_foreign_linker_supported() { return true; } @@ -62,10 +52,6 @@ const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) { parse_register_array(outputStorage, StorageType::INTEGER, abi._integer_return_registers, as_Register); parse_register_array(outputStorage, StorageType::FLOAT, abi._float_return_registers, as_FloatRegister); - objArrayOop volatileStorage = jdk_internal_foreign_abi_ABIDescriptor::volatileStorage(abi_oop); - parse_register_array(volatileStorage, StorageType::INTEGER, abi._integer_additional_volatile_registers, as_Register); - parse_register_array(volatileStorage, StorageType::FLOAT, abi._float_additional_volatile_registers, as_FloatRegister); - abi._stack_alignment_bytes = jdk_internal_foreign_abi_ABIDescriptor::stackAlignment(abi_oop); abi._shadow_space_bytes = jdk_internal_foreign_abi_ABIDescriptor::shadowSpace(abi_oop); @@ -126,12 +112,7 @@ static void move_reg64(MacroAssembler* masm, int out_stk_bias, __ stw(as_Register(from_reg), -8, R1_SP); __ lfs(as_FloatRegister(to_reg), -8, R1_SP); // convert to double precision format } else { - if (VM_Version::has_mtfprd()) { - __ mtfprd(as_FloatRegister(to_reg), as_Register(from_reg)); - } else { - __ std(as_Register(from_reg), -8, R1_SP); - __ lfd(as_FloatRegister(to_reg), -8, R1_SP); - } + __ mtfprd(as_FloatRegister(to_reg), as_Register(from_reg)); } break; case StorageType::STACK: @@ -164,12 +145,7 @@ static void move_float(MacroAssembler* masm, int out_stk_bias, __ stfs(as_FloatRegister(from_reg), -8, R1_SP); // convert to single precision format __ lwa(as_Register(to_reg), -8, R1_SP); } else { - if (VM_Version::has_mtfprd()) { - __ mffprd(as_Register(to_reg), as_FloatRegister(from_reg)); - } else { - __ stfd(as_FloatRegister(from_reg), -8, R1_SP); - __ ld(as_Register(to_reg), -8, R1_SP); - } + __ mffprd(as_Register(to_reg), as_FloatRegister(from_reg)); } break; case StorageType::FLOAT: diff --git a/src/hotspot/cpu/ppc/foreignGlobals_ppc.hpp b/src/hotspot/cpu/ppc/foreignGlobals_ppc.hpp index baccdf2c9bb..b25ee28f192 100644 --- a/src/hotspot/cpu/ppc/foreignGlobals_ppc.hpp +++ b/src/hotspot/cpu/ppc/foreignGlobals_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2023 SAP SE. All rights reserved. + * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,9 +34,6 @@ struct ABIDescriptor { GrowableArray _float_argument_registers; GrowableArray _float_return_registers; - GrowableArray _integer_additional_volatile_registers; - GrowableArray _float_additional_volatile_registers; - int32_t _stack_alignment_bytes; int32_t _shadow_space_bytes; diff --git a/src/hotspot/cpu/ppc/frame_ppc.hpp b/src/hotspot/cpu/ppc/frame_ppc.hpp index 560615089fe..188015f5cd9 100644 --- a/src/hotspot/cpu/ppc/frame_ppc.hpp +++ b/src/hotspot/cpu/ppc/frame_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2024 SAP SE. All rights reserved. + * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -134,56 +134,6 @@ #define _native_abi_reg_args_spill(_component) \ (offset_of(frame::native_abi_reg_args_spill, _component)) - // non-volatile GPRs: - - struct spill_nonvolatiles { - uint64_t r14; - uint64_t r15; //_16 - uint64_t r16; - uint64_t r17; //_16 - uint64_t r18; - uint64_t r19; //_16 - uint64_t r20; - uint64_t r21; //_16 - uint64_t r22; - uint64_t r23; //_16 - uint64_t r24; - uint64_t r25; //_16 - uint64_t r26; - uint64_t r27; //_16 - uint64_t r28; - uint64_t r29; //_16 - uint64_t r30; - uint64_t r31; //_16 - - double f14; - double f15; - double f16; - double f17; - double f18; - double f19; - double f20; - double f21; - double f22; - double f23; - double f24; - double f25; - double f26; - double f27; - double f28; - double f29; - double f30; - double f31; - - // aligned to frame::alignment_in_bytes (16) - }; - - enum { - spill_nonvolatiles_size = sizeof(spill_nonvolatiles) - }; - - #define _spill_nonvolatiles_neg(_component) \ - (int)(-frame::spill_nonvolatiles_size + offset_of(frame::spill_nonvolatiles, _component)) // Frame layout for the Java template interpreter on PPC64. // @@ -230,6 +180,7 @@ // [callee's Java result] // [callee's locals w/o arguments] // [outgoing arguments] + // [non-volatiles] // [ENTRY_FRAME_LOCALS] // ABI for every Java frame, compiled and interpreted @@ -292,7 +243,6 @@ uint64_t result_type; uint64_t arguments_tos_address; //_16 // aligned to frame::alignment_in_bytes (16) - uint64_t r[spill_nonvolatiles_size/sizeof(uint64_t)]; }; enum { @@ -413,7 +363,7 @@ inline frame(intptr_t* sp, intptr_t* fp, address pc); inline frame(intptr_t* sp, address pc, kind knd = kind::nmethod); inline frame(intptr_t* sp, address pc, intptr_t* unextended_sp, intptr_t* fp = nullptr, CodeBlob* cb = nullptr); - inline frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc, CodeBlob* cb, const ImmutableOopMap* oop_map); + inline frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc, CodeBlob* cb, const ImmutableOopMap* oop_map = nullptr); inline frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc, CodeBlob* cb, const ImmutableOopMap* oop_map, bool on_heap); private: diff --git a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp index 19a90367353..bb711f2d053 100644 --- a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2024 SAP SE. All rights reserved. + * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -391,4 +391,43 @@ void frame::update_map_with_saved_link(RegisterMapT* map, intptr_t** link_addr) // Nothing to do. } +#if INCLUDE_JFR + +// Static helper routines +inline intptr_t* frame::sender_sp(intptr_t* fp) { return fp; } + +// Extract common_abi parts. +inline intptr_t* frame::fp(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast (((common_abi*)sp)->callers_sp); +} + +inline intptr_t* frame::link(const intptr_t* fp) { return frame::fp(fp); } + +inline address frame::return_address(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast(((common_abi*)sp)->lr); +} + +inline address frame::interpreter_return_address(const intptr_t* fp) { return frame::return_address(fp); } + +// Extract java interpreter state parts. +inline address frame::interpreter_bcp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast(*(fp + ijava_idx(bcp))); +} + +inline intptr_t* frame::interpreter_sender_sp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast (*(fp + ijava_idx(sender_sp))); +} + +inline bool frame::is_interpreter_frame_setup_at(const intptr_t* fp, const void* sp) { + assert(fp != nullptr, "invariant"); + assert(sp != nullptr, "invariant"); + return sp <= fp - ((frame::ijava_state_size + frame::top_ijava_frame_abi_size) >> LogBytesPerWord); +} + +#endif // INCLUDE_JFR + #endif // CPU_PPC_FRAME_PPC_INLINE_HPP diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp index acf916c8c72..32a7011ac26 100644 --- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp @@ -337,15 +337,24 @@ int SaveLiveRegisters::iterate_over_register_mask(IterationAction action, int of assert(SuperwordUseVSX, "or should not reach here"); VectorSRegister vs_reg = vm_reg->as_VectorSRegister(); if (vs_reg->encoding() >= VSR32->encoding() && vs_reg->encoding() <= VSR51->encoding()) { - reg_save_index += 2; + reg_save_index += (2 + (reg_save_index & 1)); // 2 slots + alignment if needed Register spill_addr = R0; + int spill_offset = offset - reg_save_index * BytesPerWord; if (action == ACTION_SAVE) { - _masm->addi(spill_addr, R1_SP, offset - reg_save_index * BytesPerWord); - _masm->stxvd2x(vs_reg, spill_addr); + if (PowerArchitecturePPC64 >= 9) { + _masm->stxv(vs_reg, spill_offset, R1_SP); + } else { + _masm->addi(spill_addr, R1_SP, spill_offset); + _masm->stxvd2x(vs_reg, spill_addr); + } } else if (action == ACTION_RESTORE) { - _masm->addi(spill_addr, R1_SP, offset - reg_save_index * BytesPerWord); - _masm->lxvd2x(vs_reg, spill_addr); + if (PowerArchitecturePPC64 >= 9) { + _masm->lxv(vs_reg, spill_offset, R1_SP); + } else { + _masm->addi(spill_addr, R1_SP, spill_offset); + _masm->lxvd2x(vs_reg, spill_addr); + } } else { assert(action == ACTION_COUNT_ONLY, "Sanity"); } diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetNMethod_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetNMethod_ppc.cpp index 19084ed27c7..d3bb9cc3c04 100644 --- a/src/hotspot/cpu/ppc/gc/shared/barrierSetNMethod_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetNMethod_ppc.cpp @@ -23,8 +23,8 @@ */ #include "code/codeBlob.hpp" -#include "code/nmethod.hpp" #include "code/nativeInst.hpp" +#include "code/nmethod.hpp" #include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/barrierSetNMethod.hpp" @@ -108,7 +108,7 @@ static NativeNMethodBarrier* get_nmethod_barrier(nmethod* nm) { } auto barrier = reinterpret_cast (barrier_address); - debug_only(barrier->verify()); + DEBUG_ONLY(barrier->verify()); return barrier; } diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/c1/shenandoahBarrierSetC1_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/c1/shenandoahBarrierSetC1_ppc.cpp index 48422bc6621..5b24259103f 100644 --- a/src/hotspot/cpu/ppc/gc/shenandoah/c1/shenandoahBarrierSetC1_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shenandoah/c1/shenandoahBarrierSetC1_ppc.cpp @@ -26,9 +26,9 @@ #include "asm/macroAssembler.inline.hpp" #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" #define __ masm->masm()-> diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp index 842201e1584..b7156144d8b 100644 --- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp @@ -24,8 +24,10 @@ */ #include "asm/macroAssembler.inline.hpp" -#include "gc/shared/gcArguments.hpp" #include "gc/shared/gc_globals.hpp" +#include "gc/shared/gcArguments.hpp" +#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" +#include "gc/shenandoah/mode/shenandoahMode.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" #include "gc/shenandoah/shenandoahForwarding.hpp" @@ -34,8 +36,6 @@ #include "gc/shenandoah/shenandoahHeapRegion.hpp" #include "gc/shenandoah/shenandoahRuntime.hpp" #include "gc/shenandoah/shenandoahThreadLocalData.hpp" -#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" -#include "gc/shenandoah/mode/shenandoahMode.hpp" #include "interpreter/interpreter.hpp" #include "macroAssembler_ppc.hpp" #include "runtime/javaThread.hpp" @@ -360,13 +360,8 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb assert(markWord::lock_mask_in_place == markWord::marked_value, "marked value must equal the value obtained when all lock bits are being set"); - if (VM_Version::has_isel()) { - __ xori(tmp1, tmp1, markWord::lock_mask_in_place); - __ isel(dst, CR0, Assembler::equal, false, tmp1); - } else { - __ bne(CR0, done); - __ xori(dst, tmp1, markWord::lock_mask_in_place); - } + __ xori(tmp1, tmp1, markWord::lock_mask_in_place); + __ isel(dst, CR0, Assembler::equal, false, tmp1); __ bind(done); __ block_comment("} resolve_forward_pointer_not_null (shenandoahgc)"); diff --git a/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp b/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp index 2e3eed8ec60..20d96f6e937 100644 --- a/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp @@ -21,8 +21,8 @@ * questions. */ -#include "gc/shared/gcLogPrecious.hpp" #include "gc/shared/gc_globals.hpp" +#include "gc/shared/gcLogPrecious.hpp" #include "gc/z/zAddress.inline.hpp" #include "gc/z/zGlobals.hpp" #include "runtime/globals.hpp" @@ -34,9 +34,11 @@ #include #endif // LINUX -// Default value if probing is not implemented for a certain platform: 128TB -static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; -// Minimum value returned, if probing fails: 64GB +// Default value if probing is not implemented for a certain platform +// Max address bit is restricted by implicit assumptions in the code, for instance +// the bit layout of ZForwardingEntry or Partial array entry (see ZMarkStackEntry) in mark stack +static const size_t DEFAULT_MAX_ADDRESS_BIT = 46; +// Minimum value returned, if probing fail static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; static size_t probe_valid_max_address_bit() { @@ -91,10 +93,15 @@ static size_t probe_valid_max_address_bit() { size_t ZPlatformAddressOffsetBits() { static const size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1; const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; +#ifdef ADDRESS_SANITIZER + // The max supported value is 44 because of other internal data structures. + return MIN2(valid_max_address_offset_bits, (size_t)44); +#else const size_t min_address_offset_bits = max_address_offset_bits - 2; const size_t address_offset = ZGlobalsPointers::min_address_offset_request(); const size_t address_offset_bits = log2i_exact(address_offset); return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); +#endif } size_t ZPlatformAddressHeapBaseShift() { diff --git a/src/hotspot/cpu/ppc/globals_ppc.hpp b/src/hotspot/cpu/ppc/globals_ppc.hpp index 7fefc856a47..f944408fe29 100644 --- a/src/hotspot/cpu/ppc/globals_ppc.hpp +++ b/src/hotspot/cpu/ppc/globals_ppc.hpp @@ -116,7 +116,7 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong); \ /* special instructions */ \ product(bool, SuperwordUseVSX, false, \ - "Use Power8 VSX instructions for superword optimization.") \ + "Use VSX instructions for superword optimization.") \ \ product(bool, UseByteReverseInstructions, false, DIAGNOSTIC, \ "Use byte reverse instructions.") \ diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp index 99ac037e4b7..d3969427db3 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2023 SAP SE. All rights reserved. + * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -170,7 +170,11 @@ class InterpreterMacroAssembler: public MacroAssembler { void remove_activation(TosState state, bool throw_monitor_exception = true, bool install_monitor_exception = true); - void merge_frames(Register Rtop_frame_sp, Register return_pc, Register Rscratch1, Register Rscratch2); // merge top frames + JFR_ONLY(void enter_jfr_critical_section();) + JFR_ONLY(void leave_jfr_critical_section();) + void load_fp(Register fp); + void remove_top_frame_given_fp(Register fp, Register sender_sp, Register sender_fp, Register return_pc, Register temp); + void merge_frames(Register sender_sp, Register return_pc, Register temp1, Register temp2); // merge top frames void add_monitor_to_stack(bool stack_is_empty, Register Rtemp1, Register Rtemp2); diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp index 7a75dfd3de1..29fb54250c2 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp @@ -308,21 +308,11 @@ void InterpreterMacroAssembler::push_2ptrs(Register first, Register second) { } void InterpreterMacroAssembler::move_l_to_d(Register l, FloatRegister d) { - if (VM_Version::has_mtfprd()) { - mtfprd(d, l); - } else { - std(l, 0, R15_esp); - lfd(d, 0, R15_esp); - } + mtfprd(d, l); } void InterpreterMacroAssembler::move_d_to_l(FloatRegister d, Register l) { - if (VM_Version::has_mtfprd()) { - mffprd(l, d); - } else { - stfd(d, 0, R15_esp); - ld(l, 0, R15_esp); - } + mffprd(l, d); } void InterpreterMacroAssembler::push(TosState state) { @@ -793,19 +783,27 @@ void InterpreterMacroAssembler::unlock_if_synchronized_method(TosState state, } // Support function for remove_activation & Co. -void InterpreterMacroAssembler::merge_frames(Register Rsender_sp, Register return_pc, - Register Rscratch1, Register Rscratch2) { - // Pop interpreter frame. - ld(Rscratch1, 0, R1_SP); // *SP - ld(Rsender_sp, _ijava_state_neg(sender_sp), Rscratch1); // top_frame_sp - ld(Rscratch2, 0, Rscratch1); // **SP - if (return_pc!=noreg) { - ld(return_pc, _abi0(lr), Rscratch1); // LR +void InterpreterMacroAssembler::load_fp(Register fp) { + ld(fp, _abi0(callers_sp), R1_SP); // *SP +} + +void InterpreterMacroAssembler::remove_top_frame_given_fp(Register fp, Register sender_sp, Register sender_fp, + Register return_pc, Register temp) { + assert_different_registers(sender_sp, sender_fp, return_pc, temp); + ld(sender_sp, _ijava_state_neg(sender_sp), fp); + ld(sender_fp, _abi0(callers_sp), fp); // **SP + if (return_pc != noreg) { + ld(return_pc, _abi0(lr), fp); // last usage of fp, register can be reused } + subf(temp, R1_SP, sender_sp); // sender_sp - SP + stdux(sender_fp, R1_SP, temp); // atomically set *(SP = sender_sp) = sender_fp +} - // Merge top frames. - subf(Rscratch1, R1_SP, Rsender_sp); // top_frame_sp - SP - stdux(Rscratch2, R1_SP, Rscratch1); // atomically set *(SP = top_frame_sp) = **SP +void InterpreterMacroAssembler::merge_frames(Register sender_sp, Register return_pc, + Register temp1, Register temp2) { + Register fp = temp1, sender_fp = temp2; + load_fp(fp); + remove_top_frame_given_fp(fp, sender_sp, sender_fp, return_pc, /* temp */ fp); } void InterpreterMacroAssembler::narrow(Register result) { @@ -864,11 +862,16 @@ void InterpreterMacroAssembler::remove_activation(TosState state, bool install_monitor_exception) { BLOCK_COMMENT("remove_activation {"); + unlock_if_synchronized_method(state, throw_monitor_exception, install_monitor_exception); + // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, // that would normally not be safe to use. Such bad returns into unsafe territory of // the stack, will call InterpreterRuntime::at_unwind. - Label slow_path; - Label fast_path; + Label slow_path, fast_path; + Register fp = R22_tmp2; + load_fp(fp); + + JFR_ONLY(enter_jfr_critical_section();) safepoint_poll(slow_path, R11_scratch1, true /* at_return */, false /* in_nmethod */); b(fast_path); bind(slow_path); @@ -880,8 +883,6 @@ void InterpreterMacroAssembler::remove_activation(TosState state, align(32); bind(fast_path); - unlock_if_synchronized_method(state, throw_monitor_exception, install_monitor_exception); - // Save result (push state before jvmti call and pop it afterwards) and notify jvmti. notify_method_exit(false, state, NotifyJVMTI, true); @@ -901,10 +902,11 @@ void InterpreterMacroAssembler::remove_activation(TosState state, // call could have a smaller SP, so that this compare succeeds for an // inner call of the method annotated with ReservedStack. ld_ptr(R0, JavaThread::reserved_stack_activation_offset(), R16_thread); - ld_ptr(R11_scratch1, _abi0(callers_sp), R1_SP); // Load frame pointer. - cmpld(CR0, R11_scratch1, R0); + cmpld(CR0, fp, R0); blt_predict_taken(CR0, no_reserved_zone_enabling); + JFR_ONLY(leave_jfr_critical_section();) + // Enable reserved zone again, throw stack overflow exception. call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), R16_thread); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_delayed_StackOverflowError)); @@ -916,12 +918,26 @@ void InterpreterMacroAssembler::remove_activation(TosState state, verify_oop(R17_tos, state); - merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2); + remove_top_frame_given_fp(fp, R21_sender_SP, R23_tmp3, /*return_pc*/ R0, R11_scratch1); mtlr(R0); pop_cont_fastpath(); + JFR_ONLY(leave_jfr_critical_section();) + BLOCK_COMMENT("} remove_activation"); } +#if INCLUDE_JFR +void InterpreterMacroAssembler::enter_jfr_critical_section() { + li(R0, 1); + stb(R0, in_bytes(SAMPLING_CRITICAL_SECTION_OFFSET_JFR), R16_thread); +} + +void InterpreterMacroAssembler::leave_jfr_critical_section() { + li(R0, 0); + stb(R0, in_bytes(SAMPLING_CRITICAL_SECTION_OFFSET_JFR), R16_thread); +} +#endif // INCLUDE_JFR + // Lock object // // Registers alive @@ -958,17 +974,18 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // markWord displaced_header = obj->mark().set_unlocked(); - if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(tmp, object); - lbz(tmp, in_bytes(Klass::misc_flags_offset()), tmp); - testbitdi(CR0, R0, tmp, exact_log2(KlassFlags::_misc_is_value_based_class)); - bne(CR0, slow_case); - } - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(monitor, object, header, tmp, slow_case); b(done); } else if (LockingMode == LM_LEGACY) { + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp, object); + lbz(tmp, in_bytes(Klass::misc_flags_offset()), tmp); + testbitdi(CR0, R0, tmp, exact_log2(KlassFlags::_misc_is_value_based_class)); + bne(CR0, slow_case); + } + // Load markWord from object into header. ld(header, oopDesc::mark_offset_in_bytes(), object); diff --git a/src/hotspot/cpu/ppc/javaFrameAnchor_ppc.hpp b/src/hotspot/cpu/ppc/javaFrameAnchor_ppc.hpp index 8b539bc8101..00a6b4cbf95 100644 --- a/src/hotspot/cpu/ppc/javaFrameAnchor_ppc.hpp +++ b/src/hotspot/cpu/ppc/javaFrameAnchor_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 SAP SE. All rights reserved. + * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -73,6 +73,8 @@ address last_Java_pc(void) { return _last_Java_pc; } + intptr_t* last_Java_fp() const { return *(intptr_t**)_last_Java_sp; } + void set_last_Java_sp(intptr_t* sp) { OrderAccess::release(); _last_Java_sp = sp; } #endif // CPU_PPC_JAVAFRAMEANCHOR_PPC_HPP diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index f82395b14fb..857911214c5 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -774,93 +774,82 @@ void MacroAssembler::clobber_carg_stack_slots(Register tmp) { } } -// Uses ordering which corresponds to ABI: -// _savegpr0_14: std r14,-144(r1) -// _savegpr0_15: std r15,-136(r1) -// _savegpr0_16: std r16,-128(r1) -void MacroAssembler::save_nonvolatile_gprs(Register dst, int offset) { - std(R14, offset, dst); offset += 8; - std(R15, offset, dst); offset += 8; - std(R16, offset, dst); offset += 8; - std(R17, offset, dst); offset += 8; - std(R18, offset, dst); offset += 8; - std(R19, offset, dst); offset += 8; - std(R20, offset, dst); offset += 8; - std(R21, offset, dst); offset += 8; - std(R22, offset, dst); offset += 8; - std(R23, offset, dst); offset += 8; - std(R24, offset, dst); offset += 8; - std(R25, offset, dst); offset += 8; - std(R26, offset, dst); offset += 8; - std(R27, offset, dst); offset += 8; - std(R28, offset, dst); offset += 8; - std(R29, offset, dst); offset += 8; - std(R30, offset, dst); offset += 8; - std(R31, offset, dst); offset += 8; - - stfd(F14, offset, dst); offset += 8; - stfd(F15, offset, dst); offset += 8; - stfd(F16, offset, dst); offset += 8; - stfd(F17, offset, dst); offset += 8; - stfd(F18, offset, dst); offset += 8; - stfd(F19, offset, dst); offset += 8; - stfd(F20, offset, dst); offset += 8; - stfd(F21, offset, dst); offset += 8; - stfd(F22, offset, dst); offset += 8; - stfd(F23, offset, dst); offset += 8; - stfd(F24, offset, dst); offset += 8; - stfd(F25, offset, dst); offset += 8; - stfd(F26, offset, dst); offset += 8; - stfd(F27, offset, dst); offset += 8; - stfd(F28, offset, dst); offset += 8; - stfd(F29, offset, dst); offset += 8; - stfd(F30, offset, dst); offset += 8; - stfd(F31, offset, dst); -} - -// Uses ordering which corresponds to ABI: -// _restgpr0_14: ld r14,-144(r1) -// _restgpr0_15: ld r15,-136(r1) -// _restgpr0_16: ld r16,-128(r1) -void MacroAssembler::restore_nonvolatile_gprs(Register src, int offset) { - ld(R14, offset, src); offset += 8; - ld(R15, offset, src); offset += 8; - ld(R16, offset, src); offset += 8; - ld(R17, offset, src); offset += 8; - ld(R18, offset, src); offset += 8; - ld(R19, offset, src); offset += 8; - ld(R20, offset, src); offset += 8; - ld(R21, offset, src); offset += 8; - ld(R22, offset, src); offset += 8; - ld(R23, offset, src); offset += 8; - ld(R24, offset, src); offset += 8; - ld(R25, offset, src); offset += 8; - ld(R26, offset, src); offset += 8; - ld(R27, offset, src); offset += 8; - ld(R28, offset, src); offset += 8; - ld(R29, offset, src); offset += 8; - ld(R30, offset, src); offset += 8; - ld(R31, offset, src); offset += 8; - - // FP registers - lfd(F14, offset, src); offset += 8; - lfd(F15, offset, src); offset += 8; - lfd(F16, offset, src); offset += 8; - lfd(F17, offset, src); offset += 8; - lfd(F18, offset, src); offset += 8; - lfd(F19, offset, src); offset += 8; - lfd(F20, offset, src); offset += 8; - lfd(F21, offset, src); offset += 8; - lfd(F22, offset, src); offset += 8; - lfd(F23, offset, src); offset += 8; - lfd(F24, offset, src); offset += 8; - lfd(F25, offset, src); offset += 8; - lfd(F26, offset, src); offset += 8; - lfd(F27, offset, src); offset += 8; - lfd(F28, offset, src); offset += 8; - lfd(F29, offset, src); offset += 8; - lfd(F30, offset, src); offset += 8; - lfd(F31, offset, src); +void MacroAssembler::save_nonvolatile_registers(Register dst, int offset, bool include_fp_regs, bool include_vector_regs) { + BLOCK_COMMENT("save_nonvolatile_registers {"); + + for (int i = 14; i < 32; i++) { + std(as_Register(i), offset, dst); + offset += 8; + } + + if (include_fp_regs) { + for (int i = 14; i < 32; i++) { + stfd(as_FloatRegister(i), offset, dst); + offset += 8; + } + } + + if (include_vector_regs) { + assert(is_aligned(offset, StackAlignmentInBytes), "should be"); + if (PowerArchitecturePPC64 >= 10) { + for (int i = 20; i < 32; i += 2) { + stxvp(as_VectorRegister(i)->to_vsr(), offset, dst); + offset += 32; + } + } else { + for (int i = 20; i < 32; i++) { + if (PowerArchitecturePPC64 >= 9) { + stxv(as_VectorRegister(i)->to_vsr(), offset, dst); + } else { + Register spill_addr = R0; + addi(spill_addr, dst, offset); + stxvd2x(as_VectorRegister(i)->to_vsr(), spill_addr); + } + offset += 16; + } + } + } + + BLOCK_COMMENT("} save_nonvolatile_registers "); +} + +void MacroAssembler::restore_nonvolatile_registers(Register src, int offset, bool include_fp_regs, bool include_vector_regs) { + BLOCK_COMMENT("restore_nonvolatile_registers {"); + + for (int i = 14; i < 32; i++) { + ld(as_Register(i), offset, src); + offset += 8; + } + + if (include_fp_regs) { + for (int i = 14; i < 32; i++) { + lfd(as_FloatRegister(i), offset, src); + offset += 8; + } + } + + if (include_vector_regs) { + assert(is_aligned(offset, StackAlignmentInBytes), "should be"); + if (PowerArchitecturePPC64 >= 10) { + for (int i = 20; i < 32; i += 2) { + lxvp(as_VectorRegister(i)->to_vsr(), offset, src); + offset += 32; + } + } else { + for (int i = 20; i < 32; i++) { + if (PowerArchitecturePPC64 >= 9) { + lxv(as_VectorRegister(i)->to_vsr(), offset, src); + } else { + Register spill_addr = R0; + addi(spill_addr, src, offset); + lxvd2x(as_VectorRegister(i)->to_vsr(), spill_addr); + } + offset += 16; + } + } + } + + BLOCK_COMMENT("} restore_nonvolatile_registers"); } // For verify_oops. @@ -1029,13 +1018,6 @@ void MacroAssembler::push_frame_reg_args(unsigned int bytes, Register tmp) { push_frame(bytes + frame::native_abi_reg_args_size, tmp); } -// Setup up a new C frame with a spill area for non-volatile GPRs and -// additional space for local variables. -void MacroAssembler::push_frame_reg_args_nonvolatiles(unsigned int bytes, - Register tmp) { - push_frame(bytes + frame::native_abi_reg_args_size + frame::spill_nonvolatiles_size, tmp); -} - // Pop current C frame. void MacroAssembler::pop_frame() { ld(R1_SP, _abi0(callers_sp), R1_SP); @@ -1570,57 +1552,27 @@ void MacroAssembler::atomic_get_and_modify_generic(Register dest_current_value, Register addr_base, Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint, bool is_add, int size) { // Sub-word instructions are available since Power 8. - // For older processors, instruction_type != size holds, and we - // emulate the sub-word instructions by constructing a 4-byte value - // that leaves the other bytes unchanged. - const int instruction_type = VM_Version::has_lqarx() ? size : 4; Label retry; Register shift_amount = noreg, val32 = dest_current_value, modval = is_add ? tmp1 : exchange_value; - if (instruction_type != size) { - assert_different_registers(tmp1, tmp2, tmp3, dest_current_value, exchange_value, addr_base); - modval = tmp1; - shift_amount = tmp2; - val32 = tmp3; - // Need some preparation: Compute shift amount, align address. Note: shorts must be 2 byte aligned. -#ifdef VM_LITTLE_ENDIAN - rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8; - clrrdi(addr_base, addr_base, 2); -#else - xori(shift_amount, addr_base, (size == 1) ? 3 : 2); - clrrdi(addr_base, addr_base, 2); - rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16; -#endif - } // atomic emulation loop bind(retry); - switch (instruction_type) { + switch (size) { case 4: lwarx(val32, addr_base, cmpxchgx_hint); break; case 2: lharx(val32, addr_base, cmpxchgx_hint); break; case 1: lbarx(val32, addr_base, cmpxchgx_hint); break; default: ShouldNotReachHere(); } - if (instruction_type != size) { - srw(dest_current_value, val32, shift_amount); - } - if (is_add) { add(modval, dest_current_value, exchange_value); } - if (instruction_type != size) { - // Transform exchange value such that the replacement can be done by one xor instruction. - xorr(modval, dest_current_value, is_add ? modval : exchange_value); - clrldi(modval, modval, (size == 1) ? 56 : 48); - slw(modval, modval, shift_amount); - xorr(modval, val32, modval); - } - switch (instruction_type) { + switch (size) { case 4: stwcx_(modval, addr_base); break; case 2: sthcx_(modval, addr_base); break; case 1: stbcx_(modval, addr_base); break; @@ -1645,51 +1597,22 @@ void MacroAssembler::atomic_get_and_modify_generic(Register dest_current_value, // Only signed types are supported with size < 4. void MacroAssembler::cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, - Register addr_base, Register tmp1, Register tmp2, - Label &retry, Label &failed, bool cmpxchgx_hint, int size) { + Register addr_base, Label &retry, Label &failed, bool cmpxchgx_hint, int size) { // Sub-word instructions are available since Power 8. - // For older processors, instruction_type != size holds, and we - // emulate the sub-word instructions by constructing a 4-byte value - // that leaves the other bytes unchanged. - const int instruction_type = VM_Version::has_lqarx() ? size : 4; - Register shift_amount = noreg, val32 = dest_current_value, modval = exchange_value; - if (instruction_type != size) { - assert_different_registers(tmp1, tmp2, dest_current_value, compare_value.register_or_noreg(), exchange_value, addr_base); - shift_amount = tmp1; - val32 = tmp2; - modval = tmp2; - // Need some preparation: Compute shift amount, align address. Note: shorts must be 2 byte aligned. -#ifdef VM_LITTLE_ENDIAN - rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8; - clrrdi(addr_base, addr_base, 2); -#else - xori(shift_amount, addr_base, (size == 1) ? 3 : 2); - clrrdi(addr_base, addr_base, 2); - rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16; -#endif - // Transform exchange value such that the replacement can be done by one xor instruction. - xorr(exchange_value, compare_value, exchange_value); - clrldi(exchange_value, exchange_value, (size == 1) ? 56 : 48); - slw(exchange_value, exchange_value, shift_amount); - } - // atomic emulation loop bind(retry); - switch (instruction_type) { + switch (size) { case 4: lwarx(val32, addr_base, cmpxchgx_hint); break; case 2: lharx(val32, addr_base, cmpxchgx_hint); break; case 1: lbarx(val32, addr_base, cmpxchgx_hint); break; default: ShouldNotReachHere(); } - if (instruction_type != size) { - srw(dest_current_value, val32, shift_amount); - } if (size == 1) { extsb(dest_current_value, dest_current_value); } else if (size == 2) { @@ -1705,11 +1628,7 @@ void MacroAssembler::cmpxchg_loop_body(ConditionRegister flag, Register dest_cur // branch to done => (flag == ne), (dest_current_value != compare_value) // fall through => (flag == eq), (dest_current_value == compare_value) - if (instruction_type != size) { - xorr(modval, val32, exchange_value); - } - - switch (instruction_type) { + switch (size) { case 4: stwcx_(modval, addr_base); break; case 2: sthcx_(modval, addr_base); break; case 1: stbcx_(modval, addr_base); break; @@ -1720,8 +1639,7 @@ void MacroAssembler::cmpxchg_loop_body(ConditionRegister flag, Register dest_cur // CmpxchgX sets condition register to cmpX(current, compare). void MacroAssembler::cmpxchg_generic(ConditionRegister flag, Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, - Register addr_base, Register tmp1, Register tmp2, - int semantics, bool cmpxchgx_hint, Register int_flag_success, + Register addr_base, int semantics, bool cmpxchgx_hint, Register int_flag_success, Label* failed_ext, bool contention_hint, bool weak, int size) { Label retry; Label failed_int; @@ -1732,8 +1650,7 @@ void MacroAssembler::cmpxchg_generic(ConditionRegister flag, Register dest_curre // result register is different from the other ones. bool use_result_reg = (int_flag_success != noreg); bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value.register_or_noreg() && - int_flag_success != exchange_value && int_flag_success != addr_base && - int_flag_success != tmp1 && int_flag_success != tmp2); + int_flag_success != exchange_value && int_flag_success != addr_base); assert(!weak || flag == CR0, "weak only supported with CR0"); assert(int_flag_success == noreg || failed_ext == nullptr, "cannot have both"); assert(size == 1 || size == 2 || size == 4, "unsupported"); @@ -1759,7 +1676,7 @@ void MacroAssembler::cmpxchg_generic(ConditionRegister flag, Register dest_curre release(); } - cmpxchg_loop_body(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, + cmpxchg_loop_body(flag, dest_current_value, compare_value, exchange_value, addr_base, retry, failed, cmpxchgx_hint, size); if (!weak || use_result_reg || failed_ext) { if (UseStaticBranchPredictionInCompareAndSwapPPC64) { @@ -3000,7 +2917,7 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla Label slow_path; if (UseObjectMonitorTable) { - // Clear cache in case fast locking succeeds. + // Clear cache in case fast locking succeeds or we need to take the slow-path. li(tmp1, 0); std(tmp1, in_bytes(BasicObjectLock::lock_offset()) + BasicLock::object_monitor_cache_offset_in_bytes(), box); } @@ -3360,6 +3277,35 @@ void MacroAssembler::safepoint_poll(Label& slow_path, Register temp, bool at_ret } } +void MacroAssembler::jump_to_polling_page_return_handler_blob(int safepoint_offset, bool fixed_size) { + assert(SharedRuntime::polling_page_return_handler_blob() != nullptr, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + + // Determine saved exception pc using pc relative address computation. + { + Label next_pc; + bl(next_pc); + bind(next_pc); + } + int current_offset = offset(); + + if (fixed_size) { + // Code size must not depend on offsets. + load_const32(R12, safepoint_offset - current_offset); + mflr(R0); + add(R12, R12, R0); + } else { + mflr(R12); + add_const_optimized(R12, R12, safepoint_offset - current_offset); + } + std(R12, in_bytes(JavaThread::saved_exception_pc_offset()), R16_thread); + + add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub)); + mtctr(R0); + bctr(); +} + void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2, MacroAssembler::PreservationLevel preservation_level) { BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); @@ -3735,7 +3681,6 @@ void MacroAssembler::load_reverse_32(Register dst, Register src) { // Due to register shortage, setting tc3 may overwrite table. With the return offset // at hand, the original table address can be easily reconstructed. int MacroAssembler::crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3) { - assert(!VM_Version::has_vpmsumb(), "Vector version should be used instead!"); // Point to 4 byte folding tables (byte-reversed version for Big Endian) // Layout: See StubRoutines::ppc::generate_crc_constants. @@ -3868,103 +3813,6 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab xorr(crc, t0, t2); // Now crc contains the final checksum value. } -/** - * @param crc register containing existing CRC (32-bit) - * @param buf register pointing to input byte buffer (byte*) - * @param len register containing number of bytes - * @param table register pointing to CRC table - * - * uses R9..R12 as work register. Must be saved/restored by caller! - */ -void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, - Register t0, Register t1, Register t2, Register t3, - Register tc0, Register tc1, Register tc2, Register tc3, - bool invertCRC) { - assert_different_registers(crc, buf, len, table); - - Label L_mainLoop, L_tail; - Register tmp = t0; - Register data = t0; - Register tmp2 = t1; - const int mainLoop_stepping = 4; - const int tailLoop_stepping = 1; - const int log_stepping = exact_log2(mainLoop_stepping); - const int mainLoop_alignment = 32; // InputForNewCode > 4 ? InputForNewCode : 32; - const int complexThreshold = 2*mainLoop_stepping; - - // Don't test for len <= 0 here. This pathological case should not occur anyway. - // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles - // for all well-behaved cases. The situation itself is detected and handled correctly - // within update_byteLoop_crc32. - assert(tailLoop_stepping == 1, "check tailLoop_stepping!"); - - BLOCK_COMMENT("kernel_crc32_1word {"); - - if (invertCRC) { - nand(crc, crc, crc); // 1s complement of crc - } - - // Check for short ( mainLoop_stepping) { - sub(len, len, tmp2); // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed). - } else { - sub(tmp, len, tmp2); // Remaining bytes for main loop. - cmpdi(CR0, tmp, mainLoop_stepping); - blt(CR0, L_tail); // For less than one mainloop_stepping left, do only tail processing - mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed). - } - update_byteLoop_crc32(crc, buf, tmp2, table, data, false); - } - - srdi(tmp2, len, log_stepping); // #iterations for mainLoop - andi(len, len, mainLoop_stepping-1); // remaining bytes for tailLoop - mtctr(tmp2); - -#ifdef VM_LITTLE_ENDIAN - Register crc_rv = crc; -#else - Register crc_rv = tmp; // Load_reverse needs separate registers to work on. - // Occupies tmp, but frees up crc. - load_reverse_32(crc_rv, crc); // Revert byte order because we are dealing with big-endian data. - tmp = crc; -#endif - - int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3); - - align(mainLoop_alignment); // Octoword-aligned loop address. Shows 2% improvement. - BIND(L_mainLoop); - update_1word_crc32(crc_rv, buf, table, 0, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3); - bdnz(L_mainLoop); - -#ifndef VM_LITTLE_ENDIAN - load_reverse_32(crc, crc_rv); // Revert byte order because we are dealing with big-endian data. - tmp = crc_rv; // Tmp uses it's original register again. -#endif - - // Restore original table address for tailLoop. - if (reconstructTableOffset != 0) { - addi(table, table, -reconstructTableOffset); - } - - // Process last few ( = Power7). // fcfid + frsp showed rounding problem when result should be 0x3f800001. - return VM_Version::has_fcfids(); + return true; } // Implements a variant of EncodeISOArrayNode that encode ASCII only diff --git a/src/hotspot/cpu/ppc/methodHandles_ppc.cpp b/src/hotspot/cpu/ppc/methodHandles_ppc.cpp index 13fb8ef79d6..803bb6bfe69 100644 --- a/src/hotspot/cpu/ppc/methodHandles_ppc.cpp +++ b/src/hotspot/cpu/ppc/methodHandles_ppc.cpp @@ -359,7 +359,9 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ? -1 // enforce receiver null check : oopDesc::klass_offset_in_bytes(); // regular null-checking behavior - __ null_check_throw(receiver_reg, klass_offset, temp1, Interpreter::throw_NullPointerException_entry()); + address NullPointerException_entry = for_compiler_entry ? SharedRuntime::throw_NullPointerException_at_call_entry() + : Interpreter::throw_NullPointerException_entry(); + __ null_check_throw(receiver_reg, klass_offset, temp1, NullPointerException_entry); if (iid != vmIntrinsics::_linkToSpecial || VerifyMethodHandles) { __ load_klass(temp1_recv_klass, receiver_reg); diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 07d681e8982..128e566d0f3 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -258,71 +258,326 @@ register %{ // Vector-Scalar Registers // ---------------------------- // 1st 32 VSRs are aliases for the FPRs which are already defined above. - reg_def VSR0 ( SOC, SOC, Op_VecX, 0, VMRegImpl::Bad()); - reg_def VSR1 ( SOC, SOC, Op_VecX, 1, VMRegImpl::Bad()); - reg_def VSR2 ( SOC, SOC, Op_VecX, 2, VMRegImpl::Bad()); - reg_def VSR3 ( SOC, SOC, Op_VecX, 3, VMRegImpl::Bad()); - reg_def VSR4 ( SOC, SOC, Op_VecX, 4, VMRegImpl::Bad()); - reg_def VSR5 ( SOC, SOC, Op_VecX, 5, VMRegImpl::Bad()); - reg_def VSR6 ( SOC, SOC, Op_VecX, 6, VMRegImpl::Bad()); - reg_def VSR7 ( SOC, SOC, Op_VecX, 7, VMRegImpl::Bad()); - reg_def VSR8 ( SOC, SOC, Op_VecX, 8, VMRegImpl::Bad()); - reg_def VSR9 ( SOC, SOC, Op_VecX, 9, VMRegImpl::Bad()); - reg_def VSR10 ( SOC, SOC, Op_VecX, 10, VMRegImpl::Bad()); - reg_def VSR11 ( SOC, SOC, Op_VecX, 11, VMRegImpl::Bad()); - reg_def VSR12 ( SOC, SOC, Op_VecX, 12, VMRegImpl::Bad()); - reg_def VSR13 ( SOC, SOC, Op_VecX, 13, VMRegImpl::Bad()); - reg_def VSR14 ( SOC, SOE, Op_VecX, 14, VMRegImpl::Bad()); - reg_def VSR15 ( SOC, SOE, Op_VecX, 15, VMRegImpl::Bad()); - reg_def VSR16 ( SOC, SOE, Op_VecX, 16, VMRegImpl::Bad()); - reg_def VSR17 ( SOC, SOE, Op_VecX, 17, VMRegImpl::Bad()); - reg_def VSR18 ( SOC, SOE, Op_VecX, 18, VMRegImpl::Bad()); - reg_def VSR19 ( SOC, SOE, Op_VecX, 19, VMRegImpl::Bad()); - reg_def VSR20 ( SOC, SOE, Op_VecX, 20, VMRegImpl::Bad()); - reg_def VSR21 ( SOC, SOE, Op_VecX, 21, VMRegImpl::Bad()); - reg_def VSR22 ( SOC, SOE, Op_VecX, 22, VMRegImpl::Bad()); - reg_def VSR23 ( SOC, SOE, Op_VecX, 23, VMRegImpl::Bad()); - reg_def VSR24 ( SOC, SOE, Op_VecX, 24, VMRegImpl::Bad()); - reg_def VSR25 ( SOC, SOE, Op_VecX, 25, VMRegImpl::Bad()); - reg_def VSR26 ( SOC, SOE, Op_VecX, 26, VMRegImpl::Bad()); - reg_def VSR27 ( SOC, SOE, Op_VecX, 27, VMRegImpl::Bad()); - reg_def VSR28 ( SOC, SOE, Op_VecX, 28, VMRegImpl::Bad()); - reg_def VSR29 ( SOC, SOE, Op_VecX, 29, VMRegImpl::Bad()); - reg_def VSR30 ( SOC, SOE, Op_VecX, 30, VMRegImpl::Bad()); - reg_def VSR31 ( SOC, SOE, Op_VecX, 31, VMRegImpl::Bad()); + reg_def VSR0 (SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + reg_def VSR0_H (SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + reg_def VSR0_J (SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + reg_def VSR0_K (SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + + reg_def VSR1 (SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + reg_def VSR1_H (SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + reg_def VSR1_J (SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + reg_def VSR1_K (SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + + reg_def VSR2 (SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + reg_def VSR2_H (SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + reg_def VSR2_J (SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + reg_def VSR2_K (SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + + reg_def VSR3 (SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + reg_def VSR3_H (SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + reg_def VSR3_J (SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + reg_def VSR3_K (SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + + reg_def VSR4 (SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + reg_def VSR4_H (SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + reg_def VSR4_J (SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + reg_def VSR4_K (SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + + reg_def VSR5 (SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + reg_def VSR5_H (SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + reg_def VSR5_J (SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + reg_def VSR5_K (SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + + reg_def VSR6 (SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + reg_def VSR6_H (SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + reg_def VSR6_J (SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + reg_def VSR6_K (SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + + reg_def VSR7 (SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + reg_def VSR7_H (SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + reg_def VSR7_J (SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + reg_def VSR7_K (SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + + reg_def VSR8 (SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + reg_def VSR8_H (SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + reg_def VSR8_J (SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + reg_def VSR8_K (SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + + reg_def VSR9 (SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + reg_def VSR9_H (SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + reg_def VSR9_J (SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + reg_def VSR9_K (SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + + reg_def VSR10 (SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + reg_def VSR10_H(SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + reg_def VSR10_J(SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + reg_def VSR10_K(SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + + reg_def VSR11 (SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + reg_def VSR11_H(SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + reg_def VSR11_J(SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + reg_def VSR11_K(SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + + reg_def VSR12 (SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + reg_def VSR12_H(SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + reg_def VSR12_J(SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + reg_def VSR12_K(SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + + reg_def VSR13 (SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + reg_def VSR13_H(SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + reg_def VSR13_J(SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + reg_def VSR13_K(SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + + reg_def VSR14 (SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + reg_def VSR14_H(SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + reg_def VSR14_J(SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + reg_def VSR14_K(SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + + reg_def VSR15 (SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + reg_def VSR15_H(SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + reg_def VSR15_J(SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + reg_def VSR15_K(SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + + reg_def VSR16 (SOC, SOC, Op_RegF, 16, VMRegImpl::Bad()); + reg_def VSR16_H(SOC, SOC, Op_RegF, 16, VMRegImpl::Bad()); + reg_def VSR16_J(SOC, SOC, Op_RegF, 16, VMRegImpl::Bad()); + reg_def VSR16_K(SOC, SOC, Op_RegF, 16, VMRegImpl::Bad()); + + reg_def VSR17 (SOC, SOC, Op_RegF, 17, VMRegImpl::Bad()); + reg_def VSR17_H(SOC, SOC, Op_RegF, 17, VMRegImpl::Bad()); + reg_def VSR17_J(SOC, SOC, Op_RegF, 17, VMRegImpl::Bad()); + reg_def VSR17_K(SOC, SOC, Op_RegF, 17, VMRegImpl::Bad()); + + reg_def VSR18 (SOC, SOC, Op_RegF, 18, VMRegImpl::Bad()); + reg_def VSR18_H(SOC, SOC, Op_RegF, 18, VMRegImpl::Bad()); + reg_def VSR18_J(SOC, SOC, Op_RegF, 18, VMRegImpl::Bad()); + reg_def VSR18_K(SOC, SOC, Op_RegF, 18, VMRegImpl::Bad()); + + reg_def VSR19 (SOC, SOC, Op_RegF, 19, VMRegImpl::Bad()); + reg_def VSR19_H(SOC, SOC, Op_RegF, 19, VMRegImpl::Bad()); + reg_def VSR19_J(SOC, SOC, Op_RegF, 19, VMRegImpl::Bad()); + reg_def VSR19_K(SOC, SOC, Op_RegF, 19, VMRegImpl::Bad()); + + reg_def VSR20 (SOC, SOC, Op_RegF, 20, VMRegImpl::Bad()); + reg_def VSR20_H(SOC, SOC, Op_RegF, 20, VMRegImpl::Bad()); + reg_def VSR20_J(SOC, SOC, Op_RegF, 20, VMRegImpl::Bad()); + reg_def VSR20_K(SOC, SOC, Op_RegF, 20, VMRegImpl::Bad()); + + reg_def VSR21 (SOC, SOC, Op_RegF, 21, VMRegImpl::Bad()); + reg_def VSR21_H(SOC, SOC, Op_RegF, 21, VMRegImpl::Bad()); + reg_def VSR21_J(SOC, SOC, Op_RegF, 21, VMRegImpl::Bad()); + reg_def VSR21_K(SOC, SOC, Op_RegF, 21, VMRegImpl::Bad()); + + reg_def VSR22 (SOC, SOC, Op_RegF, 22, VMRegImpl::Bad()); + reg_def VSR22_H(SOC, SOC, Op_RegF, 22, VMRegImpl::Bad()); + reg_def VSR22_J(SOC, SOC, Op_RegF, 22, VMRegImpl::Bad()); + reg_def VSR22_K(SOC, SOC, Op_RegF, 22, VMRegImpl::Bad()); + + reg_def VSR23 (SOC, SOC, Op_RegF, 23, VMRegImpl::Bad()); + reg_def VSR23_H(SOC, SOC, Op_RegF, 23, VMRegImpl::Bad()); + reg_def VSR23_J(SOC, SOC, Op_RegF, 23, VMRegImpl::Bad()); + reg_def VSR23_K(SOC, SOC, Op_RegF, 23, VMRegImpl::Bad()); + + reg_def VSR24 (SOC, SOC, Op_RegF, 24, VMRegImpl::Bad()); + reg_def VSR24_H(SOC, SOC, Op_RegF, 24, VMRegImpl::Bad()); + reg_def VSR24_J(SOC, SOC, Op_RegF, 24, VMRegImpl::Bad()); + reg_def VSR24_K(SOC, SOC, Op_RegF, 24, VMRegImpl::Bad()); + + reg_def VSR25 (SOC, SOC, Op_RegF, 25, VMRegImpl::Bad()); + reg_def VSR25_H(SOC, SOC, Op_RegF, 25, VMRegImpl::Bad()); + reg_def VSR25_J(SOC, SOC, Op_RegF, 25, VMRegImpl::Bad()); + reg_def VSR25_K(SOC, SOC, Op_RegF, 25, VMRegImpl::Bad()); + + reg_def VSR26 (SOC, SOC, Op_RegF, 26, VMRegImpl::Bad()); + reg_def VSR26_H(SOC, SOC, Op_RegF, 26, VMRegImpl::Bad()); + reg_def VSR26_J(SOC, SOC, Op_RegF, 26, VMRegImpl::Bad()); + reg_def VSR26_K(SOC, SOC, Op_RegF, 26, VMRegImpl::Bad()); + + reg_def VSR27 (SOC, SOC, Op_RegF, 27, VMRegImpl::Bad()); + reg_def VSR27_H(SOC, SOC, Op_RegF, 27, VMRegImpl::Bad()); + reg_def VSR27_J(SOC, SOC, Op_RegF, 27, VMRegImpl::Bad()); + reg_def VSR27_K(SOC, SOC, Op_RegF, 27, VMRegImpl::Bad()); + + reg_def VSR28 (SOC, SOC, Op_RegF, 28, VMRegImpl::Bad()); + reg_def VSR28_H(SOC, SOC, Op_RegF, 28, VMRegImpl::Bad()); + reg_def VSR28_J(SOC, SOC, Op_RegF, 28, VMRegImpl::Bad()); + reg_def VSR28_K(SOC, SOC, Op_RegF, 28, VMRegImpl::Bad()); + + reg_def VSR29 (SOC, SOC, Op_RegF, 29, VMRegImpl::Bad()); + reg_def VSR29_H(SOC, SOC, Op_RegF, 29, VMRegImpl::Bad()); + reg_def VSR29_J(SOC, SOC, Op_RegF, 29, VMRegImpl::Bad()); + reg_def VSR29_K(SOC, SOC, Op_RegF, 29, VMRegImpl::Bad()); + + reg_def VSR30 (SOC, SOC, Op_RegF, 30, VMRegImpl::Bad()); + reg_def VSR30_H(SOC, SOC, Op_RegF, 30, VMRegImpl::Bad()); + reg_def VSR30_J(SOC, SOC, Op_RegF, 30, VMRegImpl::Bad()); + reg_def VSR30_K(SOC, SOC, Op_RegF, 30, VMRegImpl::Bad()); + + reg_def VSR31 (SOC, SOC, Op_RegF, 31, VMRegImpl::Bad()); + reg_def VSR31_H(SOC, SOC, Op_RegF, 31, VMRegImpl::Bad()); + reg_def VSR31_J(SOC, SOC, Op_RegF, 31, VMRegImpl::Bad()); + reg_def VSR31_K(SOC, SOC, Op_RegF, 31, VMRegImpl::Bad()); + // 2nd 32 VSRs are aliases for the VRs which are only defined here. - reg_def VSR32 ( SOC, SOC, Op_VecX, 32, VSR32->as_VMReg()); - reg_def VSR33 ( SOC, SOC, Op_VecX, 33, VSR33->as_VMReg()); - reg_def VSR34 ( SOC, SOC, Op_VecX, 34, VSR34->as_VMReg()); - reg_def VSR35 ( SOC, SOC, Op_VecX, 35, VSR35->as_VMReg()); - reg_def VSR36 ( SOC, SOC, Op_VecX, 36, VSR36->as_VMReg()); - reg_def VSR37 ( SOC, SOC, Op_VecX, 37, VSR37->as_VMReg()); - reg_def VSR38 ( SOC, SOC, Op_VecX, 38, VSR38->as_VMReg()); - reg_def VSR39 ( SOC, SOC, Op_VecX, 39, VSR39->as_VMReg()); - reg_def VSR40 ( SOC, SOC, Op_VecX, 40, VSR40->as_VMReg()); - reg_def VSR41 ( SOC, SOC, Op_VecX, 41, VSR41->as_VMReg()); - reg_def VSR42 ( SOC, SOC, Op_VecX, 42, VSR42->as_VMReg()); - reg_def VSR43 ( SOC, SOC, Op_VecX, 43, VSR43->as_VMReg()); - reg_def VSR44 ( SOC, SOC, Op_VecX, 44, VSR44->as_VMReg()); - reg_def VSR45 ( SOC, SOC, Op_VecX, 45, VSR45->as_VMReg()); - reg_def VSR46 ( SOC, SOC, Op_VecX, 46, VSR46->as_VMReg()); - reg_def VSR47 ( SOC, SOC, Op_VecX, 47, VSR47->as_VMReg()); - reg_def VSR48 ( SOC, SOC, Op_VecX, 48, VSR48->as_VMReg()); - reg_def VSR49 ( SOC, SOC, Op_VecX, 49, VSR49->as_VMReg()); - reg_def VSR50 ( SOC, SOC, Op_VecX, 50, VSR50->as_VMReg()); - reg_def VSR51 ( SOC, SOC, Op_VecX, 51, VSR51->as_VMReg()); - reg_def VSR52 ( SOC, SOE, Op_VecX, 52, VSR52->as_VMReg()); - reg_def VSR53 ( SOC, SOE, Op_VecX, 53, VSR53->as_VMReg()); - reg_def VSR54 ( SOC, SOE, Op_VecX, 54, VSR54->as_VMReg()); - reg_def VSR55 ( SOC, SOE, Op_VecX, 55, VSR55->as_VMReg()); - reg_def VSR56 ( SOC, SOE, Op_VecX, 56, VSR56->as_VMReg()); - reg_def VSR57 ( SOC, SOE, Op_VecX, 57, VSR57->as_VMReg()); - reg_def VSR58 ( SOC, SOE, Op_VecX, 58, VSR58->as_VMReg()); - reg_def VSR59 ( SOC, SOE, Op_VecX, 59, VSR59->as_VMReg()); - reg_def VSR60 ( SOC, SOE, Op_VecX, 60, VSR60->as_VMReg()); - reg_def VSR61 ( SOC, SOE, Op_VecX, 61, VSR61->as_VMReg()); - reg_def VSR62 ( SOC, SOE, Op_VecX, 62, VSR62->as_VMReg()); - reg_def VSR63 ( SOC, SOE, Op_VecX, 63, VSR63->as_VMReg()); + reg_def VSR32 (SOC, SOC, Op_RegF, 32, VSR32->as_VMReg() ); + reg_def VSR32_H(SOC, SOC, Op_RegF, 32, VSR32->as_VMReg()->next() ); + reg_def VSR32_J(SOC, SOC, Op_RegF, 32, VSR32->as_VMReg()->next(2)); + reg_def VSR32_K(SOC, SOC, Op_RegF, 32, VSR32->as_VMReg()->next(3)); + + reg_def VSR33 (SOC, SOC, Op_RegF, 33, VSR33->as_VMReg() ); + reg_def VSR33_H(SOC, SOC, Op_RegF, 33, VSR33->as_VMReg()->next() ); + reg_def VSR33_J(SOC, SOC, Op_RegF, 33, VSR33->as_VMReg()->next(2)); + reg_def VSR33_K(SOC, SOC, Op_RegF, 33, VSR33->as_VMReg()->next(3)); + + reg_def VSR34 (SOC, SOC, Op_RegF, 34, VSR34->as_VMReg() ); + reg_def VSR34_H(SOC, SOC, Op_RegF, 34, VSR34->as_VMReg()->next() ); + reg_def VSR34_J(SOC, SOC, Op_RegF, 34, VSR34->as_VMReg()->next(2)); + reg_def VSR34_K(SOC, SOC, Op_RegF, 34, VSR34->as_VMReg()->next(3)); + + reg_def VSR35 (SOC, SOC, Op_RegF, 35, VSR35->as_VMReg() ); + reg_def VSR35_H(SOC, SOC, Op_RegF, 35, VSR35->as_VMReg()->next() ); + reg_def VSR35_J(SOC, SOC, Op_RegF, 35, VSR35->as_VMReg()->next(2)); + reg_def VSR35_K(SOC, SOC, Op_RegF, 35, VSR35->as_VMReg()->next(3)); + + reg_def VSR36 (SOC, SOC, Op_RegF, 36, VSR36->as_VMReg() ); + reg_def VSR36_H(SOC, SOC, Op_RegF, 36, VSR36->as_VMReg()->next() ); + reg_def VSR36_J(SOC, SOC, Op_RegF, 36, VSR36->as_VMReg()->next(2)); + reg_def VSR36_K(SOC, SOC, Op_RegF, 36, VSR36->as_VMReg()->next(3)); + + reg_def VSR37 (SOC, SOC, Op_RegF, 37, VSR37->as_VMReg() ); + reg_def VSR37_H(SOC, SOC, Op_RegF, 37, VSR37->as_VMReg()->next() ); + reg_def VSR37_J(SOC, SOC, Op_RegF, 37, VSR37->as_VMReg()->next(2)); + reg_def VSR37_K(SOC, SOC, Op_RegF, 37, VSR37->as_VMReg()->next(3)); + + reg_def VSR38 (SOC, SOC, Op_RegF, 38, VSR38->as_VMReg() ); + reg_def VSR38_H(SOC, SOC, Op_RegF, 38, VSR38->as_VMReg()->next() ); + reg_def VSR38_J(SOC, SOC, Op_RegF, 38, VSR38->as_VMReg()->next(2)); + reg_def VSR38_K(SOC, SOC, Op_RegF, 38, VSR38->as_VMReg()->next(3)); + + reg_def VSR39 (SOC, SOC, Op_RegF, 39, VSR39->as_VMReg() ); + reg_def VSR39_H(SOC, SOC, Op_RegF, 39, VSR39->as_VMReg()->next() ); + reg_def VSR39_J(SOC, SOC, Op_RegF, 39, VSR39->as_VMReg()->next(2)); + reg_def VSR39_K(SOC, SOC, Op_RegF, 39, VSR39->as_VMReg()->next(3)); + + reg_def VSR40 (SOC, SOC, Op_RegF, 40, VSR40->as_VMReg() ); + reg_def VSR40_H(SOC, SOC, Op_RegF, 40, VSR40->as_VMReg()->next() ); + reg_def VSR40_J(SOC, SOC, Op_RegF, 40, VSR40->as_VMReg()->next(2)); + reg_def VSR40_K(SOC, SOC, Op_RegF, 40, VSR40->as_VMReg()->next(3)); + + reg_def VSR41 (SOC, SOC, Op_RegF, 41, VSR41->as_VMReg() ); + reg_def VSR41_H(SOC, SOC, Op_RegF, 41, VSR41->as_VMReg()->next() ); + reg_def VSR41_J(SOC, SOC, Op_RegF, 41, VSR41->as_VMReg()->next(2)); + reg_def VSR41_K(SOC, SOC, Op_RegF, 41, VSR41->as_VMReg()->next(3)); + + reg_def VSR42 (SOC, SOC, Op_RegF, 42, VSR42->as_VMReg() ); + reg_def VSR42_H(SOC, SOC, Op_RegF, 42, VSR42->as_VMReg()->next() ); + reg_def VSR42_J(SOC, SOC, Op_RegF, 42, VSR42->as_VMReg()->next(2)); + reg_def VSR42_K(SOC, SOC, Op_RegF, 42, VSR42->as_VMReg()->next(3)); + + reg_def VSR43 (SOC, SOC, Op_RegF, 43, VSR43->as_VMReg() ); + reg_def VSR43_H(SOC, SOC, Op_RegF, 43, VSR43->as_VMReg()->next() ); + reg_def VSR43_J(SOC, SOC, Op_RegF, 43, VSR43->as_VMReg()->next(2)); + reg_def VSR43_K(SOC, SOC, Op_RegF, 43, VSR43->as_VMReg()->next(3)); + + reg_def VSR44 (SOC, SOC, Op_RegF, 44, VSR44->as_VMReg() ); + reg_def VSR44_H(SOC, SOC, Op_RegF, 44, VSR44->as_VMReg()->next() ); + reg_def VSR44_J(SOC, SOC, Op_RegF, 44, VSR44->as_VMReg()->next(2)); + reg_def VSR44_K(SOC, SOC, Op_RegF, 44, VSR44->as_VMReg()->next(3)); + + reg_def VSR45 (SOC, SOC, Op_RegF, 45, VSR45->as_VMReg() ); + reg_def VSR45_H(SOC, SOC, Op_RegF, 45, VSR45->as_VMReg()->next() ); + reg_def VSR45_J(SOC, SOC, Op_RegF, 45, VSR45->as_VMReg()->next(2)); + reg_def VSR45_K(SOC, SOC, Op_RegF, 45, VSR45->as_VMReg()->next(3)); + + reg_def VSR46 (SOC, SOC, Op_RegF, 46, VSR46->as_VMReg() ); + reg_def VSR46_H(SOC, SOC, Op_RegF, 46, VSR46->as_VMReg()->next() ); + reg_def VSR46_J(SOC, SOC, Op_RegF, 46, VSR46->as_VMReg()->next(2)); + reg_def VSR46_K(SOC, SOC, Op_RegF, 46, VSR46->as_VMReg()->next(3)); + + reg_def VSR47 (SOC, SOC, Op_RegF, 47, VSR47->as_VMReg() ); + reg_def VSR47_H(SOC, SOC, Op_RegF, 47, VSR47->as_VMReg()->next() ); + reg_def VSR47_J(SOC, SOC, Op_RegF, 47, VSR47->as_VMReg()->next(2)); + reg_def VSR47_K(SOC, SOC, Op_RegF, 47, VSR47->as_VMReg()->next(3)); + + reg_def VSR48 (SOC, SOC, Op_RegF, 48, VSR48->as_VMReg() ); + reg_def VSR48_H(SOC, SOC, Op_RegF, 48, VSR48->as_VMReg()->next() ); + reg_def VSR48_J(SOC, SOC, Op_RegF, 48, VSR48->as_VMReg()->next(2)); + reg_def VSR48_K(SOC, SOC, Op_RegF, 48, VSR48->as_VMReg()->next(3)); + + reg_def VSR49 (SOC, SOC, Op_RegF, 49, VSR49->as_VMReg() ); + reg_def VSR49_H(SOC, SOC, Op_RegF, 49, VSR49->as_VMReg()->next() ); + reg_def VSR49_J(SOC, SOC, Op_RegF, 49, VSR49->as_VMReg()->next(2)); + reg_def VSR49_K(SOC, SOC, Op_RegF, 49, VSR49->as_VMReg()->next(3)); + + reg_def VSR50 (SOC, SOC, Op_RegF, 50, VSR50->as_VMReg() ); + reg_def VSR50_H(SOC, SOC, Op_RegF, 50, VSR50->as_VMReg()->next() ); + reg_def VSR50_J(SOC, SOC, Op_RegF, 50, VSR50->as_VMReg()->next(2)); + reg_def VSR50_K(SOC, SOC, Op_RegF, 50, VSR50->as_VMReg()->next(3)); + + reg_def VSR51 (SOC, SOC, Op_RegF, 51, VSR51->as_VMReg() ); + reg_def VSR51_H(SOC, SOC, Op_RegF, 51, VSR51->as_VMReg()->next() ); + reg_def VSR51_J(SOC, SOC, Op_RegF, 51, VSR51->as_VMReg()->next(2)); + reg_def VSR51_K(SOC, SOC, Op_RegF, 51, VSR51->as_VMReg()->next(3)); + + reg_def VSR52 (SOC, SOE, Op_RegF, 52, VSR52->as_VMReg() ); + reg_def VSR52_H(SOC, SOE, Op_RegF, 52, VSR52->as_VMReg()->next() ); + reg_def VSR52_J(SOC, SOE, Op_RegF, 52, VSR52->as_VMReg()->next(2)); + reg_def VSR52_K(SOC, SOE, Op_RegF, 52, VSR52->as_VMReg()->next(3)); + + reg_def VSR53 (SOC, SOE, Op_RegF, 53, VSR53->as_VMReg() ); + reg_def VSR53_H(SOC, SOE, Op_RegF, 53, VSR53->as_VMReg()->next() ); + reg_def VSR53_J(SOC, SOE, Op_RegF, 53, VSR53->as_VMReg()->next(2)); + reg_def VSR53_K(SOC, SOE, Op_RegF, 53, VSR53->as_VMReg()->next(3)); + + reg_def VSR54 (SOC, SOE, Op_RegF, 54, VSR54->as_VMReg() ); + reg_def VSR54_H(SOC, SOE, Op_RegF, 54, VSR54->as_VMReg()->next() ); + reg_def VSR54_J(SOC, SOE, Op_RegF, 54, VSR54->as_VMReg()->next(2)); + reg_def VSR54_K(SOC, SOE, Op_RegF, 54, VSR54->as_VMReg()->next(3)); + + reg_def VSR55 (SOC, SOE, Op_RegF, 55, VSR55->as_VMReg() ); + reg_def VSR55_H(SOC, SOE, Op_RegF, 55, VSR55->as_VMReg()->next() ); + reg_def VSR55_J(SOC, SOE, Op_RegF, 55, VSR55->as_VMReg()->next(2)); + reg_def VSR55_K(SOC, SOE, Op_RegF, 55, VSR55->as_VMReg()->next(3)); + + reg_def VSR56 (SOC, SOE, Op_RegF, 56, VSR56->as_VMReg() ); + reg_def VSR56_H(SOC, SOE, Op_RegF, 56, VSR56->as_VMReg()->next() ); + reg_def VSR56_J(SOC, SOE, Op_RegF, 56, VSR56->as_VMReg()->next(2)); + reg_def VSR56_K(SOC, SOE, Op_RegF, 56, VSR56->as_VMReg()->next(3)); + + reg_def VSR57 (SOC, SOE, Op_RegF, 57, VSR57->as_VMReg() ); + reg_def VSR57_H(SOC, SOE, Op_RegF, 57, VSR57->as_VMReg()->next() ); + reg_def VSR57_J(SOC, SOE, Op_RegF, 57, VSR57->as_VMReg()->next(2)); + reg_def VSR57_K(SOC, SOE, Op_RegF, 57, VSR57->as_VMReg()->next(3)); + + reg_def VSR58 (SOC, SOE, Op_RegF, 58, VSR58->as_VMReg() ); + reg_def VSR58_H(SOC, SOE, Op_RegF, 58, VSR58->as_VMReg()->next() ); + reg_def VSR58_J(SOC, SOE, Op_RegF, 58, VSR58->as_VMReg()->next(2)); + reg_def VSR58_K(SOC, SOE, Op_RegF, 58, VSR58->as_VMReg()->next(3)); + + reg_def VSR59 (SOC, SOE, Op_RegF, 59, VSR59->as_VMReg() ); + reg_def VSR59_H(SOC, SOE, Op_RegF, 59, VSR59->as_VMReg()->next() ); + reg_def VSR59_J(SOC, SOE, Op_RegF, 59, VSR59->as_VMReg()->next(2)); + reg_def VSR59_K(SOC, SOE, Op_RegF, 59, VSR59->as_VMReg()->next(3)); + + reg_def VSR60 (SOC, SOE, Op_RegF, 60, VSR60->as_VMReg() ); + reg_def VSR60_H(SOC, SOE, Op_RegF, 60, VSR60->as_VMReg()->next() ); + reg_def VSR60_J(SOC, SOE, Op_RegF, 60, VSR60->as_VMReg()->next(2)); + reg_def VSR60_K(SOC, SOE, Op_RegF, 60, VSR60->as_VMReg()->next(3)); + + reg_def VSR61 (SOC, SOE, Op_RegF, 61, VSR61->as_VMReg() ); + reg_def VSR61_H(SOC, SOE, Op_RegF, 61, VSR61->as_VMReg()->next() ); + reg_def VSR61_J(SOC, SOE, Op_RegF, 61, VSR61->as_VMReg()->next(2)); + reg_def VSR61_K(SOC, SOE, Op_RegF, 61, VSR61->as_VMReg()->next(3)); + + reg_def VSR62 (SOC, SOE, Op_RegF, 62, VSR62->as_VMReg() ); + reg_def VSR62_H(SOC, SOE, Op_RegF, 62, VSR62->as_VMReg()->next() ); + reg_def VSR62_J(SOC, SOE, Op_RegF, 62, VSR62->as_VMReg()->next(2)); + reg_def VSR62_K(SOC, SOE, Op_RegF, 62, VSR62->as_VMReg()->next(3)); + + reg_def VSR63 (SOC, SOE, Op_RegF, 63, VSR63->as_VMReg() ); + reg_def VSR63_H(SOC, SOE, Op_RegF, 63, VSR63->as_VMReg()->next() ); + reg_def VSR63_J(SOC, SOE, Op_RegF, 63, VSR63->as_VMReg()->next(2)); + reg_def VSR63_K(SOC, SOE, Op_RegF, 63, VSR63->as_VMReg()->next(3)); // ---------------------------- // Specify priority of register selection within phases of register @@ -441,8 +696,74 @@ alloc_class chunk1 ( ); alloc_class chunk2 ( - // Chunk2 contains *all* 8 condition code registers. + VSR0 , VSR0_H , VSR0_J , VSR0_K , + VSR1 , VSR1_H , VSR1_J , VSR1_K , + VSR2 , VSR2_H , VSR2_J , VSR2_K , + VSR3 , VSR3_H , VSR3_J , VSR3_K , + VSR4 , VSR4_H , VSR4_J , VSR4_K , + VSR5 , VSR5_H , VSR5_J , VSR5_K , + VSR6 , VSR6_H , VSR6_J , VSR6_K , + VSR7 , VSR7_H , VSR7_J , VSR7_K , + VSR8 , VSR8_H , VSR8_J , VSR8_K , + VSR9 , VSR9_H , VSR9_J , VSR9_K , + VSR10, VSR10_H, VSR10_J, VSR10_K, + VSR11, VSR11_H, VSR11_J, VSR11_K, + VSR12, VSR12_H, VSR12_J, VSR12_K, + VSR13, VSR13_H, VSR13_J, VSR13_K, + VSR14, VSR14_H, VSR14_J, VSR14_K, + VSR15, VSR15_H, VSR15_J, VSR15_K, + VSR16, VSR16_H, VSR16_J, VSR16_K, + VSR17, VSR17_H, VSR17_J, VSR17_K, + VSR18, VSR18_H, VSR18_J, VSR18_K, + VSR19, VSR19_H, VSR19_J, VSR19_K, + VSR20, VSR20_H, VSR20_J, VSR20_K, + VSR21, VSR21_H, VSR21_J, VSR21_K, + VSR22, VSR22_H, VSR22_J, VSR22_K, + VSR23, VSR23_H, VSR23_J, VSR23_K, + VSR24, VSR24_H, VSR24_J, VSR24_K, + VSR25, VSR25_H, VSR25_J, VSR25_K, + VSR26, VSR26_H, VSR26_J, VSR26_K, + VSR27, VSR27_H, VSR27_J, VSR27_K, + VSR28, VSR28_H, VSR28_J, VSR28_K, + VSR29, VSR29_H, VSR29_J, VSR29_K, + VSR30, VSR30_H, VSR30_J, VSR30_K, + VSR31, VSR31_H, VSR31_J, VSR31_K, + VSR32, VSR32_H, VSR32_J, VSR32_K, + VSR33, VSR33_H, VSR33_J, VSR33_K, + VSR34, VSR34_H, VSR34_J, VSR34_K, + VSR35, VSR35_H, VSR35_J, VSR35_K, + VSR36, VSR36_H, VSR36_J, VSR36_K, + VSR37, VSR37_H, VSR37_J, VSR37_K, + VSR38, VSR38_H, VSR38_J, VSR38_K, + VSR39, VSR39_H, VSR39_J, VSR39_K, + VSR40, VSR40_H, VSR40_J, VSR40_K, + VSR41, VSR41_H, VSR41_J, VSR41_K, + VSR42, VSR42_H, VSR42_J, VSR42_K, + VSR43, VSR43_H, VSR43_J, VSR43_K, + VSR44, VSR44_H, VSR44_J, VSR44_K, + VSR45, VSR45_H, VSR45_J, VSR45_K, + VSR46, VSR46_H, VSR46_J, VSR46_K, + VSR47, VSR47_H, VSR47_J, VSR47_K, + VSR48, VSR48_H, VSR48_J, VSR48_K, + VSR49, VSR49_H, VSR49_J, VSR49_K, + VSR50, VSR50_H, VSR50_J, VSR50_K, + VSR51, VSR51_H, VSR51_J, VSR51_K, + VSR52, VSR52_H, VSR52_J, VSR52_K, + VSR53, VSR53_H, VSR53_J, VSR53_K, + VSR54, VSR54_H, VSR54_J, VSR54_K, + VSR55, VSR55_H, VSR55_J, VSR55_K, + VSR56, VSR56_H, VSR56_J, VSR56_K, + VSR57, VSR57_H, VSR57_J, VSR57_K, + VSR58, VSR58_H, VSR58_J, VSR58_K, + VSR59, VSR59_H, VSR59_J, VSR59_K, + VSR60, VSR60_H, VSR60_J, VSR60_K, + VSR61, VSR61_H, VSR61_J, VSR61_K, + VSR62, VSR62_H, VSR62_J, VSR62_K, + VSR63, VSR63_H, VSR63_J, VSR63_K +); +alloc_class chunk3 ( + // Chunk2 contains *all* 8 condition code registers. CR0, CR1, CR2, @@ -453,73 +774,6 @@ alloc_class chunk2 ( CR7 ); -alloc_class chunk3 ( - VSR0, - VSR1, - VSR2, - VSR3, - VSR4, - VSR5, - VSR6, - VSR7, - VSR8, - VSR9, - VSR10, - VSR11, - VSR12, - VSR13, - VSR14, - VSR15, - VSR16, - VSR17, - VSR18, - VSR19, - VSR20, - VSR21, - VSR22, - VSR23, - VSR24, - VSR25, - VSR26, - VSR27, - VSR28, - VSR29, - VSR30, - VSR31, - VSR32, - VSR33, - VSR34, - VSR35, - VSR36, - VSR37, - VSR38, - VSR39, - VSR40, - VSR41, - VSR42, - VSR43, - VSR44, - VSR45, - VSR46, - VSR47, - VSR48, - VSR49, - VSR50, - VSR51, - VSR52, - VSR53, - VSR54, - VSR55, - VSR56, - VSR57, - VSR58, - VSR59, - VSR60, - VSR61, - VSR62, - VSR63 -); - alloc_class chunk4 ( // special registers // These registers are not allocated, but used for nodes generated by postalloc expand. @@ -910,28 +1164,38 @@ reg_class dbl_reg( // ---------------------------- reg_class vs_reg( - // Attention: Only these ones are saved & restored at safepoint by RegisterSaver. - VSR32, - VSR33, - VSR34, - VSR35, - VSR36, - VSR37, - VSR38, - VSR39, - VSR40, - VSR41, - VSR42, - VSR43, - VSR44, - VSR45, - VSR46, - VSR47, - VSR48, - VSR49, - VSR50, - VSR51 - // VSR52-VSR63 // nv! + VSR32, VSR32_H, VSR32_J, VSR32_K, + VSR33, VSR33_H, VSR33_J, VSR33_K, + VSR34, VSR34_H, VSR34_J, VSR34_K, + VSR35, VSR35_H, VSR35_J, VSR35_K, + VSR36, VSR36_H, VSR36_J, VSR36_K, + VSR37, VSR37_H, VSR37_J, VSR37_K, + VSR38, VSR38_H, VSR38_J, VSR38_K, + VSR39, VSR39_H, VSR39_J, VSR39_K, + VSR40, VSR40_H, VSR40_J, VSR40_K, + VSR41, VSR41_H, VSR41_J, VSR41_K, + VSR42, VSR42_H, VSR42_J, VSR42_K, + VSR43, VSR43_H, VSR43_J, VSR43_K, + VSR44, VSR44_H, VSR44_J, VSR44_K, + VSR45, VSR45_H, VSR45_J, VSR45_K, + VSR46, VSR46_H, VSR46_J, VSR46_K, + VSR47, VSR47_H, VSR47_J, VSR47_K, + VSR48, VSR48_H, VSR48_J, VSR48_K, + VSR49, VSR49_H, VSR49_J, VSR49_K, + VSR50, VSR50_H, VSR50_J, VSR50_K, + VSR51, VSR51_H, VSR51_J, VSR51_K, + VSR52, VSR52_H, VSR52_J, VSR52_K, // non-volatile + VSR53, VSR53_H, VSR53_J, VSR53_K, // non-volatile + VSR54, VSR54_H, VSR54_J, VSR54_K, // non-volatile + VSR55, VSR55_H, VSR55_J, VSR55_K, // non-volatile + VSR56, VSR56_H, VSR56_J, VSR56_K, // non-volatile + VSR57, VSR57_H, VSR57_J, VSR57_K, // non-volatile + VSR58, VSR58_H, VSR58_J, VSR58_K, // non-volatile + VSR59, VSR59_H, VSR59_J, VSR59_K, // non-volatile + VSR60, VSR60_H, VSR60_J, VSR60_K, // non-volatile + VSR61, VSR61_H, VSR61_J, VSR61_K, // non-volatile + VSR62, VSR62_H, VSR62_J, VSR62_K, // non-volatile + VSR63, VSR63_H, VSR63_J, VSR63_K // non-volatile ); %} @@ -1656,17 +1920,19 @@ static enum RC rc_class(OptoReg::Name reg) { if (reg == OptoReg::Bad) return rc_bad; // We have 64 integer register halves, starting at index 0. - if (reg < 64) return rc_int; + STATIC_ASSERT((int)ConcreteRegisterImpl::max_gpr == (int)MachRegisterNumbers::F0_num); + if (reg < ConcreteRegisterImpl::max_gpr) return rc_int; // We have 64 floating-point register halves, starting at index 64. - if (reg < 64+64) return rc_float; + STATIC_ASSERT((int)ConcreteRegisterImpl::max_fpr == (int)MachRegisterNumbers::VSR0_num); + if (reg < ConcreteRegisterImpl::max_fpr) return rc_float; // We have 64 vector-scalar registers, starting at index 128. - if (reg < 64+64+64) return rc_vs; - - // Between float regs & stack are the flags regs. - assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags"); + STATIC_ASSERT((int)ConcreteRegisterImpl::max_vsr == (int)MachRegisterNumbers::CR0_num); + if (reg < ConcreteRegisterImpl::max_vsr) return rc_vs; + // Condition and special purpose registers are not allocated. We only accept stack from here. + assert(OptoReg::is_stack(reg), "what else is it?"); return rc_stack; } @@ -1743,21 +2009,53 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r else if (src_lo_rc == rc_vs && dst_lo_rc == rc_stack) { VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]); int dst_offset = ra_->reg2offset(dst_lo); - if (masm) { - __ addi(R0, R1_SP, dst_offset); - __ stxvd2x(Rsrc, R0); + if (PowerArchitecturePPC64 >= 9) { + if (is_aligned(dst_offset, 16)) { + if (masm) { + __ stxv(Rsrc, dst_offset, R1_SP); // matches storeV16_Power9 + } + size += 4; + } else { + // Other alignment can be used by Vector API (VectorPayload in rearrangeOp, + // observed with VectorRearrangeTest.java on Power9). + if (masm) { + __ addi(R0, R1_SP, dst_offset); + __ stxvx(Rsrc, R0); // matches storeV16_Power9 (regarding element ordering) + } + size += 8; + } + } else { + if (masm) { + __ addi(R0, R1_SP, dst_offset); + __ stxvd2x(Rsrc, R0); // matches storeV16_Power8 + } + size += 8; } - size += 8; } // Memory->VectorSRegister Spill. else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vs) { VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]); int src_offset = ra_->reg2offset(src_lo); - if (masm) { - __ addi(R0, R1_SP, src_offset); - __ lxvd2x(Rdst, R0); + if (PowerArchitecturePPC64 >= 9) { + if (is_aligned(src_offset, 16)) { + if (masm) { + __ lxv(Rdst, src_offset, R1_SP); + } + size += 4; + } else { + if (masm) { + __ addi(R0, R1_SP, src_offset); + __ lxvx(Rdst, R0); + } + size += 8; + } + } else { + if (masm) { + __ addi(R0, R1_SP, src_offset); + __ lxvd2x(Rdst, R0); + } + size += 8; } - size += 8; } // VectorSRegister->VectorSRegister. else if (src_lo_rc == rc_vs && dst_lo_rc == rc_vs) { @@ -2064,10 +2362,6 @@ bool Matcher::match_rule_supported(int opcode) { } switch (opcode) { - case Op_SqrtD: - return VM_Version::has_fsqrt(); - case Op_RoundDoubleMode: - return VM_Version::has_vsx(); case Op_CountLeadingZerosI: case Op_CountLeadingZerosL: return UseCountLeadingZerosInstructionsPPC64; @@ -2076,11 +2370,10 @@ bool Matcher::match_rule_supported(int opcode) { return (UseCountLeadingZerosInstructionsPPC64 || UseCountTrailingZerosInstructionsPPC64); case Op_PopCountI: case Op_PopCountL: - return (UsePopCountInstruction && VM_Version::has_popcntw()); + return UsePopCountInstruction; case Op_ConvF2HF: case Op_ConvHF2F: return VM_Version::supports_float16(); - case Op_AddVB: case Op_AddVS: case Op_AddVI: @@ -2106,6 +2399,18 @@ bool Matcher::match_rule_supported(int opcode) { case Op_SubVL: case Op_MulVI: case Op_RoundDoubleModeV: + case Op_MinV: + case Op_MaxV: + case Op_AndV: + case Op_OrV: + case Op_XorV: + case Op_AddReductionVI: + case Op_MulReductionVI: + case Op_AndReductionV: + case Op_OrReductionV: + case Op_XorReductionV: + case Op_MinReductionV: + case Op_MaxReductionV: return SuperwordUseVSX; case Op_PopCountVI: case Op_PopCountVL: @@ -2147,6 +2452,22 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { return false; } + // Special cases + switch (opcode) { + // Reductions only support INT at the moment. + case Op_AddReductionVI: + case Op_MulReductionVI: + case Op_AndReductionV: + case Op_OrReductionV: + case Op_XorReductionV: + case Op_MinReductionV: + case Op_MaxReductionV: + return bt == T_INT; + // MaxV, MinV need types == INT || LONG. + case Op_MaxV: + case Op_MinV: + return bt == T_INT || bt == T_LONG; + } return true; // Per default match rules are supported. } @@ -2265,52 +2586,11 @@ bool Matcher::is_generic_vector(MachOper* opnd) { return false; } -// Constants for c2c and c calling conventions. - -const MachRegisterNumbers iarg_reg[8] = { - R3_num, R4_num, R5_num, R6_num, - R7_num, R8_num, R9_num, R10_num -}; - -const MachRegisterNumbers farg_reg[13] = { - F1_num, F2_num, F3_num, F4_num, - F5_num, F6_num, F7_num, F8_num, - F9_num, F10_num, F11_num, F12_num, - F13_num -}; - -const MachRegisterNumbers vsarg_reg[64] = { - VSR0_num, VSR1_num, VSR2_num, VSR3_num, - VSR4_num, VSR5_num, VSR6_num, VSR7_num, - VSR8_num, VSR9_num, VSR10_num, VSR11_num, - VSR12_num, VSR13_num, VSR14_num, VSR15_num, - VSR16_num, VSR17_num, VSR18_num, VSR19_num, - VSR20_num, VSR21_num, VSR22_num, VSR23_num, - VSR24_num, VSR23_num, VSR24_num, VSR25_num, - VSR28_num, VSR29_num, VSR30_num, VSR31_num, - VSR32_num, VSR33_num, VSR34_num, VSR35_num, - VSR36_num, VSR37_num, VSR38_num, VSR39_num, - VSR40_num, VSR41_num, VSR42_num, VSR43_num, - VSR44_num, VSR45_num, VSR46_num, VSR47_num, - VSR48_num, VSR49_num, VSR50_num, VSR51_num, - VSR52_num, VSR53_num, VSR54_num, VSR55_num, - VSR56_num, VSR57_num, VSR58_num, VSR59_num, - VSR60_num, VSR61_num, VSR62_num, VSR63_num -}; - -const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]); - -const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]); - -const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]); - // Return whether or not this register is ever used as an argument. This // function is used on startup to build the trampoline stubs in generateOptoStub. // Registers not mentioned will be killed by the VM call in the trampoline, and // arguments in those registers not be available to the callee. bool Matcher::can_be_java_arg(int reg) { - // We return true for all registers contained in iarg_reg[] and - // farg_reg[] and their virtual halves. // We must include the virtual halves in order to get STDs and LDs // instead of STWs and LWs in the trampoline stubs. @@ -2928,85 +3208,42 @@ encode %{ %} enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{ + // use isel instruction with Power 7 + cmpP_reg_imm16Node *n_compare = new cmpP_reg_imm16Node(); + encodeP_subNode *n_sub_base = new encodeP_subNode(); + encodeP_shiftNode *n_shift = new encodeP_shiftNode(); + cond_set_0_oopNode *n_cond_set = new cond_set_0_oopNode(); - if (VM_Version::has_isel()) { - // use isel instruction with Power 7 - cmpP_reg_imm16Node *n_compare = new cmpP_reg_imm16Node(); - encodeP_subNode *n_sub_base = new encodeP_subNode(); - encodeP_shiftNode *n_shift = new encodeP_shiftNode(); - cond_set_0_oopNode *n_cond_set = new cond_set_0_oopNode(); - - n_compare->add_req(n_region, n_src); - n_compare->_opnds[0] = op_crx; - n_compare->_opnds[1] = op_src; - n_compare->_opnds[2] = new immL16Oper(0); - - n_sub_base->add_req(n_region, n_src); - n_sub_base->_opnds[0] = op_dst; - n_sub_base->_opnds[1] = op_src; - n_sub_base->_bottom_type = _bottom_type; - - n_shift->add_req(n_region, n_sub_base); - n_shift->_opnds[0] = op_dst; - n_shift->_opnds[1] = op_dst; - n_shift->_bottom_type = _bottom_type; - - n_cond_set->add_req(n_region, n_compare, n_shift); - n_cond_set->_opnds[0] = op_dst; - n_cond_set->_opnds[1] = op_crx; - n_cond_set->_opnds[2] = op_dst; - n_cond_set->_bottom_type = _bottom_type; - - ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); - ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - - nodes->push(n_compare); - nodes->push(n_sub_base); - nodes->push(n_shift); - nodes->push(n_cond_set); + n_compare->add_req(n_region, n_src); + n_compare->_opnds[0] = op_crx; + n_compare->_opnds[1] = op_src; + n_compare->_opnds[2] = new immL16Oper(0); - } else { - // before Power 7 - moveRegNode *n_move = new moveRegNode(); - cmpP_reg_imm16Node *n_compare = new cmpP_reg_imm16Node(); - encodeP_shiftNode *n_shift = new encodeP_shiftNode(); - cond_sub_baseNode *n_sub_base = new cond_sub_baseNode(); - - n_move->add_req(n_region, n_src); - n_move->_opnds[0] = op_dst; - n_move->_opnds[1] = op_src; - ra_->set_oop(n_move, true); // Until here, 'n_move' still produces an oop. - - n_compare->add_req(n_region, n_src); - n_compare->add_prec(n_move); - - n_compare->_opnds[0] = op_crx; - n_compare->_opnds[1] = op_src; - n_compare->_opnds[2] = new immL16Oper(0); - - n_sub_base->add_req(n_region, n_compare, n_src); - n_sub_base->_opnds[0] = op_dst; - n_sub_base->_opnds[1] = op_crx; - n_sub_base->_opnds[2] = op_src; - n_sub_base->_bottom_type = _bottom_type; - - n_shift->add_req(n_region, n_sub_base); - n_shift->_opnds[0] = op_dst; - n_shift->_opnds[1] = op_dst; - n_shift->_bottom_type = _bottom_type; - - ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); - ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - - nodes->push(n_move); - nodes->push(n_compare); - nodes->push(n_sub_base); - nodes->push(n_shift); - } + n_sub_base->add_req(n_region, n_src); + n_sub_base->_opnds[0] = op_dst; + n_sub_base->_opnds[1] = op_src; + n_sub_base->_bottom_type = _bottom_type; + + n_shift->add_req(n_region, n_sub_base); + n_shift->_opnds[0] = op_dst; + n_shift->_opnds[1] = op_dst; + n_shift->_bottom_type = _bottom_type; + + n_cond_set->add_req(n_region, n_compare, n_shift); + n_cond_set->_opnds[0] = op_dst; + n_cond_set->_opnds[1] = op_crx; + n_cond_set->_opnds[2] = op_dst; + n_cond_set->_bottom_type = _bottom_type; + + ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); + ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + nodes->push(n_compare); + nodes->push(n_sub_base); + nodes->push(n_shift); + nodes->push(n_cond_set); assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed. %} @@ -3046,56 +3283,33 @@ encode %{ n_shift->_opnds[1] = op_src; n_shift->_bottom_type = _bottom_type; - if (VM_Version::has_isel()) { - // use isel instruction with Power 7 - - decodeN_addNode *n_add_base = new decodeN_addNode(); - n_add_base->add_req(n_region, n_shift); - n_add_base->_opnds[0] = op_dst; - n_add_base->_opnds[1] = op_dst; - n_add_base->_bottom_type = _bottom_type; - - cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode(); - n_cond_set->add_req(n_region, n_compare, n_add_base); - n_cond_set->_opnds[0] = op_dst; - n_cond_set->_opnds[1] = op_crx; - n_cond_set->_opnds[2] = op_dst; - n_cond_set->_bottom_type = _bottom_type; - - assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); - ra_->set_oop(n_cond_set, true); - - ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); - ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - - nodes->push(n_compare); - nodes->push(n_shift); - nodes->push(n_add_base); - nodes->push(n_cond_set); + // use isel instruction with Power 7 + decodeN_addNode *n_add_base = new decodeN_addNode(); + n_add_base->add_req(n_region, n_shift); + n_add_base->_opnds[0] = op_dst; + n_add_base->_opnds[1] = op_dst; + n_add_base->_bottom_type = _bottom_type; - } else { - // before Power 7 - cond_add_baseNode *n_add_base = new cond_add_baseNode(); + cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode(); + n_cond_set->add_req(n_region, n_compare, n_add_base); + n_cond_set->_opnds[0] = op_dst; + n_cond_set->_opnds[1] = op_crx; + n_cond_set->_opnds[2] = op_dst; + n_cond_set->_bottom_type = _bottom_type; - n_add_base->add_req(n_region, n_compare, n_shift); - n_add_base->_opnds[0] = op_dst; - n_add_base->_opnds[1] = op_crx; - n_add_base->_opnds[2] = op_dst; - n_add_base->_bottom_type = _bottom_type; + assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); + ra_->set_oop(n_cond_set, true); - assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); - ra_->set_oop(n_add_base, true); + ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); + ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); - ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); - ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + nodes->push(n_compare); + nodes->push(n_shift); + nodes->push(n_add_base); + nodes->push(n_cond_set); - nodes->push(n_compare); - nodes->push(n_shift); - nodes->push(n_add_base); - } %} enc_class postalloc_expand_decode_oop_not_null(iRegPdst dst, iRegNsrc src) %{ @@ -3120,25 +3334,6 @@ encode %{ nodes->push(n2); %} - enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{ - int cc = $cmp$$cmpcode; - int flags_reg = $crx$$reg; - Label done; - assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); - // Branch if not (cmp crx). - __ bc(cc_to_inverse_boint(cc), cc_to_biint(cc, flags_reg), done); - __ mr($dst$$Register, $src$$Register); - __ bind(done); - %} - - enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{ - Label done; - assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); - // Branch if not (cmp crx). - __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done); - __ li($dst$$Register, $src$$constant); - __ bind(done); - %} // This enc_class is needed so that scheduler gets proper // input mapping for latency computation. @@ -4125,6 +4320,15 @@ operand immL16Alg4() %{ interface(CONST_INTER); %} +// Long Immediate: 16-bit, 16-aligned +operand immL16Alg16() %{ + predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0xf) == 0)); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // Long Immediate: 32-bit, where lowest 16 bits are 0x0000. operand immL32hi16() %{ predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L)); @@ -4643,6 +4847,20 @@ operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{ %} %} +// Indirect with 16-aligned Offset +operand indOffset16Alg16(iRegPsrc reg, immL16Alg16 offset) %{ + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(AddP reg offset); + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + //----------Complex Operands for Compressed OOPs------------------------------- // Compressed OOPs with narrow_oop_shift == 0. @@ -4852,6 +5070,7 @@ operand cmpOp() %{ opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass); // Memory operand where offsets are 4-aligned. Required for ld, std. opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass); +opclass memoryAlg16(indirect, indOffset16Alg16); opclass indirectMemory(indirect, indirectNarrow); // Special opclass for I and ConvL2I. @@ -5392,8 +5611,9 @@ instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{ %} // Load Aligned Packed Byte -instruct loadV16(vecX dst, indirect mem) %{ - predicate(n->as_LoadVector()->memory_size() == 16); +// Note: The Power8 instruction loads the contents in a special order in Little Endian mode. +instruct loadV16_Power8(vecX dst, indirect mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16 && PowerArchitecturePPC64 == 8); match(Set dst (LoadVector mem)); ins_cost(MEMORY_REF_COST); @@ -5405,6 +5625,19 @@ instruct loadV16(vecX dst, indirect mem) %{ ins_pipe(pipe_class_default); %} +instruct loadV16_Power9(vecX dst, memoryAlg16 mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16 && PowerArchitecturePPC64 >= 9); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LXV $dst, $mem \t// load 16-byte Vector" %} + size(4); + ins_encode %{ + __ lxv($dst$$VectorSRegister, $mem$$disp, $mem$$Register); + %} + ins_pipe(pipe_class_default); +%} + // Load Range, range = array length (=jint) instruct loadRange(iRegIdst dst, memory mem) %{ match(Set dst (LoadRange mem)); @@ -6418,8 +6651,9 @@ instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{ %} // Store Packed Byte long register to memory -instruct storeV16(indirect mem, vecX src) %{ - predicate(n->as_StoreVector()->memory_size() == 16); +// Note: The Power8 instruction stores the contents in a special order in Little Endian mode. +instruct storeV16_Power8(indirect mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16 && PowerArchitecturePPC64 == 8); match(Set mem (StoreVector mem src)); ins_cost(MEMORY_REF_COST); @@ -6431,6 +6665,19 @@ instruct storeV16(indirect mem, vecX src) %{ ins_pipe(pipe_class_default); %} +instruct storeV16_Power9(memoryAlg16 mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16 && PowerArchitecturePPC64 >= 9); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STXV $mem, $src \t// store 16-byte Vector" %} + size(4); + ins_encode %{ + __ stxv($src$$VectorSRegister, $mem$$disp, $mem$$Register); + %} + ins_pipe(pipe_class_default); +%} + // Reinterpret: only one vector size used: either L or X instruct reinterpretL(iRegLdst dst) %{ match(Set dst (VectorReinterpret dst)); @@ -6787,7 +7034,7 @@ instruct decodeN_Disjoint_isel_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{ effect(TEMP_DEF dst, TEMP crx); predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull && n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) && - CompressedOops::base_disjoint() && VM_Version::has_isel()); + CompressedOops::base_disjoint()); ins_cost(3 * DEFAULT_COST); format %{ "DecodeN $dst, $src \t// decode with disjoint base using isel" %} @@ -7167,7 +7414,6 @@ instruct membar_CPUOrder() %{ // Cmove using isel. instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{ match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); - predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} @@ -7182,37 +7428,9 @@ instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) ins_pipe(pipe_class_default); %} -instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{ - match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); - predicate(!VM_Version::has_isel()); - ins_cost(DEFAULT_COST+BRANCH_COST); - - ins_variable_size_depending_on_alignment(true); - - format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} - // Worst case is branch + move + stop, no stop without scheduler - size(8); - ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); - ins_pipe(pipe_class_default); -%} - -instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{ - match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); - ins_cost(DEFAULT_COST+BRANCH_COST); - - ins_variable_size_depending_on_alignment(true); - - format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} - // Worst case is branch + move + stop, no stop without scheduler - size(8); - ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); - ins_pipe(pipe_class_default); -%} - // Cmove using isel. instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{ match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); - predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} @@ -7227,37 +7445,9 @@ instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) ins_pipe(pipe_class_default); %} -instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{ - match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); - predicate(!VM_Version::has_isel()); - ins_cost(DEFAULT_COST+BRANCH_COST); - - ins_variable_size_depending_on_alignment(true); - - format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} - // Worst case is branch + move + stop, no stop without scheduler. - size(8); - ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); - ins_pipe(pipe_class_default); -%} - -instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{ - match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); - ins_cost(DEFAULT_COST+BRANCH_COST); - - ins_variable_size_depending_on_alignment(true); - - format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} - // Worst case is branch + move + stop, no stop without scheduler. - size(8); - ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); - ins_pipe(pipe_class_default); -%} - // Cmove using isel. instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{ match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); - predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} @@ -7272,38 +7462,9 @@ instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) ins_pipe(pipe_class_default); %} -// Conditional move for RegN. Only cmov(reg, reg). -instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{ - match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); - predicate(!VM_Version::has_isel()); - ins_cost(DEFAULT_COST+BRANCH_COST); - - ins_variable_size_depending_on_alignment(true); - - format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} - // Worst case is branch + move + stop, no stop without scheduler. - size(8); - ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); - ins_pipe(pipe_class_default); -%} - -instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{ - match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); - ins_cost(DEFAULT_COST+BRANCH_COST); - - ins_variable_size_depending_on_alignment(true); - - format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} - // Worst case is branch + move + stop, no stop without scheduler. - size(8); - ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); - ins_pipe(pipe_class_default); -%} - // Cmove using isel. instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{ match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); - predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} @@ -7318,33 +7479,6 @@ instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) ins_pipe(pipe_class_default); %} -instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{ - match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); - predicate(!VM_Version::has_isel()); - ins_cost(DEFAULT_COST+BRANCH_COST); - - ins_variable_size_depending_on_alignment(true); - - format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} - // Worst case is branch + move + stop, no stop without scheduler. - size(8); - ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); - ins_pipe(pipe_class_default); -%} - -instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{ - match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); - ins_cost(DEFAULT_COST+BRANCH_COST); - - ins_variable_size_depending_on_alignment(true); - - format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} - // Worst case is branch + move + stop, no stop without scheduler. - size(8); - ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); - ins_pipe(pipe_class_default); -%} - instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{ match(Set dst (CMoveF (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7395,31 +7529,11 @@ instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{ instruct compareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2))); - predicate(VM_Version::has_lqarx()); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, - MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), - $res$$Register, nullptr, true); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - __ sync(); - } - %} - ins_pipe(pipe_class_default); -%} - -instruct compareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ - match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2))); - predicate(!VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump - format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %} - ins_encode %{ - // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, + __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true); if (support_IRIW_for_not_multiple_copy_atomic_cpu) { @@ -7433,31 +7547,11 @@ instruct compareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIs instruct compareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2))); - predicate(VM_Version::has_lqarx()); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, - MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), - $res$$Register, nullptr, true); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - __ sync(); - } - %} - ins_pipe(pipe_class_default); -%} - -instruct compareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ - match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2))); - predicate(!VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump - format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %} - ins_encode %{ - // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, + __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true); if (support_IRIW_for_not_multiple_copy_atomic_cpu) { @@ -7547,26 +7641,12 @@ instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc instruct weakCompareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx()); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, - MacroAssembler::MemBarNone, - MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true); - %} - ins_pipe(pipe_class_default); -%} - -instruct weakCompareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ - match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump - format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %} - ins_encode %{ - // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, + __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true); %} @@ -7575,55 +7655,27 @@ instruct weakCompareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iR instruct weakCompareAndSwapB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2))); - predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx()); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) ); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true); %} ins_pipe(pipe_class_default); %} -instruct weakCompareAndSwapB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ - match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2))); - predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump - format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %} +instruct weakCompareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump + format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgb(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, - support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, - MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true); - %} - ins_pipe(pipe_class_default); -%} - -instruct weakCompareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ - match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx()); - effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump - format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %} - ins_encode %{ - // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, - MacroAssembler::MemBarNone, - MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true); - %} - ins_pipe(pipe_class_default); -%} - -instruct weakCompareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ - match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump - format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %} - ins_encode %{ - // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, - MacroAssembler::MemBarNone, + __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true); %} ins_pipe(pipe_class_default); @@ -7631,26 +7683,12 @@ instruct weakCompareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iR instruct weakCompareAndSwapS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2))); - predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx()); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, - support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, - MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true); - %} - ins_pipe(pipe_class_default); -%} - -instruct weakCompareAndSwapS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ - match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2))); - predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump - format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %} - ins_encode %{ - // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, + __ cmpxchgh(CR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, nullptr, true, /*weak*/ true); %} @@ -7782,26 +7820,12 @@ instruct weakCompareAndSwapP_acq_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, instruct compareAndExchangeB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx()); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); effect(TEMP_DEF res, TEMP cr0); format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %} ins_encode %{ // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgb(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, - MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), - noreg, nullptr, true); - %} - ins_pipe(pipe_class_default); -%} - -instruct compareAndExchangeB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{ - match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0); - format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %} - ins_encode %{ - // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgb(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0, + __ cmpxchgb(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), noreg, nullptr, true); %} @@ -7810,12 +7834,12 @@ instruct compareAndExchangeB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iR instruct compareAndExchangeB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2))); - predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx()); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); effect(TEMP_DEF res, TEMP cr0); format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %} ins_encode %{ // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgb(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + __ cmpxchgb(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), noreg, nullptr, true); if (support_IRIW_for_not_multiple_copy_atomic_cpu) { @@ -7828,48 +7852,15 @@ instruct compareAndExchangeB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, ins_pipe(pipe_class_default); %} -instruct compareAndExchangeB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{ - match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2))); - predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0); - format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %} - ins_encode %{ - // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgb(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0, - MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), - noreg, nullptr, true); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. - __ sync(); - } - %} - ins_pipe(pipe_class_default); -%} instruct compareAndExchangeS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx()); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); effect(TEMP_DEF res, TEMP cr0); format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %} ins_encode %{ // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgh(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, - MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), - noreg, nullptr, true); - %} - ins_pipe(pipe_class_default); -%} - -instruct compareAndExchangeS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{ - match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0); - format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %} - ins_encode %{ - // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgh(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0, + __ cmpxchgh(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), noreg, nullptr, true); %} @@ -7878,32 +7869,12 @@ instruct compareAndExchangeS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iR instruct compareAndExchangeS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2))); - predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx()); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); effect(TEMP_DEF res, TEMP cr0); format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %} ins_encode %{ // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgh(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, - MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), - noreg, nullptr, true); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. - __ sync(); - } - %} - ins_pipe(pipe_class_default); -%} - -instruct compareAndExchangeS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{ - match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2))); - predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0); - format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %} - ins_encode %{ - // CmpxchgX sets CR0 to cmpX(src1, src2) and Rres to 'true'/'false'. - __ cmpxchgh(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0, + __ cmpxchgh(CR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), noreg, nullptr, true); if (support_IRIW_for_not_multiple_copy_atomic_cpu) { @@ -8058,7 +8029,6 @@ instruct compareAndExchangeP_acq_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, instruct getAndAddB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndAddB mem_ptr src)); - predicate(VM_Version::has_lqarx()); effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndAddB $res, $mem_ptr, $src" %} ins_encode %{ @@ -8073,26 +8043,8 @@ instruct getAndAddB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr ins_pipe(pipe_class_default); %} -instruct getAndAddB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{ - match(Set res (GetAndAddB mem_ptr src)); - predicate(!VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); - format %{ "GetAndAddB $res, $mem_ptr, $src" %} - ins_encode %{ - __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register, - R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - __ sync(); - } - %} - ins_pipe(pipe_class_default); -%} - instruct getAndAddS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndAddS mem_ptr src)); - predicate(VM_Version::has_lqarx()); effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndAddS $res, $mem_ptr, $src" %} ins_encode %{ @@ -8107,22 +8059,6 @@ instruct getAndAddS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr ins_pipe(pipe_class_default); %} -instruct getAndAddS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{ - match(Set res (GetAndAddS mem_ptr src)); - predicate(!VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); - format %{ "GetAndAddS $res, $mem_ptr, $src" %} - ins_encode %{ - __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register, - R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - __ sync(); - } - %} - ins_pipe(pipe_class_default); -%} instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndAddI mem_ptr src)); @@ -8158,7 +8094,6 @@ instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr instruct getAndSetB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetB mem_ptr src)); - predicate(VM_Version::has_lqarx()); effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndSetB $res, $mem_ptr, $src" %} ins_encode %{ @@ -8173,26 +8108,8 @@ instruct getAndSetB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr ins_pipe(pipe_class_default); %} -instruct getAndSetB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{ - match(Set res (GetAndSetB mem_ptr src)); - predicate(!VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); - format %{ "GetAndSetB $res, $mem_ptr, $src" %} - ins_encode %{ - __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register, - R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - __ sync(); - } - %} - ins_pipe(pipe_class_default); -%} - instruct getAndSetS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetS mem_ptr src)); - predicate(VM_Version::has_lqarx()); effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndSetS $res, $mem_ptr, $src" %} ins_encode %{ @@ -8207,22 +8124,6 @@ instruct getAndSetS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr ins_pipe(pipe_class_default); %} -instruct getAndSetS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{ - match(Set res (GetAndSetS mem_ptr src)); - predicate(!VM_Version::has_lqarx()); - effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); - format %{ "GetAndSetS $res, $mem_ptr, $src" %} - ins_encode %{ - __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register, - R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - __ sync(); - } - %} - ins_pipe(pipe_class_default); -%} instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetI mem_ptr src)); @@ -9511,7 +9412,6 @@ instruct negD_absD_reg(regD dst, regD src) %{ ins_pipe(pipe_class_default); %} -// VM_Version::has_fsqrt() decides if this node will be used. // Sqrt float double precision instruct sqrtD_reg(regD dst, regD src) %{ match(Set dst (SqrtD src)); @@ -9526,7 +9426,6 @@ instruct sqrtD_reg(regD dst, regD src) %{ // Single-precision sqrt. instruct sqrtF_reg(regF dst, regF src) %{ match(Set dst (SqrtF src)); - predicate(VM_Version::has_fsqrts()); ins_cost(DEFAULT_COST); format %{ "FSQRTS $dst, $src" %} @@ -10028,7 +9927,6 @@ instruct andcL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ instruct moveL2D_reg(regD dst, iRegLsrc src) %{ match(Set dst (MoveL2D src)); - predicate(VM_Version::has_mtfprd()); format %{ "MTFPRD $dst, $src" %} size(4); @@ -10148,18 +10046,6 @@ instruct moveI2F_reg_stack(stackSlotF dst, iRegIsrc src) %{ ins_pipe(pipe_class_memory); %} -//----------Moves between long and float - -instruct moveF2L_reg_stack(stackSlotL dst, regF src) %{ - // no match-rule, false predicate - effect(DEF dst, USE src); - predicate(false); - - format %{ "storeD $src, $dst \t// STACK" %} - size(4); - ins_encode( enc_stfd(src, dst) ); - ins_pipe(pipe_class_default); -%} //----------Moves between long and double @@ -10185,27 +10071,6 @@ instruct moveD2L_reg_stack(stackSlotL dst, regD src) %{ ins_pipe(pipe_class_memory); %} -// Move long value from long stack-location to double register. -instruct moveL2D_stack_reg(regD dst, stackSlotL src) %{ - match(Set dst (MoveL2D src)); - ins_cost(MEMORY_REF_COST); - - format %{ "LFD $dst, $src \t// MoveL2D" %} - size(4); - ins_encode( enc_lfd(dst, src) ); - ins_pipe(pipe_class_memory); -%} - -// Move long value from long register to double stack-location. -instruct moveL2D_reg_stack(stackSlotD dst, iRegLsrc src) %{ - match(Set dst (MoveL2D src)); - ins_cost(MEMORY_REF_COST); - - format %{ "STD $src, $dst \t// MoveL2D" %} - size(4); - ins_encode( enc_std(src, dst) ); - ins_pipe(pipe_class_memory); -%} //----------Register Move Instructions----------------------------------------- @@ -10605,59 +10470,6 @@ instruct cmovI_bso_reg(iRegIdst dst, flagsRegSrc crx, regD src) %{ ins_pipe(pipe_class_default); %} -instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{ - // no match-rule, false predicate - effect(DEF dst, USE crx, USE mem); - predicate(false); - - format %{ "CmovI $dst, $crx, $mem \t// postalloc expanded" %} - postalloc_expand %{ - // - // replaces - // - // region dst crx mem - // \ | | / - // dst=cmovI_bso_stackSlotL_conLvalue0 - // - // with - // - // region dst - // \ / - // dst=loadConI16(0) - // | - // ^ region dst crx mem - // | \ | | / - // dst=cmovI_bso_stackSlotL - // - - // Create new nodes. - MachNode *m1 = new loadConI16Node(); - MachNode *m2 = new cmovI_bso_stackSlotLNode(); - - // inputs for new nodes - m1->add_req(n_region); - m2->add_req(n_region, n_crx, n_mem); - - // precedences for new nodes - m2->add_prec(m1); - - // operands for new nodes - m1->_opnds[0] = op_dst; - m1->_opnds[1] = new immI16Oper(0); - - m2->_opnds[0] = op_dst; - m2->_opnds[1] = op_crx; - m2->_opnds[2] = op_mem; - - // registers for new nodes - ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst - ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst - - // Insert new nodes. - nodes->push(m1); - nodes->push(m2); - %} -%} instruct cmovI_bso_reg_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, regD src) %{ // no match-rule, false predicate @@ -10713,27 +10525,10 @@ instruct cmovI_bso_reg_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, regD src) %{ %} %} -// Double to Int conversion, NaN is mapped to 0. -instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{ - match(Set dst (ConvD2I src)); - predicate(!VM_Version::has_mtfprd()); - ins_cost(DEFAULT_COST); - - expand %{ - regD tmpD; - stackSlotL tmpS; - flagsReg crx; - cmpDUnordered_reg_reg(crx, src, src); // Check whether src is NaN. - convD2IRaw_regD(tmpD, src); // Convert float to int (speculated). - moveD2L_reg_stack(tmpS, tmpD); // Store float to stack (speculated). - cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. - %} -%} // Double to Int conversion, NaN is mapped to 0. Special version for Power8. instruct convD2I_reg_mffprd_ExEx(iRegIdst dst, regD src) %{ match(Set dst (ConvD2I src)); - predicate(VM_Version::has_mtfprd()); ins_cost(DEFAULT_COST); expand %{ @@ -10758,27 +10553,10 @@ instruct convF2IRaw_regF(regF dst, regF src) %{ ins_pipe(pipe_class_default); %} -// Float to Int conversion, NaN is mapped to 0. -instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{ - match(Set dst (ConvF2I src)); - predicate(!VM_Version::has_mtfprd()); - ins_cost(DEFAULT_COST); - - expand %{ - regF tmpF; - stackSlotL tmpS; - flagsReg crx; - cmpFUnordered_reg_reg(crx, src, src); // Check whether src is NaN. - convF2IRaw_regF(tmpF, src); // Convert float to int (speculated). - moveF2L_reg_stack(tmpS, tmpF); // Store float to stack (speculated). - cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. - %} -%} // Float to Int conversion, NaN is mapped to 0. Special version for Power8. instruct convF2I_regF_mffprd_ExEx(iRegIdst dst, regF src) %{ match(Set dst (ConvF2I src)); - predicate(VM_Version::has_mtfprd()); ins_cost(DEFAULT_COST); expand %{ @@ -10869,56 +10647,6 @@ instruct cmovL_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{ ins_pipe(pipe_class_default); %} -instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{ - // no match-rule, false predicate - effect(DEF dst, USE crx, USE mem); - predicate(false); - - format %{ "CmovL $dst, $crx, $mem \t// postalloc expanded" %} - postalloc_expand %{ - // - // replaces - // - // region dst crx mem - // \ | | / - // dst=cmovL_bso_stackSlotL_conLvalue0 - // - // with - // - // region dst - // \ / - // dst=loadConL16(0) - // | - // ^ region dst crx mem - // | \ | | / - // dst=cmovL_bso_stackSlotL - // - - // Create new nodes. - MachNode *m1 = new loadConL16Node(); - MachNode *m2 = new cmovL_bso_stackSlotLNode(); - - // inputs for new nodes - m1->add_req(n_region); - m2->add_req(n_region, n_crx, n_mem); - m2->add_prec(m1); - - // operands for new nodes - m1->_opnds[0] = op_dst; - m1->_opnds[1] = new immL16Oper(0); - m2->_opnds[0] = op_dst; - m2->_opnds[1] = op_crx; - m2->_opnds[2] = op_mem; - - // registers for new nodes - ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst - ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst - - // Insert new nodes. - nodes->push(m1); - nodes->push(m2); - %} -%} instruct cmovL_bso_reg_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, regD src) %{ // no match-rule, false predicate @@ -10971,27 +10699,10 @@ instruct cmovL_bso_reg_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, regD src) %{ %} %} -// Float to Long conversion, NaN is mapped to 0. -instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{ - match(Set dst (ConvF2L src)); - predicate(!VM_Version::has_mtfprd()); - ins_cost(DEFAULT_COST); - - expand %{ - regF tmpF; - stackSlotL tmpS; - flagsReg crx; - cmpFUnordered_reg_reg(crx, src, src); // Check whether src is NaN. - convF2LRaw_regF(tmpF, src); // Convert float to long (speculated). - moveF2L_reg_stack(tmpS, tmpF); // Store float to stack (speculated). - cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. - %} -%} // Float to Long conversion, NaN is mapped to 0. Special version for Power8. instruct convF2L_reg_mffprd_ExEx(iRegLdst dst, regF src) %{ match(Set dst (ConvF2L src)); - predicate(VM_Version::has_mtfprd()); ins_cost(DEFAULT_COST); expand %{ @@ -11016,27 +10727,10 @@ instruct convD2LRaw_regD(regD dst, regD src) %{ ins_pipe(pipe_class_default); %} -// Double to Long conversion, NaN is mapped to 0. -instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{ - match(Set dst (ConvD2L src)); - predicate(!VM_Version::has_mtfprd()); - ins_cost(DEFAULT_COST); - - expand %{ - regD tmpD; - stackSlotL tmpS; - flagsReg crx; - cmpDUnordered_reg_reg(crx, src, src); // Check whether src is NaN. - convD2LRaw_regD(tmpD, src); // Convert float to long (speculated). - moveD2L_reg_stack(tmpS, tmpD); // Store float to stack (speculated). - cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. - %} -%} // Double to Long conversion, NaN is mapped to 0. Special version for Power8. instruct convD2L_reg_mffprd_ExEx(iRegLdst dst, regD src) %{ match(Set dst (ConvD2L src)); - predicate(VM_Version::has_mtfprd()); ins_cost(DEFAULT_COST); expand %{ @@ -11075,25 +10769,6 @@ instruct convD2F_reg(regF dst, regD src) %{ ins_pipe(pipe_class_default); %} -// Integer to Float conversion. -instruct convI2F_ireg_Ex(regF dst, iRegIsrc src) %{ - match(Set dst (ConvI2F src)); - predicate(!VM_Version::has_fcfids()); - ins_cost(DEFAULT_COST); - - expand %{ - iRegLdst tmpL; - stackSlotL tmpS; - regD tmpD; - regD tmpD2; - convI2L_reg(tmpL, src); // Sign-extension int to long. - regL_to_stkL(tmpS, tmpL); // Store long to stack. - moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. - convL2DRaw_regD(tmpD2, tmpD); // Convert to double. - convD2F_reg(dst, tmpD2); // Convert double to float. - %} -%} - instruct convL2FRaw_regF(regF dst, regD src) %{ // no match-rule, false predicate effect(DEF dst, USE src); @@ -11107,27 +10782,10 @@ instruct convL2FRaw_regF(regF dst, regD src) %{ ins_pipe(pipe_class_default); %} -// Integer to Float conversion. Special version for Power7. -instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{ - match(Set dst (ConvI2F src)); - predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd()); - ins_cost(DEFAULT_COST); - - expand %{ - iRegLdst tmpL; - stackSlotL tmpS; - regD tmpD; - convI2L_reg(tmpL, src); // Sign-extension int to long. - regL_to_stkL(tmpS, tmpL); // Store long to stack. - moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. - convL2FRaw_regF(dst, tmpD); // Convert to float. - %} -%} // Integer to Float conversion. Special version for Power8. instruct convI2F_ireg_mtfprd_Ex(regF dst, iRegIsrc src) %{ match(Set dst (ConvI2F src)); - predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd()); ins_cost(DEFAULT_COST); expand %{ @@ -11137,25 +10795,10 @@ instruct convI2F_ireg_mtfprd_Ex(regF dst, iRegIsrc src) %{ %} %} -// L2F to avoid runtime call. -instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{ - match(Set dst (ConvL2F src)); - predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd()); - ins_cost(DEFAULT_COST); - - expand %{ - stackSlotL tmpS; - regD tmpD; - regL_to_stkL(tmpS, src); // Store long to stack. - moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. - convL2FRaw_regF(dst, tmpD); // Convert to float. - %} -%} // L2F to avoid runtime call. Special version for Power8. instruct convL2F_ireg_mtfprd_Ex(regF dst, iRegLsrc src) %{ match(Set dst (ConvL2F src)); - predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd()); ins_cost(DEFAULT_COST); expand %{ @@ -11170,27 +10813,10 @@ instruct convL2F_ireg_mtfprd_Ex(regF dst, iRegLsrc src) %{ // Convert to Double -// Integer to Double conversion. -instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{ - match(Set dst (ConvI2D src)); - predicate(!VM_Version::has_mtfprd()); - ins_cost(DEFAULT_COST); - - expand %{ - iRegLdst tmpL; - stackSlotL tmpS; - regD tmpD; - convI2L_reg(tmpL, src); // Sign-extension int to long. - regL_to_stkL(tmpS, tmpL); // Store long to stack. - moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. - convL2DRaw_regD(dst, tmpD); // Convert to double. - %} -%} // Integer to Double conversion. Special version for Power8. instruct convI2D_reg_mtfprd_Ex(regD dst, iRegIsrc src) %{ match(Set dst (ConvI2D src)); - predicate(VM_Version::has_mtfprd()); ins_cost(DEFAULT_COST); expand %{ @@ -11200,22 +10826,10 @@ instruct convI2D_reg_mtfprd_Ex(regD dst, iRegIsrc src) %{ %} %} -// Long to Double conversion -instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{ - match(Set dst (ConvL2D src)); - ins_cost(DEFAULT_COST + MEMORY_REF_COST); - - expand %{ - regD tmpD; - moveL2D_stack_reg(tmpD, src); - convL2DRaw_regD(dst, tmpD); - %} -%} // Long to Double conversion. Special version for Power8. instruct convL2D_reg_mtfprd_Ex(regD dst, iRegLsrc src) %{ match(Set dst (ConvL2D src)); - predicate(VM_Version::has_mtfprd()); ins_cost(DEFAULT_COST); expand %{ @@ -12810,30 +12424,10 @@ instruct encode_ascii_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst //---------- Min/Max Instructions --------------------------------------------- -instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ - match(Set dst (MinI src1 src2)); - ins_cost(DEFAULT_COST*6); - - expand %{ - iRegLdst src1s; - iRegLdst src2s; - iRegLdst diff; - iRegLdst sm; - iRegLdst doz; // difference or zero - convI2L_reg(src1s, src1); // Ensure proper sign extension. - convI2L_reg(src2s, src2); // Ensure proper sign extension. - subL_reg_reg(diff, src2s, src1s); - // Need to consider >=33 bit result, therefore we need signmaskL. - signmask64L_regL(sm, diff); - andL_reg_reg(doz, diff, sm); // <=0 - addI_regL_regL(dst, doz, src1s); - %} -%} instruct minI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set dst (MinI src1 src2)); effect(KILL cr0); - predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST*2); ins_encode %{ @@ -12843,30 +12437,10 @@ instruct minI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegC ins_pipe(pipe_class_default); %} -instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ - match(Set dst (MaxI src1 src2)); - ins_cost(DEFAULT_COST*6); - - expand %{ - iRegLdst src1s; - iRegLdst src2s; - iRegLdst diff; - iRegLdst sm; - iRegLdst doz; // difference or zero - convI2L_reg(src1s, src1); // Ensure proper sign extension. - convI2L_reg(src2s, src2); // Ensure proper sign extension. - subL_reg_reg(diff, src2s, src1s); - // Need to consider >=33 bit result, therefore we need signmaskL. - signmask64L_regL(sm, diff); - andcL_reg_reg(doz, diff, sm); // >=0 - addI_regL_regL(dst, doz, src1s); - %} -%} instruct maxI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set dst (MaxI src1 src2)); effect(KILL cr0); - predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST*2); ins_encode %{ @@ -12881,7 +12455,7 @@ instruct maxI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegC // Popcnt for Power7. instruct popCountI(iRegIdst dst, iRegIsrc src) %{ match(Set dst (PopCountI src)); - predicate(UsePopCountInstruction && VM_Version::has_popcntw()); + predicate(UsePopCountInstruction); ins_cost(DEFAULT_COST); format %{ "POPCNTW $dst, $src" %} @@ -12894,7 +12468,7 @@ instruct popCountI(iRegIdst dst, iRegIsrc src) %{ // Popcnt for Power7. instruct popCountL(iRegIdst dst, iRegLsrc src) %{ - predicate(UsePopCountInstruction && VM_Version::has_popcntw()); + predicate(UsePopCountInstruction); match(Set dst (PopCountL src)); ins_cost(DEFAULT_COST); @@ -13253,7 +12827,7 @@ instruct loadI_reversed_acquire(iRegIdst dst, indirect mem) %{ // Load Long - aligned and reversed instruct loadL_reversed(iRegLdst dst, indirect mem) %{ match(Set dst (ReverseBytesL (LoadL mem))); - predicate(VM_Version::has_ldbrx() && (n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)))); + predicate((n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)))); ins_cost(MEMORY_REF_COST); size(4); @@ -13265,7 +12839,6 @@ instruct loadL_reversed(iRegLdst dst, indirect mem) %{ instruct loadL_reversed_acquire(iRegLdst dst, indirect mem) %{ match(Set dst (ReverseBytesL (LoadL mem))); - predicate(VM_Version::has_ldbrx()); ins_cost(2 * MEMORY_REF_COST); size(12); @@ -13346,7 +12919,6 @@ instruct storeI_reversed(iRegIsrc src, indirect mem) %{ // Store Long reversed byte order instruct storeL_reversed(iRegLsrc src, indirect mem) %{ match(Set mem (StoreL mem (ReverseBytesL src))); - predicate(VM_Version::has_stdbrx()); ins_cost(MEMORY_REF_COST); size(4); @@ -13941,6 +13513,113 @@ instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ ins_pipe(pipe_class_default); %} +// Vector Min / Max Instructions + +instruct vmin_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (MinV src1 src2)); + format %{ "VMIN $dst,$src1,$src2\t// vector min" %} + size(4); + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + switch (bt) { + case T_INT: + __ vminsw($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr()); + break; + case T_LONG: + __ vminsd($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr()); + break; + default: + ShouldNotReachHere(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vmax_reg(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (MaxV src1 src2)); + format %{ "VMAX $dst,$src1,$src2\t// vector max" %} + size(4); + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + switch (bt) { + case T_INT: + __ vmaxsw($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr()); + break; + case T_LONG: + __ vmaxsd($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr()); + break; + default: + ShouldNotReachHere(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vand(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (AndV src1 src2)); + size(4); + format %{ "VAND $dst,$src1,$src2\t// and vectors" %} + ins_encode %{ + __ vand($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr()); + %} + ins_pipe(pipe_class_default); +%} + +instruct vor(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (OrV src1 src2)); + size(4); + format %{ "VOR $dst,$src1,$src2\t// or vectors" %} + ins_encode %{ + __ vor($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr()); + %} + ins_pipe(pipe_class_default); +%} + +instruct vxor(vecX dst, vecX src1, vecX src2) %{ + match(Set dst (XorV src1 src2)); + size(4); + format %{ "VXOR $dst,$src1,$src2\t// xor vectors" %} + ins_encode %{ + __ vxor($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr()); + %} + ins_pipe(pipe_class_default); +%} + +instruct reductionI_arith_logic(iRegIdst dst, iRegIsrc srcInt, vecX srcVec, vecX tmp1, vecX tmp2) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); + match(Set dst (AddReductionVI srcInt srcVec)); + match(Set dst (MulReductionVI srcInt srcVec)); + match(Set dst (AndReductionV srcInt srcVec)); + match(Set dst ( OrReductionV srcInt srcVec)); + match(Set dst (XorReductionV srcInt srcVec)); + effect(TEMP tmp1, TEMP tmp2); + ins_cost(DEFAULT_COST * 6); + format %{ "REDUCEI_ARITH_LOGIC // $dst,$srcInt,$srcVec,$tmp1,$tmp2\t// reduce vector int add/mul/and/or/xor" %} + size(24); + ins_encode %{ + int opcode = this->ideal_Opcode(); + __ reduceI(opcode, $dst$$Register, $srcInt$$Register, $srcVec$$VectorSRegister->to_vr(), + $tmp1$$VectorSRegister->to_vr(), $tmp2$$VectorSRegister->to_vr()); + %} + ins_pipe(pipe_class_default); +%} + +instruct reductionI_min_max(iRegIdst dst, iRegIsrc srcInt, vecX srcVec, vecX tmp1, vecX tmp2, flagsRegCR0 cr0) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); + match(Set dst (MinReductionV srcInt srcVec)); + match(Set dst (MaxReductionV srcInt srcVec)); + effect(TEMP tmp1, TEMP tmp2, KILL cr0); + ins_cost(DEFAULT_COST * 7); + format %{ "REDUCEI_MINMAX // $dst,$srcInt,$srcVec,$tmp1,$tmp2,cr0\t// reduce vector int min/max" %} + size(28); + ins_encode %{ + int opcode = this->ideal_Opcode(); + __ reduceI(opcode, $dst$$Register, $srcInt$$Register, $srcVec$$VectorSRegister->to_vr(), + $tmp1$$VectorSRegister->to_vr(), $tmp2$$VectorSRegister->to_vr()); + %} + ins_pipe(pipe_class_default); +%} + // Vector Absolute Instructions instruct vabs4F_reg(vecX dst, vecX src) %{ diff --git a/src/hotspot/cpu/ppc/register_ppc.hpp b/src/hotspot/cpu/ppc/register_ppc.hpp index 565542ad7c0..b7949750dcc 100644 --- a/src/hotspot/cpu/ppc/register_ppc.hpp +++ b/src/hotspot/cpu/ppc/register_ppc.hpp @@ -99,8 +99,8 @@ class Register { // testers constexpr bool is_valid() const { return ( 0 <= _encoding && _encoding < number_of_registers); } - constexpr bool is_volatile() const { return ( 0 <= _encoding && _encoding <= 13 ); } - constexpr bool is_nonvolatile() const { return (14 <= _encoding && _encoding <= 31 ); } + constexpr bool is_volatile() const { return ( 0 <= _encoding && _encoding <= 13); } + constexpr bool is_nonvolatile() const { return (14 <= _encoding && _encoding <= 31); } const char* name() const; }; @@ -169,7 +169,7 @@ class ConditionRegister { // testers constexpr bool is_valid() const { return (0 <= _encoding && _encoding < number_of_registers); } - constexpr bool is_nonvolatile() const { return (2 <= _encoding && _encoding <= 4 ); } + constexpr bool is_nonvolatile() const { return (2 <= _encoding && _encoding <= 4); } const char* name() const; }; @@ -214,6 +214,7 @@ class FloatRegister { // testers constexpr bool is_valid() const { return (0 <= _encoding && _encoding < number_of_registers); } + constexpr bool is_nonvolatile() const { return (14 <= _encoding && _encoding <= 31); } const char* name() const; @@ -323,6 +324,7 @@ class VectorRegister { // testers constexpr bool is_valid() const { return (0 <= _encoding && _encoding < number_of_registers); } + constexpr bool is_nonvolatile() const { return (20 <= _encoding && _encoding <= 31); } const char* name() const; @@ -372,6 +374,7 @@ constexpr VectorRegister VR31 = as_VectorRegister(31); // The implementation of Vector-Scalar (VSX) registers on POWER architecture. +// VSR0-31 are aliases for F0-31 and VSR32-63 are aliases for VR0-31. class VectorSRegister { int _encoding; public: @@ -390,6 +393,7 @@ class VectorSRegister { // accessors constexpr int encoding() const { assert(is_valid(), "invalid register"); return _encoding; } inline VMReg as_VMReg() const; + VectorSRegister successor() const { return VectorSRegister(encoding() + 1); } // testers constexpr bool is_valid() const { return (0 <= _encoding && _encoding < number_of_registers); } @@ -480,7 +484,7 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { enum { max_gpr = Register::number_of_registers * 2, max_fpr = max_gpr + FloatRegister::number_of_registers * 2, - max_vsr = max_fpr + VectorSRegister::number_of_registers, + max_vsr = max_fpr + VectorSRegister::number_of_registers * 4, max_cnd = max_vsr + ConditionRegister::number_of_registers, max_spr = max_cnd + SpecialRegister::number_of_registers, // This number must be large enough to cover REG_COUNT (defined by c2) registers. @@ -519,7 +523,7 @@ constexpr FloatRegister F11_ARG11 = F11; // volatile constexpr FloatRegister F12_ARG12 = F12; // volatile constexpr FloatRegister F13_ARG13 = F13; // volatile -// Register declarations to be used in frame manager assembly code. +// Register declarations to be used in template interpreter assembly code. // Use only non-volatile registers in order to keep values across C-calls. constexpr Register R14_bcp = R14; constexpr Register R15_esp = R15; // slot below top of expression stack for ld/st with update @@ -529,7 +533,7 @@ constexpr Register R17_tos = R17; // The interpreter's top of (expres constexpr Register R18_locals = R18; // address of first param slot (receiver). constexpr Register R19_method = R19; // address of current method -// Temporary registers to be used within frame manager. We can use +// Temporary registers to be used within template interpreter. We can use // the non-volatiles because the call stub has saved them. // Use only non-volatile registers in order to keep values across C-calls. constexpr Register R21_tmp1 = R21; diff --git a/src/hotspot/cpu/ppc/runtime_ppc.cpp b/src/hotspot/cpu/ppc/runtime_ppc.cpp index 94e8c08ebf5..6d9a1dfcb1e 100644 --- a/src/hotspot/cpu/ppc/runtime_ppc.cpp +++ b/src/hotspot/cpu/ppc/runtime_ppc.cpp @@ -73,6 +73,9 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() { // Setup code generation tools. const char* name = OptoRuntime::stub_name(OptoStubId::exception_id); CodeBuffer buffer(name, 2048, 1024); + if (buffer.blob() == nullptr) { + return nullptr; + } InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); address start = __ pc(); diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp index 5a94d469434..4ec2483b267 100644 --- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp +++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp @@ -243,7 +243,19 @@ static const RegisterSaver::LiveRegType RegisterSaver_LiveVSRegs[] = { RegisterSaver_LiveVSReg( VSR48 ), RegisterSaver_LiveVSReg( VSR49 ), RegisterSaver_LiveVSReg( VSR50 ), - RegisterSaver_LiveVSReg( VSR51 ) + RegisterSaver_LiveVSReg( VSR51 ), + RegisterSaver_LiveVSReg( VSR52 ), + RegisterSaver_LiveVSReg( VSR53 ), + RegisterSaver_LiveVSReg( VSR54 ), + RegisterSaver_LiveVSReg( VSR55 ), + RegisterSaver_LiveVSReg( VSR56 ), + RegisterSaver_LiveVSReg( VSR57 ), + RegisterSaver_LiveVSReg( VSR58 ), + RegisterSaver_LiveVSReg( VSR59 ), + RegisterSaver_LiveVSReg( VSR60 ), + RegisterSaver_LiveVSReg( VSR61 ), + RegisterSaver_LiveVSReg( VSR62 ), + RegisterSaver_LiveVSReg( VSR63 ) }; @@ -336,26 +348,50 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble } if (generate_oop_map) { - map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), + map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), RegisterSaver_LiveRegs[i].vmreg); - map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), - RegisterSaver_LiveRegs[i].vmreg->next()); } offset += reg_size; } - for (int i = 0; i < vsregstosave_num; i++) { - int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; - int reg_type = RegisterSaver_LiveVSRegs[i].reg_type; - - __ li(R30, offset); - __ stxvd2x(as_VectorSRegister(reg_num), R30, R1_SP); + // Note that generate_oop_map in the following loop is only used for the + // polling_page_vectors_safepoint_handler_blob. + // The order in which the vector contents are stored depends on Endianess and + // the utilized instructions (PowerArchitecturePPC64). + assert(is_aligned(offset, StackAlignmentInBytes), "should be"); + if (PowerArchitecturePPC64 >= 10) { + assert(is_even(vsregstosave_num), "expectation"); + for (int i = 0; i < vsregstosave_num; i += 2) { + int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; + assert(RegisterSaver_LiveVSRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!"); + + __ stxvp(as_VectorSRegister(reg_num), offset, R1_SP); + // Note: The contents were read in the same order (see loadV16_Power9 node in ppc.ad). + if (generate_oop_map) { + map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), + RegisterSaver_LiveVSRegs[i LITTLE_ENDIAN_ONLY(+1) ].vmreg); + map->set_callee_saved(VMRegImpl::stack2reg((offset + vs_reg_size) >> 2), + RegisterSaver_LiveVSRegs[i BIG_ENDIAN_ONLY(+1) ].vmreg); + } + offset += (2 * vs_reg_size); + } + } else { + for (int i = 0; i < vsregstosave_num; i++) { + int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; - if (generate_oop_map) { - map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), - RegisterSaver_LiveVSRegs[i].vmreg); + if (PowerArchitecturePPC64 >= 9) { + __ stxv(as_VectorSRegister(reg_num), offset, R1_SP); + } else { + __ li(R31, offset); + __ stxvd2x(as_VectorSRegister(reg_num), R31, R1_SP); + } + // Note: The contents were read in the same order (see loadV16_Power8 / loadV16_Power9 node in ppc.ad). + if (generate_oop_map) { + VMReg vsr = RegisterSaver_LiveVSRegs[i].vmreg; + map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), vsr); + } + offset += vs_reg_size; } - offset += vs_reg_size; } assert(offset == frame_size_in_bytes, "consistency check"); @@ -418,14 +454,29 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm, offset += reg_size; } - for (int i = 0; i < vsregstosave_num; i++) { - int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; - int reg_type = RegisterSaver_LiveVSRegs[i].reg_type; + assert(is_aligned(offset, StackAlignmentInBytes), "should be"); + if (PowerArchitecturePPC64 >= 10) { + for (int i = 0; i < vsregstosave_num; i += 2) { + int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; + assert(RegisterSaver_LiveVSRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!"); - __ li(R31, offset); - __ lxvd2x(as_VectorSRegister(reg_num), R31, R1_SP); + __ lxvp(as_VectorSRegister(reg_num), offset, R1_SP); - offset += vs_reg_size; + offset += (2 * vs_reg_size); + } + } else { + for (int i = 0; i < vsregstosave_num; i++) { + int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; + + if (PowerArchitecturePPC64 >= 9) { + __ lxv(as_VectorSRegister(reg_num), offset, R1_SP); + } else { + __ li(R31, offset); + __ lxvd2x(as_VectorSRegister(reg_num), R31, R1_SP); + } + + offset += vs_reg_size; + } } assert(offset == frame_size_in_bytes, "consistency check"); @@ -1143,12 +1194,12 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, __ bctr(); } -AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterHandlerEntry* handler) { address i2c_entry; address c2i_unverified_entry; address c2i_entry; @@ -1223,8 +1274,8 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry); - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, - c2i_no_clinit_check_entry); + handler->set_entry_points(i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + return; } // An oop arg. Must pass a handle not the oop itself. @@ -2689,6 +2740,21 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ li(r_temp_2, 0); __ stw(r_temp_2, in_bytes(JNIHandleBlock::top_offset()), r_temp_1); + // Prepare for return + // -------------------------------------------------------------------------- + __ pop_frame(); + __ restore_LR(R11); + +#if INCLUDE_JFR + // We need to do a poll test after unwind in case the sampler + // managed to sample the native frame after returning to Java. + Label L_stub; + int safepoint_offset = __ offset(); + if (!UseSIGTRAP) { + __ relocate(relocInfo::poll_return_type); + } + __ safepoint_poll(L_stub, r_temp_2, true /* at_return */, true /* in_nmethod: frame already popped */); +#endif // INCLUDE_JFR // Check for pending exceptions. // -------------------------------------------------------------------------- @@ -2696,13 +2762,16 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ cmpdi(CR0, r_temp_2, 0); __ bne(CR0, handle_pending_exception); - // Return - // -------------------------------------------------------------------------- - - __ pop_frame(); - __ restore_LR(R11); + // Return. __ blr(); + // Handler for return safepoint (out-of-line). +#if INCLUDE_JFR + if (!UseSIGTRAP) { + __ bind(L_stub); + __ jump_to_polling_page_return_handler_blob(safepoint_offset); + } +#endif // INCLUDE_JFR // Handler for pending exceptions (out-of-line). // -------------------------------------------------------------------------- @@ -2710,9 +2779,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // is the empty function. We just pop this frame and then jump to // forward_exception_entry. __ bind(handle_pending_exception); - - __ pop_frame(); - __ restore_LR(R11); __ b64_patchable((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); @@ -2869,7 +2935,7 @@ static void push_skeleton_frames(MacroAssembler* masm, bool deopt, __ cmpdi(CR0, number_of_frames_reg, 0); __ bne(CR0, loop); - // Get the return address pointing into the frame manager. + // Get the return address pointing into the template interpreter. __ ld(R0, 0, pcs_reg); // Store it in the top interpreter frame. __ std(R0, _abi0(lr), R1_SP); @@ -3106,6 +3172,9 @@ UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() { // Setup code generation tools. const char* name = OptoRuntime::stub_name(OptoStubId::uncommon_trap_id); CodeBuffer buffer(name, 2048, 1024); + if (buffer.blob() == nullptr) { + return nullptr; + } InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); address start = __ pc(); diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp index 939c3d3094a..2624131033c 100644 --- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp @@ -86,7 +86,7 @@ class StubGenerator: public StubCodeGenerator { // R10 - thread : Thread* // address generate_call_stub(address& return_address) { - // Setup a new c frame, copy java arguments, call frame manager or + // Setup a new c frame, copy java arguments, call template interpreter or // native_entry, and process result. StubGenStubId stub_id = StubGenStubId::call_stub_id; @@ -94,10 +94,13 @@ class StubGenerator: public StubCodeGenerator { address start = __ function_entry(); + int save_nonvolatile_registers_size = __ save_nonvolatile_registers_size(true, SuperwordUseVSX); + // some sanity checks + STATIC_ASSERT(StackAlignmentInBytes == 16); assert((sizeof(frame::native_abi_minframe) % 16) == 0, "unaligned"); assert((sizeof(frame::native_abi_reg_args) % 16) == 0, "unaligned"); - assert((sizeof(frame::spill_nonvolatiles) % 16) == 0, "unaligned"); + assert((save_nonvolatile_registers_size % 16) == 0, "unaligned"); assert((sizeof(frame::parent_ijava_frame_abi) % 16) == 0, "unaligned"); assert((sizeof(frame::entry_frame_locals) % 16) == 0, "unaligned"); @@ -106,93 +109,72 @@ class StubGenerator: public StubCodeGenerator { Register r_arg_result_type = R5; Register r_arg_method = R6; Register r_arg_entry = R7; + Register r_arg_argument_addr = R8; + Register r_arg_argument_count = R9; Register r_arg_thread = R10; - Register r_temp = R24; - Register r_top_of_arguments_addr = R25; - Register r_entryframe_fp = R26; + Register r_entryframe_fp = R2; // volatile + Register r_argument_size = R11_scratch1; // volatile + Register r_top_of_arguments_addr = R21_tmp1; { // Stack on entry to call_stub: // // F1 [C_FRAME] // ... - - Register r_arg_argument_addr = R8; - Register r_arg_argument_count = R9; - Register r_frame_alignment_in_bytes = R27; - Register r_argument_addr = R28; - Register r_argumentcopy_addr = R29; - Register r_argument_size_in_bytes = R30; - Register r_frame_size = R23; - + Register r_frame_size = R12_scratch2; // volatile Label arguments_copied; // Save LR/CR to caller's C_FRAME. __ save_LR_CR(R0); - // Zero extend arg_argument_count. - __ clrldi(r_arg_argument_count, r_arg_argument_count, 32); - - // Save non-volatiles GPRs to ENTRY_FRAME (not yet pushed, but it's safe). - __ save_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); - // Keep copy of our frame pointer (caller's SP). __ mr(r_entryframe_fp, R1_SP); + // calculate frame size + STATIC_ASSERT(Interpreter::logStackElementSize == 3); + + // space for arguments aligned up: ((arg_count + 1) * 8) &~ 15 + __ addi(r_frame_size, r_arg_argument_count, 1); + __ rldicr(r_frame_size, r_frame_size, 3, 63 - 4); + + // this is the pure space for arguments (excluding alignment padding) + __ sldi(r_argument_size, r_arg_argument_count, 3); + + __ addi(r_frame_size, r_frame_size, + save_nonvolatile_registers_size + frame::entry_frame_locals_size + frame::top_ijava_frame_abi_size); + + // push ENTRY_FRAME + __ push_frame(r_frame_size, R0); + + // Save non-volatiles registers to ENTRY_FRAME. + __ save_nonvolatile_registers(r_entryframe_fp, -(frame::entry_frame_locals_size + save_nonvolatile_registers_size), + true, SuperwordUseVSX); + BLOCK_COMMENT("Push ENTRY_FRAME including arguments"); // Push ENTRY_FRAME including arguments: // // F0 [TOP_IJAVA_FRAME_ABI] // alignment (optional) // [outgoing Java arguments] + // [non-volatiles] // [ENTRY_FRAME_LOCALS] // F1 [C_FRAME] // ... - // calculate frame size - - // unaligned size of arguments - __ sldi(r_argument_size_in_bytes, - r_arg_argument_count, Interpreter::logStackElementSize); - // arguments alignment (max 1 slot) - // FIXME: use round_to() here - __ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1); - __ sldi(r_frame_alignment_in_bytes, - r_frame_alignment_in_bytes, Interpreter::logStackElementSize); - - // size = unaligned size of arguments + top abi's size - __ addi(r_frame_size, r_argument_size_in_bytes, - frame::top_ijava_frame_abi_size); - // size += arguments alignment - __ add(r_frame_size, - r_frame_size, r_frame_alignment_in_bytes); - // size += size of call_stub locals - __ addi(r_frame_size, - r_frame_size, frame::entry_frame_locals_size); - - // push ENTRY_FRAME - __ push_frame(r_frame_size, r_temp); - // initialize call_stub locals (step 1) - __ std(r_arg_call_wrapper_addr, - _entry_frame_locals_neg(call_wrapper_address), r_entryframe_fp); - __ std(r_arg_result_addr, - _entry_frame_locals_neg(result_address), r_entryframe_fp); - __ std(r_arg_result_type, - _entry_frame_locals_neg(result_type), r_entryframe_fp); + __ std(r_arg_call_wrapper_addr, _entry_frame_locals_neg(call_wrapper_address), r_entryframe_fp); + __ std(r_arg_result_addr, _entry_frame_locals_neg(result_address), r_entryframe_fp); + __ std(r_arg_result_type, _entry_frame_locals_neg(result_type), r_entryframe_fp); // we will save arguments_tos_address later - BLOCK_COMMENT("Copy Java arguments"); // copy Java arguments // Calculate top_of_arguments_addr which will be R17_tos (not prepushed) later. - // FIXME: why not simply use SP+frame::top_ijava_frame_size? - __ addi(r_top_of_arguments_addr, - R1_SP, frame::top_ijava_frame_abi_size); - __ add(r_top_of_arguments_addr, - r_top_of_arguments_addr, r_frame_alignment_in_bytes); + __ addi(r_top_of_arguments_addr, r_entryframe_fp, + -(save_nonvolatile_registers_size + frame::entry_frame_locals_size)); + __ sub(r_top_of_arguments_addr, r_top_of_arguments_addr, r_argument_size); // any arguments to copy? __ cmpdi(CR0, r_arg_argument_count, 0); @@ -200,6 +182,8 @@ class StubGenerator: public StubCodeGenerator { // prepare loop and copy arguments in reverse order { + Register r_argument_addr = R22_tmp2; + Register r_argumentcopy_addr = R23_tmp3; // init CTR with arg_argument_count __ mtctr(r_arg_argument_count); @@ -207,8 +191,7 @@ class StubGenerator: public StubCodeGenerator { __ mr(r_argumentcopy_addr, r_top_of_arguments_addr); // let r_argument_addr point to last incoming java argument - __ add(r_argument_addr, - r_arg_argument_addr, r_argument_size_in_bytes); + __ add(r_argument_addr, r_arg_argument_addr, r_argument_size); __ addi(r_argument_addr, r_argument_addr, -BytesPerWord); // now loop while CTR > 0 and copy arguments @@ -216,10 +199,10 @@ class StubGenerator: public StubCodeGenerator { Label next_argument; __ bind(next_argument); - __ ld(r_temp, 0, r_argument_addr); + __ ld(R0, 0, r_argument_addr); // argument_addr--; __ addi(r_argument_addr, r_argument_addr, -BytesPerWord); - __ std(r_temp, 0, r_argumentcopy_addr); + __ std(R0, 0, r_argumentcopy_addr); // argumentcopy_addr++; __ addi(r_argumentcopy_addr, r_argumentcopy_addr, BytesPerWord); @@ -232,15 +215,10 @@ class StubGenerator: public StubCodeGenerator { } { - BLOCK_COMMENT("Call frame manager or native entry."); - // Call frame manager or native entry. - Register r_new_arg_entry = R14; - assert_different_registers(r_new_arg_entry, r_top_of_arguments_addr, - r_arg_method, r_arg_thread); - - __ mr(r_new_arg_entry, r_arg_entry); + BLOCK_COMMENT("Call template interpreter or native entry."); + assert_different_registers(r_arg_entry, r_top_of_arguments_addr, r_arg_method, r_arg_thread); - // Register state on entry to frame manager / native entry: + // Register state on entry to template interpreter / native entry: // // tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8 // R19_method - Method @@ -262,42 +240,43 @@ class StubGenerator: public StubCodeGenerator { assert(tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread"); // Set R15_prev_state to 0 for simplifying checks in callee. - __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1); - // Stack on entry to frame manager / native entry: + __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R0); + // Stack on entry to template interpreter / native entry: // // F0 [TOP_IJAVA_FRAME_ABI] // alignment (optional) // [outgoing Java arguments] + // [non-volatiles] // [ENTRY_FRAME_LOCALS] // F1 [C_FRAME] // ... // // global toc register - __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R11_scratch1); + __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R0); // Remember the senderSP so we interpreter can pop c2i arguments off of the stack // when called via a c2i. // Pass initial_caller_sp to framemanager. __ mr(R21_sender_SP, R1_SP); - // Do a light-weight C-call here, r_new_arg_entry holds the address - // of the interpreter entry point (frame manager or native entry) + // Do a light-weight C-call here, r_arg_entry holds the address + // of the interpreter entry point (template interpreter or native entry) // and save runtime-value of LR in return_address. - assert(r_new_arg_entry != tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread, - "trashed r_new_arg_entry"); - return_address = __ call_stub(r_new_arg_entry); + assert(r_arg_entry != tos && r_arg_entry != R19_method && r_arg_entry != R16_thread, + "trashed r_arg_entry"); + return_address = __ call_stub(r_arg_entry); } { - BLOCK_COMMENT("Returned from frame manager or native entry."); - // Returned from frame manager or native entry. + BLOCK_COMMENT("Returned from template interpreter or native entry."); // Now pop frame, process result, and return to caller. - // Stack on exit from frame manager / native entry: + // Stack on exit from template interpreter / native entry: // // F0 [ABI] // ... + // [non-volatiles] // [ENTRY_FRAME_LOCALS] // F1 [C_FRAME] // ... @@ -310,39 +289,38 @@ class StubGenerator: public StubCodeGenerator { Label ret_is_float; Label ret_is_double; - Register r_entryframe_fp = R30; - Register r_lr = R7_ARG5; - Register r_cr = R8_ARG6; + Register r_lr = R11_scratch1; + Register r_cr = R12_scratch2; // Reload some volatile registers which we've spilled before the call - // to frame manager / native entry. + // to template interpreter / native entry. // Access all locals via frame pointer, because we know nothing about // the topmost frame's size. - __ ld(r_entryframe_fp, _abi0(callers_sp), R1_SP); + __ ld(r_entryframe_fp, _abi0(callers_sp), R1_SP); // restore after call assert_different_registers(r_entryframe_fp, R3_RET, r_arg_result_addr, r_arg_result_type, r_cr, r_lr); - __ ld(r_arg_result_addr, - _entry_frame_locals_neg(result_address), r_entryframe_fp); - __ ld(r_arg_result_type, - _entry_frame_locals_neg(result_type), r_entryframe_fp); + __ ld(r_arg_result_addr, _entry_frame_locals_neg(result_address), r_entryframe_fp); + __ ld(r_arg_result_type, _entry_frame_locals_neg(result_type), r_entryframe_fp); __ ld(r_cr, _abi0(cr), r_entryframe_fp); __ ld(r_lr, _abi0(lr), r_entryframe_fp); - - // pop frame and restore non-volatiles, LR and CR - __ mr(R1_SP, r_entryframe_fp); - __ pop_cont_fastpath(); - __ mtcr(r_cr); - __ mtlr(r_lr); + __ mtcr(r_cr); // restore CR + __ mtlr(r_lr); // restore LR // Store result depending on type. Everything that is not // T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT. - __ cmpwi(CR0, r_arg_result_type, T_OBJECT); - __ cmpwi(CR1, r_arg_result_type, T_LONG); - __ cmpwi(CR5, r_arg_result_type, T_FLOAT); - __ cmpwi(CR6, r_arg_result_type, T_DOUBLE); + // Using volatile CRs. + __ cmpwi(CR1, r_arg_result_type, T_OBJECT); + __ cmpwi(CR5, r_arg_result_type, T_LONG); + __ cmpwi(CR6, r_arg_result_type, T_FLOAT); + __ cmpwi(CR7, r_arg_result_type, T_DOUBLE); + + __ pop_cont_fastpath(); // kills CR0, uses R16_thread // restore non-volatile registers - __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); + __ restore_nonvolatile_registers(r_entryframe_fp, -(frame::entry_frame_locals_size + save_nonvolatile_registers_size), + true, SuperwordUseVSX); + // pop frame + __ mr(R1_SP, r_entryframe_fp); // Stack on exit from call_stub: // @@ -351,24 +329,18 @@ class StubGenerator: public StubCodeGenerator { // // no call_stub frames left. - // All non-volatiles have been restored at this point!! - assert(R3_RET == R3, "R3_RET should be R3"); - - __ beq(CR0, ret_is_object); - __ beq(CR1, ret_is_long); - __ beq(CR5, ret_is_float); - __ beq(CR6, ret_is_double); + __ beq(CR1, ret_is_object); + __ beq(CR5, ret_is_long); + __ beq(CR6, ret_is_float); + __ beq(CR7, ret_is_double); // default: __ stw(R3_RET, 0, r_arg_result_addr); __ blr(); // return to caller // case T_OBJECT: - __ bind(ret_is_object); - __ std(R3_RET, 0, r_arg_result_addr); - __ blr(); // return to caller - // case T_LONG: + __ bind(ret_is_object); __ bind(ret_is_long); __ std(R3_RET, 0, r_arg_result_addr); __ blr(); // return to caller @@ -546,6 +518,177 @@ class StubGenerator: public StubCodeGenerator { return start; } + // Computes the Galois/Counter Mode (GCM) product and reduction. + // + // This function performs polynomial multiplication of the subkey H with + // the current GHASH state using vectorized polynomial multiplication (`vpmsumd`). + // The subkey H is divided into lower, middle, and higher halves. + // The multiplication results are reduced using `vConstC2` to stay within GF(2^128). + // The final computed value is stored back into `vState`. + static void computeGCMProduct(MacroAssembler* _masm, + VectorRegister vLowerH, VectorRegister vH, VectorRegister vHigherH, + VectorRegister vConstC2, VectorRegister vZero, VectorRegister vState, + VectorRegister vLowProduct, VectorRegister vMidProduct, VectorRegister vHighProduct, + VectorRegister vReducedLow, VectorRegister vTmp8, VectorRegister vTmp9, + VectorRegister vCombinedResult, VectorRegister vSwappedH) { + __ vxor(vH, vH, vState); + __ vpmsumd(vLowProduct, vLowerH, vH); // L : Lower Half of subkey H + __ vpmsumd(vMidProduct, vSwappedH, vH); // M : Combined halves of subkey H + __ vpmsumd(vHighProduct, vHigherH, vH); // H : Higher Half of subkey H + __ vpmsumd(vReducedLow, vLowProduct, vConstC2); // Reduction + __ vsldoi(vTmp8, vMidProduct, vZero, 8); // mL : Extract the lower 64 bits of M + __ vsldoi(vTmp9, vZero, vMidProduct, 8); // mH : Extract the higher 64 bits of M + __ vxor(vLowProduct, vLowProduct, vTmp8); // LL + mL : Partial result for lower half + __ vxor(vHighProduct, vHighProduct, vTmp9); // HH + mH : Partial result for upper half + __ vsldoi(vLowProduct, vLowProduct, vLowProduct, 8); // Swap + __ vxor(vLowProduct, vLowProduct, vReducedLow); + __ vsldoi(vCombinedResult, vLowProduct, vLowProduct, 8); // Swap + __ vpmsumd(vLowProduct, vLowProduct, vConstC2); // Reduction using constant + __ vxor(vCombinedResult, vCombinedResult, vHighProduct); // Combine reduced Low & High products + __ vxor(vState, vLowProduct, vCombinedResult); + } + + // Generate stub for ghash process blocks. + // + // Arguments for generated stub: + // state: R3_ARG1 (long[] state) + // subkeyH: R4_ARG2 (long[] subH) + // data: R5_ARG3 (byte[] data) + // blocks: R6_ARG4 (number of 16-byte blocks to process) + // + // The polynomials are processed in bit-reflected order for efficiency reasons. + // This optimization leverages the structure of the Galois field arithmetic + // to minimize the number of bit manipulations required during multiplication. + // For an explanation of how this works, refer : + // Vinodh Gopal, Erdinc Ozturk, Wajdi Feghali, Jim Guilford, Gil Wolrich, + // Martin Dixon. "Optimized Galois-Counter-Mode Implementation on Intel® + // Architecture Processor" + // http://web.archive.org/web/20130609111954/http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/communications-ia-galois-counter-mode-paper.pdf + // + // + address generate_ghash_processBlocks() { + StubCodeMark mark(this, "StubRoutines", "ghash"); + address start = __ function_entry(); + + // Registers for parameters + Register state = R3_ARG1; // long[] state + Register subkeyH = R4_ARG2; // long[] subH + Register data = R5_ARG3; // byte[] data + Register blocks = R6_ARG4; + Register temp1 = R8; + // Vector Registers + VectorRegister vZero = VR0; + VectorRegister vH = VR1; + VectorRegister vLowerH = VR2; + VectorRegister vHigherH = VR3; + VectorRegister vLowProduct = VR4; + VectorRegister vMidProduct = VR5; + VectorRegister vHighProduct = VR6; + VectorRegister vReducedLow = VR7; + VectorRegister vTmp8 = VR8; + VectorRegister vTmp9 = VR9; + VectorRegister vTmp10 = VR10; + VectorRegister vSwappedH = VR11; + VectorRegister vTmp12 = VR12; + VectorRegister loadOrder = VR13; + VectorRegister vHigh = VR14; + VectorRegister vLow = VR15; + VectorRegister vState = VR16; + VectorRegister vPerm = VR17; + VectorRegister vCombinedResult = VR18; + VectorRegister vConstC2 = VR19; + + __ li(temp1, 0xc2); + __ sldi(temp1, temp1, 56); + __ vspltisb(vZero, 0); + __ mtvrd(vConstC2, temp1); + __ lxvd2x(vH->to_vsr(), subkeyH); + __ lxvd2x(vState->to_vsr(), state); + // Operations to obtain lower and higher bytes of subkey H. + __ vspltisb(vReducedLow, 1); + __ vspltisb(vTmp10, 7); + __ vsldoi(vTmp8, vZero, vReducedLow, 1); // 0x1 + __ vor(vTmp8, vConstC2, vTmp8); // 0xC2...1 + __ vsplt(vTmp9, 0, vH); // MSB of H + __ vsl(vH, vH, vReducedLow); // Carry = H<<7 + __ vsrab(vTmp9, vTmp9, vTmp10); + __ vand(vTmp9, vTmp9, vTmp8); // Carry + __ vxor(vTmp10, vH, vTmp9); + __ vsldoi(vConstC2, vZero, vConstC2, 8); + __ vsldoi(vSwappedH, vTmp10, vTmp10, 8); // swap Lower and Higher Halves of subkey H + __ vsldoi(vLowerH, vZero, vSwappedH, 8); // H.L + __ vsldoi(vHigherH, vSwappedH, vZero, 8); // H.H +#ifdef ASSERT + __ cmpwi(CR0, blocks, 0); // Compare 'blocks' (R6_ARG4) with zero + __ asm_assert_ne("blocks should NOT be zero"); +#endif + __ clrldi(blocks, blocks, 32); + __ mtctr(blocks); + __ lvsl(loadOrder, temp1); +#ifdef VM_LITTLE_ENDIAN + __ vspltisb(vTmp12, 0xf); + __ vxor(loadOrder, loadOrder, vTmp12); +#define LE_swap_bytes(x) __ vec_perm(x, x, x, loadOrder) +#else +#define LE_swap_bytes(x) +#endif + + // This code performs Karatsuba multiplication in Galois fields to compute the GHASH operation. + // + // The Karatsuba method breaks the multiplication of two 128-bit numbers into smaller parts, + // performing three 128-bit multiplications and combining the results efficiently. + // + // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1) + // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0 + // + // Inputs: + // - vH: The data vector (state), containing both B0 (lower half) and B1 (higher half). + // - vLowerH: Lower half of the subkey H (A0). + // - vHigherH: Higher half of the subkey H (A1). + // - vConstC2: Constant used for reduction (for final processing). + // + // References: + // Shay Gueron, Michael E. Kounavis. + // "Intel® Carry-Less Multiplication Instruction and its Usage for Computing the GCM Mode" + // https://web.archive.org/web/20110609115824/https://software.intel.com/file/24918 + // + Label L_aligned_loop, L_store, L_unaligned_loop, L_initialize_unaligned_loop; + __ andi(temp1, data, 15); + __ cmpwi(CR0, temp1, 0); + __ bne(CR0, L_initialize_unaligned_loop); + + __ bind(L_aligned_loop); + __ lvx(vH, temp1, data); + LE_swap_bytes(vH); + computeGCMProduct(_masm, vLowerH, vH, vHigherH, vConstC2, vZero, vState, + vLowProduct, vMidProduct, vHighProduct, vReducedLow, vTmp8, vTmp9, vCombinedResult, vSwappedH); + __ addi(data, data, 16); + __ bdnz(L_aligned_loop); + __ b(L_store); + + __ bind(L_initialize_unaligned_loop); + __ li(temp1, 0); + __ lvsl(vPerm, temp1, data); + __ lvx(vHigh, temp1, data); +#ifdef VM_LITTLE_ENDIAN + __ vspltisb(vTmp12, -1); + __ vxor(vPerm, vPerm, vTmp12); +#endif + __ bind(L_unaligned_loop); + __ addi(data, data, 16); + __ lvx(vLow, temp1, data); + __ vec_perm(vH, vHigh, vLow, vPerm); + computeGCMProduct(_masm, vLowerH, vH, vHigherH, vConstC2, vZero, vState, + vLowProduct, vMidProduct, vHighProduct, vReducedLow, vTmp8, vTmp9, vCombinedResult, vSwappedH); + __ vmr(vHigh, vLow); + __ bdnz(L_unaligned_loop); + + __ bind(L_store); + __ stxvd2x(vState->to_vsr(), state); + __ blr(); + + return start; + } // -XX:+OptimizeFill : convert fill/copy loops into intrinsic // // The code is implemented(ported from sparc) as we believe it benefits JVM98, however @@ -809,10 +952,8 @@ class StubGenerator: public StubCodeGenerator { address start_pc = __ pc(); Register tmp1 = R6_ARG4; // probably copy stub would have changed value reset it. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp1, VM_Version::_dscr_val); - __ mtdscr(tmp1); - } + __ load_const_optimized(tmp1, VM_Version::_dscr_val); + __ mtdscr(tmp1); __ li(R3_RET, 0); // return 0 __ blr(); return start_pc; @@ -924,34 +1065,13 @@ class StubGenerator: public StubCodeGenerator { __ andi_(R5_ARG3, R5_ARG3, 31); __ mtctr(tmp1); - if (!VM_Version::has_vsx()) { - - __ bind(l_8); - // Use unrolled version for mass copying (copy 32 elements a time) - // Load feeding store gets zero latency on Power6, however not on Power5. - // Therefore, the following sequence is made for the good of both. - __ ld(tmp1, 0, R3_ARG1); - __ ld(tmp2, 8, R3_ARG1); - __ ld(tmp3, 16, R3_ARG1); - __ ld(tmp4, 24, R3_ARG1); - __ std(tmp1, 0, R4_ARG2); - __ std(tmp2, 8, R4_ARG2); - __ std(tmp3, 16, R4_ARG2); - __ std(tmp4, 24, R4_ARG2); - __ addi(R3_ARG1, R3_ARG1, 32); - __ addi(R4_ARG2, R4_ARG2, 32); - __ bdnz(l_8); - - } else { // Processor supports VSX, so use it to mass copy. // Prefetch the data into the L2 cache. __ dcbt(R3_ARG1, 0); // If supported set DSCR pre-fetch to deepest. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); - __ mtdscr(tmp2); - } + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); __ li(tmp1, 16); @@ -972,12 +1092,9 @@ class StubGenerator: public StubCodeGenerator { __ bdnz(l_10); // Dec CTR and loop if not zero. // Restore DSCR pre-fetch value. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val); - __ mtdscr(tmp2); - } + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); - } // VSX } // FasterArrayCopy __ bind(l_6); @@ -1220,34 +1337,15 @@ class StubGenerator: public StubCodeGenerator { __ andi_(R5_ARG3, R5_ARG3, 15); __ mtctr(tmp1); - if (!VM_Version::has_vsx()) { - - __ bind(l_8); - // Use unrolled version for mass copying (copy 16 elements a time). - // Load feeding store gets zero latency on Power6, however not on Power5. - // Therefore, the following sequence is made for the good of both. - __ ld(tmp1, 0, R3_ARG1); - __ ld(tmp2, 8, R3_ARG1); - __ ld(tmp3, 16, R3_ARG1); - __ ld(tmp4, 24, R3_ARG1); - __ std(tmp1, 0, R4_ARG2); - __ std(tmp2, 8, R4_ARG2); - __ std(tmp3, 16, R4_ARG2); - __ std(tmp4, 24, R4_ARG2); - __ addi(R3_ARG1, R3_ARG1, 32); - __ addi(R4_ARG2, R4_ARG2, 32); - __ bdnz(l_8); - - } else { // Processor supports VSX, so use it to mass copy. + + // Processor supports VSX, so use it to mass copy. // Prefetch src data into L2 cache. __ dcbt(R3_ARG1, 0); // If supported set DSCR pre-fetch to deepest. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); - __ mtdscr(tmp2); - } + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); __ li(tmp1, 16); // Backbranch target aligned to 32-byte. It's not aligned 16-byte @@ -1267,12 +1365,8 @@ class StubGenerator: public StubCodeGenerator { __ bdnz(l_9); // Dec CTR and loop if not zero. // Restore DSCR pre-fetch value. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val); - __ mtdscr(tmp2); - } - - } + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); } // FasterArrayCopy __ bind(l_6); @@ -1427,60 +1521,38 @@ class StubGenerator: public StubCodeGenerator { __ andi_(R5_ARG3, R5_ARG3, 7); __ mtctr(tmp1); - if (!VM_Version::has_vsx()) { + // Processor supports VSX, so use it to mass copy. - __ bind(l_6); - // Use unrolled version for mass copying (copy 8 elements a time). - // Load feeding store gets zero latency on power6, however not on power 5. - // Therefore, the following sequence is made for the good of both. - __ ld(tmp1, 0, R3_ARG1); - __ ld(tmp2, 8, R3_ARG1); - __ ld(tmp3, 16, R3_ARG1); - __ ld(tmp4, 24, R3_ARG1); - __ std(tmp1, 0, R4_ARG2); - __ std(tmp2, 8, R4_ARG2); - __ std(tmp3, 16, R4_ARG2); - __ std(tmp4, 24, R4_ARG2); - __ addi(R3_ARG1, R3_ARG1, 32); - __ addi(R4_ARG2, R4_ARG2, 32); - __ bdnz(l_6); - - } else { // Processor supports VSX, so use it to mass copy. + // Prefetch the data into the L2 cache. + __ dcbt(R3_ARG1, 0); - // Prefetch the data into the L2 cache. - __ dcbt(R3_ARG1, 0); + // Set DSCR pre-fetch to deepest. + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); - // If supported set DSCR pre-fetch to deepest. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); - __ mtdscr(tmp2); - } + __ li(tmp1, 16); - __ li(tmp1, 16); + // Backbranch target aligned to 32-byte. Not 16-byte align as + // loop contains < 8 instructions that fit inside a single + // i-cache sector. + __ align(32); - // Backbranch target aligned to 32-byte. Not 16-byte align as - // loop contains < 8 instructions that fit inside a single - // i-cache sector. - __ align(32); + __ bind(l_7); + // Use loop with VSX load/store instructions to + // copy 8 elements a time. + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst + __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src + 16 + __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16 + __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32 + __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32 + __ bdnz(l_7); // Dec CTR and loop if not zero. - __ bind(l_7); - // Use loop with VSX load/store instructions to - // copy 8 elements a time. - __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src - __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst - __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src + 16 - __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16 - __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32 - __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32 - __ bdnz(l_7); // Dec CTR and loop if not zero. + // Restore DSCR pre-fetch value. + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); - // Restore DSCR pre-fetch value. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val); - __ mtdscr(tmp2); - } - } // VSX } // FasterArrayCopy // copy 1 element at a time @@ -1595,31 +1667,13 @@ class StubGenerator: public StubCodeGenerator { __ andi(R5_ARG3, R5_ARG3, 7); __ mtctr(tmp1); - if (!VM_Version::has_vsx()) { - __ bind(l_4); - // Use unrolled version for mass copying (copy 4 elements a time). - // Load feeding store gets zero latency on Power6, however not on Power5. - // Therefore, the following sequence is made for the good of both. - __ addi(R3_ARG1, R3_ARG1, -32); - __ addi(R4_ARG2, R4_ARG2, -32); - __ ld(tmp4, 24, R3_ARG1); - __ ld(tmp3, 16, R3_ARG1); - __ ld(tmp2, 8, R3_ARG1); - __ ld(tmp1, 0, R3_ARG1); - __ std(tmp4, 24, R4_ARG2); - __ std(tmp3, 16, R4_ARG2); - __ std(tmp2, 8, R4_ARG2); - __ std(tmp1, 0, R4_ARG2); - __ bdnz(l_4); - } else { // Processor supports VSX, so use it to mass copy. + // Processor supports VSX, so use it to mass copy. // Prefetch the data into the L2 cache. __ dcbt(R3_ARG1, 0); - // If supported set DSCR pre-fetch to deepest. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); - __ mtdscr(tmp2); - } + // Set DSCR pre-fetch to deepest. + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); __ li(tmp1, 16); @@ -1640,11 +1694,8 @@ class StubGenerator: public StubCodeGenerator { __ bdnz(l_4); // Restore DSCR pre-fetch value. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val); - __ mtdscr(tmp2); - } - } + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); __ cmpwi(CR0, R5_ARG3, 0); __ beq(CR0, l_6); @@ -1731,33 +1782,14 @@ class StubGenerator: public StubCodeGenerator { __ andi_(R5_ARG3, R5_ARG3, 3); __ mtctr(tmp1); - if (!VM_Version::has_vsx()) { - __ bind(l_4); - // Use unrolled version for mass copying (copy 4 elements a time). - // Load feeding store gets zero latency on Power6, however not on Power5. - // Therefore, the following sequence is made for the good of both. - __ ld(tmp1, 0, R3_ARG1); - __ ld(tmp2, 8, R3_ARG1); - __ ld(tmp3, 16, R3_ARG1); - __ ld(tmp4, 24, R3_ARG1); - __ std(tmp1, 0, R4_ARG2); - __ std(tmp2, 8, R4_ARG2); - __ std(tmp3, 16, R4_ARG2); - __ std(tmp4, 24, R4_ARG2); - __ addi(R3_ARG1, R3_ARG1, 32); - __ addi(R4_ARG2, R4_ARG2, 32); - __ bdnz(l_4); - - } else { // Processor supports VSX, so use it to mass copy. + // Processor supports VSX, so use it to mass copy. // Prefetch the data into the L2 cache. __ dcbt(R3_ARG1, 0); - // If supported set DSCR pre-fetch to deepest. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); - __ mtdscr(tmp2); - } + // Set DSCR pre-fetch to deepest. + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); __ li(tmp1, 16); @@ -1778,12 +1810,9 @@ class StubGenerator: public StubCodeGenerator { __ bdnz(l_5); // Dec CTR and loop if not zero. // Restore DSCR pre-fetch value. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val); - __ mtdscr(tmp2); - } + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); - } // VSX } // FasterArrayCopy // copy 1 element at a time @@ -1876,31 +1905,13 @@ class StubGenerator: public StubCodeGenerator { __ andi(R5_ARG3, R5_ARG3, 3); __ mtctr(tmp1); - if (!VM_Version::has_vsx()) { - __ bind(l_4); - // Use unrolled version for mass copying (copy 4 elements a time). - // Load feeding store gets zero latency on Power6, however not on Power5. - // Therefore, the following sequence is made for the good of both. - __ addi(R3_ARG1, R3_ARG1, -32); - __ addi(R4_ARG2, R4_ARG2, -32); - __ ld(tmp4, 24, R3_ARG1); - __ ld(tmp3, 16, R3_ARG1); - __ ld(tmp2, 8, R3_ARG1); - __ ld(tmp1, 0, R3_ARG1); - __ std(tmp4, 24, R4_ARG2); - __ std(tmp3, 16, R4_ARG2); - __ std(tmp2, 8, R4_ARG2); - __ std(tmp1, 0, R4_ARG2); - __ bdnz(l_4); - } else { // Processor supports VSX, so use it to mass copy. + // Processor supports VSX, so use it to mass copy. // Prefetch the data into the L2 cache. __ dcbt(R3_ARG1, 0); - // If supported set DSCR pre-fetch to deepest. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); - __ mtdscr(tmp2); - } + // Set DSCR pre-fetch to deepest. + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); __ li(tmp1, 16); @@ -1921,11 +1932,8 @@ class StubGenerator: public StubCodeGenerator { __ bdnz(l_4); // Restore DSCR pre-fetch value. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val); - __ mtdscr(tmp2); - } - } + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); __ cmpwi(CR0, R5_ARG3, 0); __ beq(CR0, l_1); @@ -5028,6 +5036,10 @@ void generate_lookup_secondary_supers_table_stub() { StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync(); } + if (UseGHASHIntrinsics) { + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } + if (UseAESIntrinsics) { StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); diff --git a/src/hotspot/cpu/ppc/stubRoutines_ppc_64.cpp b/src/hotspot/cpu/ppc/stubRoutines_ppc_64.cpp index 60cca4efb57..fed3f208f06 100644 --- a/src/hotspot/cpu/ppc/stubRoutines_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/stubRoutines_ppc_64.cpp @@ -77,13 +77,10 @@ static julong compute_inverse_poly(julong long_poly) { // Constants to fold n words as needed by macroAssembler. address StubRoutines::ppc::generate_crc_constants(juint reverse_poly) { // Layout of constant table: - // <= Power7 Little Endian: 4 tables for byte folding - // <= Power7 Big Endian: 1 table for single byte folding + 4 tables for multi-byte folding // >= Power8: 1 table for single byte folding + constants for fast vector implementation - const bool use_vector = VM_Version::has_vpmsumb(); const int vector_size = 16 * (CRC32_UNROLL_FACTOR2 + CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2); - const int size = use_vector ? CRC32_TABLE_SIZE + vector_size : (4 BIG_ENDIAN_ONLY(+1)) * CRC32_TABLE_SIZE; + const int size = CRC32_TABLE_SIZE + vector_size; const address consts = (address)os::malloc(size, mtInternal); if (consts == nullptr) { vm_exit_out_of_memory(size, OOM_MALLOC_ERROR, "CRC constants: no enough space"); @@ -91,43 +88,8 @@ address StubRoutines::ppc::generate_crc_constants(juint reverse_poly) { juint* ptr = (juint*)consts; // Simple table used for single byte folding - LITTLE_ENDIAN_ONLY(if (use_vector)) { - for (int i = 0; i < 256; ++i) { - ptr[i] = fold_byte(i, reverse_poly); - } - } - - if (!use_vector) { - BIG_ENDIAN_ONLY(ptr = (juint*)(consts + CRC32_TABLE_SIZE);) - // <= Power7: 4 tables - for (int i = 0; i < 256; ++i) { - juint a = fold_byte(i, reverse_poly), - b = fold_byte(a, reverse_poly), - c = fold_byte(b, reverse_poly), - d = fold_byte(c, reverse_poly); -#ifndef VM_LITTLE_ENDIAN - a = byteswap(a); - b = byteswap(b); - c = byteswap(c); - d = byteswap(d); -#endif - ptr[i ] = a; - ptr[i + 256] = b; - ptr[i + 2* 256] = c; - ptr[i + 3* 256] = d; - } -#if 0 - for (int i = 0; i < 4; ++i) { - tty->print_cr("table %d:", i); - for (int j = 0; j < 32; ++j) { - for (int k = 0; k < 8; ++k) { - tty->print("%08x ", ptr[i*256 + j*8 + k]); - } - tty->cr(); - } - } -#endif - return consts; + for (int i = 0; i < 256; ++i) { + ptr[i] = fold_byte(i, reverse_poly); } // >= Power8: vector constants diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp index a8f5dbda484..ab4f35f4d8c 100644 --- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp @@ -119,12 +119,13 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() { const FloatRegister floatSlot = F0; address entry = __ function_entry(); + int save_nonvolatile_registers_size = __ save_nonvolatile_registers_size(false, false); __ save_LR(R0); - __ save_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); + __ save_nonvolatile_registers(R1_SP, -save_nonvolatile_registers_size, false, false); // We use target_sp for storing arguments in the C frame. __ mr(target_sp, R1_SP); - __ push_frame_reg_args_nonvolatiles(0, R11_scratch1); + __ push_frame(frame::native_abi_reg_args_size + save_nonvolatile_registers_size, R11_scratch1); __ mr(arg_java, R3_ARG1); @@ -309,7 +310,7 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() { __ bind(loop_end); __ pop_frame(); - __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); + __ restore_nonvolatile_registers(R1_SP, -save_nonvolatile_registers_size, false, false); __ restore_LR(R0); __ blr(); @@ -1077,7 +1078,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M // PPC64 specific: switch (kind) { - case Interpreter::java_lang_math_sqrt: use_instruction = VM_Version::has_fsqrt(); break; + case Interpreter::java_lang_math_sqrt: use_instruction = true; break; case Interpreter::java_lang_math_abs: use_instruction = true; break; case Interpreter::java_lang_math_fmaF: case Interpreter::java_lang_math_fmaD: use_instruction = UseFMA; break; @@ -1089,8 +1090,9 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M case Interpreter::java_lang_math_cos : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); break; case Interpreter::java_lang_math_tan : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); break; case Interpreter::java_lang_math_tanh : /* run interpreted */ break; + case Interpreter::java_lang_math_cbrt : /* run interpreted */ break; case Interpreter::java_lang_math_abs : /* run interpreted */ break; - case Interpreter::java_lang_math_sqrt : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); break; + case Interpreter::java_lang_math_sqrt : /* run interpreted */ break; case Interpreter::java_lang_math_log : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); break; case Interpreter::java_lang_math_log10: runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); break; case Interpreter::java_lang_math_pow : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); num_args = 2; break; @@ -1583,6 +1585,24 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ st_ptr(R0, JavaThread::pending_jni_exception_check_fn_offset(), R16_thread); } + #if INCLUDE_JFR + __ enter_jfr_critical_section(); + + // This poll test is to uphold the invariant that a JFR sampled frame + // must not return to its caller without a prior safepoint poll check. + // The earlier poll check in this routine is insufficient for this purpose + // because the thread has transitioned back to Java. + + Label slow_path, fast_path; + __ safepoint_poll(slow_path, R11_scratch1, true /* at_return */, false /* in_nmethod */); + __ b(fast_path); + __ bind(slow_path); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), R16_thread); + __ align(32); + __ bind(fast_path); + +#endif // INCLUDE_JFR + __ reset_last_Java_frame(); // Jvmdi/jvmpi support. Whether we've got an exception pending or @@ -1624,11 +1644,12 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ lfd(F1_RET, _ijava_state_neg(fresult), R11_scratch1); __ call_stub(result_handler_addr); - __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2); + __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R12_scratch2, R11_scratch1, R0); + JFR_ONLY(__ leave_jfr_critical_section();) // Must use the return pc which was loaded from the caller's frame // as the VM uses return-pc-patching for deoptimization. - __ mtlr(R0); + __ mtlr(R12_scratch2); __ blr(); //----------------------------------------------------------------------------- diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp index 8be6080e3d1..7431f77aeff 100644 --- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp @@ -319,14 +319,7 @@ void TemplateTable::fast_aldc(LdcType type) { __ ld(R31, simm16_rest, R11_scratch1); __ resolve_oop_handle(R31, R11_scratch1, R12_scratch2, MacroAssembler::PRESERVATION_NONE); __ cmpld(CR0, R17_tos, R31); - if (VM_Version::has_isel()) { - __ isel_0(R17_tos, CR0, Assembler::equal); - } else { - Label not_sentinel; - __ bne(CR0, not_sentinel); - __ li(R17_tos, 0); - __ bind(not_sentinel); - } + __ isel_0(R17_tos, CR0, Assembler::equal); __ verify_oop(R17_tos); __ dispatch_epilog(atos, Bytecodes::length_for(bytecode())); @@ -1042,7 +1035,7 @@ void TemplateTable::bastore() { // Need to check whether array is boolean or byte // since both types share the bastore bytecode. - __ load_klass(Rscratch, Rarray); + __ load_klass_check_null_throw(Rscratch, Rarray, Rscratch); __ lwz(Rscratch, in_bytes(Klass::layout_helper_offset()), Rscratch); int diffbit = exact_log2(Klass::layout_helper_boolean_diffbit()); __ testbitdi(CR0, R0, Rscratch, diffbit); @@ -1534,25 +1527,12 @@ void TemplateTable::convert() { case Bytecodes::_i2f: __ extsw(R17_tos, R17_tos); __ move_l_to_d(); - if (VM_Version::has_fcfids()) { // fcfids is >= Power7 only - // Comment: alternatively, load with sign extend could be done by lfiwax. - __ fcfids(F15_ftos, F15_ftos); - } else { - __ fcfid(F15_ftos, F15_ftos); - __ frsp(F15_ftos, F15_ftos); - } + __ fcfids(F15_ftos, F15_ftos); break; case Bytecodes::_l2f: - if (VM_Version::has_fcfids()) { // fcfids is >= Power7 only - __ move_l_to_d(); - __ fcfids(F15_ftos, F15_ftos); - } else { - // Avoid rounding problem when result should be 0x3f800001: need fixup code before fcfid+frsp. - __ mr(R3_ARG1, R17_tos); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::l2f)); - __ fmr(F15_ftos, F1_RET); - } + __ move_l_to_d(); + __ fcfids(F15_ftos, F15_ftos); break; case Bytecodes::_f2d: @@ -1748,16 +1728,18 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { const Register osr_nmethod = R31; __ mr(osr_nmethod, R3_RET); __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R11_scratch1); + JFR_ONLY(__ enter_jfr_critical_section();) __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin), R16_thread); __ reset_last_Java_frame(); // OSR buffer is in ARG1. // Remove the interpreter frame. - __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2); + __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R12_scratch2, R11_scratch1, R0); + JFR_ONLY(__ leave_jfr_critical_section();) // Jump to the osr code. __ ld(R11_scratch1, nmethod::osr_entry_point_offset(), osr_nmethod); - __ mtlr(R0); + __ mtlr(R12_scratch2); __ mtctr(R11_scratch1); __ bctr(); diff --git a/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp b/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp index 5c7b0067c3a..ae5410b12df 100644 --- a/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp +++ b/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2023 SAP SE. All rights reserved. + * Copyright (c) 2023, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,86 +35,6 @@ #define __ _masm-> -// for callee saved regs, according to the caller's ABI -static int compute_reg_save_area_size(const ABIDescriptor& abi) { - int size = 0; - for (int i = 0; i < Register::number_of_registers; i++) { - Register reg = as_Register(i); - // R1 saved/restored by prologue/epilogue, R13 (system thread) won't get modified! - if (reg == R1_SP || reg == R13) continue; - if (!abi.is_volatile_reg(reg)) { - size += 8; // bytes - } - } - - for (int i = 0; i < FloatRegister::number_of_registers; i++) { - FloatRegister reg = as_FloatRegister(i); - if (!abi.is_volatile_reg(reg)) { - size += 8; // bytes - } - } - - return size; -} - -static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) { - // 1. iterate all registers in the architecture - // - check if they are volatile or not for the given abi - // - if NOT, we need to save it here - - int offset = reg_save_area_offset; - - __ block_comment("{ preserve_callee_saved_regs "); - for (int i = 0; i < Register::number_of_registers; i++) { - Register reg = as_Register(i); - // R1 saved/restored by prologue/epilogue, R13 (system thread) won't get modified! - if (reg == R1_SP || reg == R13) continue; - if (!abi.is_volatile_reg(reg)) { - __ std(reg, offset, R1_SP); - offset += 8; - } - } - - for (int i = 0; i < FloatRegister::number_of_registers; i++) { - FloatRegister reg = as_FloatRegister(i); - if (!abi.is_volatile_reg(reg)) { - __ stfd(reg, offset, R1_SP); - offset += 8; - } - } - - __ block_comment("} preserve_callee_saved_regs "); -} - -static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) { - // 1. iterate all registers in the architecture - // - check if they are volatile or not for the given abi - // - if NOT, we need to restore it here - - int offset = reg_save_area_offset; - - __ block_comment("{ restore_callee_saved_regs "); - for (int i = 0; i < Register::number_of_registers; i++) { - Register reg = as_Register(i); - // R1 saved/restored by prologue/epilogue, R13 (system thread) won't get modified! - if (reg == R1_SP || reg == R13) continue; - if (!abi.is_volatile_reg(reg)) { - __ ld(reg, offset, R1_SP); - offset += 8; - } - } - - for (int i = 0; i < FloatRegister::number_of_registers; i++) { - FloatRegister reg = as_FloatRegister(i); - if (!abi.is_volatile_reg(reg)) { - __ lfd(reg, offset, R1_SP); - offset += 8; - } - } - - __ block_comment("} restore_callee_saved_regs "); -} - static const int upcall_stub_code_base_size = 1024; static const int upcall_stub_size_per_arg = 16; // arg save & restore + move @@ -140,13 +60,17 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, // The Java call uses the JIT ABI, but we also call C. int out_arg_area = MAX2(frame::jit_out_preserve_size + out_arg_bytes, (int)frame::native_abi_reg_args_size); - int reg_save_area_size = compute_reg_save_area_size(abi); + MacroAssembler* _masm = new MacroAssembler(&buffer); + int reg_save_area_size = __ save_nonvolatile_registers_size(true, SuperwordUseVSX); RegSpiller arg_spiller(call_regs._arg_regs); RegSpiller result_spiller(call_regs._ret_regs); int res_save_area_offset = out_arg_area; int arg_save_area_offset = res_save_area_offset + result_spiller.spill_size_bytes(); int reg_save_area_offset = arg_save_area_offset + arg_spiller.spill_size_bytes(); + if (SuperwordUseVSX) { // VectorRegisters want alignment + reg_save_area_offset = align_up(reg_save_area_offset, StackAlignmentInBytes); + } int frame_data_offset = reg_save_area_offset + reg_save_area_size; int frame_bottom_offset = frame_data_offset + sizeof(UpcallStub::FrameData); @@ -201,7 +125,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, ////////////////////////////////////////////////////////////////////////////// - MacroAssembler* _masm = new MacroAssembler(&buffer); address start = __ function_entry(); // called by C __ save_LR_CR(R0); assert((abi._stack_alignment_bytes % 16) == 0, "must be 16 byte aligned"); @@ -212,7 +135,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, // (and maybe attach it). arg_spiller.generate_spill(_masm, arg_save_area_offset); // Java methods won't preserve them, so save them here: - preserve_callee_saved_registers(_masm, abi, reg_save_area_offset); + __ save_nonvolatile_registers(R1_SP, reg_save_area_offset, true, SuperwordUseVSX); // Java code uses TOC (pointer to code cache). __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R0); // reinit @@ -310,7 +233,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, __ call_c(call_target_address); __ block_comment("} on_exit"); - restore_callee_saved_registers(_masm, abi, reg_save_area_offset); + __ restore_nonvolatile_registers(R1_SP, reg_save_area_offset, true, SuperwordUseVSX); result_spiller.generate_fill(_masm, res_save_area_offset); diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.cpp b/src/hotspot/cpu/ppc/vm_version_ppc.cpp index 8ec69bffe15..ad5e915a838 100644 --- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp +++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp @@ -64,16 +64,8 @@ void VM_Version::initialize() { FLAG_SET_ERGO(PowerArchitecturePPC64, 10); } else if (VM_Version::has_darn()) { FLAG_SET_ERGO(PowerArchitecturePPC64, 9); - } else if (VM_Version::has_lqarx()) { - FLAG_SET_ERGO(PowerArchitecturePPC64, 8); - } else if (VM_Version::has_popcntw()) { - FLAG_SET_ERGO(PowerArchitecturePPC64, 7); - } else if (VM_Version::has_cmpb()) { - FLAG_SET_ERGO(PowerArchitecturePPC64, 6); - } else if (VM_Version::has_popcntb()) { - FLAG_SET_ERGO(PowerArchitecturePPC64, 5); } else { - FLAG_SET_ERGO(PowerArchitecturePPC64, 0); + FLAG_SET_ERGO(PowerArchitecturePPC64, 8); } } @@ -81,20 +73,14 @@ void VM_Version::initialize() { switch (PowerArchitecturePPC64) { case 10: if (!VM_Version::has_brw() ) break; case 9: if (!VM_Version::has_darn() ) break; - case 8: if (!VM_Version::has_lqarx() ) break; - case 7: if (!VM_Version::has_popcntw()) break; - case 6: if (!VM_Version::has_cmpb() ) break; - case 5: if (!VM_Version::has_popcntb()) break; - case 0: PowerArchitecturePPC64_ok = true; break; + case 8: PowerArchitecturePPC64_ok = true; break; default: break; } guarantee(PowerArchitecturePPC64_ok, "PowerArchitecturePPC64 cannot be set to " "%zu on this machine", PowerArchitecturePPC64); // Power 8: Configure Data Stream Control Register. - if (PowerArchitecturePPC64 >= 8 && has_mfdscr()) { - config_dscr(); - } + config_dscr(); if (!UseSIGTRAP) { MSG(TrapBasedICMissChecks); @@ -109,27 +95,13 @@ void VM_Version::initialize() { FLAG_SET_ERGO(TrapBasedRangeChecks, false); } - // Power7 and later. - if (PowerArchitecturePPC64 > 6) { - if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { - FLAG_SET_ERGO(UsePopCountInstruction, true); - } - } - - if (!VM_Version::has_isel() && FLAG_IS_DEFAULT(ConditionalMoveLimit)) { - FLAG_SET_ERGO(ConditionalMoveLimit, 0); - } - - if (PowerArchitecturePPC64 >= 8) { + if (PowerArchitecturePPC64 >= 9) { + // Performance is good since Power9. if (FLAG_IS_DEFAULT(SuperwordUseVSX)) { FLAG_SET_ERGO(SuperwordUseVSX, true); } - } else { - if (SuperwordUseVSX) { - warning("SuperwordUseVSX specified, but needs at least Power8."); - FLAG_SET_DEFAULT(SuperwordUseVSX, false); - } } + MaxVectorSize = SuperwordUseVSX ? 16 : 8; if (FLAG_IS_DEFAULT(AlignVector)) { FLAG_SET_ERGO(AlignVector, false); @@ -198,28 +170,12 @@ void VM_Version::initialize() { // Create and print feature-string. char buf[(num_features+1) * 16]; // Max 16 chars per feature. jio_snprintf(buf, sizeof(buf), - "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - (has_fsqrt() ? " fsqrt" : ""), - (has_isel() ? " isel" : ""), - (has_lxarxeh() ? " lxarxeh" : ""), - (has_cmpb() ? " cmpb" : ""), - (has_popcntb() ? " popcntb" : ""), - (has_popcntw() ? " popcntw" : ""), - (has_fcfids() ? " fcfids" : ""), - (has_vand() ? " vand" : ""), - (has_lqarx() ? " lqarx" : ""), - (has_vcipher() ? " aes" : ""), - (has_vpmsumb() ? " vpmsumb" : ""), - (has_mfdscr() ? " mfdscr" : ""), - (has_vsx() ? " vsx" : ""), - (has_ldbrx() ? " ldbrx" : ""), - (has_stdbrx() ? " stdbrx" : ""), - (has_vshasig() ? " sha" : ""), + "ppc64 sha aes%s%s", (has_darn() ? " darn" : ""), (has_brw() ? " brw" : "") // Make sure number of %s matches num_features! ); - _features_string = os::strdup(buf); + _cpu_info_string = os::strdup(buf); if (Verbose) { print_features(); } @@ -283,24 +239,12 @@ void VM_Version::initialize() { } // The AES intrinsic stubs require AES instruction support. - if (has_vcipher()) { - if (FLAG_IS_DEFAULT(UseAES)) { - UseAES = true; - } - } else if (UseAES) { - if (!FLAG_IS_DEFAULT(UseAES)) - warning("AES instructions are not available on this CPU"); - FLAG_SET_DEFAULT(UseAES, false); + if (FLAG_IS_DEFAULT(UseAES)) { + UseAES = true; } - if (UseAES && has_vcipher()) { - if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { - UseAESIntrinsics = true; - } - } else if (UseAESIntrinsics) { - if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) - warning("AES intrinsics are not available on this CPU"); - FLAG_SET_DEFAULT(UseAESIntrinsics, false); + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + UseAESIntrinsics = true; } if (UseAESCTRIntrinsics) { @@ -308,9 +252,8 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); } - if (UseGHASHIntrinsics) { - warning("GHASH intrinsics are not available on this CPU"); - FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { + UseGHASHIntrinsics = true; } if (FLAG_IS_DEFAULT(UseFMA)) { @@ -322,14 +265,8 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseMD5Intrinsics, false); } - if (has_vshasig()) { - if (FLAG_IS_DEFAULT(UseSHA)) { - UseSHA = true; - } - } else if (UseSHA) { - if (!FLAG_IS_DEFAULT(UseSHA)) - warning("SHA instructions are not available on this CPU"); - FLAG_SET_DEFAULT(UseSHA, false); + if (FLAG_IS_DEFAULT(UseSHA)) { + UseSHA = true; } if (UseSHA1Intrinsics) { @@ -337,7 +274,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); } - if (UseSHA && has_vshasig()) { + if (UseSHA) { if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); } @@ -346,7 +283,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); } - if (UseSHA && has_vshasig()) { + if (UseSHA) { if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); } @@ -364,12 +301,6 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA, false); } - if (UseSecondarySupersTable && PowerArchitecturePPC64 < 7) { - if (!FLAG_IS_DEFAULT(UseSecondarySupersTable)) { - warning("UseSecondarySupersTable requires Power7 or later."); - } - FLAG_SET_DEFAULT(UseSecondarySupersTable, false); - } #ifdef COMPILER2 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { @@ -519,7 +450,7 @@ void VM_Version::print_platform_virtualization_info(outputStream* st) { } void VM_Version::print_features() { - tty->print_cr("Version: %s L1_data_cache_line_size=%d", features_string(), L1_data_cache_line_size()); + tty->print_cr("Version: %s L1_data_cache_line_size=%d", cpu_info_string(), L1_data_cache_line_size()); if (Verbose) { if (ContendedPaddingWidth > 0) { @@ -555,29 +486,10 @@ void VM_Version::determine_features() { // Emit code. void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry(); uint32_t *code = (uint32_t *)a->pc(); - // Don't use R0 in ldarx. // Keep R3_ARG1 unmodified, it contains &field (see below). // Keep R4_ARG2 unmodified, it contains offset = 0 (see below). - a->fsqrt(F3, F4); // code[0] -> fsqrt_m - a->fsqrts(F3, F4); // code[1] -> fsqrts_m - a->isel(R7, R5, R6, 0); // code[2] -> isel_m - a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m - a->cmpb(R7, R5, R6); // code[4] -> cmpb - a->popcntb(R7, R5); // code[5] -> popcntb - a->popcntw(R7, R5); // code[6] -> popcntw - a->fcfids(F3, F4); // code[7] -> fcfids - a->vand(VR0, VR0, VR0); // code[8] -> vand - // arg0 of lqarx must be an even register, (arg1 + arg2) must be a multiple of 16 - a->lqarx_unchecked(R6, R3_ARG1, R4_ARG2, 1); // code[9] -> lqarx_m - a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher - a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb - a->mfdscr(R0); // code[12] -> mfdscr - a->lxvd2x(VSR0, R3_ARG1); // code[13] -> vsx - a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[14] -> ldbrx - a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> stdbrx - a->vshasigmaw(VR0, VR1, 1, 0xF); // code[16] -> vshasig - a->darn(R7); // code[17] -> darn - a->brw(R5, R6); // code[18] -> brw + a->darn(R7); + a->brw(R5, R6); a->blr(); // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. @@ -612,23 +524,6 @@ void VM_Version::determine_features() { // determine which instructions are legal. int feature_cntr = 0; - if (code[feature_cntr++]) features |= fsqrt_m; - if (code[feature_cntr++]) features |= fsqrts_m; - if (code[feature_cntr++]) features |= isel_m; - if (code[feature_cntr++]) features |= lxarxeh_m; - if (code[feature_cntr++]) features |= cmpb_m; - if (code[feature_cntr++]) features |= popcntb_m; - if (code[feature_cntr++]) features |= popcntw_m; - if (code[feature_cntr++]) features |= fcfids_m; - if (code[feature_cntr++]) features |= vand_m; - if (code[feature_cntr++]) features |= lqarx_m; - if (code[feature_cntr++]) features |= vcipher_m; - if (code[feature_cntr++]) features |= vpmsumb_m; - if (code[feature_cntr++]) features |= mfdscr_m; - if (code[feature_cntr++]) features |= vsx_m; - if (code[feature_cntr++]) features |= ldbrx_m; - if (code[feature_cntr++]) features |= stdbrx_m; - if (code[feature_cntr++]) features |= vshasig_m; if (code[feature_cntr++]) features |= darn_m; if (code[feature_cntr++]) features |= brw_m; @@ -726,6 +621,6 @@ void VM_Version::initialize_cpu_information(void) { _no_of_threads = _no_of_cores; _no_of_sockets = _no_of_cores; snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE, "PowerPC POWER%lu", PowerArchitecturePPC64); - snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "PPC %s", features_string()); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "PPC %s", cpu_info_string()); _initialized = true; } diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.hpp b/src/hotspot/cpu/ppc/vm_version_ppc.hpp index 6096f8e4fd1..18dfd843c19 100644 --- a/src/hotspot/cpu/ppc/vm_version_ppc.hpp +++ b/src/hotspot/cpu/ppc/vm_version_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2024 SAP SE. All rights reserved. + * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,46 +32,12 @@ class VM_Version: public Abstract_VM_Version { protected: enum Feature_Flag { - fsqrt, - fsqrts, - isel, - lxarxeh, - cmpb, - popcntb, - popcntw, - fcfids, - vand, - lqarx, - vcipher, - vpmsumb, - mfdscr, - vsx, - ldbrx, - stdbrx, - vshasig, darn, brw, num_features // last entry to count features }; enum Feature_Flag_Set { unknown_m = 0, - fsqrt_m = (1 << fsqrt ), - fsqrts_m = (1 << fsqrts ), - isel_m = (1 << isel ), - lxarxeh_m = (1 << lxarxeh), - cmpb_m = (1 << cmpb ), - popcntb_m = (1 << popcntb), - popcntw_m = (1 << popcntw), - fcfids_m = (1 << fcfids ), - vand_m = (1 << vand ), - lqarx_m = (1 << lqarx ), - vcipher_m = (1 << vcipher), - vpmsumb_m = (1 << vpmsumb), - mfdscr_m = (1 << mfdscr ), - vsx_m = (1 << vsx ), - ldbrx_m = (1 << ldbrx ), - stdbrx_m = (1 << stdbrx ), - vshasig_m = (1 << vshasig), darn_m = (1 << darn ), brw_m = (1 << brw ), all_features_m = (unsigned long)-1 @@ -101,28 +67,9 @@ class VM_Version: public Abstract_VM_Version { static bool is_determine_features_test_running() { return _is_determine_features_test_running; } // CPU instruction support - static bool has_fsqrt() { return (_features & fsqrt_m) != 0; } - static bool has_fsqrts() { return (_features & fsqrts_m) != 0; } - static bool has_isel() { return (_features & isel_m) != 0; } - static bool has_lxarxeh() { return (_features & lxarxeh_m) !=0; } - static bool has_cmpb() { return (_features & cmpb_m) != 0; } - static bool has_popcntb() { return (_features & popcntb_m) != 0; } - static bool has_popcntw() { return (_features & popcntw_m) != 0; } - static bool has_fcfids() { return (_features & fcfids_m) != 0; } - static bool has_vand() { return (_features & vand_m) != 0; } - static bool has_lqarx() { return (_features & lqarx_m) != 0; } - static bool has_vcipher() { return (_features & vcipher_m) != 0; } - static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; } - static bool has_mfdscr() { return (_features & mfdscr_m) != 0; } - static bool has_vsx() { return (_features & vsx_m) != 0; } - static bool has_ldbrx() { return (_features & ldbrx_m) != 0; } - static bool has_stdbrx() { return (_features & stdbrx_m) != 0; } - static bool has_vshasig() { return (_features & vshasig_m) != 0; } static bool has_darn() { return (_features & darn_m) != 0; } static bool has_brw() { return (_features & brw_m) != 0; } - static bool has_mtfprd() { return has_vpmsumb(); } // alias for P8 - // Assembler testing static void allow_all(); static void revert(); diff --git a/src/hotspot/cpu/ppc/vmreg_ppc.cpp b/src/hotspot/cpu/ppc/vmreg_ppc.cpp index d8a5c35cac0..2ed68578a80 100644 --- a/src/hotspot/cpu/ppc/vmreg_ppc.cpp +++ b/src/hotspot/cpu/ppc/vmreg_ppc.cpp @@ -32,21 +32,29 @@ void VMRegImpl::set_regName() { for (i = 0; i < ConcreteRegisterImpl::max_gpr; ) { regName[i++] = reg->name(); regName[i++] = reg->name(); - if (reg->encoding() < Register::number_of_registers-1) + if (reg->encoding() < Register::number_of_registers - 1) { reg = reg->successor(); + } } FloatRegister freg = ::as_FloatRegister(0); for ( ; i < ConcreteRegisterImpl::max_fpr; ) { regName[i++] = freg->name(); regName[i++] = freg->name(); - if (reg->encoding() < FloatRegister::number_of_registers-1) + if (reg->encoding() < FloatRegister::number_of_registers - 1) { freg = freg->successor(); + } } VectorSRegister vsreg = ::as_VectorSRegister(0); for ( ; i < ConcreteRegisterImpl::max_vsr; ) { regName[i++] = vsreg->name(); + regName[i++] = vsreg->name(); + regName[i++] = vsreg->name(); + regName[i++] = vsreg->name(); + if (reg->encoding() < VectorSRegister::number_of_registers - 1) { + vsreg = vsreg->successor(); + } } for ( ; i < ConcreteRegisterImpl::number_of_registers; ) { diff --git a/src/hotspot/cpu/ppc/vmreg_ppc.hpp b/src/hotspot/cpu/ppc/vmreg_ppc.hpp index b2d97a6d385..4e25c8b3cea 100644 --- a/src/hotspot/cpu/ppc/vmreg_ppc.hpp +++ b/src/hotspot/cpu/ppc/vmreg_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2001, 2022, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2022 SAP SE. All rights reserved. + * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -62,12 +62,17 @@ inline FloatRegister as_FloatRegister() { inline VectorSRegister as_VectorSRegister() { assert(is_VectorSRegister(), "must be"); - return ::as_VectorSRegister(value() - ConcreteRegisterImpl::max_fpr); + return ::as_VectorSRegister((value() - ConcreteRegisterImpl::max_fpr) >> 2); } inline bool is_concrete() { assert(is_reg(), "must be"); - return is_even(value()); + if (is_Register() || is_FloatRegister()) return is_even(value()); + if (is_VectorSRegister()) { + int base = value() - ConcreteRegisterImpl::max_fpr; + return (base & 3) == 0; + } + return true; } #endif // CPU_PPC_VMREG_PPC_HPP diff --git a/src/hotspot/cpu/ppc/vmreg_ppc.inline.hpp b/src/hotspot/cpu/ppc/vmreg_ppc.inline.hpp index afaefe50c97..2424df8da01 100644 --- a/src/hotspot/cpu/ppc/vmreg_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/vmreg_ppc.inline.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2022 SAP SE. All rights reserved. + * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,7 +41,8 @@ inline VMReg FloatRegister::as_VMReg() const { } inline VMReg VectorSRegister::as_VMReg() const { - return VMRegImpl::as_VMReg((encoding()) + ConcreteRegisterImpl::max_fpr); + // Four halves, multiply by 4. + return VMRegImpl::as_VMReg((encoding() << 2) + ConcreteRegisterImpl::max_fpr); } inline VMReg ConditionRegister::as_VMReg() const { diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index e036cb6b1ec..3317ccc3b53 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -671,29 +671,86 @@ class Assembler : public AbstractAssembler { #undef INSN -// Load/store register (all modes) -#define INSN(NAME, op, funct3) \ - void NAME(Register Rd, Register Rs, const int32_t offset) { \ - guarantee(is_simm12(offset), "offset is invalid."); \ - unsigned insn = 0; \ - int32_t val = offset & 0xfff; \ - patch((address)&insn, 6, 0, op); \ - patch((address)&insn, 14, 12, funct3); \ - patch_reg((address)&insn, 15, Rs); \ - patch_reg((address)&insn, 7, Rd); \ - patch((address)&insn, 31, 20, val); \ - emit(insn); \ - } - - INSN(lb, 0b0000011, 0b000); - INSN(_lbu, 0b0000011, 0b100); - INSN(_lh, 0b0000011, 0b001); - INSN(_lhu, 0b0000011, 0b101); - INSN(_lw, 0b0000011, 0b010); - INSN(lwu, 0b0000011, 0b110); - INSN(_ld, 0b0000011, 0b011); + private: + // Load + enum LoadWidthFunct3 : uint8_t { + LOAD_WIDTH_BYTE = 0b000, + LOAD_WIDTH_HALFWORD = 0b001, + LOAD_WIDTH_WORD = 0b010, + LOAD_WIDTH_DOUBLEWORD = 0b011, + LOAD_WIDTH_BYTE_UNSIGNED = 0b100, + LOAD_WIDTH_HALFWORD_UNSIGNED = 0b101, + LOAD_WIDTH_WORD_UNSIGNED = 0b110, + // 0b111 is reserved + }; -#undef INSN + static constexpr uint8_t OP_LOAD_MAJOR = 0b0000011; + static constexpr uint8_t OP_FP_LOAD_MAJOR = 0b0000111; + + template + void load_base(uint8_t Rd, Register Rs, const int32_t offset) { + guarantee(is_simm12(offset), "offset is invalid."); + unsigned insn = 0; + int32_t val = offset & 0xfff; + patch((address)&insn, 6, 0, op_major); + patch((address)&insn, 11, 7, Rd); + patch((address)&insn, 14, 12, width); + patch_reg((address)&insn, 15, Rs); + patch((address)&insn, 31, 20, val); + emit(insn); + } + + template + void load_base(Register Rd, Register Rs, const int32_t offset) { + load_base (Rd->raw_encoding(), Rs, offset); + } + + template + void load_base(FloatRegister Rd, Register Rs, const int32_t offset) { + load_base (Rd->raw_encoding(), Rs, offset); + } + + public: + + void lb(Register Rd, Register Rs, const int32_t offset) { + load_base (Rd, Rs, offset); + } + + void _lbu(Register Rd, Register Rs, const int32_t offset) { + load_base (Rd, Rs, offset); + } + + void _lh(Register Rd, Register Rs, const int32_t offset) { + load_base (Rd, Rs, offset); + } + + void _lhu(Register Rd, Register Rs, const int32_t offset) { + load_base (Rd, Rs, offset); + } + + void _lw(Register Rd, Register Rs, const int32_t offset) { + load_base (Rd, Rs, offset); + } + + void lwu(Register Rd, Register Rs, const int32_t offset) { + load_base (Rd, Rs, offset); + } + + void _ld(Register Rd, Register Rs, const int32_t offset) { + load_base (Rd, Rs, offset); + } + + void flh(FloatRegister Rd, Register Rs, const int32_t offset) { + load_base (Rd, Rs, offset); + } + + void flw(FloatRegister Rd, Register Rs, const int32_t offset) { + load_base (Rd, Rs, offset); + } + + void _fld(FloatRegister Rd, Register Rs, const int32_t offset) { + load_base (Rd, Rs, offset); + } #define INSN(NAME, op, funct3) \ void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ @@ -724,30 +781,70 @@ class Assembler : public AbstractAssembler { #undef INSN -#define INSN(NAME, REGISTER, op, funct3) \ - void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) { \ - guarantee(is_simm12(offset), "offset is invalid."); \ - unsigned insn = 0; \ - uint32_t val = offset & 0xfff; \ - uint32_t low = val & 0x1f; \ - uint32_t high = (val >> 5) & 0x7f; \ - patch((address)&insn, 6, 0, op); \ - patch((address)&insn, 14, 12, funct3); \ - patch_reg((address)&insn, 15, Rs2); \ - patch_reg((address)&insn, 20, Rs1); \ - patch((address)&insn, 11, 7, low); \ - patch((address)&insn, 31, 25, high); \ - emit(insn); \ - } \ + private: - INSN(_sb, Register, 0b0100011, 0b000); - INSN(_sh, Register, 0b0100011, 0b001); - INSN(_sw, Register, 0b0100011, 0b010); - INSN(_sd, Register, 0b0100011, 0b011); - INSN(fsw, FloatRegister, 0b0100111, 0b010); - INSN(_fsd, FloatRegister, 0b0100111, 0b011); + enum StoreWidthFunct3 : uint8_t { + STORE_WIDTH_BYTE = 0b000, + STORE_WIDTH_HALFWORD = 0b001, + STORE_WIDTH_WORD = 0b010, + STORE_WIDTH_DOUBLEWORD = 0b011, + // 0b100 to 0b111 are reserved for this opcode + }; -#undef INSN + static constexpr uint8_t OP_STORE_MAJOR = 0b0100011; + static constexpr uint8_t OP_FP_STORE_MAJOR = 0b0100111; + + template + void store_base(uint8_t Rs2, Register Rs1, const int32_t offset) { + guarantee(is_simm12(offset), "offset is invalid."); + unsigned insn = 0; + uint32_t val = offset & 0xfff; + uint32_t low = val & 0x1f; + uint32_t high = (val >> 5) & 0x7f; + patch((address)&insn, 6, 0, op_code); + patch((address)&insn, 11, 7, low); + patch((address)&insn, 14, 12, width); + patch_reg((address)&insn, 15, Rs1); + patch((address)&insn, 24, 20, Rs2); + patch((address)&insn, 31, 25, high); + emit(insn); + } + + template + void store_base(Register Rs2, Register Rs1, const int32_t offset) { + store_base (Rs2->raw_encoding(), Rs1, offset); + } + + template + void store_base(FloatRegister Rs2, Register Rs1, const int32_t offset) { + store_base (Rs2->raw_encoding(), Rs1, offset); + } + + public: + + void _sb(Register Rs2, Register Rs1, const int32_t offset) { + store_base (Rs2, Rs1, offset); + } + + void _sh(Register Rs2, Register Rs1, const int32_t offset) { + store_base (Rs2, Rs1, offset); + } + + void _sw(Register Rs2, Register Rs1, const int32_t offset) { + store_base (Rs2, Rs1, offset); + } + + void _sd(Register Rs2, Register Rs1, const int32_t offset) { + store_base (Rs2, Rs1, offset); + } + + void fsw(FloatRegister Rs2, Register Rs1, const int32_t offset) { + store_base (Rs2, Rs1, offset); + } + + void _fsd(FloatRegister Rs2, Register Rs1, const int32_t offset) { + store_base (Rs2, Rs1, offset); + } #define INSN(NAME, op, funct3) \ void NAME(Register Rd, const uint32_t csr, Register Rs1) { \ @@ -864,81 +961,239 @@ class Assembler : public AbstractAssembler { #undef INSN -enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; - -#define INSN(NAME, op, funct3, funct7) \ - void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { \ - unsigned insn = 0; \ - patch((address)&insn, 6, 0, op); \ - patch((address)&insn, 14, 12, funct3); \ - patch_reg((address)&insn, 7, Rd); \ - patch_reg((address)&insn, 15, Rs1); \ - patch_reg((address)&insn, 20, Rs2); \ - patch((address)&insn, 31, 27, funct7); \ - patch((address)&insn, 26, 25, memory_order); \ - emit(insn); \ - } - - INSN(amoswap_w, 0b0101111, 0b010, 0b00001); - INSN(amoadd_w, 0b0101111, 0b010, 0b00000); - INSN(amoxor_w, 0b0101111, 0b010, 0b00100); - INSN(amoand_w, 0b0101111, 0b010, 0b01100); - INSN(amoor_w, 0b0101111, 0b010, 0b01000); - INSN(amomin_w, 0b0101111, 0b010, 0b10000); - INSN(amomax_w, 0b0101111, 0b010, 0b10100); - INSN(amominu_w, 0b0101111, 0b010, 0b11000); - INSN(amomaxu_w, 0b0101111, 0b010, 0b11100); - INSN(amoswap_d, 0b0101111, 0b011, 0b00001); - INSN(amoadd_d, 0b0101111, 0b011, 0b00000); - INSN(amoxor_d, 0b0101111, 0b011, 0b00100); - INSN(amoand_d, 0b0101111, 0b011, 0b01100); - INSN(amoor_d, 0b0101111, 0b011, 0b01000); - INSN(amomin_d, 0b0101111, 0b011, 0b10000); - INSN(amomax_d , 0b0101111, 0b011, 0b10100); - INSN(amominu_d, 0b0101111, 0b011, 0b11000); - INSN(amomaxu_d, 0b0101111, 0b011, 0b11100); - INSN(amocas_w, 0b0101111, 0b010, 0b00101); - INSN(amocas_d, 0b0101111, 0b011, 0b00101); -#undef INSN - -enum operand_size { int8, int16, int32, uint32, int64 }; - -#define INSN(NAME, op, funct3, funct7) \ - void NAME(Register Rd, Register Rs1, Aqrl memory_order = relaxed) { \ - unsigned insn = 0; \ - uint32_t val = memory_order & 0x3; \ - patch((address)&insn, 6, 0, op); \ - patch((address)&insn, 14, 12, funct3); \ - patch_reg((address)&insn, 7, Rd); \ - patch_reg((address)&insn, 15, Rs1); \ - patch((address)&insn, 25, 20, 0b00000); \ - patch((address)&insn, 31, 27, funct7); \ - patch((address)&insn, 26, 25, val); \ - emit(insn); \ - } - - INSN(lr_w, 0b0101111, 0b010, 0b00010); - INSN(lr_d, 0b0101111, 0b011, 0b00010); - -#undef INSN - -#define INSN(NAME, op, funct3, funct7) \ - void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = relaxed) { \ - unsigned insn = 0; \ - uint32_t val = memory_order & 0x3; \ - patch((address)&insn, 6, 0, op); \ - patch((address)&insn, 14, 12, funct3); \ - patch_reg((address)&insn, 7, Rd); \ - patch_reg((address)&insn, 15, Rs2); \ - patch_reg((address)&insn, 20, Rs1); \ - patch((address)&insn, 31, 27, funct7); \ - patch((address)&insn, 26, 25, val); \ - emit(insn); \ + enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; + + private: + + enum AmoWidthFunct3 : uint8_t { + AMO_WIDTH_BYTE = 0b000, // Zabha extension + AMO_WIDTH_HALFWORD = 0b001, // Zabha extension + AMO_WIDTH_WORD = 0b010, + AMO_WIDTH_DOUBLEWORD = 0b011, + AMO_WIDTH_QUADWORD = 0b100, + // 0b101 to 0b111 are reserved + }; + + enum AmoOperationFunct5 : uint8_t { + AMO_ADD = 0b00000, + AMO_SWAP = 0b00001, + AMO_LR = 0b00010, + AMO_SC = 0b00011, + AMO_XOR = 0b00100, + AMO_OR = 0b01000, + AMO_AND = 0b01100, + AMO_MIN = 0b10000, + AMO_MAX = 0b10100, + AMO_MINU = 0b11000, + AMO_MAXU = 0b11100, + AMO_CAS = 0b00101 // Zacas + }; + + static constexpr uint32_t OP_AMO_MAJOR = 0b0101111; + + template + void amo_base(Register Rd, Register Rs1, uint8_t Rs2, Aqrl memory_order = aqrl) { + assert(width > AMO_WIDTH_HALFWORD || UseZabha, "Must be"); + assert(funct5 != AMO_CAS || UseZacas, "Must be"); + unsigned insn = 0; + patch((address)&insn, 6, 0, OP_AMO_MAJOR); + patch_reg((address)&insn, 7, Rd); + patch((address)&insn, 14, 12, width); + patch_reg((address)&insn, 15, Rs1); + patch((address)&insn, 24, 20, Rs2); + patch((address)&insn, 26, 25, memory_order); + patch((address)&insn, 31, 27, funct5); + emit(insn); } - INSN(sc_w, 0b0101111, 0b010, 0b00011); - INSN(sc_d, 0b0101111, 0b011, 0b00011); -#undef INSN + template + void amo_base(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2->raw_encoding(), memory_order); + } + + public: + + void amoadd_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoadd_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoadd_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoadd_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoswap_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoswap_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoswap_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoswap_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoxor_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoxor_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoxor_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoxor_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoor_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoor_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoor_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoor_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoand_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoand_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoand_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amoand_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomin_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomin_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomin_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomin_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amominu_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amominu_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amominu_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amominu_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomax_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomax_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomax_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomax_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomaxu_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomaxu_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomaxu_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amomaxu_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + protected: + + void lr_w(Register Rd, Register Rs1, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, 0, memory_order); + } + + void lr_d(Register Rd, Register Rs1, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, 0, memory_order); + } + + void sc_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void sc_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amocas_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amocas_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amocas_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + void amocas_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { + amo_base (Rd, Rs1, Rs2, memory_order); + } + + public: + + enum operand_size { int8, int16, int32, uint32, int64 }; // Immediate Instruction #define INSN(NAME, op, funct3) \ @@ -1345,29 +1600,6 @@ enum operand_size { int8, int16, int32, uint32, int64 }; fp_base (Rd, Rs1, 0b00000, 0b000); } - private: - static constexpr unsigned int OP_LOAD_FP = 0b0000111; - - template - void fp_load(FloatRegister Rd, Register Rs, const int32_t offset) { - guarantee(is_uimm3(FpWidth), "Rounding mode is out of validity"); - guarantee(is_simm12(offset), "offset is invalid."); - unsigned insn = 0; - uint32_t val = offset & 0xfff; - patch((address)&insn, 6, 0, OP_LOAD_FP); - patch_reg((address)&insn, 7, Rd); - patch((address)&insn, 14, 12, FpWidth); - patch_reg((address)&insn, 15, Rs); - patch((address)&insn, 31, 20, val); - emit(insn); - } - - public: - - void flh(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b001>(Rd, Rs, offset); } - void flw(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b010>(Rd, Rs, offset); } - void _fld(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b011>(Rd, Rs, offset); } - private: template void fp_fm(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm) { @@ -1904,8 +2136,14 @@ enum VectorMask { INSN(vand_vv, 0b1010111, 0b000, 0b001001); // Vector Single-Width Integer Add and Subtract - INSN(vsub_vv, 0b1010111, 0b000, 0b000010); INSN(vadd_vv, 0b1010111, 0b000, 0b000000); + INSN(vsub_vv, 0b1010111, 0b000, 0b000010); + + // Vector Saturating Integer Add and Subtract + INSN(vsadd_vv, 0b1010111, 0b000, 0b100001); + INSN(vsaddu_vv, 0b1010111, 0b000, 0b100000); + INSN(vssub_vv, 0b1010111, 0b000, 0b100011); + INSN(vssubu_vv, 0b1010111, 0b000, 0b100010); // Vector Register Gather Instructions INSN(vrgather_vv, 0b1010111, 0b000, 0b001100); @@ -2323,6 +2561,7 @@ enum Nf { } // Vector Bit-manipulation used in Cryptography (Zvbb) Extension + INSN(vandn_vx, 0b1010111, 0b100, 0b000001); INSN(vrol_vx, 0b1010111, 0b100, 0b010101); INSN(vror_vx, 0b1010111, 0b100, 0b010100); @@ -2509,17 +2748,17 @@ enum Nf { // wrappers such as 'add' which do the compressing work through 'c_add' depending on the // the operands of the instruction and availability of the RVC hardware extension. // -// 2. 'CompressibleRegion' and 'IncompressibleRegion' are introduced to mark assembler scopes +// 2. 'CompressibleScope' and 'IncompressibleScope' are introduced to mark assembler scopes // within which instructions are qualified or unqualified to be compressed into their 16-bit // versions. An example: // -// CompressibleRegion cr(_masm); +// CompressibleScope scope(_masm); // __ add(...); // this instruction will be compressed into 'c.add' when possible // { -// IncompressibleRegion ir(_masm); +// IncompressibleScope scope(_masm); // __ add(...); // this instruction will not be compressed // { -// CompressibleRegion cr(_masm); +// CompressibleScope scope(_masm); // __ add(...); // this instruction will be compressed into 'c.add' when possible // } // } @@ -2528,40 +2767,40 @@ enum Nf { // distinguish compressed 16-bit instructions from normal 32-bit ones. private: - bool _in_compressible_region; + bool _in_compressible_scope; public: - bool in_compressible_region() const { return _in_compressible_region; } - void set_in_compressible_region(bool b) { _in_compressible_region = b; } + bool in_compressible_scope() const { return _in_compressible_scope; } + void set_in_compressible_scope(bool b) { _in_compressible_scope = b; } public: - // an abstract compressible region - class AbstractCompressibleRegion : public StackObj { + // An abstract compressible scope + class AbstractCompressibleScope : public StackObj { protected: Assembler *_masm; - bool _saved_in_compressible_region; + bool _saved_in_compressible_scope; protected: - AbstractCompressibleRegion(Assembler *_masm) + AbstractCompressibleScope(Assembler *_masm) : _masm(_masm) - , _saved_in_compressible_region(_masm->in_compressible_region()) {} + , _saved_in_compressible_scope(_masm->in_compressible_scope()) {} }; - // a compressible region - class CompressibleRegion : public AbstractCompressibleRegion { + // A compressible scope + class CompressibleScope : public AbstractCompressibleScope { public: - CompressibleRegion(Assembler *_masm) : AbstractCompressibleRegion(_masm) { - _masm->set_in_compressible_region(true); + CompressibleScope(Assembler *_masm) : AbstractCompressibleScope(_masm) { + _masm->set_in_compressible_scope(true); } - ~CompressibleRegion() { - _masm->set_in_compressible_region(_saved_in_compressible_region); + ~CompressibleScope() { + _masm->set_in_compressible_scope(_saved_in_compressible_scope); } }; - // an incompressible region - class IncompressibleRegion : public AbstractCompressibleRegion { + // An incompressible scope + class IncompressibleScope : public AbstractCompressibleScope { public: - IncompressibleRegion(Assembler *_masm) : AbstractCompressibleRegion(_masm) { - _masm->set_in_compressible_region(false); + IncompressibleScope(Assembler *_masm) : AbstractCompressibleScope(_masm) { + _masm->set_in_compressible_scope(false); } - ~IncompressibleRegion() { - _masm->set_in_compressible_region(_saved_in_compressible_region); + ~IncompressibleScope() { + _masm->set_in_compressible_scope(_saved_in_compressible_scope); } }; @@ -2576,13 +2815,13 @@ enum Nf { template void relocate(RelocationHolder const& rspec, Callback emit_insts, int format = 0) { AbstractAssembler::relocate(rspec, format); - IncompressibleRegion ir(this); // relocations + IncompressibleScope scope(this); // relocations emit_insts(); } template void relocate(relocInfo::relocType rtype, Callback emit_insts, int format = 0) { AbstractAssembler::relocate(rtype, format); - IncompressibleRegion ir(this); // relocations + IncompressibleScope scope(this); // relocations emit_insts(); } @@ -3163,7 +3402,7 @@ enum Nf { public: bool do_compress() const { - return UseRVC && in_compressible_region(); + return UseRVC && in_compressible_scope(); } bool do_compress_zcb(Register reg1 = noreg, Register reg2 = noreg) const { @@ -3178,125 +3417,95 @@ enum Nf { // -------------------------- // Load/store register // -------------------------- -#define INSN(NAME) \ - void NAME(Register Rd, Register Rs, const int32_t offset) { \ - /* lw -> c.lwsp/c.lw */ \ - if (do_compress()) { \ - if (is_c_lwswsp(Rs, Rd, offset, true)) { \ - c_lwsp(Rd, offset); \ - return; \ - } else if (is_c_lwsw(Rs, Rd, offset)) { \ - c_lw(Rd, Rs, offset); \ - return; \ - } \ - } \ - _lw(Rd, Rs, offset); \ + void lw(Register Rd, Register Rs, const int32_t offset) { + /* lw -> c.lwsp/c.lw */ + if (do_compress()) { + if (is_c_lwswsp(Rs, Rd, offset, true)) { + c_lwsp(Rd, offset); + return; + } else if (is_c_lwsw(Rs, Rd, offset)) { + c_lw(Rd, Rs, offset); + return; + } + } + _lw(Rd, Rs, offset); } - INSN(lw); - -#undef INSN - // -------------------------- -#define INSN(NAME) \ - void NAME(Register Rd, Register Rs, const int32_t offset) { \ - /* ld -> c.ldsp/c.ld */ \ - if (do_compress()) { \ - if (is_c_ldsdsp(Rs, Rd, offset, true)) { \ - c_ldsp(Rd, offset); \ - return; \ - } else if (is_c_ldsd(Rs, Rd, offset)) { \ - c_ld(Rd, Rs, offset); \ - return; \ - } \ - } \ - _ld(Rd, Rs, offset); \ + void ld(Register Rd, Register Rs, const int32_t offset) { + /* ld -> c.ldsp/c.ld */ + if (do_compress()) { + if (is_c_ldsdsp(Rs, Rd, offset, true)) { + c_ldsp(Rd, offset); + return; + } else if (is_c_ldsd(Rs, Rd, offset)) { + c_ld(Rd, Rs, offset); + return; + } + } + _ld(Rd, Rs, offset); } - INSN(ld); - -#undef INSN - // -------------------------- -#define INSN(NAME) \ - void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ - /* fld -> c.fldsp/c.fld */ \ - if (do_compress()) { \ - if (is_c_fldsdsp(Rs, offset)) { \ - c_fldsp(Rd, offset); \ - return; \ - } else if (is_c_fldsd(Rs, Rd, offset)) { \ - c_fld(Rd, Rs, offset); \ - return; \ - } \ - } \ - _fld(Rd, Rs, offset); \ + void fld(FloatRegister Rd, Register Rs, const int32_t offset) { + /* fld -> c.fldsp/c.fld */ + if (do_compress()) { + if (is_c_fldsdsp(Rs, offset)) { + c_fldsp(Rd, offset); + return; + } else if (is_c_fldsd(Rs, Rd, offset)) { + c_fld(Rd, Rs, offset); + return; + } + } + _fld(Rd, Rs, offset); } - INSN(fld); - -#undef INSN - // -------------------------- -#define INSN(NAME) \ - void NAME(Register Rd, Register Rs, const int32_t offset) { \ - /* sd -> c.sdsp/c.sd */ \ - if (do_compress()) { \ - if (is_c_ldsdsp(Rs, Rd, offset, false)) { \ - c_sdsp(Rd, offset); \ - return; \ - } else if (is_c_ldsd(Rs, Rd, offset)) { \ - c_sd(Rd, Rs, offset); \ - return; \ - } \ - } \ - _sd(Rd, Rs, offset); \ + void sd(Register Rs2, Register Rs1, const int32_t offset) { + /* sd -> c.sdsp/c.sd */ + if (do_compress()) { + if (is_c_ldsdsp(Rs1, Rs2, offset, false)) { + c_sdsp(Rs2, offset); + return; + } else if (is_c_ldsd(Rs1, Rs2, offset)) { + c_sd(Rs2, Rs1, offset); + return; + } + } + _sd(Rs2, Rs1, offset); } - INSN(sd); - -#undef INSN - // -------------------------- -#define INSN(NAME) \ - void NAME(Register Rd, Register Rs, const int32_t offset) { \ - /* sw -> c.swsp/c.sw */ \ - if (do_compress()) { \ - if (is_c_lwswsp(Rs, Rd, offset, false)) { \ - c_swsp(Rd, offset); \ - return; \ - } else if (is_c_lwsw(Rs, Rd, offset)) { \ - c_sw(Rd, Rs, offset); \ - return; \ - } \ - } \ - _sw(Rd, Rs, offset); \ + void sw(Register Rs2, Register Rs1, const int32_t offset) { + /* sw -> c.swsp/c.sw */ + if (do_compress()) { + if (is_c_lwswsp(Rs1, Rs2, offset, false)) { + c_swsp(Rs2, offset); + return; + } else if (is_c_lwsw(Rs1, Rs2, offset)) { + c_sw(Rs2, Rs1, offset); + return; + } + } + _sw(Rs2, Rs1, offset); } - INSN(sw); - -#undef INSN - // -------------------------- -#define INSN(NAME) \ - void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ - /* fsd -> c.fsdsp/c.fsd */ \ - if (do_compress()) { \ - if (is_c_fldsdsp(Rs, offset)) { \ - c_fsdsp(Rd, offset); \ - return; \ - } else if (is_c_fldsd(Rs, Rd, offset)) { \ - c_fsd(Rd, Rs, offset); \ - return; \ - } \ - } \ - _fsd(Rd, Rs, offset); \ + void fsd(FloatRegister Rs2, Register Rs1, const int32_t offset) { + /* fsd -> c.fsdsp/c.fsd */ + if (do_compress()) { + if (is_c_fldsdsp(Rs1, offset)) { + c_fsdsp(Rs2, offset); + return; + } else if (is_c_fldsd(Rs1, Rs2, offset)) { + c_fsd(Rs2, Rs1, offset); + return; + } + } + _fsd(Rs2, Rs1, offset); } - INSN(fsd); - -#undef INSN - // -------------------------- // Unconditional branch instructions // -------------------------- @@ -3817,7 +4026,7 @@ enum Nf { static const unsigned long branch_range = 1 * M; static bool reachable_from_branch_at(address branch, address target) { - return uabs(target - branch) < branch_range; + return g_uabs(target - branch) < branch_range; } // Decode the given instruction, checking if it's a 16-bit compressed @@ -3831,7 +4040,7 @@ enum Nf { } } - Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(true) {} + Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_scope(true) {} }; #endif // CPU_RISCV_ASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp index d55521823ec..ea299181ca7 100644 --- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp @@ -70,7 +70,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ far_call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); return; } @@ -92,7 +92,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ rt_call(Runtime1::entry_for(stub_id), ra); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { @@ -105,7 +105,7 @@ void PredicateFailedStub::emit_code(LIR_Assembler* ce) { __ far_call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } void DivByZeroStub::emit_code(LIR_Assembler* ce) { @@ -258,7 +258,7 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { __ far_call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { @@ -272,7 +272,7 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { } __ far_call(RuntimeAddress(Runtime1::entry_for(_stub))); ce->add_call_info_here(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } void ArrayCopyStub::emit_code(LIR_Assembler* ce) { diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp index 76089e8dd45..32b99f56909 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -61,16 +61,17 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr null_check_offset = offset(); - if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(hdr, obj); - lbu(hdr, Address(hdr, Klass::misc_flags_offset())); - test_bit(temp, hdr, exact_log2(KlassFlags::_misc_is_value_based_class)); - bnez(temp, slow_case, true /* is_far */); - } - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(disp_hdr, obj, hdr, temp, t1, slow_case); } else if (LockingMode == LM_LEGACY) { + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(hdr, obj); + lbu(hdr, Address(hdr, Klass::misc_flags_offset())); + test_bit(temp, hdr, exact_log2(KlassFlags::_misc_is_value_based_class)); + bnez(temp, slow_case, /* is_far */ true); + } + Label done; // Load object header ld(hdr, Address(obj, hdr_offset)); @@ -348,7 +349,7 @@ void C1_MacroAssembler::verified_entry(bool breakAtEntry) { // first instruction with a jump. For this action to be legal we // must ensure that this first instruction is a J, JAL or NOP. // Make it a NOP. - IncompressibleRegion ir(this); // keep the nop as 4 bytes for patching. + IncompressibleScope scope(this); // keep the nop as 4 bytes for patching. assert_alignment(pc()); nop(); // 4 bytes } diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 99cbcedb8ff..77b4e26cc92 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -289,7 +289,7 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Label slow_path; if (UseObjectMonitorTable) { - // Clear cache in case fast locking succeeds. + // Clear cache in case fast locking succeeds or we need to take the slow-path. sd(zr, Address(box, BasicLock::object_monitor_cache_offset_in_bytes())); } @@ -2156,6 +2156,36 @@ void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Regis } } +void C2_MacroAssembler::enc_cmove_cmp_fp(int cmpFlag, FloatRegister op1, FloatRegister op2, Register dst, Register src, bool is_single) { + int op_select = cmpFlag & (~unsigned_branch_mask); + + switch (op_select) { + case BoolTest::eq: + cmov_cmp_fp_eq(op1, op2, dst, src, is_single); + break; + case BoolTest::ne: + cmov_cmp_fp_ne(op1, op2, dst, src, is_single); + break; + case BoolTest::le: + cmov_cmp_fp_le(op1, op2, dst, src, is_single); + break; + case BoolTest::ge: + assert(false, "Should go to BoolTest::le case"); + ShouldNotReachHere(); + break; + case BoolTest::lt: + cmov_cmp_fp_lt(op1, op2, dst, src, is_single); + break; + case BoolTest::gt: + assert(false, "Should go to BoolTest::lt case"); + ShouldNotReachHere(); + break; + default: + assert(false, "unsupported compare condition"); + ShouldNotReachHere(); + } +} + // Set dst to NaN if any NaN input. void C2_MacroAssembler::minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2, FLOAT_TYPE ft, bool is_min) { @@ -3080,7 +3110,9 @@ void C2_MacroAssembler::compare_integral_v(VectorRegister vd, VectorRegister src assert(is_integral_type(bt), "unsupported element type"); assert(vm == Assembler::v0_t ? vd != v0 : true, "should be different registers"); vsetvli_helper(bt, vector_length); - vmclr_m(vd); + if (vm == Assembler::v0_t) { + vmclr_m(vd); + } switch (cond) { case BoolTest::eq: vmseq_vv(vd, src1, src2, vm); break; case BoolTest::ne: vmsne_vv(vd, src1, src2, vm); break; @@ -3103,7 +3135,9 @@ void C2_MacroAssembler::compare_fp_v(VectorRegister vd, VectorRegister src1, Vec assert(is_floating_point_type(bt), "unsupported element type"); assert(vm == Assembler::v0_t ? vd != v0 : true, "should be different registers"); vsetvli_helper(bt, vector_length); - vmclr_m(vd); + if (vm == Assembler::v0_t) { + vmclr_m(vd); + } switch (cond) { case BoolTest::eq: vmfeq_vv(vd, src1, src2, vm); break; case BoolTest::ne: vmfne_vv(vd, src1, src2, vm); break; diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index a650174d90f..73fceea3805 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -129,6 +129,10 @@ Register op1, Register op2, Register dst, Register src); + void enc_cmove_cmp_fp(int cmpFlag, + FloatRegister op1, FloatRegister op2, + Register dst, Register src, bool is_single); + void spill(Register r, bool is64, int offset) { is64 ? sd(r, Address(sp, offset)) : sw(r, Address(sp, offset)); diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp index de3c1b17c8e..79bdc4917c9 100644 --- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp @@ -43,7 +43,7 @@ define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT define_pd_global(intx, CompileThreshold, 10000); define_pd_global(intx, OnStackReplacePercentage, 140); -define_pd_global(intx, ConditionalMoveLimit, 0); +define_pd_global(intx, ConditionalMoveLimit, 3); define_pd_global(intx, FreqInlineSize, 325); define_pd_global(intx, MinJumpTableSize, 10); define_pd_global(intx, InteriorEntryAlignment, 16); diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp index 8ee6d11dcaf..e77375434c2 100644 --- a/src/hotspot/cpu/riscv/frame_riscv.cpp +++ b/src/hotspot/cpu/riscv/frame_riscv.cpp @@ -670,7 +670,6 @@ void JavaFrameAnchor::make_walkable() { // already walkable? if (walkable()) { return; } vmassert(last_Java_sp() != nullptr, "not called from Java code?"); - vmassert(last_Java_pc() == nullptr, "already walkable"); _last_Java_pc = (address)_last_Java_sp[-1]; vmassert(walkable(), "something went wrong"); } diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp index 2e79c89e7b0..fb31760e20b 100644 --- a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp +++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp @@ -35,6 +35,53 @@ // Inline functions for RISCV frames: +#if INCLUDE_JFR + +// Static helper routines + +inline address frame::interpreter_bcp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast(fp[frame::interpreter_frame_bcp_offset]); +} + +inline address frame::interpreter_return_address(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast(fp[frame::return_addr_offset]); +} + +inline intptr_t* frame::interpreter_sender_sp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast (fp[frame::interpreter_frame_sender_sp_offset]); +} + +inline bool frame::is_interpreter_frame_setup_at(const intptr_t* fp, const void* sp) { + assert(fp != nullptr, "invariant"); + assert(sp != nullptr, "invariant"); + return sp <= fp + frame::interpreter_frame_initial_sp_offset; +} + +inline intptr_t* frame::sender_sp(intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return fp + frame::sender_sp_offset; +} + +inline intptr_t* frame::link(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast (fp[frame::link_offset]); +} + +inline address frame::return_address(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast(sp[-1]); +} + +inline intptr_t* frame::fp(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast (sp[-2]); +} + +#endif // INCLUDE_JFR + // Constructors: inline frame::frame() { diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp index 5b3c926cfa9..7e9bea381a5 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp @@ -226,7 +226,7 @@ void BarrierSetAssembler::clear_patching_epoch() { void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard) { BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); - Assembler::IncompressibleRegion ir(masm); // Fixed length: see entry_barrier_offset() + Assembler::IncompressibleScope scope(masm); // Fixed length: see entry_barrier_offset() Label local_guard; NMethodPatchingType patching_type = nmethod_patching_type(); diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp index 39da77181c6..f24e4f789bc 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp @@ -31,8 +31,8 @@ #include "memory/resourceArea.hpp" #include "runtime/frame.inline.hpp" #include "runtime/javaThread.hpp" -#include "runtime/sharedRuntime.hpp" #include "runtime/registerMap.hpp" +#include "runtime/sharedRuntime.hpp" #include "utilities/align.hpp" #include "utilities/debug.hpp" #if INCLUDE_JVMCI diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp index 2a96bd32cf8..11c4e5dc81b 100644 --- a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp @@ -26,9 +26,9 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/shared/gc_globals.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" #define __ masm->masm()-> diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp index 3021351cca8..4c1056e75a5 100644 --- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp @@ -23,6 +23,8 @@ * */ +#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" +#include "gc/shenandoah/mode/shenandoahMode.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" #include "gc/shenandoah/shenandoahForwarding.hpp" @@ -30,10 +32,8 @@ #include "gc/shenandoah/shenandoahHeapRegion.hpp" #include "gc/shenandoah/shenandoahRuntime.hpp" #include "gc/shenandoah/shenandoahThreadLocalData.hpp" -#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" -#include "gc/shenandoah/mode/shenandoahMode.hpp" -#include "interpreter/interpreter.hpp" #include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" #include "runtime/javaThread.hpp" #include "runtime/sharedRuntime.hpp" #ifdef COMPILER1 diff --git a/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp index 1f2f0146f04..9df0a431c45 100644 --- a/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp @@ -22,8 +22,8 @@ * questions. */ -#include "gc/shared/gcLogPrecious.hpp" #include "gc/shared/gc_globals.hpp" +#include "gc/shared/gcLogPrecious.hpp" #include "gc/z/zAddress.hpp" #include "gc/z/zBarrierSetAssembler.hpp" #include "gc/z/zGlobals.hpp" @@ -36,9 +36,11 @@ #include #endif // LINUX -// Default value if probe is not implemented for a certain platform: 128TB -static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; -// Minimum value returned, if probing fails: 64GB +// Default value if probing is not implemented for a certain platform +// Max address bit is restricted by implicit assumptions in the code, for instance +// the bit layout of ZForwardingEntry or Partial array entry (see ZMarkStackEntry) in mark stack +static const size_t DEFAULT_MAX_ADDRESS_BIT = 46; +// Minimum value returned, if probing fail static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; static size_t probe_valid_max_address_bit() { diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp index 3ef084d30fc..d67e05bbb6d 100644 --- a/src/hotspot/cpu/riscv/globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/globals_riscv.hpp @@ -107,6 +107,7 @@ define_pd_global(intx, InlineSmallCode, 1000); product(bool, UseZfh, false, DIAGNOSTIC, "Use Zfh instructions") \ product(bool, UseZfhmin, false, DIAGNOSTIC, "Use Zfhmin instructions") \ product(bool, UseZacas, false, EXPERIMENTAL, "Use Zacas instructions") \ + product(bool, UseZabha, false, EXPERIMENTAL, "Use UseZabha instructions") \ product(bool, UseZcb, false, EXPERIMENTAL, "Use Zcb instructions") \ product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions") \ product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions") \ diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index f1f9414d98a..fae34a9c770 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -497,9 +497,10 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { // remove activation // -// Apply stack watermark barrier. // Unlock the receiver if this is a synchronized method. // Unlock any Java monitors from synchronized blocks. +// Apply stack watermark barrier. +// Notify JVMTI. // Remove the activation from the stack. // // If there are locked Java monitors @@ -509,32 +510,14 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { // installs IllegalMonitorStateException // Else // no error processing -void InterpreterMacroAssembler::remove_activation( - TosState state, - bool throw_monitor_exception, - bool install_monitor_exception, - bool notify_jvmdi) { +void InterpreterMacroAssembler::remove_activation(TosState state, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { // Note: Registers x13 may be in use for the // result check if synchronized method Label unlocked, unlock, no_unlock; - // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, - // that would normally not be safe to use. Such bad returns into unsafe territory of - // the stack, will call InterpreterRuntime::at_unwind. - Label slow_path; - Label fast_path; - safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); - j(fast_path); - - bind(slow_path); - push(state); - set_last_Java_frame(esp, fp, (address)pc(), t0); - super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); - reset_last_Java_frame(true); - pop(state); - - bind(fast_path); - // get the value of _do_not_unlock_if_synchronized into x13 const Address do_not_unlock_if_synchronized(xthread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); @@ -655,10 +638,27 @@ void InterpreterMacroAssembler::remove_activation( bind(no_unlock); - // jvmti support - if (notify_jvmdi) { - notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + JFR_ONLY(enter_jfr_critical_section();) + + // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, + // that would normally not be safe to use. Such bad returns into unsafe territory of + // the stack, will call InterpreterRuntime::at_unwind. + Label slow_path; + Label fast_path; + safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); + j(fast_path); + + bind(slow_path); + push(state); + set_last_Java_frame(esp, fp, pc(), t0); + super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); + reset_last_Java_frame(true); + pop(state); + bind(fast_path); + // JVMTI support. Make sure the safepoint poll test is issued prior. + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA } else { notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA } @@ -677,9 +677,13 @@ void InterpreterMacroAssembler::remove_activation( subw(t0, t0, StackOverflow::stack_guard_enabled); beqz(t0, no_reserved_zone_enabling); + // look for an overflow into the stack reserved zone, i.e. + // interpreter_frame_sender_sp <= JavaThread::reserved_stack_activation ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); ble(t1, t0, no_reserved_zone_enabling); + JFR_ONLY(leave_jfr_critical_section();) + call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread); call_VM(noreg, CAST_FROM_FN_PTR(address, @@ -689,11 +693,14 @@ void InterpreterMacroAssembler::remove_activation( bind(no_reserved_zone_enabling); } + // remove frame anchor + leave(); + + JFR_ONLY(leave_jfr_critical_section();) + // restore sender esp mv(esp, t1); - // remove frame anchor - leave(); // If we're returning to interpreted code we will shortly be // adjusting SP to allow some space for ESP. If we're returning to // compiled code the saved sender SP was saved in sender_sp, so this @@ -701,6 +708,19 @@ void InterpreterMacroAssembler::remove_activation( andi(sp, esp, -16); } +#if INCLUDE_JFR +void InterpreterMacroAssembler::enter_jfr_critical_section() { + const Address sampling_critical_section(xthread, in_bytes(SAMPLING_CRITICAL_SECTION_OFFSET_JFR)); + mv(t0, true); + sb(t0, sampling_critical_section); +} + +void InterpreterMacroAssembler::leave_jfr_critical_section() { + const Address sampling_critical_section(xthread, in_bytes(SAMPLING_CRITICAL_SECTION_OFFSET_JFR)); + sb(zr, sampling_critical_section); +} +#endif // INCLUDE_JFR + // Lock object // // Args: @@ -736,17 +756,18 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) // Load object pointer into obj_reg c_rarg3 ld(obj_reg, Address(lock_reg, obj_offset)); - if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(tmp, obj_reg); - lbu(tmp, Address(tmp, Klass::misc_flags_offset())); - test_bit(tmp, tmp, exact_log2(KlassFlags::_misc_is_value_based_class)); - bnez(tmp, slow_case); - } - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(lock_reg, obj_reg, tmp, tmp2, tmp3, slow_case); j(done); } else if (LockingMode == LM_LEGACY) { + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp, obj_reg); + lbu(tmp, Address(tmp, Klass::misc_flags_offset())); + test_bit(tmp, tmp, exact_log2(KlassFlags::_misc_is_value_based_class)); + bnez(tmp, slow_case); + } + // Load (object->mark() | 1) into swap_reg ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ori(swap_reg, t0, 1); @@ -934,47 +955,29 @@ void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, - int constant, - bool decrement) { - increment_mdp_data_at(mdp_in, noreg, constant, decrement); + int constant) { + increment_mdp_data_at(mdp_in, noreg, constant); } void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, - Register reg, - int constant, - bool decrement) { + Register index, + int constant) { assert(ProfileInterpreter, "must be profiling interpreter"); - // %%% this does 64bit counters at best it is wasting space - // at worst it is a rare bug when counters overflow - assert_different_registers(t1, t0, mdp_in, reg); + assert_different_registers(t1, t0, mdp_in, index); Address addr1(mdp_in, constant); Address addr2(t1, 0); Address &addr = addr1; - if (reg != noreg) { + if (index != noreg) { la(t1, addr1); - add(t1, t1, reg); + add(t1, t1, index); addr = addr2; } - if (decrement) { - ld(t0, addr); - subi(t0, t0, DataLayout::counter_increment); - Label L; - bltz(t0, L); // skip store if counter underflow - sd(t0, addr); - bind(L); - } else { - assert(DataLayout::counter_increment == 1, - "flow-free idiom only works with 1"); - ld(t0, addr); - addi(t0, t0, DataLayout::counter_increment); - Label L; - blez(t0, L); // skip store if counter overflow - sd(t0, addr); - bind(L); - } + ld(t0, addr); + addi(t0, t0, DataLayout::counter_increment); + sd(t0, addr); } void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, @@ -1514,7 +1517,7 @@ void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { // interpreter specific // - // Note: No need to save/restore rbcp & rlocals pointer since these + // Note: No need to save/restore xbcp & xlocals pointer since these // are callee saved registers and no blocking/ GC can happen // in leaf calls. #ifdef ASSERT diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp index b94140ea990..891db16b243 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp @@ -233,11 +233,8 @@ class InterpreterMacroAssembler: public MacroAssembler { void verify_method_data_pointer(); void set_mdp_data_at(Register mdp_in, int constant, Register value); - void increment_mdp_data_at(Address data, bool decrement = false); - void increment_mdp_data_at(Register mdp_in, int constant, - bool decrement = false); - void increment_mdp_data_at(Register mdp_in, Register reg, int constant, - bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant); + void increment_mdp_data_at(Register mdp_in, Register index, int constant); void increment_mask_and_jump(Address counter_addr, int increment, Address mask, Register tmp1, Register tmp2, @@ -290,6 +287,9 @@ class InterpreterMacroAssembler: public MacroAssembler { void notify_method_entry(); void notify_method_exit(TosState state, NotifyMethodExitMode mode); + JFR_ONLY(void enter_jfr_critical_section();) + JFR_ONLY(void leave_jfr_critical_section();) + virtual void _call_Unimplemented(address call_site) { save_bcp(); set_last_Java_frame(esp, fp, (address) pc(), t0); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index b5c311c341d..c755d9ae23d 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -256,7 +256,7 @@ void MacroAssembler::dec_held_monitor_count(Register tmp) { } int MacroAssembler::align(int modulus, int extra_offset) { - CompressibleRegion cr(this); + CompressibleScope scope(this); intptr_t before = offset(); while ((offset() + extra_offset) % modulus != 0) { nop(); } return (int)(offset() - before); @@ -417,7 +417,7 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp, set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); } else { L.add_patch_at(code(), locator()); - IncompressibleRegion ir(this); // the label address will be patched back. + IncompressibleScope scope(this); // the label address will be patched back. set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); } } @@ -564,7 +564,7 @@ void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, // The length of the instruction sequence emitted should not depend // on the address of the char buffer so that the size of mach nodes for // scratch emit and normal emit matches. - IncompressibleRegion ir(this); // Fixed length + IncompressibleScope scope(this); // Fixed length movptr(t0, (address) b); } @@ -604,7 +604,7 @@ void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* f // The length of the instruction sequence emitted should not depend // on the address of the char buffer so that the size of mach nodes for // scratch emit and normal emit matches. - IncompressibleRegion ir(this); // Fixed length + IncompressibleScope scope(this); // Fixed length movptr(t0, (address) b); } @@ -775,7 +775,7 @@ void MacroAssembler::unimplemented(const char* what) { } void MacroAssembler::emit_static_call_stub() { - IncompressibleRegion ir(this); // Fixed length: see CompiledDirectCall::to_interp_stub_size(). + IncompressibleScope scope(this); // Fixed length: see CompiledDirectCall::to_interp_stub_size(). // CompiledDirectCall::set_to_interpreted knows the // exact layout of this stub. @@ -907,7 +907,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) { } void MacroAssembler::la(Register Rd, Label &label) { - IncompressibleRegion ir(this); // the label address may be patched back. + IncompressibleScope scope(this); // the label address may be patched back. wrap_label(Rd, label, &MacroAssembler::la); } @@ -971,7 +971,7 @@ void MacroAssembler::j(const address dest, Register temp) { int64_t distance = dest - pc(); // We can't patch C, i.e. if Label wasn't bound we need to patch this jump. - IncompressibleRegion ir(this); + IncompressibleScope scope(this); if (is_simm21(distance) && ((distance % 2) == 0)) { Assembler::jal(x0, distance); } else { @@ -1267,6 +1267,130 @@ void MacroAssembler::cmov_gtu(Register cmp1, Register cmp2, Register dst, Regist bind(no_set); } +// ----------- cmove, compare float ----------- + +// Move src to dst only if cmp1 == cmp2, +// otherwise leave dst unchanged, including the case where one of them is NaN. +// Clarification: +// java code : cmp1 != cmp2 ? dst : src +// transformed to : CMove dst, (cmp1 eq cmp2), dst, src +void MacroAssembler::cmov_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single) { + if (UseZicond) { + if (is_single) { + feq_s(t0, cmp1, cmp2); + } else { + feq_d(t0, cmp1, cmp2); + } + czero_nez(dst, dst, t0); + czero_eqz(t0 , src, t0); + orr(dst, dst, t0); + return; + } + Label no_set; + if (is_single) { + // jump if cmp1 != cmp2, including the case of NaN + // not jump (i.e. move src to dst) if cmp1 == cmp2 + float_bne(cmp1, cmp2, no_set); + } else { + double_bne(cmp1, cmp2, no_set); + } + mv(dst, src); + bind(no_set); +} + +// Keep dst unchanged only if cmp1 == cmp2, +// otherwise move src to dst, including the case where one of them is NaN. +// Clarification: +// java code : cmp1 == cmp2 ? dst : src +// transformed to : CMove dst, (cmp1 ne cmp2), dst, src +void MacroAssembler::cmov_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single) { + if (UseZicond) { + if (is_single) { + feq_s(t0, cmp1, cmp2); + } else { + feq_d(t0, cmp1, cmp2); + } + czero_eqz(dst, dst, t0); + czero_nez(t0 , src, t0); + orr(dst, dst, t0); + return; + } + Label no_set; + if (is_single) { + // jump if cmp1 == cmp2 + // not jump (i.e. move src to dst) if cmp1 != cmp2, including the case of NaN + float_beq(cmp1, cmp2, no_set); + } else { + double_beq(cmp1, cmp2, no_set); + } + mv(dst, src); + bind(no_set); +} + +// When cmp1 <= cmp2 or any of them is NaN then dst = src, otherwise, dst = dst +// Clarification +// scenario 1: +// java code : cmp2 < cmp1 ? dst : src +// transformed to : CMove dst, (cmp1 le cmp2), dst, src +// scenario 2: +// java code : cmp1 > cmp2 ? dst : src +// transformed to : CMove dst, (cmp1 le cmp2), dst, src +void MacroAssembler::cmov_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single) { + if (UseZicond) { + if (is_single) { + flt_s(t0, cmp2, cmp1); + } else { + flt_d(t0, cmp2, cmp1); + } + czero_eqz(dst, dst, t0); + czero_nez(t0 , src, t0); + orr(dst, dst, t0); + return; + } + Label no_set; + if (is_single) { + // jump if cmp1 > cmp2 + // not jump (i.e. move src to dst) if cmp1 <= cmp2 or either is NaN + float_bgt(cmp1, cmp2, no_set); + } else { + double_bgt(cmp1, cmp2, no_set); + } + mv(dst, src); + bind(no_set); +} + +// When cmp1 < cmp2 or any of them is NaN then dst = src, otherwise, dst = dst +// Clarification +// scenario 1: +// java code : cmp2 <= cmp1 ? dst : src +// transformed to : CMove dst, (cmp1 lt cmp2), dst, src +// scenario 2: +// java code : cmp1 >= cmp2 ? dst : src +// transformed to : CMove dst, (cmp1 lt cmp2), dst, src +void MacroAssembler::cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single) { + if (UseZicond) { + if (is_single) { + fle_s(t0, cmp2, cmp1); + } else { + fle_d(t0, cmp2, cmp1); + } + czero_eqz(dst, dst, t0); + czero_nez(t0 , src, t0); + orr(dst, dst, t0); + return; + } + Label no_set; + if (is_single) { + // jump if cmp1 >= cmp2 + // not jump (i.e. move src to dst) if cmp1 < cmp2 or either is NaN + float_bge(cmp1, cmp2, no_set); + } else { + double_bge(cmp1, cmp2, no_set); + } + mv(dst, src); + bind(no_set); +} + // Float compare branch instructions #define INSN(NAME, FLOATCMP, BRANCH) \ @@ -1682,7 +1806,7 @@ void MacroAssembler::vector_update_crc32(Register crc, Register buf, Register le for (int i = 0; i < N; i++) { vmv_x_s(tmp2, vcrc); // in vmv_x_s, the value is sign-extended to SEW bits, but we need zero-extended here. - zext_w(tmp2, tmp2); + zext(tmp2, tmp2, 32); vslidedown_vi(vcrc, vcrc, 1); xorr(crc, crc, tmp2); for (int j = 0; j < W; j++) { @@ -3615,16 +3739,16 @@ void MacroAssembler::check_klass_subtype(Register sub_klass, bind(L_failure); } -void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { - ld(t0, Address(xthread, JavaThread::polling_word_offset())); +void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod, Register tmp_reg) { + ld(tmp_reg, Address(xthread, JavaThread::polling_word_offset())); if (acquire) { membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); } if (at_return) { - bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true); + bgtu(in_nmethod ? sp : fp, tmp_reg, slow_path, /* is_far */ true); } else { - test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit())); - bnez(t0, slow_path, true /* is_far */); + test_bit(tmp_reg, tmp_reg, exact_log2(SafepointMechanism::poll_bit())); + bnez(tmp_reg, slow_path, /* is_far */ true); } } @@ -3674,7 +3798,7 @@ void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register o void MacroAssembler::load_reserved(Register dst, Register addr, - enum operand_size size, + Assembler::operand_size size, Assembler::Aqrl acquire) { switch (size) { case int64: @@ -3695,15 +3819,15 @@ void MacroAssembler::load_reserved(Register dst, void MacroAssembler::store_conditional(Register dst, Register new_val, Register addr, - enum operand_size size, + Assembler::operand_size size, Assembler::Aqrl release) { switch (size) { case int64: - sc_d(dst, new_val, addr, release); + sc_d(dst, addr, new_val, release); break; case int32: case uint32: - sc_w(dst, new_val, addr, release); + sc_w(dst, addr, new_val, release); break; default: ShouldNotReachHere(); @@ -3712,7 +3836,7 @@ void MacroAssembler::store_conditional(Register dst, void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val, - enum operand_size size, + Assembler::operand_size size, Register shift, Register mask, Register aligned_addr) { assert(size == int8 || size == int16, "unsupported operand size"); @@ -3742,10 +3866,11 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte // which are forced to work with 4-byte aligned address. void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, Register new_val, - enum operand_size size, + Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release, Register result, bool result_as_bool, Register tmp1, Register tmp2, Register tmp3) { + assert(!(UseZacas && UseZabha), "Use amocas"); assert_different_registers(addr, expected, new_val, result, tmp1, tmp2, tmp3, t0, t1); Register scratch0 = t0, aligned_addr = t1; @@ -3778,13 +3903,13 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, notr(scratch1, mask); bind(retry); - lr_w(result, aligned_addr, acquire); + load_reserved(result, aligned_addr, operand_size::int32, acquire); andr(scratch0, result, mask); bne(scratch0, expected, fail); andr(scratch0, result, scratch1); // scratch1 is ~mask orr(scratch0, scratch0, new_val); - sc_w(scratch0, scratch0, aligned_addr, release); + store_conditional(scratch0, scratch0, aligned_addr, operand_size::int32, release); bnez(scratch0, retry); } @@ -3816,10 +3941,11 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, // failed. void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, Register new_val, - enum operand_size size, + Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release, Register result, Register tmp1, Register tmp2, Register tmp3) { + assert(!(UseZacas && UseZabha), "Use amocas"); assert_different_registers(addr, expected, new_val, result, tmp1, tmp2, tmp3, t0, t1); Register scratch0 = t0, aligned_addr = t1; @@ -3850,13 +3976,13 @@ void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, } else { notr(scratch1, mask); - lr_w(result, aligned_addr, acquire); + load_reserved(result, aligned_addr, operand_size::int32, acquire); andr(scratch0, result, mask); bne(scratch0, expected, fail); andr(scratch0, result, scratch1); // scratch1 is ~mask orr(scratch0, scratch0, new_val); - sc_w(scratch0, scratch0, aligned_addr, release); + store_conditional(scratch0, scratch0, aligned_addr, operand_size::int32, release); bnez(scratch0, fail); } @@ -3873,10 +3999,10 @@ void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, void MacroAssembler::cmpxchg(Register addr, Register expected, Register new_val, - enum operand_size size, + Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release, Register result, bool result_as_bool) { - assert(size != int8 && size != int16, "unsupported operand size"); + assert((UseZacas && UseZabha) || (size != int8 && size != int16), "unsupported operand size"); assert_different_registers(addr, t0); assert_different_registers(expected, t0); assert_different_registers(new_val, t0); @@ -3934,10 +4060,10 @@ void MacroAssembler::cmpxchg(Register addr, Register expected, void MacroAssembler::weak_cmpxchg(Register addr, Register expected, Register new_val, - enum operand_size size, + Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release, Register result) { - + assert((UseZacas && UseZabha) || (size != int8 && size != int16), "unsupported operand size"); assert_different_registers(addr, t0); assert_different_registers(expected, t0); assert_different_registers(new_val, t0); @@ -4010,7 +4136,7 @@ ATOMIC_XCHGU(xchgalwu, xchgalw) #undef ATOMIC_XCHGU void MacroAssembler::atomic_cas(Register prev, Register newv, Register addr, - enum operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) { + Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) { switch (size) { case int64: amocas_d(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); @@ -4022,6 +4148,12 @@ void MacroAssembler::atomic_cas(Register prev, Register newv, Register addr, amocas_w(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); zext(prev, prev, 32); break; + case int16: + amocas_h(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); + break; + case int8: + amocas_b(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); + break; default: ShouldNotReachHere(); } @@ -4853,7 +4985,7 @@ address MacroAssembler::reloc_call(Address entry, Register tmp) { address MacroAssembler::ic_call(address entry, jint method_index) { RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); - IncompressibleRegion ir(this); // relocations + IncompressibleScope scope(this); // relocations movptr(t0, (address)Universe::non_oop_word(), t1); assert_cond(entry != nullptr); return reloc_call(Address(entry, rh)); @@ -4866,7 +4998,7 @@ int MacroAssembler::ic_check_size() { } int MacroAssembler::ic_check(int end_alignment) { - IncompressibleRegion ir(this); + IncompressibleScope scope(this); Register receiver = j_rarg0; Register data = t0; @@ -5779,17 +5911,19 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value) { andi(t0, cnt, unroll - 1); sub(cnt, cnt, t0); - // align 8, so first sd n % 8 = mod, next loop sd 8 * n. shadd(base, t0, base, t1, 3); la(t1, entry); - slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) + slli(t0, t0, 2); sub(t1, t1, t0); jr(t1); bind(loop); addi(base, base, unroll * wordSize); - for (int i = -unroll; i < 0; i++) { - sd(value, Address(base, i * 8)); + { + IncompressibleScope scope(this); // Fixed length + for (int i = -unroll; i < 0; i++) { + sd(value, Address(base, i * 8)); + } } bind(entry); subi(cnt, cnt, unroll); @@ -5996,10 +6130,14 @@ void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { slli(t0, t0, 2); sub(t1, t1, t0); jr(t1); + bind(loop); sub(len, len, unroll); - for (int i = -unroll; i < 0; i++) { - sd(zr, Address(tmp, i * wordSize)); + { + IncompressibleScope scope(this); // Fixed length + for (int i = -unroll; i < 0; i++) { + sd(zr, Address(tmp, i * wordSize)); + } } bind(entry); add(tmp, tmp, unroll * wordSize); @@ -6362,10 +6500,17 @@ void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Registe ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); if (UseObjectMonitorTable) { - // Clear cache in case fast locking succeeds. + // Clear cache in case fast locking succeeds or we need to take the slow-path. sd(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes())))); } + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp1, obj); + lbu(tmp1, Address(tmp1, Klass::misc_flags_offset())); + test_bit(tmp1, tmp1, exact_log2(KlassFlags::_misc_is_value_based_class)); + bnez(tmp1, slow, /* is_far */ true); + } + // Check if the lock-stack is full. lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); mv(t, (unsigned)LockStack::end_offset()); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index b390fb236c2..7fa7f931044 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -44,7 +44,7 @@ class MacroAssembler: public Assembler { MacroAssembler(CodeBuffer* code) : Assembler(code) {} - void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); + void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod, Register tmp_reg = t0); // Alignment int align(int modulus, int extra_offset = 0); @@ -657,11 +657,16 @@ class MacroAssembler: public Assembler { void cmov_gt(Register cmp1, Register cmp2, Register dst, Register src); void cmov_gtu(Register cmp1, Register cmp2, Register dst, Register src); + void cmov_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single); + void cmov_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single); + void cmov_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single); + void cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single); + public: // We try to follow risc-v asm menomics. // But as we don't layout a reachable GOT, // we often need to resort to movptr, li <48imm>. - // https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md + // https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/src/asm-manual.adoc // Hotspot only use the standard calling convention using x1/ra. // The alternative calling convection using x5/t0 is not used. @@ -744,7 +749,7 @@ class MacroAssembler: public Assembler { guarantee(rtype == relocInfo::internal_word_type, \ "only internal_word_type relocs make sense here"); \ relocate(InternalAddress(dest).rspec()); \ - IncompressibleRegion ir(this); /* relocations */ + IncompressibleScope scope(this); /* relocations */ #define INSN(NAME) \ void NAME(Register Rs1, Register Rs2, const address dest) { \ @@ -965,7 +970,7 @@ class MacroAssembler: public Assembler { guarantee(rtype == relocInfo::internal_word_type, \ "only internal_word_type relocs make sense here"); \ relocate(InternalAddress(dest).rspec()); \ - IncompressibleRegion ir(this); /* relocations */ + IncompressibleScope scope(this); /* relocations */ #define INSN(NAME) \ void NAME(Register Rd, address dest) { \ @@ -1182,26 +1187,26 @@ class MacroAssembler: public Assembler { void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); void cmpxchg(Register addr, Register expected, Register new_val, - enum operand_size size, + Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release, Register result, bool result_as_bool = false); void weak_cmpxchg(Register addr, Register expected, Register new_val, - enum operand_size size, + Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release, Register result); void cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val, - enum operand_size size, + Assembler::operand_size size, Register shift, Register mask, Register aligned_addr); void cmpxchg_narrow_value(Register addr, Register expected, Register new_val, - enum operand_size size, + Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release, Register result, bool result_as_bool, Register tmp1, Register tmp2, Register tmp3); void weak_cmpxchg_narrow_value(Register addr, Register expected, Register new_val, - enum operand_size size, + Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release, Register result, Register tmp1, Register tmp2, Register tmp3); @@ -1218,7 +1223,7 @@ class MacroAssembler: public Assembler { void atomic_xchgwu(Register prev, Register newv, Register addr); void atomic_xchgalwu(Register prev, Register newv, Register addr); - void atomic_cas(Register prev, Register newv, Register addr, enum operand_size size, + void atomic_cas(Register prev, Register newv, Register addr, Assembler::operand_size size, Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed); // Emit a far call/jump. Only invalidates the tmp register which @@ -1631,8 +1636,8 @@ class MacroAssembler: public Assembler { int bitset_to_regs(unsigned int bitset, unsigned char* regs); Address add_memory_helper(const Address dst, Register tmp); - void load_reserved(Register dst, Register addr, enum operand_size size, Assembler::Aqrl acquire); - void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release); + void load_reserved(Register dst, Register addr, Assembler::operand_size size, Assembler::Aqrl acquire); + void store_conditional(Register dst, Register new_val, Register addr, Assembler::operand_size size, Assembler::Aqrl release); public: void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp index 6f20d54b222..31947b520d0 100644 --- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp @@ -479,7 +479,7 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { CodeBuffer cb(code_pos, instruction_size); MacroAssembler a(&cb); - Assembler::IncompressibleRegion ir(&a); // Fixed length: see NativeGeneralJump::get_instruction_size() + Assembler::IncompressibleScope scope(&a); // Fixed length: see NativeGeneralJump::get_instruction_size() int32_t offset = 0; a.movptr(t1, entry, offset, t0); // lui, lui, slli, add diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp index 295e92bbc1b..d8f5fa57816 100644 --- a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp @@ -300,7 +300,7 @@ class NativeGeneralJump: public NativeJump { inline NativeGeneralJump* nativeGeneralJump_at(address addr) { assert_cond(addr != nullptr); NativeGeneralJump* jump = (NativeGeneralJump*)(addr); - debug_only(jump->verify();) + DEBUG_ONLY(jump->verify();) return jump; } diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index aca2f4dd488..e838ee184fb 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -1295,7 +1295,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { #endif void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { - Assembler::CompressibleRegion cr(masm); // nops shall be 2-byte under RVC for alignment purposes. + Assembler::CompressibleScope scope(masm); // nops shall be 2-byte under RVC for alignment purposes. for (int i = 0; i < _count; i++) { __ nop(); } @@ -1371,7 +1371,7 @@ void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { // insert a nop at the start of the prolog so we can patch in a // branch if we need to invalidate the method later { - Assembler::IncompressibleRegion ir(masm); // keep the nop as 4 bytes for patching. + Assembler::IncompressibleScope scope(masm); // keep the nop as 4 bytes for patching. MacroAssembler::assert_alignment(__ pc()); __ nop(); // 4 bytes } @@ -1596,7 +1596,8 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { // vpr to vpr - __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); + __ vsetvli_helper(T_BYTE, MaxVectorSize); + __ vmv_v_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); } else { ShouldNotReachHere(); } @@ -1614,7 +1615,8 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r __ unspill_vmask(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { // vmask to vmask - __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); + __ vsetvli_helper(T_BYTE, MaxVectorSize >> 3); + __ vmv_v_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); } else { ShouldNotReachHere(); } @@ -1752,7 +1754,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { #endif void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - Assembler::IncompressibleRegion ir(masm); // Fixed length: see BoxLockNode::size() + Assembler::IncompressibleScope scope(masm); // Fixed length: see BoxLockNode::size() assert_cond(ra_ != nullptr); int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); @@ -1880,18 +1882,6 @@ bool Matcher::match_rule_supported(int opcode) { case Op_EncodeISOArray: return UseRVV; - // Current test shows that, it brings performance gain when MaxVectorSize >= 32, but brings - // regression when MaxVectorSize == 16. So only enable the intrinsic when MaxVectorSize >= 32. - case Op_RoundVF: - return UseRVV && MaxVectorSize >= 32; - - // For double, current test shows that even with MaxVectorSize == 32, there is still some regression. - // Although there is no hardware to verify it for now, from the trend of performance data on hardwares - // (with vlenb == 16 and 32 respectively), it's promising to bring better performance rather than - // regression for double when MaxVectorSize == 64+. So only enable the intrinsic when MaxVectorSize >= 64. - case Op_RoundVD: - return UseRVV && MaxVectorSize >= 64; - case Op_PopCountI: case Op_PopCountL: return UsePopCountInstruction; @@ -1914,8 +1904,6 @@ bool Matcher::match_rule_supported(int opcode) { case Op_FmaF: case Op_FmaD: - case Op_FmaVF: - case Op_FmaVD: return UseFMA; case Op_ConvHF2F: @@ -1930,9 +1918,15 @@ bool Matcher::match_rule_supported(int opcode) { case Op_MaxHF: case Op_MinHF: case Op_MulHF: - case Op_SubHF: case Op_SqrtHF: + case Op_SubHF: return UseZfh; + + case Op_CMoveF: + case Op_CMoveD: + case Op_CMoveP: + case Op_CMoveN: + return false; } return true; // Per default match rules are supported. @@ -1944,11 +1938,11 @@ const RegMask* Matcher::predicate_reg_mask(void) { // Vector calling convention not yet implemented. bool Matcher::supports_vector_calling_convention(void) { - return EnableVectorSupport && UseVectorStubs; + return EnableVectorSupport; } OptoRegPair Matcher::vector_return_value(uint ideal_reg) { - assert(EnableVectorSupport && UseVectorStubs, "sanity"); + assert(EnableVectorSupport, "sanity"); assert(ideal_reg == Op_VecA, "sanity"); // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc int lo = V8_num; @@ -2310,42 +2304,6 @@ encode %{ } %} - enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, - /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - - enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, - /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - - enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{ - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, - /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - - enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, - /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - - enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, - /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - - enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{ - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, - /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - // compare and branch instruction encodings enc_class riscv_enc_j(label lbl) %{ @@ -2395,7 +2353,7 @@ encode %{ %} enc_class riscv_enc_java_static_call(method meth) %{ - Assembler::IncompressibleRegion ir(masm); // Fixed length: see ret_addr_offset + Assembler::IncompressibleScope scope(masm); // Fixed length: see ret_addr_offset address addr = (address)$meth$$method; address call = nullptr; @@ -2442,7 +2400,7 @@ encode %{ %} enc_class riscv_enc_java_dynamic_call(method meth) %{ - Assembler::IncompressibleRegion ir(masm); // Fixed length: see ret_addr_offset + Assembler::IncompressibleScope scope(masm); // Fixed length: see ret_addr_offset int method_index = resolved_method_index(masm); address call = __ ic_call((address)$meth$$method, method_index); if (call == nullptr) { @@ -2461,7 +2419,7 @@ encode %{ %} enc_class riscv_enc_java_to_runtime(method meth) %{ - Assembler::IncompressibleRegion ir(masm); // Fixed length: see ret_addr_offset + Assembler::IncompressibleScope scope(masm); // Fixed length: see ret_addr_offset // Some calls to generated routines (arraycopy code) are scheduled by C2 // as runtime calls. if so we can call them using a far call (they will be @@ -5256,18 +5214,20 @@ instruct prefetchalloc( memory mem ) %{ // standard CompareAndSwapX when we are using barriers // these have higher priority than the rules selected by a predicate -instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct compareAndSwapB_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) %{ + predicate(!UseZabha || !UseZacas); + match(Set res (CompareAndSwapB mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); format %{ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" - "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB_narrow" %} ins_encode %{ @@ -5279,18 +5239,42 @@ instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R1 ins_pipe(pipe_slow); %} -instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ + predicate(UseZabha && UseZacas); + + match(Set res (CompareAndSwapB mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result as bool */); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapS_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ + predicate(!UseZabha || !UseZacas); + match(Set res (CompareAndSwapS mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); format %{ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" - "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS_narrow" %} ins_encode %{ @@ -5302,18 +5286,44 @@ instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R1 ins_pipe(pipe_slow); %} +instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + predicate(UseZabha && UseZacas); + + match(Set res (CompareAndSwapS mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result as bool */); + %} + + ins_pipe(pipe_slow); +%} + instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ match(Set res (CompareAndSwapI mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" %} - ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} ins_pipe(pipe_slow); %} @@ -5322,14 +5332,18 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval %{ match(Set res (CompareAndSwapL mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" %} - ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} ins_pipe(pipe_slow); %} @@ -5340,14 +5354,18 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval match(Set res (CompareAndSwapP mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" %} - ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} ins_pipe(pipe_slow); %} @@ -5355,35 +5373,40 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" %} - ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} ins_pipe(pipe_slow); %} // alternative CompareAndSwapX when we are eliding barriers -instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct compareAndSwapBAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); match(Set res (CompareAndSwapB mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" - "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq_narrow" %} ins_encode %{ @@ -5395,20 +5418,42 @@ instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI ins_pipe(pipe_slow); %} -instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapB mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result as bool */); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapSAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ + predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); match(Set res (CompareAndSwapS mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" - "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq_narrow" %} ins_encode %{ @@ -5420,20 +5465,46 @@ instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI ins_pipe(pipe_slow); %} +instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapS mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result as bool */); + %} + + ins_pipe(pipe_slow); +%} + instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndSwapI mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" %} - ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} ins_pipe(pipe_slow); %} @@ -5444,14 +5515,18 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL new match(Set res (CompareAndSwapL mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" %} - ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} ins_pipe(pipe_slow); %} @@ -5462,14 +5537,18 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new match(Set res (CompareAndSwapP mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" %} - ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} ins_pipe(pipe_slow); %} @@ -5480,14 +5559,18 @@ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN new match(Set res (CompareAndSwapN mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" %} - ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} ins_pipe(pipe_slow); %} @@ -5498,17 +5581,19 @@ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN new // no trailing StoreLoad barrier emitted by C2. Unfortunately we // can't check the type of memory ordering here, so we always emit a // sc_d(w) with rl bit set. -instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct compareAndExchangeB_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) %{ + predicate(!UseZabha || !UseZacas); + match(Set res (CompareAndExchangeB mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ - "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" + "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB_narrow" %} ins_encode %{ @@ -5520,17 +5605,39 @@ instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg ins_pipe(pipe_slow); %} -instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ + predicate(UseZabha && UseZacas); + + match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeS_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ + predicate(!UseZabha || !UseZacas); + match(Set res (CompareAndExchangeS mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ - "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" + "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS_narrow" %} ins_encode %{ @@ -5542,13 +5649,31 @@ instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg ins_pipe(pipe_slow); %} +instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + predicate(UseZabha && UseZacas); + + match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); - - effect(TEMP_DEF res); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI" @@ -5566,9 +5691,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne %{ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); - - effect(TEMP_DEF res); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL" @@ -5585,11 +5708,10 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) %{ predicate(n->as_LoadStore()->barrier_data() == 0); - match(Set res (CompareAndExchangeN mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); - effect(TEMP_DEF res); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN" @@ -5606,11 +5728,10 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ predicate(n->as_LoadStore()->barrier_data() == 0); - match(Set res (CompareAndExchangeP mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); - effect(TEMP_DEF res); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP" @@ -5624,19 +5745,19 @@ instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP ne ins_pipe(pipe_slow); %} -instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct compareAndExchangeBAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); match(Set res (CompareAndExchangeB mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ - "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq_narrow" %} ins_encode %{ @@ -5648,19 +5769,39 @@ instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i ins_pipe(pipe_slow); %} -instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeSAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ + predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); match(Set res (CompareAndExchangeS mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ - "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq_narrow" %} ins_encode %{ @@ -5672,15 +5813,33 @@ instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i ins_pipe(pipe_slow); %} +instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ predicate(needs_acquiring_load_reserved(n)); match(Set res (CompareAndExchangeI mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); - - effect(TEMP_DEF res); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq" @@ -5700,9 +5859,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL match(Set res (CompareAndExchangeL mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); - - effect(TEMP_DEF res); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq" @@ -5722,9 +5879,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN match(Set res (CompareAndExchangeN mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); - - effect(TEMP_DEF res); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq" @@ -5744,9 +5899,7 @@ instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP match(Set res (CompareAndExchangeP mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); - - effect(TEMP_DEF res); + ins_cost(2 * VOLATILE_REF_COST); format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq" @@ -5760,18 +5913,20 @@ instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct weakCompareAndSwapB_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) %{ + predicate(!UseZabha || !UseZacas); + match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "weak_cmpxchg $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" - "# $res == 1 when success, #@weakCompareAndSwapB" + "# $res == 1 when success, #@weakCompareAndSwapB_narrow" %} ins_encode %{ @@ -5783,18 +5938,41 @@ instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ + predicate(UseZabha && UseZacas); + + match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "weak_cmpxchg $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapB" + %} + + ins_encode %{ + __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapS_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ + predicate(!UseZabha || !UseZacas); + match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "weak_cmpxchg $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" - "# $res == 1 when success, #@weakCompareAndSwapS" + "# $res == 1 when success, #@weakCompareAndSwapS_narrow" %} ins_encode %{ @@ -5806,11 +5984,32 @@ instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg ins_pipe(pipe_slow); %} +instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + predicate(UseZabha && UseZacas); + + match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "weak_cmpxchg $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapS" + %} + + ins_encode %{ + __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + ins_cost(2 * VOLATILE_REF_COST); format %{ "weak_cmpxchg $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" @@ -5829,7 +6028,7 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne %{ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + ins_cost(2 * VOLATILE_REF_COST); format %{ "weak_cmpxchg $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" @@ -5847,9 +6046,10 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); format %{ "weak_cmpxchg $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" @@ -5867,9 +6067,10 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) %{ predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + ins_cost(2 * VOLATILE_REF_COST); format %{ "weak_cmpxchg $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" @@ -5884,20 +6085,20 @@ instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct weakCompareAndSwapBAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" - "# $res == 1 when success, #@weakCompareAndSwapBAcq" + "# $res == 1 when success, #@weakCompareAndSwapBAcq_narrow" %} ins_encode %{ @@ -5909,20 +6110,41 @@ instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, - iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "weak_cmpxchg_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapBAcq" + %} + + ins_encode %{ + __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapSAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ + predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); + ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" - "# $res == 1 when success, #@weakCompareAndSwapSAcq" + "# $res == 1 when success, #@weakCompareAndSwapSAcq_narrow" %} ins_encode %{ @@ -5934,13 +6156,34 @@ instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i ins_pipe(pipe_slow); %} +instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "weak_cmpxchg_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapSAcq" + %} + + ins_encode %{ + __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) %{ predicate(needs_acquiring_load_reserved(n)); match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + ins_cost(2 * VOLATILE_REF_COST); format %{ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" @@ -5961,7 +6204,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + ins_cost(2 * VOLATILE_REF_COST); format %{ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" @@ -5982,7 +6225,7 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); + ins_cost(2 * VOLATILE_REF_COST); format %{ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" @@ -6003,7 +6246,7 @@ instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + ins_cost(2 * VOLATILE_REF_COST); format %{ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" @@ -6445,7 +6688,6 @@ instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %} ins_encode %{ - int32_t con = (int32_t)$src2$$constant; __ addiw(as_Register($dst$$reg), as_Register($src1$$reg), $src2$$constant); @@ -6507,7 +6749,6 @@ instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{ format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %} ins_encode %{ - // src2 is imm, so actually call the addi __ addi(as_Register($dst$$reg), as_Register($src1$$reg), $src2$$constant); @@ -6829,7 +7070,7 @@ instruct UmodL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ // Integer Shifts // Shift Left Register -// In RV64I, only the low 5 bits of src2 are considered for the shift amount +// Only the low 5 bits of src2 are considered for the shift amount, all other bits are ignored. instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ match(Set dst (LShiftI src1 src2)); ins_cost(ALU_COST); @@ -6862,7 +7103,7 @@ instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ %} // Shift Right Logical Register -// In RV64I, only the low 5 bits of src2 are considered for the shift amount +// Only the low 5 bits of src2 are considered for the shift amount, all other bits are ignored. instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ match(Set dst (URShiftI src1 src2)); ins_cost(ALU_COST); @@ -6895,7 +7136,7 @@ instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ %} // Shift Right Arithmetic Register -// In RV64I, only the low 5 bits of src2 are considered for the shift amount +// Only the low 5 bits of src2 are considered for the shift amount, all other bits are ignored. instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ match(Set dst (RShiftI src1 src2)); ins_cost(ALU_COST); @@ -6930,7 +7171,7 @@ instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ // Long Shifts // Shift Left Register -// In RV64I, only the low 6 bits of src2 are considered for the shift amount +// Only the low 6 bits of src2 are considered for the shift amount, all other bits are ignored. instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ match(Set dst (LShiftL src1 src2)); @@ -6965,7 +7206,7 @@ instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ %} // Shift Right Logical Register -// In RV64I, only the low 6 bits of src2 are considered for the shift amount +// Only the low 6 bits of src2 are considered for the shift amount, all other bits are ignored. instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ match(Set dst (URShiftL src1 src2)); @@ -7018,7 +7259,7 @@ instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ %} // Shift Right Arithmetic Register -// In RV64I, only the low 6 bits of src2 are considered for the shift amount +// Only the low 6 bits of src2 are considered for the shift amount, all other bits are ignored. instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ match(Set dst (RShiftL src1 src2)); @@ -9940,12 +10181,15 @@ instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, la // ============================================================================ // Conditional Move Instructions + +// --------- CMoveI --------- + instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{ match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src))); ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpI\n\t" + "CMoveI $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpI\n\t" %} ins_encode %{ @@ -9962,7 +10206,7 @@ instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpU\n\t" + "CMoveI $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpU\n\t" %} ins_encode %{ @@ -9979,7 +10223,7 @@ instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) % ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpL\n\t" + "CMoveI $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpL\n\t" %} ins_encode %{ @@ -9996,7 +10240,7 @@ instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpUL\n\t" + "CMoveI $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpUL\n\t" %} ins_encode %{ @@ -10008,12 +10252,46 @@ instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) ins_pipe(pipe_class_compare); %} +instruct cmovI_cmpF(iRegINoSp dst, iRegI src, fRegF op1, fRegF op2, cmpOp cop) %{ + match(Set dst (CMoveI (Binary cop (CmpF op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + + format %{ + "CMoveI $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpF\n\t" + %} + + ins_encode %{ + __ enc_cmove_cmp_fp($cop$$cmpcode, + as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg), true /* is_single */); + %} + + ins_pipe(pipe_class_compare); +%} + +instruct cmovI_cmpD(iRegINoSp dst, iRegI src, fRegD op1, fRegD op2, cmpOp cop) %{ + match(Set dst (CMoveI (Binary cop (CmpD op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + + format %{ + "CMoveI $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpD\n\t" + %} + + ins_encode %{ + __ enc_cmove_cmp_fp($cop$$cmpcode | C2_MacroAssembler::double_branch_mask, + as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg), false /* is_single */); + %} + + ins_pipe(pipe_class_compare); +%} + instruct cmovI_cmpN(iRegINoSp dst, iRegI src, iRegN op1, iRegN op2, cmpOpU cop) %{ match(Set dst (CMoveI (Binary cop (CmpN op1 op2)) (Binary dst src))); ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpN\n\t" + "CMoveI $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpN\n\t" %} ins_encode %{ @@ -10030,7 +10308,7 @@ instruct cmovI_cmpP(iRegINoSp dst, iRegI src, iRegP op1, iRegP op2, cmpOpU cop) ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpP\n\t" + "CMoveI $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpP\n\t" %} ins_encode %{ @@ -10042,12 +10320,14 @@ instruct cmovI_cmpP(iRegINoSp dst, iRegI src, iRegP op1, iRegP op2, cmpOpU cop) ins_pipe(pipe_class_compare); %} +// --------- CMoveL --------- + instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{ match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src))); ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpL\n\t" + "CMoveL $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpL\n\t" %} ins_encode %{ @@ -10064,7 +10344,7 @@ instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpUL\n\t" + "CMoveL $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpUL\n\t" %} ins_encode %{ @@ -10081,7 +10361,7 @@ instruct cmovL_cmpI(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOp cop) % ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpI\n\t" + "CMoveL $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpI\n\t" %} ins_encode %{ @@ -10098,7 +10378,7 @@ instruct cmovL_cmpU(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOpU cop) ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpU\n\t" + "CMoveL $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpU\n\t" %} ins_encode %{ @@ -10110,12 +10390,46 @@ instruct cmovL_cmpU(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOpU cop) ins_pipe(pipe_class_compare); %} +instruct cmovL_cmpF(iRegLNoSp dst, iRegL src, fRegF op1, fRegF op2, cmpOp cop) %{ + match(Set dst (CMoveL (Binary cop (CmpF op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + + format %{ + "CMoveL $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpF\n\t" + %} + + ins_encode %{ + __ enc_cmove_cmp_fp($cop$$cmpcode, + as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg), true /* is_single */); + %} + + ins_pipe(pipe_class_compare); +%} + +instruct cmovL_cmpD(iRegLNoSp dst, iRegL src, fRegD op1, fRegD op2, cmpOp cop) %{ + match(Set dst (CMoveL (Binary cop (CmpD op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + + format %{ + "CMoveL $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpD\n\t" + %} + + ins_encode %{ + __ enc_cmove_cmp_fp($cop$$cmpcode | C2_MacroAssembler::double_branch_mask, + as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg), false /* is_single */); + %} + + ins_pipe(pipe_class_compare); +%} + instruct cmovL_cmpN(iRegLNoSp dst, iRegL src, iRegN op1, iRegN op2, cmpOpU cop) %{ match(Set dst (CMoveL (Binary cop (CmpN op1 op2)) (Binary dst src))); ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpN\n\t" + "CMoveL $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpN\n\t" %} ins_encode %{ @@ -10132,7 +10446,7 @@ instruct cmovL_cmpP(iRegLNoSp dst, iRegL src, iRegP op1, iRegP op2, cmpOpU cop) ins_cost(ALU_COST + BRANCH_COST); format %{ - "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpP\n\t" + "CMoveL $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpP\n\t" %} ins_encode %{ diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad index ed9fca13a1b..beac10ec03d 100644 --- a/src/hotspot/cpu/riscv/riscv_b.ad +++ b/src/hotspot/cpu/riscv/riscv_b.ad @@ -25,7 +25,8 @@ // RISCV Bit-Manipulation Extension Architecture Description File -instruct rorI_imm_b(iRegINoSp dst, iRegI src, immI shift) %{ +// Rotate Right Word Immediate +instruct rorI_imm_b(iRegINoSp dst, iRegIorL2I src, immI shift) %{ predicate(UseZbb); match(Set dst (RotateRight src shift)); @@ -39,6 +40,7 @@ instruct rorI_imm_b(iRegINoSp dst, iRegI src, immI shift) %{ ins_pipe(ialu_reg_shift); %} +// Rotate Right Immediate instruct rorL_imm_b(iRegLNoSp dst, iRegL src, immI shift) %{ predicate(UseZbb); match(Set dst (RotateRight src shift)); @@ -53,7 +55,9 @@ instruct rorL_imm_b(iRegLNoSp dst, iRegL src, immI shift) %{ ins_pipe(ialu_reg_shift); %} -instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ +// Rotate Right Word Register +// Only the low 5 bits of shift value are used, all other bits are ignored. +instruct rorI_reg_b(iRegINoSp dst, iRegIorL2I src, iRegIorL2I shift) %{ predicate(UseZbb); match(Set dst (RotateRight src shift)); @@ -65,7 +69,9 @@ instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ ins_pipe(ialu_reg_reg); %} -instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ +// Rotate Right Register +// Only the low 6 bits of shift value are used, all other bits are ignored. +instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegIorL2I shift) %{ predicate(UseZbb); match(Set dst (RotateRight src shift)); @@ -77,7 +83,9 @@ instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ ins_pipe(ialu_reg_reg); %} -instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ +// Rotate Left Word Register +// Only the low 5 bits of shift value are used, all other bits are ignored. +instruct rolI_reg_b(iRegINoSp dst, iRegIorL2I src, iRegIorL2I shift) %{ predicate(UseZbb); match(Set dst (RotateLeft src shift)); @@ -89,7 +97,9 @@ instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ ins_pipe(ialu_reg_reg); %} -instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ +// Rotate Left Register +// Only the low 6 bits of shift value are used, all other bits are ignored. +instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegIorL2I shift) %{ predicate(UseZbb); match(Set dst (RotateLeft src shift)); diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad index 9b135215b3d..8b5759ce11c 100644 --- a/src/hotspot/cpu/riscv/riscv_v.ad +++ b/src/hotspot/cpu/riscv/riscv_v.ad @@ -89,13 +89,12 @@ source %{ return UseZvbb; case Op_LoadVectorGather: case Op_LoadVectorGatherMasked: + case Op_StoreVectorScatter: + case Op_StoreVectorScatterMasked: if (is_subword_type(bt)) { return false; } break; - case Op_VectorCastHF2F: - case Op_VectorCastF2HF: - return UseZvfh; case Op_VectorLoadShuffle: case Op_VectorRearrange: // vlen >= 4 is required, because min vector size for byte is 4 on riscv, @@ -111,6 +110,32 @@ source %{ if (vlen < 4) { return false; } + case Op_VectorCastHF2F: + case Op_VectorCastF2HF: + case Op_AddVHF: + case Op_DivVHF: + case Op_MaxVHF: + case Op_MinVHF: + case Op_MulVHF: + case Op_SqrtVHF: + case Op_SubVHF: + return UseZvfh; + case Op_FmaVHF: + return UseZvfh && UseFMA; + case Op_FmaVF: + case Op_FmaVD: + return UseFMA; + + // For float, current test shows that, it brings performance gain when vlen >= 8, but brings + // regression when vlen == 4. So only enable this intrinsic when vlen >= 8. + // For double, current test shows that even with vlen == 4, there is still some regression. + // Although there is no hardware to verify it, from the trend of performance data on hardwares + // (with vlen == 2 and 4 respectively), it's promising to bring better performance rather than + // regression for double when vlen == 8. So only enable this intrinsic when vlen >= 8. + case Op_RoundVF: + case Op_RoundVD: + return vlen >= 8; + default: break; } @@ -140,16 +165,11 @@ source %{ } %} -definitions %{ - int_def VEC_COST (200, 200); -%} - // All VEC instructions // vector load/store instruct loadV(vReg dst, vmemA mem) %{ match(Set dst (LoadVector mem)); - ins_cost(VEC_COST); format %{ "loadV $dst, $mem\t# vector (rvv)" %} ins_encode %{ VectorRegister dst_reg = as_VectorRegister($dst$$reg); @@ -161,7 +181,6 @@ instruct loadV(vReg dst, vmemA mem) %{ instruct storeV(vReg src, vmemA mem) %{ match(Set mem (StoreVector mem src)); - ins_cost(VEC_COST); format %{ "storeV $mem, $src\t# vector (rvv)" %} ins_encode %{ VectorRegister src_reg = as_VectorRegister($src$$reg); @@ -249,7 +268,6 @@ instruct vmaskcmp_fp(vRegMask dst, vReg src1, vReg src2, immI cond) %{ predicate(Matcher::vector_element_basic_type(n) == T_FLOAT || Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); - effect(TEMP_DEF dst); format %{ "vmaskcmp_fp $dst, $src1, $src2, $cond" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -284,7 +302,6 @@ instruct vabs(vReg dst, vReg src, vReg tmp) %{ match(Set dst (AbsVS src)); match(Set dst (AbsVI src)); match(Set dst (AbsVL src)); - ins_cost(VEC_COST); effect(TEMP tmp); format %{ "vabs $dst, $src\t# KILL $tmp" %} ins_encode %{ @@ -299,7 +316,6 @@ instruct vabs(vReg dst, vReg src, vReg tmp) %{ instruct vabs_fp(vReg dst, vReg src) %{ match(Set dst (AbsVF src)); match(Set dst (AbsVD src)); - ins_cost(VEC_COST); format %{ "vabs_fp $dst, $src" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -316,7 +332,6 @@ instruct vabs_masked(vReg dst_src, vRegMask_V0 v0, vReg tmp) %{ match(Set dst_src (AbsVS dst_src v0)); match(Set dst_src (AbsVI dst_src v0)); match(Set dst_src (AbsVL dst_src v0)); - ins_cost(VEC_COST); effect(TEMP tmp); format %{ "vabs_masked $dst_src, $dst_src, $v0\t# KILL $tmp" %} ins_encode %{ @@ -333,7 +348,6 @@ instruct vabs_masked(vReg dst_src, vRegMask_V0 v0, vReg tmp) %{ instruct vabs_fp_masked(vReg dst_src, vRegMask_V0 v0) %{ match(Set dst_src (AbsVF dst_src v0)); match(Set dst_src (AbsVD dst_src v0)); - ins_cost(VEC_COST); format %{ "vabs_fp_masked $dst_src, $dst_src, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -350,7 +364,6 @@ instruct vadd(vReg dst, vReg src1, vReg src2) %{ match(Set dst (AddVS src1 src2)); match(Set dst (AddVI src1 src2)); match(Set dst (AddVL src1 src2)); - ins_cost(VEC_COST); format %{ "vadd $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -362,10 +375,23 @@ instruct vadd(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +instruct vadd_hfp(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (AddVHF src1 src2)); + format %{ "vadd_hfp $dst, $src1, $src2" %} + ins_encode %{ + assert(UseZvfh, "must"); + assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must"); + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vfadd_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + instruct vadd_fp(vReg dst, vReg src1, vReg src2) %{ match(Set dst (AddVF src1 src2)); match(Set dst (AddVD src1 src2)); - ins_cost(VEC_COST); format %{ "vadd_fp $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -384,7 +410,6 @@ instruct vadd_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ match(Set dst_src1 (AddVS (Binary dst_src1 src2) v0)); match(Set dst_src1 (AddVI (Binary dst_src1 src2) v0)); match(Set dst_src1 (AddVL (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vadd_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -399,7 +424,6 @@ instruct vadd_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ instruct vadd_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ match(Set dst_src1 (AddVF (Binary dst_src1 src2) v0)); match(Set dst_src1 (AddVD (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vadd_fp_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -413,11 +437,11 @@ instruct vadd_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ // vector-immediate add (unpredicated) -instruct vadd_immI(vReg dst, vReg src1, immI5 con) %{ +instruct vadd_vi(vReg dst, vReg src1, immI5 con) %{ match(Set dst (AddVB src1 (Replicate con))); match(Set dst (AddVS src1 (Replicate con))); match(Set dst (AddVI src1 (Replicate con))); - format %{ "vadd_immI $dst, $src1, $con" %} + format %{ "vadd_vi $dst, $src1, $con" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -428,9 +452,9 @@ instruct vadd_immI(vReg dst, vReg src1, immI5 con) %{ ins_pipe(pipe_slow); %} -instruct vadd_immL(vReg dst, vReg src1, immL5 con) %{ +instruct vaddL_vi(vReg dst, vReg src1, immL5 con) %{ match(Set dst (AddVL src1 (Replicate con))); - format %{ "vadd_immL $dst, $src1, $con" %} + format %{ "vaddL_vi $dst, $src1, $con" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vadd_vi(as_VectorRegister($dst$$reg), @@ -442,11 +466,11 @@ instruct vadd_immL(vReg dst, vReg src1, immL5 con) %{ // vector-scalar add (unpredicated) -instruct vadd_regI(vReg dst, vReg src1, iRegIorL2I src2) %{ +instruct vadd_vx(vReg dst, vReg src1, iRegIorL2I src2) %{ match(Set dst (AddVB src1 (Replicate src2))); match(Set dst (AddVS src1 (Replicate src2))); match(Set dst (AddVI src1 (Replicate src2))); - format %{ "vadd_regI $dst, $src1, $src2" %} + format %{ "vadd_vx $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -457,9 +481,9 @@ instruct vadd_regI(vReg dst, vReg src1, iRegIorL2I src2) %{ ins_pipe(pipe_slow); %} -instruct vadd_regL(vReg dst, vReg src1, iRegL src2) %{ +instruct vaddL_vx(vReg dst, vReg src1, iRegL src2) %{ match(Set dst (AddVL src1 (Replicate src2))); - format %{ "vadd_regL $dst, $src1, $src2" %} + format %{ "vaddL_vx $dst, $src1, $src2" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vadd_vx(as_VectorRegister($dst$$reg), @@ -471,11 +495,11 @@ instruct vadd_regL(vReg dst, vReg src1, iRegL src2) %{ // vector-immediate add (predicated) -instruct vadd_immI_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ +instruct vadd_vi_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ match(Set dst_src (AddVB (Binary dst_src (Replicate con)) v0)); match(Set dst_src (AddVS (Binary dst_src (Replicate con)) v0)); match(Set dst_src (AddVI (Binary dst_src (Replicate con)) v0)); - format %{ "vadd_immI_masked $dst_src, $dst_src, $con" %} + format %{ "vadd_vi_masked $dst_src, $dst_src, $con, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -486,9 +510,9 @@ instruct vadd_immI_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vadd_immL_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ +instruct vaddL_vi_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ match(Set dst_src (AddVL (Binary dst_src (Replicate con)) v0)); - format %{ "vadd_immL_masked $dst_src, $dst_src, $con" %} + format %{ "vaddL_vi_masked $dst_src, $dst_src, $con, $v0" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vadd_vi(as_VectorRegister($dst_src$$reg), @@ -500,11 +524,11 @@ instruct vadd_immL_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ // vector-scalar add (predicated) -instruct vadd_regI_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{ +instruct vadd_vx_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{ match(Set dst_src (AddVB (Binary dst_src (Replicate src2)) v0)); match(Set dst_src (AddVS (Binary dst_src (Replicate src2)) v0)); match(Set dst_src (AddVI (Binary dst_src (Replicate src2)) v0)); - format %{ "vadd_regI_masked $dst_src, $dst_src, $src2" %} + format %{ "vadd_vx_masked $dst_src, $dst_src, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -515,9 +539,9 @@ instruct vadd_regI_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vadd_regL_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{ +instruct vaddL_vx_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{ match(Set dst_src (AddVL (Binary dst_src (Replicate src2)) v0)); - format %{ "vadd_regL_masked $dst_src, $dst_src, $src2" %} + format %{ "vaddL_vx_masked $dst_src, $dst_src, $src2, $v0" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vadd_vx(as_VectorRegister($dst_src$$reg), @@ -534,7 +558,6 @@ instruct vsub(vReg dst, vReg src1, vReg src2) %{ match(Set dst (SubVS src1 src2)); match(Set dst (SubVI src1 src2)); match(Set dst (SubVL src1 src2)); - ins_cost(VEC_COST); format %{ "vsub $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -545,10 +568,22 @@ instruct vsub(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +instruct vsub_hfp(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (SubVHF src1 src2)); + format %{ "vsub_hfp $dst, $src1, $src2" %} + ins_encode %{ + assert(UseZvfh, "must"); + assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must"); + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + instruct vsub_fp(vReg dst, vReg src1, vReg src2) %{ match(Set dst (SubVF src1 src2)); match(Set dst (SubVD src1 src2)); - ins_cost(VEC_COST); format %{ "vsub_fp $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -566,7 +601,6 @@ instruct vsub_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ match(Set dst_src1 (SubVS (Binary dst_src1 src2) v0)); match(Set dst_src1 (SubVI (Binary dst_src1 src2) v0)); match(Set dst_src1 (SubVL (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vsub_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -580,7 +614,6 @@ instruct vsub_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ instruct vsub_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ match(Set dst_src1 (SubVF (Binary dst_src1 src2) v0)); match(Set dst_src1 (SubVD (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vsub_fp_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -593,11 +626,11 @@ instruct vsub_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ // vector-scalar sub (unpredicated) -instruct vsub_regI(vReg dst, vReg src1, iRegIorL2I src2) %{ +instruct vsub_vx(vReg dst, vReg src1, iRegIorL2I src2) %{ match(Set dst (SubVB src1 (Replicate src2))); match(Set dst (SubVS src1 (Replicate src2))); match(Set dst (SubVI src1 (Replicate src2))); - format %{ "vsub_regI $dst, $src1, $src2" %} + format %{ "vsub_vx $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -608,9 +641,9 @@ instruct vsub_regI(vReg dst, vReg src1, iRegIorL2I src2) %{ ins_pipe(pipe_slow); %} -instruct vsub_regL(vReg dst, vReg src1, iRegL src2) %{ +instruct vsubL_vx(vReg dst, vReg src1, iRegL src2) %{ match(Set dst (SubVL src1 (Replicate src2))); - format %{ "vsub_regL $dst, $src1, $src2" %} + format %{ "vsubL_vx $dst, $src1, $src2" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vsub_vx(as_VectorRegister($dst$$reg), @@ -622,11 +655,11 @@ instruct vsub_regL(vReg dst, vReg src1, iRegL src2) %{ // vector-scalar sub (predicated) -instruct vsub_regI_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{ +instruct vsub_vx_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{ match(Set dst_src (SubVB (Binary dst_src (Replicate src2)) v0)); match(Set dst_src (SubVS (Binary dst_src (Replicate src2)) v0)); match(Set dst_src (SubVI (Binary dst_src (Replicate src2)) v0)); - format %{ "vsub_regI_masked $dst_src, $dst_src, $src2" %} + format %{ "vsub_vx_masked $dst_src, $dst_src, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -637,9 +670,9 @@ instruct vsub_regI_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vsub_regL_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{ +instruct vsubL_vx_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{ match(Set dst_src (SubVL (Binary dst_src (Replicate src2)) v0)); - format %{ "vsub_regL_masked $dst_src, $dst_src, $src2" %} + format %{ "vsubL_vx_masked $dst_src, $dst_src, $src2, $v0" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vsub_vx(as_VectorRegister($dst_src$$reg), @@ -649,11 +682,140 @@ instruct vsub_regL_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} +// -------- vector saturating integer operations + +// vector saturating signed integer addition + +instruct vsadd(vReg dst, vReg src1, vReg src2) %{ + predicate(n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); + match(Set dst (SaturatingAddV src1 src2)); + format %{ "vsadd $dst, $src1, $src2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vsadd_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector saturating unsigned integer addition + +instruct vsaddu(vReg dst, vReg src1, vReg src2) %{ + predicate(n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); + match(Set dst (SaturatingAddV src1 src2)); + format %{ "vsaddu $dst, $src1, $src2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vsaddu_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector saturating signed integer addition (predicated) + +instruct vsadd_masked(vReg dst_src, vReg src1, vRegMask_V0 v0) %{ + predicate(n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); + match(Set dst_src (SaturatingAddV (Binary dst_src src1) v0)); + format %{ "vsadd_masked $dst_src, $dst_src, $src1, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vsadd_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($src1$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// vector saturating unsigned integer addition (predicated) + +instruct vsaddu_masked(vReg dst_src, vReg src1, vRegMask_V0 v0) %{ + predicate(n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); + match(Set dst_src (SaturatingAddV (Binary dst_src src1) v0)); + format %{ "vsaddu_masked $dst_src, $dst_src, $src1, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vsaddu_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($src1$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// vector saturating signed integer subtraction + +instruct vssub(vReg dst, vReg src1, vReg src2) %{ + predicate(n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); + match(Set dst (SaturatingSubV src1 src2)); + format %{ "vssub $dst, $src1, $src2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vssub_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector saturating unsigned integer subtraction + +instruct vssubu(vReg dst, vReg src1, vReg src2) %{ + predicate(n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); + match(Set dst (SaturatingSubV src1 src2)); + format %{ "vssubu $dst, $src1, $src2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vssubu_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector saturating signed integer subtraction (predicated) + +instruct vssub_masked(vReg dst_src, vReg src1, vRegMask_V0 v0) %{ + predicate(n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); + match(Set dst_src (SaturatingSubV (Binary dst_src src1) v0)); + format %{ "vssub_masked $dst_src, $dst_src, $src1, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vssub_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($src1$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// vector saturating unsigned integer subtraction (predicated) + +instruct vssubu_masked(vReg dst_src, vReg src1, vRegMask_V0 v0) %{ + predicate(n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); + match(Set dst_src (SaturatingSubV (Binary dst_src src1) v0)); + format %{ "vssubu_masked $dst_src, $dst_src, $src1, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vssubu_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($src1$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector and instruct vand(vReg dst, vReg src1, vReg src2) %{ match(Set dst (AndV src1 src2)); - ins_cost(VEC_COST); format %{ "vand $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -669,7 +831,6 @@ instruct vand(vReg dst, vReg src1, vReg src2) %{ instruct vand_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ match(Set dst_src1 (AndV (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vand_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -683,30 +844,30 @@ instruct vand_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ // vector-immediate and (unpredicated) -instruct vand_immI(vReg dst_src, immI5 con) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); - match(Set dst_src (AndV dst_src (Replicate con))); - format %{ "vand_immI $dst_src, $dst_src, $con" %} +instruct vand_vi(vReg dst, vReg src1, immI5 con) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); + match(Set dst (AndV src1 (Replicate con))); + format %{ "vand_vi $dst, $src1, $con" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); - __ vand_vi(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), + __ vand_vi(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), $con$$constant); %} ins_pipe(pipe_slow); %} -instruct vand_immL(vReg dst_src, immL5 con) %{ +instruct vandL_vi(vReg dst, vReg src1, immL5 con) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); - match(Set dst_src (AndV dst_src (Replicate con))); - format %{ "vand_immL $dst_src, $dst_src, $con" %} + match(Set dst (AndV src1 (Replicate con))); + format %{ "vandL_vi $dst, $src1, $con" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); - __ vand_vi(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), + __ vand_vi(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), $con$$constant); %} ins_pipe(pipe_slow); @@ -714,43 +875,43 @@ instruct vand_immL(vReg dst_src, immL5 con) %{ // vector-scalar and (unpredicated) -instruct vand_regI(vReg dst_src, iRegIorL2I src) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); - match(Set dst_src (AndV dst_src (Replicate src))); - format %{ "vand_regI $dst_src, $dst_src, $src" %} +instruct vand_vx(vReg dst, vReg src1, iRegIorL2I src2) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); + match(Set dst (AndV src1 (Replicate src2))); + format %{ "vand_vx $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); - __ vand_vx(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), - as_Register($src$$reg)); + __ vand_vx(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_Register($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vand_regL(vReg dst_src, iRegL src) %{ +instruct vandL_vx(vReg dst, vReg src1, iRegL src2) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); - match(Set dst_src (AndV dst_src (Replicate src))); - format %{ "vand_regL $dst_src, $dst_src, $src" %} + match(Set dst (AndV src1 (Replicate src2))); + format %{ "vandL_vx $dst, $src1, $src2" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); - __ vand_vx(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), - as_Register($src$$reg)); + __ vand_vx(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_Register($src2$$reg)); %} ins_pipe(pipe_slow); %} // vector-immediate and (predicated) -instruct vand_immI_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); +instruct vand_vi_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); match(Set dst_src (AndV (Binary dst_src (Replicate con)) v0)); - format %{ "vand_immI_masked $dst_src, $dst_src, $con" %} + format %{ "vand_vi_masked $dst_src, $dst_src, $con, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -761,10 +922,10 @@ instruct vand_immI_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vand_immL_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ +instruct vandL_vi_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst_src (AndV (Binary dst_src (Replicate con)) v0)); - format %{ "vand_immL_masked $dst_src, $dst_src, $con" %} + format %{ "vandL_vi_masked $dst_src, $dst_src, $con, $v0" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vand_vi(as_VectorRegister($dst_src$$reg), @@ -776,12 +937,12 @@ instruct vand_immL_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ // vector-scalar and (predicated) -instruct vand_regI_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); +instruct vand_vx_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); match(Set dst_src (AndV (Binary dst_src (Replicate src)) v0)); - format %{ "vand_regI_masked $dst_src, $dst_src, $src" %} + format %{ "vand_vx_masked $dst_src, $dst_src, $src, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -792,10 +953,10 @@ instruct vand_regI_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vand_regL_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{ +instruct vandL_vx_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst_src (AndV (Binary dst_src (Replicate src)) v0)); - format %{ "vand_regL_masked $dst_src, $dst_src, $src" %} + format %{ "vandL_vx_masked $dst_src, $dst_src, $src, $v0" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vand_vx(as_VectorRegister($dst_src$$reg), @@ -809,7 +970,6 @@ instruct vand_regL_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{ instruct vor(vReg dst, vReg src1, vReg src2) %{ match(Set dst (OrV src1 src2)); - ins_cost(VEC_COST); format %{ "vor $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -825,7 +985,6 @@ instruct vor(vReg dst, vReg src1, vReg src2) %{ instruct vor_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ match(Set dst_src1 (OrV (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vor_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -839,30 +998,30 @@ instruct vor_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ // vector-immediate or (unpredicated) -instruct vor_immI(vReg dst_src, immI5 con) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); - match(Set dst_src (OrV dst_src (Replicate con))); - format %{ "vor_immI $dst_src, $dst_src, $con" %} +instruct vor_vi(vReg dst, vReg src1, immI5 con) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); + match(Set dst (OrV src1 (Replicate con))); + format %{ "vor_vi $dst, $src1, $con" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); - __ vor_vi(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), + __ vor_vi(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), $con$$constant); %} ins_pipe(pipe_slow); %} -instruct vor_immL(vReg dst_src, immL5 con) %{ +instruct vorL_vi(vReg dst, vReg src1, immL5 con) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); - match(Set dst_src (OrV dst_src (Replicate con))); - format %{ "vor_immL $dst_src, $dst_src, $con" %} + match(Set dst (OrV src1 (Replicate con))); + format %{ "vorL_vi $dst, $src1, $con" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); - __ vor_vi(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), + __ vor_vi(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), $con$$constant); %} ins_pipe(pipe_slow); @@ -870,43 +1029,43 @@ instruct vor_immL(vReg dst_src, immL5 con) %{ // vector-scalar or (unpredicated) -instruct vor_regI(vReg dst_src, iRegIorL2I src) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); - match(Set dst_src (OrV dst_src (Replicate src))); - format %{ "vor_regI $dst_src, $dst_src, $src" %} +instruct vor_vx(vReg dst, vReg src1, iRegIorL2I src2) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); + match(Set dst (OrV src1 (Replicate src2))); + format %{ "vor_vx $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); - __ vor_vx(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), - as_Register($src$$reg)); + __ vor_vx(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_Register($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vor_regL(vReg dst_src, iRegL src) %{ +instruct vorL_vx(vReg dst, vReg src1, iRegL src2) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); - match(Set dst_src (OrV dst_src (Replicate src))); - format %{ "vor_regL $dst_src, $dst_src, $src" %} + match(Set dst (OrV src1 (Replicate src2))); + format %{ "vorL_vx $dst, $src1, $src2" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); - __ vor_vx(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), - as_Register($src$$reg)); + __ vor_vx(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_Register($src2$$reg)); %} ins_pipe(pipe_slow); %} // vector-immediate or (predicated) -instruct vor_immI_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); +instruct vor_vi_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); match(Set dst_src (OrV (Binary dst_src (Replicate con)) v0)); - format %{ "vor_immI_masked $dst_src, $dst_src, $con" %} + format %{ "vor_vi_masked $dst_src, $dst_src, $con, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -917,10 +1076,10 @@ instruct vor_immI_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vor_immL_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ +instruct vorL_vi_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst_src (OrV (Binary dst_src (Replicate con)) v0)); - format %{ "vor_immL_masked $dst_src, $dst_src, $con" %} + format %{ "vorL_vi_masked $dst_src, $dst_src, $con, $v0" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vor_vi(as_VectorRegister($dst_src$$reg), @@ -932,12 +1091,12 @@ instruct vor_immL_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ // vector-scalar or (predicated) -instruct vor_regI_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); +instruct vor_vx_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); match(Set dst_src (OrV (Binary dst_src (Replicate src)) v0)); - format %{ "vor_regI_masked $dst_src, $dst_src, $src" %} + format %{ "vor_vx_masked $dst_src, $dst_src, $src, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -948,10 +1107,10 @@ instruct vor_regI_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vor_regL_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{ +instruct vorL_vx_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst_src (OrV (Binary dst_src (Replicate src)) v0)); - format %{ "vor_regL_masked $dst_src, $dst_src, $src" %} + format %{ "vorL_vx_masked $dst_src, $dst_src, $src, $v0" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vor_vx(as_VectorRegister($dst_src$$reg), @@ -965,7 +1124,6 @@ instruct vor_regL_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{ instruct vxor(vReg dst, vReg src1, vReg src2) %{ match(Set dst (XorV src1 src2)); - ins_cost(VEC_COST); format %{ "vxor $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -981,7 +1139,6 @@ instruct vxor(vReg dst, vReg src1, vReg src2) %{ instruct vxor_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ match(Set dst_src1 (XorV (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vxor_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -995,30 +1152,30 @@ instruct vxor_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ // vector-immediate xor (unpredicated) -instruct vxor_immI(vReg dst_src, immI5 con) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); - match(Set dst_src (XorV dst_src (Replicate con))); - format %{ "vxor_immI $dst_src, $dst_src, $con" %} +instruct vxor_vi(vReg dst, vReg src1, immI5 con) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); + match(Set dst (XorV src1 (Replicate con))); + format %{ "vxor_vi $dst, $src1, $con" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); - __ vxor_vi(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), + __ vxor_vi(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), $con$$constant); %} ins_pipe(pipe_slow); %} -instruct vxor_immL(vReg dst_src, immL5 con) %{ +instruct vxorL_vi(vReg dst, vReg src1, immL5 con) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); - match(Set dst_src (XorV dst_src (Replicate con))); - format %{ "vxor_immL $dst_src, $dst_src, $con" %} + match(Set dst (XorV src1 (Replicate con))); + format %{ "vxorL_vi $dst, $src1, $con" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); - __ vxor_vi(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), + __ vxor_vi(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), $con$$constant); %} ins_pipe(pipe_slow); @@ -1026,43 +1183,43 @@ instruct vxor_immL(vReg dst_src, immL5 con) %{ // vector-scalar xor (unpredicated) -instruct vxor_regI(vReg dst_src, iRegIorL2I src) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); - match(Set dst_src (XorV dst_src (Replicate src))); - format %{ "vxor_regI $dst_src, $dst_src, $src" %} +instruct vxor_vx(vReg dst, vReg src1, iRegIorL2I src2) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); + match(Set dst (XorV src1 (Replicate src2))); + format %{ "vxor_vx $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); - __ vxor_vx(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), - as_Register($src$$reg)); + __ vxor_vx(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_Register($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vxor_regL(vReg dst_src, iRegL src) %{ +instruct vxorL_vx(vReg dst, vReg src1, iRegL src2) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); - match(Set dst_src (XorV dst_src (Replicate src))); - format %{ "vxor_regL $dst_src, $dst_src, $src" %} + match(Set dst (XorV src1 (Replicate src2))); + format %{ "vxorL_vx $dst, $src1, $src2" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); - __ vxor_vx(as_VectorRegister($dst_src$$reg), - as_VectorRegister($dst_src$$reg), - as_Register($src$$reg)); + __ vxor_vx(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_Register($src2$$reg)); %} ins_pipe(pipe_slow); %} // vector-immediate xor (predicated) -instruct vxor_immI_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); +instruct vxor_vi_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); match(Set dst_src (XorV (Binary dst_src (Replicate con)) v0)); - format %{ "vxor_immI_masked $dst_src, $dst_src, $con" %} + format %{ "vxor_vi_masked $dst_src, $dst_src, $con, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -1073,10 +1230,10 @@ instruct vxor_immI_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vxor_immL_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ +instruct vxorL_vi_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst_src (XorV (Binary dst_src (Replicate con)) v0)); - format %{ "vxor_immL_masked $dst_src, $dst_src, $con" %} + format %{ "vxorL_vi_masked $dst_src, $dst_src, $con, $v0" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vxor_vi(as_VectorRegister($dst_src$$reg), @@ -1088,12 +1245,12 @@ instruct vxor_immL_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{ // vector-scalar xor (predicated) -instruct vxor_regI_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); +instruct vxor_vx_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); match(Set dst_src (XorV (Binary dst_src (Replicate src)) v0)); - format %{ "vxor_regI_masked $dst_src, $dst_src, $src" %} + format %{ "vxor_vx_masked $dst_src, $dst_src, $src, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -1104,10 +1261,10 @@ instruct vxor_regI_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vxor_regL_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{ +instruct vxorL_vx_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst_src (XorV (Binary dst_src (Replicate src)) v0)); - format %{ "vxor_regL_masked $dst_src, $dst_src, $src" %} + format %{ "vxorL_vx_masked $dst_src, $dst_src, $src, $v0" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vxor_vx(as_VectorRegister($dst_src$$reg), @@ -1121,16 +1278,38 @@ instruct vxor_regL_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{ // vector and not +instruct vand_notB(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_BYTE); + match(Set dst (AndV src1 (XorV src2 (Replicate m1)))); + format %{ "vand_notB $dst, $src1, $src2" %} + ins_encode %{ + __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); + __ vandn_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vand_notS(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_SHORT); + match(Set dst (AndV src1 (XorV src2 (Replicate m1)))); + format %{ "vand_notS $dst, $src1, $src2" %} + ins_encode %{ + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vandn_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{ - predicate(UseZvbb); - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_INT); match(Set dst (AndV src1 (XorV src2 (Replicate m1)))); format %{ "vand_notI $dst, $src1, $src2" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this); - __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vsetvli_helper(T_INT, Matcher::vector_length(this)); __ vandn_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); @@ -1139,8 +1318,7 @@ instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{ %} instruct vand_notL(vReg dst, vReg src1, vReg src2, immL_M1 m1) %{ - predicate(UseZvbb); - predicate(Matcher::vector_element_basic_type(n) == T_LONG); + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst (AndV src1 (XorV src2 (Replicate m1)))); format %{ "vand_notL $dst, $src1, $src2" %} ins_encode %{ @@ -1152,16 +1330,40 @@ instruct vand_notL(vReg dst, vReg src1, vReg src2, immL_M1 m1) %{ ins_pipe(pipe_slow); %} +instruct vand_notB_masked(vReg dst_src1, vReg src2, immI_M1 m1, vRegMask_V0 v0) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_BYTE); + match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (Replicate m1))) v0)); + format %{ "vand_notB_masked $dst_src1, $dst_src1, $src2, $v0" %} + ins_encode %{ + __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); + __ vandn_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vand_notS_masked(vReg dst_src1, vReg src2, immI_M1 m1, vRegMask_V0 v0) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_SHORT); + match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (Replicate m1))) v0)); + format %{ "vand_notS_masked $dst_src1, $dst_src1, $src2, $v0" %} + ins_encode %{ + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vandn_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + instruct vand_notI_masked(vReg dst_src1, vReg src2, immI_M1 m1, vRegMask_V0 v0) %{ - predicate(UseZvbb); - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_INT); match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (Replicate m1))) v0)); format %{ "vand_notI_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this); - __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vsetvli_helper(T_INT, Matcher::vector_length(this)); __ vandn_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), @@ -1171,8 +1373,7 @@ instruct vand_notI_masked(vReg dst_src1, vReg src2, immI_M1 m1, vRegMask_V0 v0) %} instruct vand_notL_masked(vReg dst_src1, vReg src2, immL_M1 m1, vRegMask_V0 v0) %{ - predicate(UseZvbb); - predicate(Matcher::vector_element_basic_type(n) == T_LONG); + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (Replicate m1))) v0)); format %{ "vand_notL_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ @@ -1185,16 +1386,124 @@ instruct vand_notL_masked(vReg dst_src1, vReg src2, immL_M1 m1, vRegMask_V0 v0) ins_pipe(pipe_slow); %} +instruct vand_notB_vx(vReg dst, vReg src1, iRegIorL2I src2, immI_M1 m1) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_BYTE); + match(Set dst (AndV src1 (Replicate (XorI src2 m1)))); + format %{ "vand_notB_vx $dst, $src1, $src2" %} + ins_encode %{ + __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); + __ vandn_vx(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_Register($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vand_notS_vx(vReg dst, vReg src1, iRegIorL2I src2, immI_M1 m1) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_SHORT); + match(Set dst (AndV src1 (Replicate (XorI src2 m1)))); + format %{ "vand_notS_vx $dst, $src1, $src2" %} + ins_encode %{ + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vandn_vx(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_Register($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vand_notI_vx(vReg dst, vReg src1, iRegIorL2I src2, immI_M1 m1) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_INT); + match(Set dst (AndV src1 (Replicate (XorI src2 m1)))); + format %{ "vand_notI_vx $dst, $src1, $src2" %} + ins_encode %{ + __ vsetvli_helper(T_INT, Matcher::vector_length(this)); + __ vandn_vx(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_Register($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vand_notL_vx(vReg dst, vReg src1, iRegL src2, immL_M1 m1) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_LONG); + match(Set dst (AndV src1 (Replicate (XorL src2 m1)))); + format %{ "vand_notL_vx $dst, $src1, $src2" %} + ins_encode %{ + __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); + __ vandn_vx(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_Register($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vand_notB_vx_masked(vReg dst_src1, iRegIorL2I src2, immI_M1 m1, vRegMask_V0 v0) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_BYTE); + match(Set dst_src1 (AndV (Binary dst_src1 (Replicate (XorI src2 m1))) v0)); + format %{ "vand_notB_vx_masked $dst_src1, $dst_src1, $src2, $v0" %} + ins_encode %{ + __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); + __ vandn_vx(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_Register($src2$$reg), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vand_notS_vx_masked(vReg dst_src1, iRegIorL2I src2, immI_M1 m1, vRegMask_V0 v0) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_SHORT); + match(Set dst_src1 (AndV (Binary dst_src1 (Replicate (XorI src2 m1))) v0)); + format %{ "vand_notS_vx_masked $dst_src1, $dst_src1, $src2, $v0" %} + ins_encode %{ + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vandn_vx(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_Register($src2$$reg), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vand_notI_vx_masked(vReg dst_src1, iRegIorL2I src2, immI_M1 m1, vRegMask_V0 v0) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_INT); + match(Set dst_src1 (AndV (Binary dst_src1 (Replicate (XorI src2 m1))) v0)); + format %{ "vand_notI_vx_masked $dst_src1, $dst_src1, $src2, $v0" %} + ins_encode %{ + __ vsetvli_helper(T_INT, Matcher::vector_length(this)); + __ vandn_vx(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_Register($src2$$reg), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vand_notL_vx_masked(vReg dst_src1, iRegL src2, immL_M1 m1, vRegMask_V0 v0) %{ + predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_LONG); + match(Set dst_src1 (AndV (Binary dst_src1 (Replicate (XorL src2 m1))) v0)); + format %{ "vand_notL_vx_masked $dst_src1, $dst_src1, $src2, $v0" %} + ins_encode %{ + __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); + __ vandn_vx(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_Register($src2$$reg), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // ------------------------------ Vector not ----------------------------------- // vector not -instruct vnotI(vReg dst, vReg src, immI_M1 m1) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); +instruct vnot(vReg dst, vReg src, immI_M1 m1) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); match(Set dst (XorV src (Replicate m1))); - format %{ "vnotI $dst, $src" %} + format %{ "vnot $dst, $src" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -1220,12 +1529,12 @@ instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{ // vector not - predicated -instruct vnotI_masked(vReg dst_src, immI_M1 m1, vRegMask_V0 v0) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); +instruct vnot_masked(vReg dst_src, immI_M1 m1, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); match(Set dst_src (XorV (Binary dst_src (Replicate m1)) v0)); - format %{ "vnotI_masked $dst_src, $dst_src, $v0" %} + format %{ "vnot_masked $dst_src, $dst_src, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -1251,10 +1560,23 @@ instruct vnotL_masked(vReg dst_src, immI_M1 m1, vRegMask_V0 v0) %{ // vector float div +instruct vdiv_hfp(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (DivVHF src1 src2)); + format %{ "vdiv_hfp $dst, $src1, $src2" %} + ins_encode %{ + assert(UseZvfh, "must"); + assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must"); + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vfdiv_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + instruct vdiv_fp(vReg dst, vReg src1, vReg src2) %{ match(Set dst (DivVF src1 src2)); match(Set dst (DivVD src1 src2)); - ins_cost(VEC_COST); format %{ "vdiv_fp $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1271,7 +1593,6 @@ instruct vdiv_fp(vReg dst, vReg src1, vReg src2) %{ instruct vdiv_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ match(Set dst_src1 (DivVF (Binary dst_src1 src2) v0)); match(Set dst_src1 (DivVD (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vdiv_fp_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1289,7 +1610,6 @@ instruct vmax(vReg dst, vReg src1, vReg src2) %{ predicate(Matcher::vector_element_basic_type(n) != T_FLOAT && Matcher::vector_element_basic_type(n) != T_DOUBLE); match(Set dst (MaxV src1 src2)); - ins_cost(VEC_COST); format %{ "vmax $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1304,7 +1624,6 @@ instruct vmin(vReg dst, vReg src1, vReg src2) %{ predicate(Matcher::vector_element_basic_type(n) != T_FLOAT && Matcher::vector_element_basic_type(n) != T_DOUBLE); match(Set dst (MinV src1 src2)); - ins_cost(VEC_COST); format %{ "vmin $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1321,7 +1640,6 @@ instruct vmax_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) != T_FLOAT && Matcher::vector_element_basic_type(n) != T_DOUBLE); match(Set dst_src1 (MaxV (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vmax_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1336,7 +1654,6 @@ instruct vmin_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) != T_FLOAT && Matcher::vector_element_basic_type(n) != T_DOUBLE); match(Set dst_src1 (MinV (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vmin_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1347,6 +1664,92 @@ instruct vmin_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} +// vector unsigned integer max/min + +instruct vmaxu(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (UMaxV src1 src2)); + format %{ "vmaxu $dst, $src1, $src2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vmaxu_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vminu(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (UMinV src1 src2)); + format %{ "vminu $dst, $src1, $src2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vminu_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector unsigned integer max/min - predicated + +instruct vmaxu_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (UMaxV (Binary dst_src1 src2) v0)); + format %{ "vmaxu_masked $dst_src1, $dst_src1, $src2, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vmaxu_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vminu_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (UMinV (Binary dst_src1 src2) v0)); + format %{ "vminu_masked $dst_src1, $dst_src1, $src2, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(is_integral_type(bt), "unsupported type"); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vminu_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// vector float-point max/min (half precision) + +instruct vmax_hfp(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst (MaxVHF src1 src2)); + effect(TEMP_DEF dst, TEMP v0); + format %{ "vmax_hfp $dst, $src1, $src2" %} + ins_encode %{ + assert(UseZvfh, "must"); + assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must"); + __ minmax_fp_v(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + T_SHORT, false /* is_min */, Matcher::vector_length(this)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmin_hfp(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst (MinVHF src1 src2)); + effect(TEMP_DEF dst, TEMP v0); + format %{ "vmin_hfp $dst, $src1, $src2" %} + ins_encode %{ + assert(UseZvfh, "must"); + assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must"); + __ minmax_fp_v(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + T_SHORT, true /* is_min */, Matcher::vector_length(this)); + %} + ins_pipe(pipe_slow); +%} + // vector float-point max/min instruct vmax_fp(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{ @@ -1354,7 +1757,6 @@ instruct vmax_fp(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{ Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst (MaxV src1 src2)); effect(TEMP_DEF dst, TEMP v0); - ins_cost(VEC_COST); format %{ "vmax_fp $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1370,7 +1772,6 @@ instruct vmin_fp(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{ Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst (MinV src1 src2)); effect(TEMP_DEF dst, TEMP v0); - ins_cost(VEC_COST); format %{ "vmin_fp $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1388,7 +1789,6 @@ instruct vmax_fp_masked(vReg dst_src1, vReg src2, vRegMask vmask, vReg tmp1, vRe Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst_src1 (MaxV (Binary dst_src1 src2) vmask)); effect(TEMP_DEF dst_src1, TEMP tmp1, TEMP tmp2, TEMP v0); - ins_cost(VEC_COST); format %{ "vmax_fp_masked $dst_src1, $dst_src1, $src2, $vmask\t# KILL $tmp1, $tmp2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1405,7 +1805,6 @@ instruct vmin_fp_masked(vReg dst_src1, vReg src2, vRegMask vmask, vReg tmp1, vRe Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst_src1 (MinV (Binary dst_src1 src2) vmask)); effect(TEMP_DEF dst_src1, TEMP tmp1, TEMP tmp2, TEMP v0); - ins_cost(VEC_COST); format %{ "vmin_fp_masked $dst_src1, $dst_src1, $src2, $vmask\t# KILL $tmp1, $tmp2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1419,11 +1818,25 @@ instruct vmin_fp_masked(vReg dst_src1, vReg src2, vRegMask vmask, vReg tmp1, vRe // vector fmla +// dst_src1 = src2 * src3 + dst_src1 (half precision) +instruct vhfmla(vReg dst_src1, vReg src2, vReg src3) %{ + match(Set dst_src1 (FmaVHF dst_src1 (Binary src2 src3))); + format %{ "vhfmla $dst_src1, $dst_src1, $src2, $src3" %} + ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); + assert(UseZvfh, "must"); + assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must"); + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + // dst_src1 = src2 * src3 + dst_src1 instruct vfmla(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); - ins_cost(VEC_COST); format %{ "vfmla $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ assert(UseFMA, "Needs FMA instructions support."); @@ -1458,7 +1871,6 @@ instruct vfmadd_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ // "(-src2) * src3 + dst_src1" has been idealized to "src3 * (-src2) + dst_src1" instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); - ins_cost(VEC_COST); format %{ "vfmlsF $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ assert(UseFMA, "Needs FMA instructions support."); @@ -1473,7 +1885,6 @@ instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ // "(-src2) * src3 + dst_src1" has been idealized to "src3 * (-src2) + dst_src1" instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); - ins_cost(VEC_COST); format %{ "vfmlsD $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ assert(UseFMA, "Needs FMA instructions support."); @@ -1507,7 +1918,6 @@ instruct vfnmsub_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ // "(-src2) * src3 - dst_src1" has been idealized to "src3 * (-src2) - dst_src1" instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); - ins_cost(VEC_COST); format %{ "vfnmlaF $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ assert(UseFMA, "Needs FMA instructions support."); @@ -1522,7 +1932,6 @@ instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ // "(-src2) * src3 - dst_src1" has been idealized to "src3 * (-src2) - dst_src1" instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); - ins_cost(VEC_COST); format %{ "vfnmlaD $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ assert(UseFMA, "Needs FMA instructions support."); @@ -1555,7 +1964,6 @@ instruct vfnmadd_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ // dst_src1 = src2 * src3 - dst_src1 instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); - ins_cost(VEC_COST); format %{ "vfnmlsF $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ assert(UseFMA, "Needs FMA instructions support."); @@ -1569,7 +1977,6 @@ instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = -dst_src1 + src2 * src3 instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); - ins_cost(VEC_COST); format %{ "vfnmlsD $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ assert(UseFMA, "Needs FMA instructions support."); @@ -1605,7 +2012,6 @@ instruct vmla(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); - ins_cost(VEC_COST); format %{ "vmla $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1641,7 +2047,6 @@ instruct vmls(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); - ins_cost(VEC_COST); format %{ "vmls $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1676,7 +2081,6 @@ instruct vmul(vReg dst, vReg src1, vReg src2) %{ match(Set dst (MulVS src1 src2)); match(Set dst (MulVI src1 src2)); match(Set dst (MulVL src1 src2)); - ins_cost(VEC_COST); format %{ "vmul $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1687,10 +2091,22 @@ instruct vmul(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +instruct vmul_hfp(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (MulVHF src1 src2)); + format %{ "vmul_hfp $dst, $src1, $src2" %} + ins_encode %{ + assert(UseZvfh, "must"); + assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must"); + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + instruct vmul_fp(vReg dst, vReg src1, vReg src2) %{ match(Set dst (MulVF src1 src2)); match(Set dst (MulVD src1 src2)); - ins_cost(VEC_COST); format %{ "vmul_fp $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1708,7 +2124,6 @@ instruct vmul_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ match(Set dst_src1 (MulVS (Binary dst_src1 src2) v0)); match(Set dst_src1 (MulVI (Binary dst_src1 src2) v0)); match(Set dst_src1 (MulVL (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vmul_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1722,7 +2137,6 @@ instruct vmul_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ instruct vmul_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ match(Set dst_src1 (MulVF (Binary dst_src1 src2) v0)); match(Set dst_src1 (MulVD (Binary dst_src1 src2) v0)); - ins_cost(VEC_COST); format %{ "vmul_fp_masked $dst_src1, $dst_src1, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1735,11 +2149,11 @@ instruct vmul_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ // vector-scalar mul (unpredicated) -instruct vmul_regI(vReg dst, vReg src1, iRegIorL2I src2) %{ +instruct vmul_vx(vReg dst, vReg src1, iRegIorL2I src2) %{ match(Set dst (MulVB src1 (Replicate src2))); match(Set dst (MulVS src1 (Replicate src2))); match(Set dst (MulVI src1 (Replicate src2))); - format %{ "vmul_regI $dst, $src1, $src2" %} + format %{ "vmul_vx $dst, $src1, $src2" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -1750,9 +2164,9 @@ instruct vmul_regI(vReg dst, vReg src1, iRegIorL2I src2) %{ ins_pipe(pipe_slow); %} -instruct vmul_regL(vReg dst, vReg src1, iRegL src2) %{ +instruct vmulL_vx(vReg dst, vReg src1, iRegL src2) %{ match(Set dst (MulVL src1 (Replicate src2))); - format %{ "vmul_regL $dst, $src1, $src2" %} + format %{ "vmulL_vx $dst, $src1, $src2" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vmul_vx(as_VectorRegister($dst$$reg), @@ -1764,11 +2178,11 @@ instruct vmul_regL(vReg dst, vReg src1, iRegL src2) %{ // vector-scalar mul (predicated) -instruct vmul_regI_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{ +instruct vmul_vx_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{ match(Set dst_src (MulVB (Binary dst_src (Replicate src2)) v0)); match(Set dst_src (MulVS (Binary dst_src (Replicate src2)) v0)); match(Set dst_src (MulVI (Binary dst_src (Replicate src2)) v0)); - format %{ "vmul_regI_masked $dst_src, $dst_src, $src2" %} + format %{ "vmul_vx_masked $dst_src, $dst_src, $src2, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -1779,9 +2193,9 @@ instruct vmul_regI_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vmul_regL_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{ +instruct vmulL_vx_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{ match(Set dst_src (MulVL (Binary dst_src (Replicate src2)) v0)); - format %{ "vmul_regL_masked $dst_src, $dst_src, $src2" %} + format %{ "vmulL_vx_masked $dst_src, $dst_src, $src2, $v0" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); __ vmul_vx(as_VectorRegister($dst_src$$reg), @@ -1796,7 +2210,6 @@ instruct vmul_regL_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{ instruct vneg(vReg dst, vReg src) %{ match(Set dst (NegVI src)); match(Set dst (NegVL src)); - ins_cost(VEC_COST); format %{ "vneg $dst, $src" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1811,7 +2224,6 @@ instruct vneg(vReg dst, vReg src) %{ instruct vneg_masked(vReg dst_src, vRegMask_V0 v0) %{ match(Set dst_src (NegVI dst_src v0)); match(Set dst_src (NegVL dst_src v0)); - ins_cost(VEC_COST); format %{ "vneg_masked $dst_src, $dst_src, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1827,7 +2239,6 @@ instruct vneg_masked(vReg dst_src, vRegMask_V0 v0) %{ instruct vfneg(vReg dst, vReg src) %{ match(Set dst (NegVF src)); match(Set dst (NegVD src)); - ins_cost(VEC_COST); format %{ "vfneg $dst, $src" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1842,7 +2253,6 @@ instruct vfneg(vReg dst, vReg src) %{ instruct vfneg_masked(vReg dst_src, vRegMask_V0 v0) %{ match(Set dst_src (NegVF dst_src v0)); match(Set dst_src (NegVD dst_src v0)); - ins_cost(VEC_COST); format %{ "vfneg_masked $dst_src, $dst_src, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -1855,14 +2265,13 @@ instruct vfneg_masked(vReg dst_src, vRegMask_V0 v0) %{ // vector and reduction -instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +instruct reduce_and(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); - format %{ "reduce_andI $dst, $src1, $src2\t# KILL $tmp" %} + format %{ "reduce_and $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -1876,7 +2285,6 @@ instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_andL $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); @@ -1889,14 +2297,13 @@ instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ // vector and reduction - predicated -instruct reduce_andI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ +instruct reduce_and_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (AndReductionV (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); - format %{ "reduce_andI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + format %{ "reduce_and_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -1911,7 +2318,6 @@ instruct reduce_andL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (AndReductionV (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_andL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); @@ -1925,14 +2331,13 @@ instruct reduce_andL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0 // vector or reduction -instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +instruct reduce_or(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); - format %{ "reduce_orI $dst, $src1, $src2\t# KILL $tmp" %} + format %{ "reduce_or $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -1946,7 +2351,6 @@ instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_orL $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); @@ -1959,14 +2363,13 @@ instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ // vector or reduction - predicated -instruct reduce_orI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ +instruct reduce_or_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (OrReductionV (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); - format %{ "reduce_orI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + format %{ "reduce_or_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -1981,7 +2384,6 @@ instruct reduce_orL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (OrReductionV (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_orL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); @@ -1995,14 +2397,13 @@ instruct reduce_orL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, // vector xor reduction -instruct reduce_xorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +instruct reduce_xor(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); - format %{ "reduce_xorI $dst, $src1, $src2\t# KILL $tmp" %} + format %{ "reduce_xor $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -2016,7 +2417,6 @@ instruct reduce_xorL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_xorL $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); @@ -2029,14 +2429,13 @@ instruct reduce_xorL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ // vector xor reduction - predicated -instruct reduce_xorI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ +instruct reduce_xor_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (XorReductionV (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); - format %{ "reduce_xorI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + format %{ "reduce_xor_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -2051,7 +2450,6 @@ instruct reduce_xorL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (XorReductionV (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_xorL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); @@ -2065,14 +2463,13 @@ instruct reduce_xorL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0 // vector add reduction -instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +instruct reduce_add(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); - format %{ "reduce_addI $dst, $src1, $src2\t# KILL $tmp" %} + format %{ "reduce_add $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -2086,7 +2483,6 @@ instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_addL $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); @@ -2109,7 +2505,6 @@ instruct reduce_addF_ordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{ predicate(n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVF src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_addF_ordered $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2)); @@ -2125,7 +2520,6 @@ instruct reduce_addF_unordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{ predicate(!n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVF src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_addF_unordered $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2)); @@ -2141,7 +2535,6 @@ instruct reduce_addD_ordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{ predicate(n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVD src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_addD_ordered $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2)); @@ -2157,7 +2550,6 @@ instruct reduce_addD_unordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{ predicate(!n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVD src1 src2)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_addD_unordered $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2)); @@ -2171,14 +2563,13 @@ instruct reduce_addD_unordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{ // vector add reduction - predicated -instruct reduce_addI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ +instruct reduce_add_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (AddReductionVI (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); - format %{ "reduce_addI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + format %{ "reduce_add_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -2193,7 +2584,6 @@ instruct reduce_addL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (AddReductionVL (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_addL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); @@ -2208,7 +2598,6 @@ instruct reduce_addL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0 instruct reduce_addF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ match(Set dst (AddReductionVF (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_addF_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2)); @@ -2223,7 +2612,6 @@ instruct reduce_addF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, vR instruct reduce_addD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ match(Set dst (AddReductionVD (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "reduce_addD_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2)); @@ -2237,14 +2625,13 @@ instruct reduce_addD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, vR // vector integer max reduction -instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +instruct vreduce_max(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (MaxReductionV src1 src2)); - ins_cost(VEC_COST); effect(TEMP tmp); - format %{ "vreduce_maxI $dst, $src1, $src2\t# KILL $tmp" %} + format %{ "vreduce_max $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -2257,7 +2644,6 @@ instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (MaxReductionV src1 src2)); - ins_cost(VEC_COST); effect(TEMP tmp); format %{ "vreduce_maxL $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ @@ -2271,14 +2657,13 @@ instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ // vector integer max reduction - predicated -instruct vreduce_maxI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ +instruct vreduce_max_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (MaxReductionV (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); - format %{ "vreduce_maxI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + format %{ "vreduce_max_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -2293,7 +2678,6 @@ instruct vreduce_maxL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (MaxReductionV (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "vreduce_maxL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); @@ -2307,14 +2691,13 @@ instruct vreduce_maxL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v // vector integer min reduction -instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +instruct vreduce_min(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (MinReductionV src1 src2)); - ins_cost(VEC_COST); effect(TEMP tmp); - format %{ "vreduce_minI $dst, $src1, $src2\t# KILL $tmp" %} + format %{ "vreduce_min $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -2327,7 +2710,6 @@ instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (MinReductionV src1 src2)); - ins_cost(VEC_COST); effect(TEMP tmp); format %{ "vreduce_minL $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ @@ -2341,14 +2723,13 @@ instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ // vector integer min reduction - predicated -instruct vreduce_minI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ +instruct vreduce_min_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || Matcher::vector_element_basic_type(n->in(2)) == T_INT); match(Set dst (MinReductionV (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); - format %{ "vreduce_minI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + format %{ "vreduce_min_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ reduce_integral_v($dst$$Register, $src1$$Register, @@ -2363,7 +2744,6 @@ instruct vreduce_minL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); match(Set dst (MinReductionV (Binary src1 src2) v0)); effect(TEMP tmp); - ins_cost(VEC_COST); format %{ "vreduce_minL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); @@ -2380,7 +2760,6 @@ instruct vreduce_minL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT); match(Set dst (MaxReductionV src1 src2)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "vreduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %} ins_encode %{ @@ -2395,7 +2774,6 @@ instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE); match(Set dst (MaxReductionV src1 src2)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "vreduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %} ins_encode %{ @@ -2412,7 +2790,6 @@ instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ instruct vreduce_maxF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT); match(Set dst (MaxReductionV (Binary src1 src2) v0)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "vreduce_maxF_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %} ins_encode %{ @@ -2428,7 +2805,6 @@ instruct vreduce_maxF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, v instruct vreduce_maxD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE); match(Set dst (MaxReductionV (Binary src1 src2) v0)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "vreduce_maxD_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %} ins_encode %{ @@ -2446,7 +2822,6 @@ instruct vreduce_maxD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, v instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT); match(Set dst (MinReductionV src1 src2)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "vreduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %} ins_encode %{ @@ -2461,7 +2836,6 @@ instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE); match(Set dst (MinReductionV src1 src2)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "vreduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %} ins_encode %{ @@ -2478,7 +2852,6 @@ instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ instruct vreduce_minF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT); match(Set dst (MinReductionV (Binary src1 src2) v0)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "vreduce_minF_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %} ins_encode %{ @@ -2494,7 +2867,6 @@ instruct vreduce_minF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, v instruct vreduce_minD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE); match(Set dst (MinReductionV (Binary src1 src2) v0)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "vreduce_minD_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %} ins_encode %{ @@ -2573,7 +2945,6 @@ instruct reduce_mulL_masked(iRegLNoSp dst, iRegL isrc, vReg vsrc, instruct replicate(vReg dst, iRegIorL2I src) %{ predicate(Matcher::is_non_long_integral_vector(n)); match(Set dst (Replicate src)); - ins_cost(VEC_COST); format %{ "replicate $dst, $src" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -2586,7 +2957,6 @@ instruct replicate(vReg dst, iRegIorL2I src) %{ instruct replicateL(vReg dst, iRegL src) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst (Replicate src)); - ins_cost(VEC_COST); format %{ "replicateL $dst, $src" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); @@ -2598,7 +2968,6 @@ instruct replicateL(vReg dst, iRegL src) %{ instruct replicate_imm5(vReg dst, immI5 con) %{ predicate(Matcher::is_non_long_integral_vector(n)); match(Set dst (Replicate con)); - ins_cost(VEC_COST); format %{ "replicate_imm5 $dst, $con" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -2611,7 +2980,6 @@ instruct replicate_imm5(vReg dst, immI5 con) %{ instruct replicateL_imm5(vReg dst, immL5 con) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst (Replicate con)); - ins_cost(VEC_COST); format %{ "replicateL_imm5 $dst, $con" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); @@ -2620,10 +2988,21 @@ instruct replicateL_imm5(vReg dst, immL5 con) %{ ins_pipe(pipe_slow); %} +instruct replicateHF(vReg dst, fRegF src) %{ + predicate(Matcher::vector_element_basic_type(n) == T_SHORT); + match(Set dst (Replicate src)); + format %{ "replicateHF $dst, $src" %} + ins_encode %{ + assert(UseZvfh, "must"); + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + instruct replicateF(vReg dst, fRegF src) %{ predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); match(Set dst (Replicate src)); - ins_cost(VEC_COST); format %{ "replicateF $dst, $src" %} ins_encode %{ __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this)); @@ -2635,7 +3014,6 @@ instruct replicateF(vReg dst, fRegF src) %{ instruct replicateD(vReg dst, fRegD src) %{ predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst (Replicate src)); - ins_cost(VEC_COST); format %{ "replicateD $dst, $src" %} ins_encode %{ __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this)); @@ -2674,7 +3052,6 @@ instruct replicateD(vReg dst, fRegD src) %{ instruct vasrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ match(Set dst (RShiftVB src shift)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP v0); format %{ "vasrB $dst, $src, $shift" %} ins_encode %{ @@ -2693,7 +3070,6 @@ instruct vasrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ instruct vasrS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ match(Set dst (RShiftVS src shift)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP v0); format %{ "vasrS $dst, $src, $shift" %} ins_encode %{ @@ -2712,7 +3088,6 @@ instruct vasrS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ instruct vasrI(vReg dst, vReg src, vReg shift) %{ match(Set dst (RShiftVI src shift)); - ins_cost(VEC_COST); format %{ "vasrI $dst, $src, $shift" %} ins_encode %{ __ vsetvli_helper(T_INT, Matcher::vector_length(this)); @@ -2724,7 +3099,6 @@ instruct vasrI(vReg dst, vReg src, vReg shift) %{ instruct vasrL(vReg dst, vReg src, vReg shift) %{ match(Set dst (RShiftVL src shift)); - ins_cost(VEC_COST); format %{ "vasrL $dst, $src, $shift" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); @@ -2736,7 +3110,6 @@ instruct vasrL(vReg dst, vReg src, vReg shift) %{ instruct vasrB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ match(Set dst_src (RShiftVB (Binary dst_src shift) vmask)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src, TEMP v0); format %{ "vasrB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} ins_encode %{ @@ -2754,7 +3127,6 @@ instruct vasrB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) instruct vasrS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ match(Set dst_src (RShiftVS (Binary dst_src shift) vmask)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src, TEMP v0); format %{ "vasrS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} ins_encode %{ @@ -2772,7 +3144,6 @@ instruct vasrS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) instruct vasrI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ match(Set dst_src (RShiftVI (Binary dst_src shift) v0)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src); format %{ "vasrI_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ @@ -2785,7 +3156,6 @@ instruct vasrI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ instruct vasrL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ match(Set dst_src (RShiftVL (Binary dst_src shift) v0)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src); format %{ "vasrL_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ @@ -2798,7 +3168,6 @@ instruct vasrL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ instruct vlslB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ match(Set dst (LShiftVB src shift)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP v0); format %{ "vlslB $dst, $src, $shift" %} ins_encode %{ @@ -2817,7 +3186,6 @@ instruct vlslB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ instruct vlslS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ match(Set dst (LShiftVS src shift)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP v0); format %{ "vlslS $dst, $src, $shift" %} ins_encode %{ @@ -2836,7 +3204,6 @@ instruct vlslS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ instruct vlslI(vReg dst, vReg src, vReg shift) %{ match(Set dst (LShiftVI src shift)); - ins_cost(VEC_COST); format %{ "vlslI $dst, $src, $shift" %} ins_encode %{ __ vsetvli_helper(T_INT, Matcher::vector_length(this)); @@ -2848,7 +3215,6 @@ instruct vlslI(vReg dst, vReg src, vReg shift) %{ instruct vlslL(vReg dst, vReg src, vReg shift) %{ match(Set dst (LShiftVL src shift)); - ins_cost(VEC_COST); format %{ "vlslL $dst, $src, $shift" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); @@ -2860,7 +3226,6 @@ instruct vlslL(vReg dst, vReg src, vReg shift) %{ instruct vlslB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ match(Set dst_src (LShiftVB (Binary dst_src shift) vmask)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src, TEMP v0); format %{ "vlslB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} ins_encode %{ @@ -2881,7 +3246,6 @@ instruct vlslB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) instruct vlslS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ match(Set dst_src (LShiftVS (Binary dst_src shift) vmask)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src, TEMP v0); format %{ "vlslS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} ins_encode %{ @@ -2902,7 +3266,6 @@ instruct vlslS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) instruct vlslI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ match(Set dst_src (LShiftVI (Binary dst_src shift) v0)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src); format %{ "vlslI_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ @@ -2915,7 +3278,6 @@ instruct vlslI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ instruct vlslL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ match(Set dst_src (LShiftVL (Binary dst_src shift) v0)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src); format %{ "vlslL_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ @@ -2928,7 +3290,6 @@ instruct vlslL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ instruct vlsrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ match(Set dst (URShiftVB src shift)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP v0); format %{ "vlsrB $dst, $src, $shift" %} ins_encode %{ @@ -2947,7 +3308,6 @@ instruct vlsrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ instruct vlsrS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ match(Set dst (URShiftVS src shift)); - ins_cost(VEC_COST); effect(TEMP_DEF dst, TEMP v0); format %{ "vlsrS $dst, $src, $shift" %} ins_encode %{ @@ -2966,7 +3326,6 @@ instruct vlsrS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ instruct vlsrI(vReg dst, vReg src, vReg shift) %{ match(Set dst (URShiftVI src shift)); - ins_cost(VEC_COST); format %{ "vlsrI $dst, $src, $shift" %} ins_encode %{ __ vsetvli_helper(T_INT, Matcher::vector_length(this)); @@ -2978,7 +3337,6 @@ instruct vlsrI(vReg dst, vReg src, vReg shift) %{ instruct vlsrL(vReg dst, vReg src, vReg shift) %{ match(Set dst (URShiftVL src shift)); - ins_cost(VEC_COST); format %{ "vlsrL $dst, $src, $shift" %} ins_encode %{ __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); @@ -2990,7 +3348,6 @@ instruct vlsrL(vReg dst, vReg src, vReg shift) %{ instruct vlsrB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ match(Set dst_src (URShiftVB (Binary dst_src shift) vmask)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src, TEMP v0); format %{ "vlsrB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} ins_encode %{ @@ -3011,7 +3368,6 @@ instruct vlsrB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) instruct vlsrS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ match(Set dst_src (URShiftVS (Binary dst_src shift) vmask)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src, TEMP v0); format %{ "vlsrS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} ins_encode %{ @@ -3032,7 +3388,6 @@ instruct vlsrS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) instruct vlsrI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ match(Set dst_src (URShiftVI (Binary dst_src shift) v0)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src); format %{ "vlsrI_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ @@ -3045,7 +3400,6 @@ instruct vlsrI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ instruct vlsrL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ match(Set dst_src (URShiftVL (Binary dst_src shift) v0)); - ins_cost(VEC_COST); effect(TEMP_DEF dst_src); format %{ "vlsrL_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ @@ -3056,10 +3410,9 @@ instruct vlsrL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ +instruct vasrB_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (RShiftVB src (RShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vasrB_imm $dst, $src, $shift" %} + format %{ "vasrB_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); @@ -3074,10 +3427,9 @@ instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ +instruct vasrS_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (RShiftVS src (RShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vasrS_imm $dst, $src, $shift" %} + format %{ "vasrS_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); @@ -3092,10 +3444,9 @@ instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ +instruct vasrI_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (RShiftVI src (RShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vasrI_imm $dst, $src, $shift" %} + format %{ "vasrI_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_INT, Matcher::vector_length(this)); @@ -3109,11 +3460,10 @@ instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ +instruct vasrL_vi(vReg dst, vReg src, immI shift) %{ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); match(Set dst (RShiftVL src (RShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vasrL_imm $dst, $src, $shift" %} + format %{ "vasrL_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); @@ -3127,10 +3477,9 @@ instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vasrB_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vasrB_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (RShiftVB (Binary dst_src (RShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vasrB_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vasrB_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; if (con == 0) { @@ -3144,10 +3493,9 @@ instruct vasrB_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vasrS_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vasrS_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (RShiftVS (Binary dst_src (RShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vasrS_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vasrS_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; if (con == 0) { @@ -3161,10 +3509,9 @@ instruct vasrS_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vasrI_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vasrI_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (RShiftVI (Binary dst_src (RShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vasrI_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vasrI_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; if (con == 0) { @@ -3177,11 +3524,10 @@ instruct vasrI_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vasrL_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vasrL_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ predicate((n->in(1)->in(2)->in(1)->get_int() & 0x3f) < 32); match(Set dst_src (RShiftVL (Binary dst_src (RShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vasrL_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vasrL_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; if (con == 0) { @@ -3194,10 +3540,9 @@ instruct vasrL_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ +instruct vlsrB_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (URShiftVB src (RShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vlsrB_imm $dst, $src, $shift" %} + format %{ "vlsrB_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); @@ -3216,10 +3561,9 @@ instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ +instruct vlsrS_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (URShiftVS src (RShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vlsrS_imm $dst, $src, $shift" %} + format %{ "vlsrS_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); @@ -3238,10 +3582,9 @@ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ +instruct vlsrI_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (URShiftVI src (RShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vlsrI_imm $dst, $src, $shift" %} + format %{ "vlsrI_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_INT, Matcher::vector_length(this)); @@ -3255,11 +3598,10 @@ instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ +instruct vlsrL_vi(vReg dst, vReg src, immI shift) %{ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); match(Set dst (URShiftVL src (RShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vlsrL_imm $dst, $src, $shift" %} + format %{ "vlsrL_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); @@ -3273,10 +3615,9 @@ instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vlsrB_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vlsrB_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (URShiftVB (Binary dst_src (RShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vlsrB_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vlsrB_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; if (con == 0) { @@ -3294,10 +3635,9 @@ instruct vlsrB_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vlsrS_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vlsrS_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (URShiftVS (Binary dst_src (RShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vlsrS_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vlsrS_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; if (con == 0) { @@ -3315,10 +3655,9 @@ instruct vlsrS_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vlsrI_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vlsrI_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (URShiftVI (Binary dst_src (RShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vlsrI_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vlsrI_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; if (con == 0) { @@ -3331,11 +3670,10 @@ instruct vlsrI_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vlsrL_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vlsrL_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ predicate((n->in(1)->in(2)->in(1)->get_int() & 0x3f) < 32); match(Set dst_src (URShiftVL (Binary dst_src (RShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vlsrL_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vlsrL_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; if (con == 0) { @@ -3348,10 +3686,9 @@ instruct vlsrL_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ +instruct vlslB_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (LShiftVB src (LShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vlslB_imm $dst, $src, $shift" %} + format %{ "vlslB_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); @@ -3365,10 +3702,9 @@ instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ +instruct vlslS_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (LShiftVS src (LShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vlslS_imm $dst, $src, $shift" %} + format %{ "vlslS_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); @@ -3382,10 +3718,9 @@ instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ +instruct vlslI_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (LShiftVI src (LShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vlslI_imm $dst, $src, $shift" %} + format %{ "vlslI_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_INT, Matcher::vector_length(this)); @@ -3394,11 +3729,10 @@ instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ +instruct vlslL_vi(vReg dst, vReg src, immI shift) %{ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); match(Set dst (LShiftVL src (LShiftCntV shift))); - ins_cost(VEC_COST); - format %{ "vlslL_imm $dst, $src, $shift" %} + format %{ "vlslL_vi $dst, $src, $shift" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); @@ -3407,10 +3741,9 @@ instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vlslB_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vlslB_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (LShiftVB (Binary dst_src (LShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vlslB_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vlslB_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); @@ -3425,10 +3758,9 @@ instruct vlslB_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vlslS_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vlslS_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (LShiftVS (Binary dst_src (LShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vlslS_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vlslS_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); @@ -3443,10 +3775,9 @@ instruct vlslS_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vlslI_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vlslI_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (LShiftVI (Binary dst_src (LShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vlslI_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vlslI_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_INT, Matcher::vector_length(this)); @@ -3456,11 +3787,10 @@ instruct vlslI_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vlslL_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vlslL_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ predicate((n->in(1)->in(2)->in(1)->get_int() & 0x3f) < 32); match(Set dst_src (LShiftVL (Binary dst_src (LShiftCntV shift)) v0)); - ins_cost(VEC_COST); - format %{ "vlslL_imm_masked $dst_src, $dst_src, $shift, $v0" %} + format %{ "vlslL_vi_masked $dst_src, $dst_src, $shift, $v0" %} ins_encode %{ uint32_t con = (unsigned)$shift$$constant & 0x1f; __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); @@ -3499,9 +3829,10 @@ instruct vrotate_right(vReg dst, vReg src, vReg shift) %{ ins_pipe(pipe_slow); %} -instruct vrotate_right_reg(vReg dst, vReg src, iRegIorL2I shift) %{ +// Only the low log2(SEW) bits of shift value are used, all other bits are ignored. +instruct vrotate_right_vx(vReg dst, vReg src, iRegIorL2I shift) %{ match(Set dst (RotateRightV src (Replicate shift))); - format %{ "vrotate_right_reg $dst, $src, $shift\t" %} + format %{ "vrotate_right_vx $dst, $src, $shift\t" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -3511,9 +3842,9 @@ instruct vrotate_right_reg(vReg dst, vReg src, iRegIorL2I shift) %{ ins_pipe(pipe_slow); %} -instruct vrotate_right_imm(vReg dst, vReg src, immI shift) %{ +instruct vrotate_right_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (RotateRightV src shift)); - format %{ "vrotate_right_imm $dst, $src, $shift\t" %} + format %{ "vrotate_right_vi $dst, $src, $shift\t" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); uint32_t bits = type2aelembytes(bt) * 8; @@ -3531,7 +3862,7 @@ instruct vrotate_right_imm(vReg dst, vReg src, immI shift) %{ instruct vrotate_right_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ match(Set dst_src (RotateRightV (Binary dst_src shift) v0)); - format %{ "vrotate_right_masked $dst_src, $dst_src, $shift, v0.t\t" %} + format %{ "vrotate_right_masked $dst_src, $dst_src, $shift, $v0\t" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -3541,9 +3872,10 @@ instruct vrotate_right_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vrotate_right_reg_masked(vReg dst_src, iRegIorL2I shift, vRegMask_V0 v0) %{ +// Only the low log2(SEW) bits of shift value are used, all other bits are ignored. +instruct vrotate_right_vx_masked(vReg dst_src, iRegIorL2I shift, vRegMask_V0 v0) %{ match(Set dst_src (RotateRightV (Binary dst_src (Replicate shift)) v0)); - format %{ "vrotate_right_reg_masked $dst_src, $dst_src, $shift, v0.t\t" %} + format %{ "vrotate_right_vx_masked $dst_src, $dst_src, $shift, $v0\t" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -3553,9 +3885,9 @@ instruct vrotate_right_reg_masked(vReg dst_src, iRegIorL2I shift, vRegMask_V0 v0 ins_pipe(pipe_slow); %} -instruct vrotate_right_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vrotate_right_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (RotateRightV (Binary dst_src shift) v0)); - format %{ "vrotate_right_imm_masked $dst_src, $dst_src, $shift, v0.t\t" %} + format %{ "vrotate_right_vi_masked $dst_src, $dst_src, $shift, $v0\t" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); uint32_t bits = type2aelembytes(bt) * 8; @@ -3584,9 +3916,10 @@ instruct vrotate_left(vReg dst, vReg src, vReg shift) %{ ins_pipe(pipe_slow); %} -instruct vrotate_left_reg(vReg dst, vReg src, iRegIorL2I shift) %{ +// Only the low log2(SEW) bits of shift value are used, all other bits are ignored. +instruct vrotate_left_vx(vReg dst, vReg src, iRegIorL2I shift) %{ match(Set dst (RotateLeftV src (Replicate shift))); - format %{ "vrotate_left_reg $dst, $src, $shift\t" %} + format %{ "vrotate_left_vx $dst, $src, $shift\t" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -3596,9 +3929,9 @@ instruct vrotate_left_reg(vReg dst, vReg src, iRegIorL2I shift) %{ ins_pipe(pipe_slow); %} -instruct vrotate_left_imm(vReg dst, vReg src, immI shift) %{ +instruct vrotate_left_vi(vReg dst, vReg src, immI shift) %{ match(Set dst (RotateLeftV src shift)); - format %{ "vrotate_left_imm $dst, $src, $shift\t" %} + format %{ "vrotate_left_vi $dst, $src, $shift\t" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); uint32_t bits = type2aelembytes(bt) * 8; @@ -3617,7 +3950,7 @@ instruct vrotate_left_imm(vReg dst, vReg src, immI shift) %{ instruct vrotate_left_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ match(Set dst_src (RotateLeftV (Binary dst_src shift) v0)); - format %{ "vrotate_left_masked $dst_src, $dst_src, $shift, v0.t\t" %} + format %{ "vrotate_left_masked $dst_src, $dst_src, $shift, $v0\t" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -3627,9 +3960,10 @@ instruct vrotate_left_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ ins_pipe(pipe_slow); %} -instruct vrotate_left_reg_masked(vReg dst_src, iRegIorL2I shift, vRegMask_V0 v0) %{ +// Only the low log2(SEW) bits of shift value are used, all other bits are ignored. +instruct vrotate_left_vx_masked(vReg dst_src, iRegIorL2I shift, vRegMask_V0 v0) %{ match(Set dst_src (RotateLeftV (Binary dst_src (Replicate shift)) v0)); - format %{ "vrotate_left_reg_masked $dst_src, $dst_src, $shift, v0.t\t" %} + format %{ "vrotate_left_vx_masked $dst_src, $dst_src, $shift, $v0\t" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -3639,9 +3973,9 @@ instruct vrotate_left_reg_masked(vReg dst_src, iRegIorL2I shift, vRegMask_V0 v0) ins_pipe(pipe_slow); %} -instruct vrotate_left_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ +instruct vrotate_left_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ match(Set dst_src (RotateLeftV (Binary dst_src shift) v0)); - format %{ "vrotate_left_imm_masked $dst_src, $dst_src, $shift, v0.t\t" %} + format %{ "vrotate_left_vi_masked $dst_src, $dst_src, $shift, $v0\t" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); uint32_t bits = type2aelembytes(bt) * 8; @@ -3659,10 +3993,21 @@ instruct vrotate_left_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ // vector sqrt +instruct vsqrt_hfp(vReg dst, vReg src) %{ + match(Set dst (SqrtVHF src)); + format %{ "vsqrt_hfp $dst, $src" %} + ins_encode %{ + assert(UseZvfh, "must"); + assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must"); + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + instruct vsqrt_fp(vReg dst, vReg src) %{ match(Set dst (SqrtVF src)); match(Set dst (SqrtVD src)); - ins_cost(VEC_COST); format %{ "vsqrt_fp $dst, $src" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -3677,7 +4022,6 @@ instruct vsqrt_fp(vReg dst, vReg src) %{ instruct vsqrt_fp_masked(vReg dst_src, vRegMask_V0 v0) %{ match(Set dst_src (SqrtVF dst_src v0)); match(Set dst_src (SqrtVD dst_src v0)); - ins_cost(VEC_COST); format %{ "vsqrt_fp_masked $dst_src, $dst_src, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -3976,7 +4320,6 @@ instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, // Vector Load Const instruct vloadcon(vReg dst, immI0 src) %{ match(Set dst (VectorLoadConst src)); - ins_cost(VEC_COST); format %{ "vloadcon $dst\t# generate iota indices" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -4237,8 +4580,8 @@ instruct vcvtStoB(vReg dst, vReg src) %{ %} instruct vcvtStoX(vReg dst, vReg src) %{ - predicate((Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_LONG)); + predicate(Matcher::vector_element_basic_type(n) == T_INT || + Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst (VectorCastS2X src)); effect(TEMP_DEF dst); format %{ "vcvtStoX $dst, $src" %} @@ -4251,8 +4594,8 @@ instruct vcvtStoX(vReg dst, vReg src) %{ %} instruct vcvtStoX_fp(vReg dst, vReg src) %{ - predicate((Matcher::vector_element_basic_type(n) == T_FLOAT || - Matcher::vector_element_basic_type(n) == T_DOUBLE)); + predicate(Matcher::vector_element_basic_type(n) == T_FLOAT || + Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst (VectorCastS2X src)); effect(TEMP_DEF dst); format %{ "vcvtStoX_fp $dst, $src" %} @@ -4349,9 +4692,9 @@ instruct vcvtItoD(vReg dst, vReg src) %{ // VectorCastL2X instruct vcvtLtoI(vReg dst, vReg src) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT); match(Set dst (VectorCastL2X src)); format %{ "vcvtLtoI $dst, $src" %} ins_encode %{ @@ -4845,7 +5188,6 @@ instruct vconvF2HF(vReg dst, vReg src, vReg vtmp, vRegMask_V0 v0, iRegINoSp tmp) instruct vpopcount_masked(vReg dst_src, vRegMask_V0 v0) %{ match(Set dst_src (PopCountVI dst_src v0)); match(Set dst_src (PopCountVL dst_src v0)); - ins_cost(VEC_COST); format %{ "vcpop_v $dst_src, $dst_src, $v0\t# vcpop_v with mask" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -4859,7 +5201,6 @@ instruct vpopcount_masked(vReg dst_src, vRegMask_V0 v0) %{ instruct vpopcount(vReg dst, vReg src) %{ match(Set dst (PopCountVI src)); match(Set dst (PopCountVL src)); - ins_cost(VEC_COST); format %{ "vcpop_v $dst, $src\t# vcpop_v without mask" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -4874,7 +5215,6 @@ instruct vpopcount(vReg dst, vReg src) %{ instruct vcountLeadingZeros_masked(vReg dst_src, vRegMask_V0 v0) %{ match(Set dst_src (CountLeadingZerosV dst_src v0)); - ins_cost(VEC_COST); format %{ "vcount_leading_zeros_masked $dst_src, $dst_src, v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -4887,7 +5227,6 @@ instruct vcountLeadingZeros_masked(vReg dst_src, vRegMask_V0 v0) %{ instruct vcountLeadingZeros(vReg dst, vReg src) %{ match(Set dst (CountLeadingZerosV src)); - ins_cost(VEC_COST); format %{ "vcount_leading_zeros $dst, $src" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -4902,7 +5241,6 @@ instruct vcountLeadingZeros(vReg dst, vReg src) %{ instruct vcountTrailingZeros_masked(vReg dst_src, vRegMask_V0 v0) %{ match(Set dst_src (CountTrailingZerosV dst_src v0)); - ins_cost(VEC_COST); format %{ "vcount_trailing_zeros_masked $dst_src, $dst_src, v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -4915,7 +5253,6 @@ instruct vcountTrailingZeros_masked(vReg dst_src, vRegMask_V0 v0) %{ instruct vcountTrailingZeros(vReg dst, vReg src) %{ match(Set dst (CountTrailingZerosV src)); - ins_cost(VEC_COST); format %{ "vcount_trailing_zeros $dst, $src" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); @@ -5087,14 +5424,14 @@ instruct populateindex(vReg dst, iRegIorL2I src1, iRegIorL2I src2, vReg tmp) %{ // BYTE, SHORT, INT -instruct insertI_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx, vRegMask_V0 v0) %{ +instruct insert_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx, vRegMask_V0 v0) %{ predicate(n->in(2)->get_int() < 32 && (Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT || Matcher::vector_element_basic_type(n) == T_INT)); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP v0); - format %{ "insertI_index_lt32 $dst, $src, $val, $idx" %} + format %{ "insert_index_lt32 $dst, $src, $val, $idx" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); @@ -5106,14 +5443,14 @@ instruct insertI_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx, vRegMa ins_pipe(pipe_slow); %} -instruct insertI_index(vReg dst, vReg src, iRegIorL2I val, iRegIorL2I idx, vReg tmp, vRegMask_V0 v0) %{ +instruct insert_index(vReg dst, vReg src, iRegIorL2I val, iRegIorL2I idx, vReg tmp, vRegMask_V0 v0) %{ predicate(n->in(2)->get_int() >= 32 && (Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT || Matcher::vector_element_basic_type(n) == T_INT)); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP tmp, TEMP v0); - format %{ "insertI_index $dst, $src, $val, $idx\t# KILL $tmp" %} + format %{ "insert_index $dst, $src, $val, $idx\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); diff --git a/src/hotspot/cpu/riscv/runtime_riscv.cpp b/src/hotspot/cpu/riscv/runtime_riscv.cpp index 44a8e35e285..7c8ca853bc4 100644 --- a/src/hotspot/cpu/riscv/runtime_riscv.cpp +++ b/src/hotspot/cpu/riscv/runtime_riscv.cpp @@ -63,6 +63,9 @@ UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() { // Setup code generation tools const char* name = OptoRuntime::stub_name(OptoStubId::uncommon_trap_id); CodeBuffer buffer(name, 2048, 1024); + if (buffer.blob() == nullptr) { + return nullptr; + } MacroAssembler* masm = new MacroAssembler(&buffer); assert_cond(masm != nullptr); @@ -282,6 +285,9 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() { // Setup code generation tools const char* name = OptoRuntime::stub_name(OptoStubId::exception_id); CodeBuffer buffer(name, 2048, 1024); + if (buffer.blob() == nullptr) { + return nullptr; + } MacroAssembler* masm = new MacroAssembler(&buffer); assert_cond(masm != nullptr); diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp index 10790841490..391be81c1ae 100644 --- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -596,12 +596,13 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } // --------------------------------------------------------------- -AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { + +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterHandlerEntry* handler) { address i2c_entry = __ pc(); gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); @@ -658,7 +659,8 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + handler->set_entry_points(i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + return; } int SharedRuntime::vector_calling_convention(VMRegPair *regs, @@ -1323,7 +1325,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // First instruction must be a nop as it may need to be patched on deoptimisation { - Assembler::IncompressibleRegion ir(masm); // keep the nop as 4 bytes for patching. + Assembler::IncompressibleScope scope(masm); // keep the nop as 4 bytes for patching. MacroAssembler::assert_alignment(__ pc()); __ nop(); // 4 bytes } @@ -1466,7 +1468,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // If we have to make this method not-entrant we'll overwrite its // first instruction with a jump. { - Assembler::IncompressibleRegion ir(masm); // keep the nop as 4 bytes for patching. + Assembler::IncompressibleScope scope(masm); // keep the nop as 4 bytes for patching. MacroAssembler::assert_alignment(__ pc()); __ nop(); // 4 bytes } @@ -1892,6 +1894,24 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ leave(); + #if INCLUDE_JFR + // We need to do a poll test after unwind in case the sampler + // managed to sample the native frame after returning to Java. + Label L_return; + __ ld(t0, Address(xthread, JavaThread::polling_word_offset())); + address poll_test_pc = __ pc(); + __ relocate(relocInfo::poll_return_type); + __ test_bit(t0, t0, log2i_exact(SafepointMechanism::poll_bit())); + __ beqz(t0, L_return); + assert(SharedRuntime::polling_page_return_handler_blob() != nullptr, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + __ la(t0, InternalAddress(poll_test_pc)); + __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); + __ far_jump(RuntimeAddress(stub)); + __ bind(L_return); +#endif // INCLUDE_JFR + // Any exception pending? Label exception_pending; __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index 4527a32926f..c58f6bc338d 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2025, Red Hat Inc. All rights reserved. - * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2025, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -886,7 +886,7 @@ class StubGenerator: public StubCodeGenerator { void copy_memory_v(Register s, Register d, Register count, int step) { bool is_backward = step < 0; - int granularity = uabs(step); + int granularity = g_uabs(step); const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17; assert_different_registers(s, d, cnt, vl, tmp1, tmp2); @@ -948,7 +948,7 @@ class StubGenerator: public StubCodeGenerator { } bool is_backwards = step < 0; - int granularity = uabs(step); + int granularity = g_uabs(step); const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17, tmp5 = x14, tmp6 = x13; const Register gct1 = x28, gct2 = x29, gct3 = t2; @@ -1633,6 +1633,126 @@ class StubGenerator: public StubCodeGenerator { BLOCK_COMMENT("arraycopy_range_checks done"); } + address generate_unsafecopy_common_error_exit() { + address start = __ pc(); + __ mv(x10, 0); + __ leave(); + __ ret(); + return start; + } + + // + // Generate 'unsafe' set memory stub + // Though just as safe as the other stubs, it takes an unscaled + // size_t (# bytes) argument instead of an element count. + // + // Input: + // c_rarg0 - destination array address + // c_rarg1 - byte count (size_t) + // c_rarg2 - byte value + // + address generate_unsafe_setmemory() { + __ align(CodeEntryAlignment); + StubGenStubId stub_id = StubGenStubId::unsafe_setmemory_id; + StubCodeMark mark(this, stub_id); + address start = __ pc(); + + // bump this on entry, not on exit: + // inc_counter_np(SharedRuntime::_unsafe_set_memory_ctr); + + Label L_fill_elements; + + const Register dest = c_rarg0; + const Register count = c_rarg1; + const Register value = c_rarg2; + const Register cnt_words = x28; // temp register + const Register tmp_reg = x29; // temp register + + // Mark remaining code as such which performs Unsafe accesses. + UnsafeMemoryAccessMark umam(this, true, false); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // if count < 8, jump to L_fill_elements + __ mv(tmp_reg, 8); // 8 bytes fill by element + __ bltu(count, tmp_reg, L_fill_elements); + + // Propagate byte to 64-bit width + // 8 bit -> 16 bit + __ zext(value, value, 8); + __ slli(tmp_reg, value, 8); + __ orr(value, value, tmp_reg); + // 16 bit -> 32 bit + __ slli(tmp_reg, value, 16); + __ orr(value, value, tmp_reg); + // 32 bit -> 64 bit + __ slli(tmp_reg, value, 32); + __ orr(value, value, tmp_reg); + + // Align source address at 8 bytes address boundary. + Label L_skip_align1, L_skip_align2, L_skip_align4; + // One byte misalignment happens. + __ test_bit(tmp_reg, dest, 0); + __ beqz(tmp_reg, L_skip_align1); + __ sb(value, Address(dest, 0)); + __ addi(dest, dest, 1); + __ subi(count, count, 1); + + __ bind(L_skip_align1); + // Two bytes misalignment happens. + __ test_bit(tmp_reg, dest, 1); + __ beqz(tmp_reg, L_skip_align2); + __ sh(value, Address(dest, 0)); + __ addi(dest, dest, 2); + __ subi(count, count, 2); + + __ bind(L_skip_align2); + // Four bytes misalignment happens. + __ test_bit(tmp_reg, dest, 2); + __ beqz(tmp_reg, L_skip_align4); + __ sw(value, Address(dest, 0)); + __ addi(dest, dest, 4); + __ subi(count, count, 4); + __ bind(L_skip_align4); + + // Fill large chunks + __ srli(cnt_words, count, 3); // number of words + __ slli(tmp_reg, cnt_words, 3); + __ sub(count, count, tmp_reg); + { + __ fill_words(dest, cnt_words, value); + } + + // Handle copies less than 8 bytes + __ bind(L_fill_elements); + Label L_fill_2, L_fill_1, L_exit; + __ test_bit(tmp_reg, count, 2); + __ beqz(tmp_reg, L_fill_2); + __ sb(value, Address(dest, 0)); + __ sb(value, Address(dest, 1)); + __ sb(value, Address(dest, 2)); + __ sb(value, Address(dest, 3)); + __ addi(dest, dest, 4); + + __ bind(L_fill_2); + __ test_bit(tmp_reg, count, 1); + __ beqz(tmp_reg, L_fill_1); + __ sb(value, Address(dest, 0)); + __ sb(value, Address(dest, 1)); + __ addi(dest, dest, 2); + + __ bind(L_fill_1); + __ test_bit(tmp_reg, count, 0); + __ beqz(tmp_reg, L_exit); + __ sb(value, Address(dest, 0)); + + __ bind(L_exit); + __ leave(); + __ ret(); + + return start; + } + // // Generate 'unsafe' array copy stub // Though just as safe as the other stubs, it takes an unscaled @@ -2029,44 +2149,40 @@ class StubGenerator: public StubCodeGenerator { __ enter(); - Label L_fill_elements, L_exit1; + Label L_fill_elements; int shift = -1; switch (t) { case T_BYTE: shift = 0; + // Short arrays (< 8 bytes) fill by element + __ mv(tmp_reg, 8 >> shift); + __ bltu(count, tmp_reg, L_fill_elements); // Zero extend value // 8 bit -> 16 bit __ zext(value, value, 8); - __ mv(tmp_reg, value); - __ slli(tmp_reg, tmp_reg, 8); + __ slli(tmp_reg, value, 8); __ orr(value, value, tmp_reg); // 16 bit -> 32 bit - __ mv(tmp_reg, value); - __ slli(tmp_reg, tmp_reg, 16); + __ slli(tmp_reg, value, 16); __ orr(value, value, tmp_reg); - - __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element - __ bltu(count, tmp_reg, L_fill_elements); break; case T_SHORT: shift = 1; + // Short arrays (< 8 bytes) fill by element + __ mv(tmp_reg, 8 >> shift); + __ bltu(count, tmp_reg, L_fill_elements); + // Zero extend value // 16 bit -> 32 bit __ zext(value, value, 16); - __ mv(tmp_reg, value); - __ slli(tmp_reg, tmp_reg, 16); + __ slli(tmp_reg, value, 16); __ orr(value, value, tmp_reg); - - // Short arrays (< 8 bytes) fill by element - __ mv(tmp_reg, 8 >> shift); - __ bltu(count, tmp_reg, L_fill_elements); break; case T_INT: shift = 2; - // Short arrays (< 8 bytes) fill by element __ mv(tmp_reg, 8 >> shift); __ bltu(count, tmp_reg, L_fill_elements); @@ -2080,8 +2196,8 @@ class StubGenerator: public StubCodeGenerator { switch (t) { case T_BYTE: // One byte misalignment happens only for byte arrays. - __ test_bit(t0, to, 0); - __ beqz(t0, L_skip_align1); + __ test_bit(tmp_reg, to, 0); + __ beqz(tmp_reg, L_skip_align1); __ sb(value, Address(to, 0)); __ addi(to, to, 1); __ subiw(count, count, 1); @@ -2089,8 +2205,8 @@ class StubGenerator: public StubCodeGenerator { // Fallthrough case T_SHORT: // Two bytes misalignment happens only for byte and short (char) arrays. - __ test_bit(t0, to, 1); - __ beqz(t0, L_skip_align2); + __ test_bit(tmp_reg, to, 1); + __ beqz(tmp_reg, L_skip_align2); __ sh(value, Address(to, 0)); __ addi(to, to, 2); __ subiw(count, count, 2 >> shift); @@ -2098,8 +2214,8 @@ class StubGenerator: public StubCodeGenerator { // Fallthrough case T_INT: // Align to 8 bytes, we know we are 4 byte aligned to start. - __ test_bit(t0, to, 2); - __ beqz(t0, L_skip_align4); + __ test_bit(tmp_reg, to, 2); + __ beqz(tmp_reg, L_skip_align4); __ sw(value, Address(to, 0)); __ addi(to, to, 4); __ subiw(count, count, 4 >> shift); @@ -2125,55 +2241,54 @@ class StubGenerator: public StubCodeGenerator { __ fill_words(to, cnt_words, value); } - // Remaining count is less than 8 bytes. Fill it by a single store. - // Note that the total length is no less than 8 bytes. - if (!AvoidUnalignedAccesses && (t == T_BYTE || t == T_SHORT)) { - __ beqz(count, L_exit1); - __ shadd(to, count, to, tmp_reg, shift); // points to the end - __ sd(value, Address(to, -8)); // overwrite some elements - __ bind(L_exit1); - __ leave(); - __ ret(); - } - // Handle copies less than 8 bytes. - Label L_fill_2, L_fill_4, L_exit2; + // Address may not be heapword aligned. + Label L_fill_1, L_fill_2, L_exit; __ bind(L_fill_elements); switch (t) { case T_BYTE: - __ test_bit(t0, count, 0); - __ beqz(t0, L_fill_2); + __ test_bit(tmp_reg, count, 2); + __ beqz(tmp_reg, L_fill_2); __ sb(value, Address(to, 0)); - __ addi(to, to, 1); + __ sb(value, Address(to, 1)); + __ sb(value, Address(to, 2)); + __ sb(value, Address(to, 3)); + __ addi(to, to, 4); + __ bind(L_fill_2); - __ test_bit(t0, count, 1); - __ beqz(t0, L_fill_4); - __ sh(value, Address(to, 0)); + __ test_bit(tmp_reg, count, 1); + __ beqz(tmp_reg, L_fill_1); + __ sb(value, Address(to, 0)); + __ sb(value, Address(to, 1)); __ addi(to, to, 2); - __ bind(L_fill_4); - __ test_bit(t0, count, 2); - __ beqz(t0, L_exit2); - __ sw(value, Address(to, 0)); + + __ bind(L_fill_1); + __ test_bit(tmp_reg, count, 0); + __ beqz(tmp_reg, L_exit); + __ sb(value, Address(to, 0)); break; case T_SHORT: - __ test_bit(t0, count, 0); - __ beqz(t0, L_fill_4); + __ test_bit(tmp_reg, count, 1); + __ beqz(tmp_reg, L_fill_2); + __ sh(value, Address(to, 0)); + __ sh(value, Address(to, 2)); + __ addi(to, to, 4); + + __ bind(L_fill_2); + __ test_bit(tmp_reg, count, 0); + __ beqz(tmp_reg, L_exit); __ sh(value, Address(to, 0)); - __ addi(to, to, 2); - __ bind(L_fill_4); - __ test_bit(t0, count, 1); - __ beqz(t0, L_exit2); - __ sw(value, Address(to, 0)); break; case T_INT: - __ beqz(count, L_exit2); + __ beqz(count, L_exit); __ sw(value, Address(to, 0)); break; default: ShouldNotReachHere(); } - __ bind(L_exit2); + __ bind(L_exit); __ leave(); __ ret(); + return start; } @@ -2189,6 +2304,9 @@ class StubGenerator: public StubCodeGenerator { generate_copy_longs(StubGenStubId::copy_byte_f_id, copy_f, c_rarg0, c_rarg1, t1); generate_copy_longs(StubGenStubId::copy_byte_b_id, copy_b, c_rarg0, c_rarg1, t1); + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeMemoryAccess::set_common_exit_stub_pc(ucm_common_error_exit); + StubRoutines::riscv::_zero_blocks = generate_zero_blocks(); //*** jbyte @@ -2259,6 +2377,8 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_arrayof_jbyte_fill = generate_fill(StubGenStubId::arrayof_jbyte_fill_id); StubRoutines::_arrayof_jshort_fill = generate_fill(StubGenStubId::arrayof_jshort_fill_id); StubRoutines::_arrayof_jint_fill = generate_fill(StubGenStubId::arrayof_jint_fill_id); + + StubRoutines::_unsafe_setmemory = generate_unsafe_setmemory(); } void generate_aes_loadkeys(const Register &key, VectorRegister *working_vregs, int rounds) { @@ -6458,58 +6578,6 @@ static const int64_t right_3_bits = right_n_bits(3); return start; } - void generate_vector_math_stubs() { - if (!UseRVV) { - log_info(library)("vector is not supported, skip loading vector math (sleef) library!"); - return; - } - - // Get native vector math stub routine addresses - void* libsleef = nullptr; - char ebuf[1024]; - char dll_name[JVM_MAXPATHLEN]; - if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) { - libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf); - } - if (libsleef == nullptr) { - log_info(library)("Failed to load native vector math (sleef) library, %s!", ebuf); - return; - } - - // Method naming convention - // All the methods are named as _ - // - // Where: - // is the operation name, e.g. sin, cos - // is to indicate float/double - // "fx/dx" for vector float/double operation - // is the precision level - // "u10/u05" represents 1.0/0.5 ULP error bounds - // We use "u10" for all operations by default - // But for those functions do not have u10 support, we use "u05" instead - // rvv, indicates riscv vector extension - // - // e.g. sinfx_u10rvv is the method for computing vector float sin using rvv instructions - // - log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef)); - - for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { - int vop = VectorSupport::VECTOR_OP_MATH_START + op; - if (vop == VectorSupport::VECTOR_OP_TANH) { // skip tanh because of performance regression - continue; - } - - // The native library does not support u10 level of "hypot". - const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10"; - - snprintf(ebuf, sizeof(ebuf), "%sfx_%srvv", VectorSupport::mathname[op], ulf); - StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf); - - snprintf(ebuf, sizeof(ebuf), "%sdx_%srvv", VectorSupport::mathname[op], ulf); - StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf); - } - } - #endif // COMPILER2 /** @@ -6741,8 +6809,6 @@ static const int64_t right_3_bits = right_n_bits(3); generate_string_indexof_stubs(); - generate_vector_math_stubs(); - #endif // COMPILER2 } diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp index 72e1180164b..b8de3547c83 100644 --- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp @@ -765,6 +765,10 @@ void TemplateInterpreterGenerator::lock_method() { // xcpool: cp cache // stack_pointer: previous sp void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + // Save ConstMethod* in x15_const_method for later use to avoid loading multiple times + Register x15_const_method = x15; + __ ld(x15_const_method, Address(xmethod, Method::const_offset())); + // initialize fixed part of activation frame if (native_call) { __ subi(esp, sp, 14 * wordSize); @@ -775,8 +779,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { __ sd(zr, Address(sp, 12 * wordSize)); } else { __ subi(esp, sp, 12 * wordSize); - __ ld(t0, Address(xmethod, Method::const_offset())); // get ConstMethod - __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase + __ add(xbcp, x15_const_method, in_bytes(ConstMethod::codes_offset())); // get codebase __ subi(sp, sp, 12 * wordSize); } __ sd(xbcp, Address(sp, wordSize)); @@ -798,9 +801,10 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { __ sd(fp, Address(sp, 10 * wordSize)); __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp - __ ld(xcpool, Address(xmethod, Method::const_offset())); - __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset())); - __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset())); + // Save ConstantPool* in x28_constants for later use to avoid loading multiple times + Register x28_constants = x28; + __ ld(x28_constants, Address(x15_const_method, ConstMethod::constants_offset())); + __ ld(xcpool, Address(x28_constants, ConstantPool::cache_offset())); __ sd(xcpool, Address(sp, 3 * wordSize)); __ sub(t0, xlocals, fp); __ srai(t0, t0, Interpreter::logStackElementSize); // t0 = xlocals - fp(); @@ -812,13 +816,15 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { __ sd(x19_sender_sp, Address(sp, 9 * wordSize)); __ sd(zr, Address(sp, 8 * wordSize)); - // Get mirror and store it in the frame as GC root for this Method* - __ load_mirror(t2, xmethod, x15, t1); + // Get mirror, Resolve ConstantPool* -> InstanceKlass* -> Java mirror + // and store it in the frame as GC root for this Method* + __ ld(t2, Address(x28_constants, ConstantPool::pool_holder_offset())); + __ ld(t2, Address(t2, in_bytes(Klass::java_mirror_offset()))); + __ resolve_oop_handle(t2, t0, t1); __ sd(t2, Address(sp, 4 * wordSize)); if (!native_call) { - __ ld(t0, Address(xmethod, Method::const_offset())); - __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); + __ lhu(t0, Address(x15_const_method, ConstMethod::max_stack_offset())); __ add(t0, t0, MAX2(3, Method::extra_stack_entries())); __ slli(t0, t0, 3); __ sub(t0, sp, t0); @@ -1372,6 +1378,31 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ bind(L); } + #if INCLUDE_JFR + __ enter_jfr_critical_section(); + + // This poll test is to uphold the invariant that a JFR sampled frame + // must not return to its caller without a prior safepoint poll check. + // The earlier poll check in this routine is insufficient for this purpose + // because the thread has transitioned back to Java. + + Label slow_path; + Label fast_path; + __ safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); + __ j(fast_path); + + __ bind(slow_path); + __ push(dtos); + __ push(ltos); + __ set_last_Java_frame(esp, fp, __ pc(), t0); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); + __ reset_last_Java_frame(true); + __ pop(ltos); + __ pop(dtos); + __ bind(fast_path); + +#endif // INCLUDE_JFR + // jvmti support // Note: This must happen _after_ handling/throwing any exceptions since // the exception handler code notifies the runtime of method exits @@ -1385,10 +1416,13 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ jalr(result_handler); // remove activation - __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp + // get sender sp + __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // remove frame anchor __ leave(); + JFR_ONLY(__ leave_jfr_critical_section();) + // restore sender sp __ mv(sp, esp); @@ -1612,6 +1646,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { Interpreter::_remove_activation_preserving_args_entry = __ pc(); __ empty_expression_stack(); + __ restore_bcp(); // We could have returned from deoptimizing this frame, so restore rbcp. // Set the popframe_processing bit in pending_popframe_condition // indicating that we are currently handling popframe, so that // call_VMs that may happen later do not trigger new popframe diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp index a035326be01..f6bf1e79f92 100644 --- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp @@ -125,24 +125,6 @@ static inline Address at_tos_p5() { return Address(esp, Interpreter::expr_offset_in_bytes(5)); } -// Miscellaneous helper routines -// Store an oop (or null) at the Address described by obj. -// If val == noreg this means store a null -static void do_oop_store(InterpreterMacroAssembler* _masm, - Address dst, - Register val, - DecoratorSet decorators) { - assert(val == noreg || val == x10, "parameter is just for looks"); - __ store_heap_oop(dst, val, x28, x29, x13, decorators); -} - -static void do_oop_load(InterpreterMacroAssembler* _masm, - Address src, - Register dst, - DecoratorSet decorators) { - __ load_heap_oop(dst, src, x28, x29, decorators); -} - Address TemplateTable::at_bcp(int offset) { assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); return Address(xbcp, offset); @@ -787,7 +769,7 @@ void TemplateTable::aaload() { index_check(x10, x11); // leaves index in x11 __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop); - do_oop_load(_masm, Address(x10), x10, IS_ARRAY); + __ load_heap_oop(x10, Address(x10), x28, x29, IS_ARRAY); } void TemplateTable::baload() { @@ -1099,7 +1081,7 @@ void TemplateTable::aastore() { // Get the value we will store __ ld(x10, at_tos()); // Now store using the appropriate barrier - do_oop_store(_masm, element_address, x10, IS_ARRAY); + __ store_heap_oop(element_address, x10, x28, x29, x13, IS_ARRAY); __ j(done); // Have a null in x10, x13=array, x12=index. Store null at ary[idx] @@ -1107,7 +1089,7 @@ void TemplateTable::aastore() { __ profile_null_seen(x12); // Store a null - do_oop_store(_masm, element_address, noreg, IS_ARRAY); + __ store_heap_oop(element_address, noreg, x28, x29, x13, IS_ARRAY); // Pop stack arguments __ bind(done); @@ -1757,6 +1739,8 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { __ mv(x9, x10); // save the nmethod + JFR_ONLY(__ enter_jfr_critical_section();) + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); // x10 is OSR buffer, move it to expected parameter location @@ -1765,9 +1749,12 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { // remove activation // get sender esp __ ld(esp, - Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); + Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // remove frame anchor __ leave(); + + JFR_ONLY(__ leave_jfr_critical_section();) + // Ensure compiled code always sees stack at proper alignment __ andi(sp, esp, -16); @@ -2560,7 +2547,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ subi(t0, tos_state, (u1)atos); __ bnez(t0, notObj); // atos - do_oop_load(_masm, field, x10, IN_HEAP); + __ load_heap_oop(x10, field, x28, x29, IN_HEAP); __ push(atos); if (rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_agetfield, bc, x11); @@ -2804,7 +2791,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr __ add(off, obj, off); // if static, obj from cache, else obj from stack. const Address field(off, 0); // Store into the field - do_oop_store(_masm, field, x10, IN_HEAP); + __ store_heap_oop(field, x10, x28, x29, x13, IN_HEAP); if (rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no); } @@ -3046,10 +3033,10 @@ void TemplateTable::fast_storefield(TosState state) { __ add(x11, x12, x11); const Address field(x11, 0); - // access field + // access field, must not clobber x13 - flags switch (bytecode()) { case Bytecodes::_fast_aputfield: - do_oop_store(_masm, field, x10, IN_HEAP); + __ store_heap_oop(field, x10, x28, x29, x15, IN_HEAP); break; case Bytecodes::_fast_lputfield: __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg); @@ -3128,7 +3115,7 @@ void TemplateTable::fast_accessfield(TosState state) { // access field switch (bytecode()) { case Bytecodes::_fast_agetfield: - do_oop_load(_masm, field, x10, IN_HEAP); + __ load_heap_oop(x10, field, x28, x29, IN_HEAP); __ verify_oop(x10); break; case Bytecodes::_fast_lgetfield: @@ -3186,7 +3173,7 @@ void TemplateTable::fast_xaccess(TosState state) { break; case atos: __ add(x10, x10, x11); - do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP); + __ load_heap_oop(x10, Address(x10, 0), x28, x29, IN_HEAP); __ verify_oop(x10); break; case ftos: diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp index a0de9d767bf..eca1bb83ab6 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp @@ -240,22 +240,20 @@ void VM_Version::common_initialize() { } // UseZvfh (depends on RVV) - if (UseZvfh && !UseRVV) { - warning("Cannot enable UseZvfh on cpu without RVV support."); - FLAG_SET_DEFAULT(UseZvfh, false); + if (UseZvfh) { + if (!UseRVV) { + warning("Cannot enable UseZvfh on cpu without RVV support."); + FLAG_SET_DEFAULT(UseZvfh, false); + } + if (!UseZfh) { + warning("Cannot enable UseZvfh on cpu without Zfh support."); + FLAG_SET_DEFAULT(UseZvfh, false); + } } } #ifdef COMPILER2 void VM_Version::c2_initialize() { - if (UseCMoveUnconditionally) { - FLAG_SET_DEFAULT(UseCMoveUnconditionally, false); - } - - if (ConditionalMoveLimit > 0) { - FLAG_SET_DEFAULT(ConditionalMoveLimit, 0); - } - if (!UseRVV) { FLAG_SET_DEFAULT(MaxVectorSize, 0); } else { @@ -476,7 +474,7 @@ void VM_Version::initialize_cpu_information(void) { _no_of_threads = _no_of_cores; _no_of_sockets = _no_of_cores; snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); - snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", features_string()); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", cpu_info_string()); _initialized = true; } diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp index 4214d6c53dc..a0a42fb5463 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp @@ -221,13 +221,13 @@ class VM_Version : public Abstract_VM_Version { FLAG_SET_DEFAULT(UseExtension, true); \ } \ - // https://github.com/riscv/riscv-profiles/blob/main/profiles.adoc#rva20-profiles + // https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#rva20-profiles #define RV_USE_RVA20U64 \ RV_ENABLE_EXTENSION(UseRVC) \ static void useRVA20U64Profile(); - // https://github.com/riscv/riscv-profiles/blob/main/profiles.adoc#rva22-profiles + // https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#rva22-profiles #define RV_USE_RVA22U64 \ RV_ENABLE_EXTENSION(UseRVC) \ RV_ENABLE_EXTENSION(UseZba) \ @@ -241,7 +241,7 @@ class VM_Version : public Abstract_VM_Version { static void useRVA22U64Profile(); - // https://github.com/riscv/riscv-profiles/blob/main/rva23-profile.adoc#rva23u64-profile + // https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc#rva23u64-profile #define RV_USE_RVA23U64 \ RV_ENABLE_EXTENSION(UseRVC) \ RV_ENABLE_EXTENSION(UseRVV) \ diff --git a/src/hotspot/cpu/s390/abstractInterpreter_s390.cpp b/src/hotspot/cpu/s390/abstractInterpreter_s390.cpp index e815542a51e..96990f0ce94 100644 --- a/src/hotspot/cpu/s390/abstractInterpreter_s390.cpp +++ b/src/hotspot/cpu/s390/abstractInterpreter_s390.cpp @@ -176,7 +176,7 @@ void AbstractInterpreter::layout_activation(Method* method, intptr_t* monitor_base = (intptr_t*)((address)interpreter_frame->fp() - frame::z_ijava_state_size); intptr_t* monitor = monitor_base - (moncount * frame::interpreter_frame_monitor_size()); intptr_t* operand_stack_base = monitor; - intptr_t* tos = operand_stack_base - tempcount - popframe_extra_args; + intptr_t* esp = operand_stack_base - tempcount - popframe_extra_args - 1; intptr_t* top_frame_sp = operand_stack_base - method->max_stack() - frame::z_top_ijava_frame_abi_size / Interpreter::stackElementSize; intptr_t* sender_sp; @@ -206,7 +206,7 @@ void AbstractInterpreter::layout_activation(Method* method, interpreter_frame->interpreter_frame_set_locals(locals_base); interpreter_frame->interpreter_frame_set_monitor_end((BasicObjectLock *)monitor); *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); - interpreter_frame->interpreter_frame_set_tos_address(tos); + interpreter_frame->interpreter_frame_set_esp(esp); if (!is_bottom_frame) { interpreter_frame->interpreter_frame_set_sender_sp(sender_sp); } diff --git a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp index c858a4b8cb1..430928a66ed 100644 --- a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp +++ b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp @@ -52,7 +52,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { CHECK_BAILOUT(); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); return; } @@ -74,7 +74,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { CHECK_BAILOUT(); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { @@ -88,7 +88,7 @@ void PredicateFailedStub::emit_code(LIR_Assembler* ce) { CHECK_BAILOUT(); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } void CounterOverflowStub::emit_code(LIR_Assembler* ce) { @@ -116,7 +116,7 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) { ce->emit_call_c(Runtime1::entry_for (C1StubId::throw_div0_exception_id)); CHECK_BAILOUT(); ce->add_call_info_here(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { @@ -134,7 +134,7 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { CHECK_BAILOUT(); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } // Note: pass object in Z_R1_scratch @@ -147,7 +147,7 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { ce->emit_call_c(a); CHECK_BAILOUT(); ce->add_call_info_here(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, C1StubId stub_id) { diff --git a/src/hotspot/cpu/s390/c1_FrameMap_s390.cpp b/src/hotspot/cpu/s390/c1_FrameMap_s390.cpp index 9fa6da8341f..ddba445154a 100644 --- a/src/hotspot/cpu/s390/c1_FrameMap_s390.cpp +++ b/src/hotspot/cpu/s390/c1_FrameMap_s390.cpp @@ -144,13 +144,13 @@ LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; // c1 rnr -> FloatRegister FloatRegister FrameMap::nr2floatreg (int rnr) { assert(_init_done, "tables not initialized"); - debug_only(fpu_range_check(rnr);) + DEBUG_ONLY(fpu_range_check(rnr);) return _fpu_rnr2reg[rnr]; } void FrameMap::map_float_register(int rnr, FloatRegister reg) { - debug_only(fpu_range_check(rnr);) - debug_only(fpu_range_check(reg->encoding());) + DEBUG_ONLY(fpu_range_check(rnr);) + DEBUG_ONLY(fpu_range_check(reg->encoding());) _fpu_rnr2reg[rnr] = reg; // mapping c1 regnr. -> FloatRegister _fpu_reg2rnr[reg->encoding()] = rnr; // mapping assembler encoding -> c1 regnr. } diff --git a/src/hotspot/cpu/s390/c1_FrameMap_s390.hpp b/src/hotspot/cpu/s390/c1_FrameMap_s390.hpp index 66ccc8de876..721995f41fe 100644 --- a/src/hotspot/cpu/s390/c1_FrameMap_s390.hpp +++ b/src/hotspot/cpu/s390/c1_FrameMap_s390.hpp @@ -107,7 +107,7 @@ static int fpu_reg2rnr (FloatRegister reg) { assert(_init_done, "tables not initialized"); int c1rnr = _fpu_reg2rnr[reg->encoding()]; - debug_only(fpu_range_check(c1rnr);) + DEBUG_ONLY(fpu_range_check(c1rnr);) return c1rnr; } diff --git a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp index 5691a2055b3..0e873250dca 100644 --- a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp @@ -69,17 +69,18 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox // Save object being locked into the BasicObjectLock... z_stg(Roop, Address(Rbox, BasicObjectLock::obj_offset())); - if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(tmp, Roop); - z_tm(Address(tmp, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); - branch_optimized(Assembler::bcondAllOne, slow_case); - } - assert(LockingMode != LM_MONITOR, "LM_MONITOR is already handled, by emit_lock()"); if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(Rbox, Roop, Rmark, tmp, slow_case); } else if (LockingMode == LM_LEGACY) { + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp, Roop); + z_tm(Address(tmp, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); + branch_optimized(Assembler::bcondAllOne, slow_case); + } + NearLabel done; // Load object header. diff --git a/src/hotspot/cpu/s390/frame_s390.cpp b/src/hotspot/cpu/s390/frame_s390.cpp index 01ed22c7d86..b602d0adce5 100644 --- a/src/hotspot/cpu/s390/frame_s390.cpp +++ b/src/hotspot/cpu/s390/frame_s390.cpp @@ -185,7 +185,8 @@ bool frame::is_interpreted_frame() const { void frame::interpreter_frame_set_locals(intptr_t* locs) { assert(is_interpreted_frame(), "interpreted frame expected"); - ijava_state_unchecked()->locals = (uint64_t)locs; + // set relativized locals + *addr_at(_z_ijava_idx(locals)) = (intptr_t) (locs - fp()); } // sender_sp @@ -340,7 +341,7 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const { if (MetaspaceObj::is_valid(cp) == false) return false; // validate locals - address locals = (address)(ijava_state_unchecked()->locals); + address locals = (address)interpreter_frame_locals(); return thread->is_in_stack_range_incl(locals, (address)fp()); } diff --git a/src/hotspot/cpu/s390/frame_s390.hpp b/src/hotspot/cpu/s390/frame_s390.hpp index 3a6b3f33a55..ad754706367 100644 --- a/src/hotspot/cpu/s390/frame_s390.hpp +++ b/src/hotspot/cpu/s390/frame_s390.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -330,6 +330,10 @@ #define _z_ijava_state_neg(_component) \ (int) (-frame::z_ijava_state_size + offset_of(frame::z_ijava_state, _component)) +// Frame slot index relative to fp +#define _z_ijava_idx(_component) \ + (_z_ijava_state_neg(_component) >> LogBytesPerWord) + // ENTRY_FRAME struct z_entry_frame_locals { @@ -406,7 +410,7 @@ // C2I adapter frames: // - // STACK (interpreted called from compiled, on entry to frame manager): + // STACK (interpreted called from compiled, on entry to template interpreter): // // [TOP_C2I_FRAME] // [JIT_FRAME] @@ -471,6 +475,7 @@ public: // To be used, if sp was not extended to match callee's calling convention. inline frame(intptr_t* sp, address pc, intptr_t* unextended_sp = nullptr, intptr_t* fp = nullptr, CodeBlob* cb = nullptr); + inline frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc, CodeBlob* cb, const ImmutableOopMap* oop_map = nullptr); // Access frame via stack pointer. inline intptr_t* sp_addr_at(int index) const { return &sp()[index]; } @@ -494,14 +499,12 @@ inline z_ijava_state* ijava_state() const; - // Where z_ijava_state.monitors is saved. - inline BasicObjectLock** interpreter_frame_monitors_addr() const; - // Where z_ijava_state.esp is saved. - inline intptr_t** interpreter_frame_esp_addr() const; - public: + + inline intptr_t* interpreter_frame_esp() const; + // Where z_ijava_state.esp is saved. + inline void interpreter_frame_set_esp(intptr_t* esp); inline intptr_t* interpreter_frame_top_frame_sp(); - inline void interpreter_frame_set_tos_address(intptr_t* x); inline void interpreter_frame_set_top_frame_sp(intptr_t* top_frame_sp); inline void interpreter_frame_set_sender_sp(intptr_t* sender_sp); #ifdef ASSERT @@ -513,6 +516,8 @@ // Next two functions read and write z_ijava_state.monitors. private: inline BasicObjectLock* interpreter_frame_monitors() const; + + // Where z_ijava_state.monitors is saved. inline void interpreter_frame_set_monitors(BasicObjectLock* monitors); public: diff --git a/src/hotspot/cpu/s390/frame_s390.inline.hpp b/src/hotspot/cpu/s390/frame_s390.inline.hpp index d29106cfc40..dea0e72581f 100644 --- a/src/hotspot/cpu/s390/frame_s390.inline.hpp +++ b/src/hotspot/cpu/s390/frame_s390.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -87,6 +87,11 @@ inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp, intptr_t* inline frame::frame(intptr_t* sp) : frame(sp, nullptr) {} +inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc, CodeBlob* cb, const ImmutableOopMap* oop_map) + :_sp(sp), _pc(pc), _cb(cb), _oop_map(oop_map), _on_heap(false), DEBUG_ONLY(_frame_index(-1) COMMA) _unextended_sp(unextended_sp), _fp(fp) { + setup(); +} + // Generic constructor. Used by pns() in debug.cpp only #ifndef PRODUCT inline frame::frame(void* sp, void* pc, void* unextended_sp) @@ -109,16 +114,19 @@ inline frame::z_ijava_state* frame::ijava_state() const { return state; } -inline BasicObjectLock** frame::interpreter_frame_monitors_addr() const { - return (BasicObjectLock**) &(ijava_state()->monitors); -} - // The next two functions read and write z_ijava_state.monitors. inline BasicObjectLock* frame::interpreter_frame_monitors() const { - return *interpreter_frame_monitors_addr(); + BasicObjectLock* result = (BasicObjectLock*) at_relative(_z_ijava_idx(monitors)); + // make sure the pointer points inside the frame + assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); + assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer: result: " INTPTR_FORMAT " fp: " INTPTR_FORMAT, p2i(result), p2i(fp())); + return result; } + inline void frame::interpreter_frame_set_monitors(BasicObjectLock* monitors) { - *interpreter_frame_monitors_addr() = monitors; + assert(is_interpreted_frame(), "interpreted frame expected"); + // set relativized monitors + ijava_state()->monitors = (intptr_t) ((intptr_t*)monitors - fp()); } // Accessors @@ -180,7 +188,8 @@ inline intptr_t* frame::link_or_null() const { } inline intptr_t* frame::interpreter_frame_locals() const { - return (intptr_t*) (ijava_state()->locals); + intptr_t n = *addr_at(_z_ijava_idx(locals)); + return &fp()[n]; // return relativized locals } inline intptr_t* frame::interpreter_frame_bcp_addr() const { @@ -202,11 +211,14 @@ inline intptr_t* frame::interpreter_frame_expression_stack() const { // Also begin is one past last monitor. inline intptr_t* frame::interpreter_frame_top_frame_sp() { - return (intptr_t*)ijava_state()->top_frame_sp; + intptr_t n = *addr_at(_z_ijava_idx(top_frame_sp)); + return &fp()[n]; // return relativized locals } inline void frame::interpreter_frame_set_top_frame_sp(intptr_t* top_frame_sp) { - ijava_state()->top_frame_sp = (intptr_t) top_frame_sp; + assert(is_interpreted_frame(), "interpreted frame expected"); + // set relativized top_frame_sp + ijava_state()->top_frame_sp = (intptr_t) (top_frame_sp - fp()); } inline void frame::interpreter_frame_set_sender_sp(intptr_t* sender_sp) { @@ -219,18 +231,20 @@ inline void frame::interpreter_frame_set_magic() { } #endif +inline intptr_t* frame::interpreter_frame_esp() const { + return (intptr_t*) at_relative(_z_ijava_idx(esp)); +} + // Where z_ijava_state.esp is saved. -inline intptr_t** frame::interpreter_frame_esp_addr() const { - return (intptr_t**) &(ijava_state()->esp); +inline void frame::interpreter_frame_set_esp(intptr_t* esp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + // set relativized esp + ijava_state()->esp = (intptr_t) (esp - fp()); } // top of expression stack (lowest address) inline intptr_t* frame::interpreter_frame_tos_address() const { - return *interpreter_frame_esp_addr() + 1; -} - -inline void frame::interpreter_frame_set_tos_address(intptr_t* x) { - *interpreter_frame_esp_addr() = x - 1; + return interpreter_frame_esp() + Interpreter::stackElementWords; } // Stack slot needed for native calls and GC. @@ -362,4 +376,42 @@ void frame::update_map_with_saved_link(RegisterMapT* map, intptr_t** link_addr) Unimplemented(); } +#if INCLUDE_JFR + +// Static helper routines +inline intptr_t* frame::sender_sp(intptr_t* fp) { return fp; } + +// Extract common_abi parts. +inline intptr_t* frame::fp(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast (((z_common_abi*)sp)->callers_sp); +} + +inline intptr_t* frame::link(const intptr_t* fp) { return frame::fp(fp); } + +inline address frame::return_address(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast(((z_common_abi*)sp)->return_pc); +} + +inline address frame::interpreter_return_address(const intptr_t* fp) { return frame::return_address(fp); } + +inline address frame::interpreter_bcp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast(*(fp + _z_ijava_idx(bcp))); +} + +inline intptr_t* frame::interpreter_sender_sp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast (*(fp + _z_ijava_idx(sender_sp))); +} + +inline bool frame::is_interpreter_frame_setup_at(const intptr_t* fp, const void* sp) { + assert(fp != nullptr, "invariant"); + assert(sp != nullptr, "invariant"); + return sp <= fp - ((frame::z_ijava_state_size + frame::z_top_ijava_frame_abi_size) >> LogBytesPerWord); +} + +#endif // INCLUDE_JFR + #endif // CPU_S390_FRAME_S390_INLINE_HPP diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp index 2054c3db36c..dea3317270e 100644 --- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp @@ -24,16 +24,16 @@ */ #include "asm/macroAssembler.inline.hpp" -#include "registerSaver_s390.hpp" -#include "gc/g1/g1CardTable.hpp" #include "gc/g1/g1BarrierSet.hpp" #include "gc/g1/g1BarrierSetAssembler.hpp" #include "gc/g1/g1BarrierSetRuntime.hpp" +#include "gc/g1/g1CardTable.hpp" #include "gc/g1/g1DirtyCardQueue.hpp" #include "gc/g1/g1HeapRegion.hpp" #include "gc/g1/g1SATBMarkQueueSet.hpp" #include "gc/g1/g1ThreadLocalData.hpp" #include "interpreter/interp_masm.hpp" +#include "registerSaver_s390.hpp" #include "runtime/jniHandles.hpp" #include "runtime/sharedRuntime.hpp" #include "utilities/macros.hpp" diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetNMethod_s390.cpp b/src/hotspot/cpu/s390/gc/shared/barrierSetNMethod_s390.cpp index 85dcc0a4e73..88b3199e4e1 100644 --- a/src/hotspot/cpu/s390/gc/shared/barrierSetNMethod_s390.cpp +++ b/src/hotspot/cpu/s390/gc/shared/barrierSetNMethod_s390.cpp @@ -40,7 +40,7 @@ class NativeMethodBarrier: public NativeInstruction { address get_patchable_data_address() const { address inst_addr = get_barrier_start_address() + PATCHABLE_INSTRUCTION_OFFSET; - debug_only(Assembler::is_z_cfi(*((long*)inst_addr))); + DEBUG_ONLY(Assembler::is_z_cfi(*((long*)inst_addr))); return inst_addr + 2; } @@ -91,7 +91,7 @@ static NativeMethodBarrier* get_nmethod_barrier(nmethod* nm) { address barrier_address = nm->code_begin() + nm->frame_complete_offset() - NativeMethodBarrier::BARRIER_TOTAL_LENGTH; auto barrier = reinterpret_cast (barrier_address); - debug_only(barrier->verify()); + DEBUG_ONLY(barrier->verify()); return barrier; } diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp index 48f4c7293a2..b384b24d49b 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.cpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp @@ -104,7 +104,15 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, address* table, bo } { Label OK; // check if the locals pointer in Z_locals is correct - z_cg(Z_locals, _z_ijava_state_neg(locals), Z_fp); + + // _z_ijava_state_neg(locals)) is fp relativized, so we need to + // extract the pointer. + + z_lg(Z_R1_scratch, Address(Z_fp, _z_ijava_state_neg(locals))); + z_sllg(Z_R1_scratch, Z_R1_scratch, Interpreter::logStackElementSize); + z_agr(Z_R1_scratch, Z_fp); + + z_cgr(Z_locals, Z_R1_scratch); z_bre(OK); reentry = stop_chain_static(reentry, "invalid locals pointer Z_locals: " FILE_AND_LINE); bind(OK); @@ -444,7 +452,7 @@ void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, // Useful if consumed previously by access via stackTop(). void InterpreterMacroAssembler::popx(int len) { add2reg(Z_esp, len*Interpreter::stackElementSize); - debug_only(verify_esp(Z_esp, Z_R1_scratch)); + DEBUG_ONLY(verify_esp(Z_esp, Z_R1_scratch)); } // Get Address object of stack top. No checks. No pop. @@ -458,38 +466,38 @@ void InterpreterMacroAssembler::pop_i(Register r) { z_l(r, Interpreter::expr_offset_in_bytes(0), Z_esp); add2reg(Z_esp, Interpreter::stackElementSize); assert_different_registers(r, Z_R1_scratch); - debug_only(verify_esp(Z_esp, Z_R1_scratch)); + DEBUG_ONLY(verify_esp(Z_esp, Z_R1_scratch)); } void InterpreterMacroAssembler::pop_ptr(Register r) { z_lg(r, Interpreter::expr_offset_in_bytes(0), Z_esp); add2reg(Z_esp, Interpreter::stackElementSize); assert_different_registers(r, Z_R1_scratch); - debug_only(verify_esp(Z_esp, Z_R1_scratch)); + DEBUG_ONLY(verify_esp(Z_esp, Z_R1_scratch)); } void InterpreterMacroAssembler::pop_l(Register r) { z_lg(r, Interpreter::expr_offset_in_bytes(0), Z_esp); add2reg(Z_esp, 2*Interpreter::stackElementSize); assert_different_registers(r, Z_R1_scratch); - debug_only(verify_esp(Z_esp, Z_R1_scratch)); + DEBUG_ONLY(verify_esp(Z_esp, Z_R1_scratch)); } void InterpreterMacroAssembler::pop_f(FloatRegister f) { mem2freg_opt(f, Address(Z_esp, Interpreter::expr_offset_in_bytes(0)), false); add2reg(Z_esp, Interpreter::stackElementSize); - debug_only(verify_esp(Z_esp, Z_R1_scratch)); + DEBUG_ONLY(verify_esp(Z_esp, Z_R1_scratch)); } void InterpreterMacroAssembler::pop_d(FloatRegister f) { mem2freg_opt(f, Address(Z_esp, Interpreter::expr_offset_in_bytes(0)), true); add2reg(Z_esp, 2*Interpreter::stackElementSize); - debug_only(verify_esp(Z_esp, Z_R1_scratch)); + DEBUG_ONLY(verify_esp(Z_esp, Z_R1_scratch)); } void InterpreterMacroAssembler::push_i(Register r) { assert_different_registers(r, Z_R1_scratch); - debug_only(verify_esp(Z_esp, Z_R1_scratch)); + DEBUG_ONLY(verify_esp(Z_esp, Z_R1_scratch)); z_st(r, Address(Z_esp)); add2reg(Z_esp, -Interpreter::stackElementSize); } @@ -501,7 +509,7 @@ void InterpreterMacroAssembler::push_ptr(Register r) { void InterpreterMacroAssembler::push_l(Register r) { assert_different_registers(r, Z_R1_scratch); - debug_only(verify_esp(Z_esp, Z_R1_scratch)); + DEBUG_ONLY(verify_esp(Z_esp, Z_R1_scratch)); int offset = -Interpreter::stackElementSize; z_stg(r, Address(Z_esp, offset)); clear_mem(Address(Z_esp), Interpreter::stackElementSize); @@ -509,13 +517,13 @@ void InterpreterMacroAssembler::push_l(Register r) { } void InterpreterMacroAssembler::push_f(FloatRegister f) { - debug_only(verify_esp(Z_esp, Z_R1_scratch)); + DEBUG_ONLY(verify_esp(Z_esp, Z_R1_scratch)); freg2mem_opt(f, Address(Z_esp), false); add2reg(Z_esp, -Interpreter::stackElementSize); } void InterpreterMacroAssembler::push_d(FloatRegister d) { - debug_only(verify_esp(Z_esp, Z_R1_scratch)); + DEBUG_ONLY(verify_esp(Z_esp, Z_R1_scratch)); int offset = -Interpreter::stackElementSize; freg2mem_opt(d, Address(Z_esp, offset)); add2reg(Z_esp, 2 * offset); @@ -568,7 +576,10 @@ void InterpreterMacroAssembler::prepare_to_jump_from_interpreted(Register method // Satisfy interpreter calling convention (see generate_normal_entry()). z_lgr(Z_R10, Z_SP); // Set sender sp (aka initial caller sp, aka unextended sp). // Record top_frame_sp, because the callee might modify it, if it's compiled. - z_stg(Z_SP, _z_ijava_state_neg(top_frame_sp), Z_fp); + assert_different_registers(Z_R1, method); + z_sgrk(Z_R1, Z_SP, Z_fp); + z_srag(Z_R1, Z_R1, Interpreter::logStackElementSize); + z_stg(Z_R1, _z_ijava_state_neg(top_frame_sp), Z_fp); save_bcp(); save_esp(); z_lgr(Z_method, method); // Set Z_method (kills Z_fp!). @@ -616,7 +627,7 @@ void InterpreterMacroAssembler::verify_esp(Register Resp, Register Rtemp) { // i.e. IJAVA_STATE.monitors > Resp. NearLabel OK; Register Rmonitors = Rtemp; - z_lg(Rmonitors, _z_ijava_state_neg(monitors), Z_fp); + get_monitors(Rmonitors); compareU64_and_branch(Rmonitors, Resp, bcondHigh, OK); reentry = stop_chain_static(reentry, "too many pops: Z_esp points into monitor area"); bind(OK); @@ -654,21 +665,46 @@ void InterpreterMacroAssembler::restore_bcp() { z_lg(Z_bcp, Address(Z_fp, _z_ijava_state_neg(bcp))); } -void InterpreterMacroAssembler::save_esp() { - z_stg(Z_esp, Address(Z_fp, _z_ijava_state_neg(esp))); +void InterpreterMacroAssembler::save_esp(Register fp) { + if (fp == noreg) { + fp = Z_fp; + } + z_sgrk(Z_R0, Z_esp, fp); + z_srag(Z_R0, Z_R0, Interpreter::logStackElementSize); + z_stg(Z_R0, Address(fp, _z_ijava_state_neg(esp))); } void InterpreterMacroAssembler::restore_esp() { asm_assert_ijava_state_magic(Z_esp); z_lg(Z_esp, Address(Z_fp, _z_ijava_state_neg(esp))); + z_slag(Z_esp, Z_esp, Interpreter::logStackElementSize); + z_agr(Z_esp, Z_fp); } void InterpreterMacroAssembler::get_monitors(Register reg) { asm_assert_ijava_state_magic(reg); +#ifdef ASSERT + NearLabel ok; + z_cg(Z_fp, 0, Z_SP); + z_bre(ok); + stop("Z_fp is corrupted"); + bind(ok); +#endif // ASSERT mem2reg_opt(reg, Address(Z_fp, _z_ijava_state_neg(monitors))); + z_slag(reg, reg, Interpreter::logStackElementSize); + z_agr(reg, Z_fp); } void InterpreterMacroAssembler::save_monitors(Register reg) { +#ifdef ASSERT + NearLabel ok; + z_cg(Z_fp, 0, Z_SP); + z_bre(ok); + stop("Z_fp is corrupted"); + bind(ok); +#endif // ASSERT + z_sgr(reg, Z_fp); + z_srag(reg, reg, Interpreter::logStackElementSize); reg2mem_opt(reg, Address(Z_fp, _z_ijava_state_neg(monitors))); } @@ -684,6 +720,8 @@ void InterpreterMacroAssembler::save_mdp(Register mdp) { void InterpreterMacroAssembler::restore_locals() { asm_assert_ijava_state_magic(Z_locals); z_lg(Z_locals, Address(Z_fp, _z_ijava_state_neg(locals))); + z_sllg(Z_locals, Z_locals, Interpreter::logStackElementSize); + z_agr(Z_locals, Z_fp); } void InterpreterMacroAssembler::get_method(Register reg) { @@ -827,12 +865,11 @@ void InterpreterMacroAssembler::unlock_if_synchronized_method(TosState state, // register for unlock_object to pass to VM directly. Register R_current_monitor = Z_ARG2; Register R_monitor_block_bot = Z_ARG1; - const Address monitor_block_top(Z_fp, _z_ijava_state_neg(monitors)); const Address monitor_block_bot(Z_fp, -frame::z_ijava_state_size); bind(restart); // Starting with top-most entry. - z_lg(R_current_monitor, monitor_block_top); + get_monitors(R_current_monitor); // Points to word before bottom of monitor block. load_address(R_monitor_block_bot, monitor_block_bot); z_bru(entry); @@ -1002,16 +1039,16 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // markWord header = obj->mark().set_unlocked(); - if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(tmp, object); - z_tm(Address(tmp, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); - z_btrue(slow_case); - } - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(monitor, object, header, tmp, slow_case); } else if (LockingMode == LM_LEGACY) { + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp, object); + z_tm(Address(tmp, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); + z_btrue(slow_case); + } + // Load markWord from object into header. z_lg(header, hdr_offset, object); diff --git a/src/hotspot/cpu/s390/interp_masm_s390.hpp b/src/hotspot/cpu/s390/interp_masm_s390.hpp index 2473463219c..94ad63b16c6 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.hpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.hpp @@ -169,7 +169,7 @@ class InterpreterMacroAssembler: public MacroAssembler { void restore_bcp(); - void save_esp(); + void save_esp(Register fp = noreg); void restore_esp(); diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 88aedd1b5c0..0129e604978 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -6363,11 +6363,17 @@ void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Registe z_lg(mark, Address(obj, mark_offset)); if (UseObjectMonitorTable) { - // Clear cache in case fast locking succeeds. + // Clear cache in case fast locking succeeds or we need to take the slow-path. const Address om_cache_addr = Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes()))); z_mvghi(om_cache_addr, 0); } + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(temp1, obj); + z_tm(Address(temp1, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); + z_brne(slow); + } + // First we need to check if the lock-stack has room for pushing the object reference. z_lgf(top, Address(Z_thread, ls_top_offset)); @@ -6501,7 +6507,7 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe NearLabel slow_path; if (UseObjectMonitorTable) { - // Clear cache in case fast locking succeeds. + // Clear cache in case fast locking succeeds or we need to take the slow-path. z_mvghi(Address(box, BasicLock::object_monitor_cache_offset_in_bytes()), 0); } diff --git a/src/hotspot/cpu/s390/register_s390.hpp b/src/hotspot/cpu/s390/register_s390.hpp index e7fdaa58d1a..a33145db02c 100644 --- a/src/hotspot/cpu/s390/register_s390.hpp +++ b/src/hotspot/cpu/s390/register_s390.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2023 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -414,7 +414,7 @@ constexpr FloatRegister Z_FARG2 = Z_F2; constexpr FloatRegister Z_FARG3 = Z_F4; constexpr FloatRegister Z_FARG4 = Z_F6; -// Register declarations to be used in frame manager assembly code. +// Register declarations to be used in template interpreter assembly code. // Use only non-volatile registers in order to keep values across C-calls. // Register to cache the integer value on top of the operand stack. @@ -439,7 +439,7 @@ constexpr Register Z_bcp = Z_R13; // Bytecode which is dispatched (short lived!). constexpr Register Z_bytecode = Z_R14; -// Temporary registers to be used within frame manager. We can use +// Temporary registers to be used within template interpreter. We can use // the nonvolatile ones because the call stub has saved them. // Use only non-volatile registers in order to keep values across C-calls. constexpr Register Z_tmp_1 = Z_R10; diff --git a/src/hotspot/cpu/s390/runtime_s390.cpp b/src/hotspot/cpu/s390/runtime_s390.cpp index 4eedb3877d2..99a33716b8b 100644 --- a/src/hotspot/cpu/s390/runtime_s390.cpp +++ b/src/hotspot/cpu/s390/runtime_s390.cpp @@ -72,6 +72,9 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() { // Setup code generation tools const char* name = OptoRuntime::stub_name(OptoStubId::exception_id); CodeBuffer buffer(name, 2048, 1024); + if (buffer.blob() == nullptr) { + return nullptr; + } MacroAssembler* masm = new MacroAssembler(&buffer); Register handle_exception = Z_ARG5; @@ -115,7 +118,7 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() { __ z_lgr(Z_SP, saved_sp); // [Z_RET] isn't null was possible in hotspot5 but not in sapjvm6. - // C2I adapter extensions are now removed by a resize in the frame manager + // C2I adapter extensions are now removed by a resize in the template interpreter // (unwind_initial_activation_pending_exception). #ifdef ASSERT __ z_ltgr(handle_exception, handle_exception); diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp index f4487ccabec..cb1f12504fd 100644 --- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp +++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp @@ -2139,7 +2139,7 @@ static address gen_c2i_adapter(MacroAssembler *masm, Register value = Z_R12; // Remember the senderSP so we can pop the interpreter arguments off of the stack. - // In addition, frame manager expects initial_caller_sp in Z_R10. + // In addition, template interpreter expects initial_caller_sp in Z_R10. __ z_lgr(sender_SP, Z_SP); // This should always fit in 14 bit immediate. @@ -2352,12 +2352,12 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, __ z_br(Z_R1_scratch); } -AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterHandlerEntry* handler) { __ align(CodeEntryAlignment); address i2c_entry = __ pc(); gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); @@ -2411,7 +2411,8 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + handler->set_entry_points(i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + return; } // This function returns the adjust size (in number of words) to a c2i adapter @@ -2768,6 +2769,9 @@ UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() { // Setup code generation tools const char* name = OptoRuntime::stub_name(OptoStubId::uncommon_trap_id); CodeBuffer buffer(name, 2048, 1024); + if (buffer.blob() == nullptr) { + return nullptr; + } InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); Register unroll_block_reg = Z_tmp_1; diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index b46393f543e..d3f6540a3ea 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -115,7 +115,7 @@ class StubGenerator: public StubCodeGenerator { // [SP+176] - thread : Thread* // address generate_call_stub(address& return_address) { - // Set up a new C frame, copy Java arguments, call frame manager + // Set up a new C frame, copy Java arguments, call template interpreter // or native_entry, and process result. StubGenStubId stub_id = StubGenStubId::call_stub_id; @@ -272,10 +272,10 @@ class StubGenerator: public StubCodeGenerator { BLOCK_COMMENT("call {"); { - // Call frame manager or native entry. + // Call template interpreter or native entry. // - // Register state on entry to frame manager / native entry: + // Register state on entry to template interpreter / native entry: // // Z_ARG1 = r_top_of_arguments_addr - intptr_t *sender tos (prepushed) // Lesp = (SP) + copied_arguments_offset - 8 @@ -290,7 +290,7 @@ class StubGenerator: public StubCodeGenerator { __ z_lgr(Z_esp, r_top_of_arguments_addr); // - // Stack on entry to frame manager / native entry: + // Stack on entry to template interpreter / native entry: // // F0 [TOP_IJAVA_FRAME_ABI] // [outgoing Java arguments] @@ -300,7 +300,7 @@ class StubGenerator: public StubCodeGenerator { // // Do a light-weight C-call here, r_new_arg_entry holds the address - // of the interpreter entry point (frame manager or native entry) + // of the interpreter entry point (template interpreter or native entry) // and save runtime-value of return_pc in return_address // (call by reference argument). return_address = __ call_stub(r_new_arg_entry); @@ -309,11 +309,11 @@ class StubGenerator: public StubCodeGenerator { { BLOCK_COMMENT("restore registers {"); - // Returned from frame manager or native entry. + // Returned from template interpreter or native entry. // Now pop frame, process result, and return to caller. // - // Stack on exit from frame manager / native entry: + // Stack on exit from template interpreter / native entry: // // F0 [ABI] // ... @@ -330,7 +330,7 @@ class StubGenerator: public StubCodeGenerator { __ pop_frame(); // Reload some volatile registers which we've spilled before the call - // to frame manager / native entry. + // to template interpreter / native entry. // Access all locals via frame pointer, because we know nothing about // the topmost frame's size. __ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp); @@ -1468,11 +1468,120 @@ class StubGenerator: public StubCodeGenerator { return __ addr_at(start_off); } + // + // Generate 'unsafe' set memory stub + // Though just as safe as the other stubs, it takes an unscaled + // size_t (# bytes) argument instead of an element count. + // + // Input: + // Z_ARG1 - destination array address + // Z_ARG2 - byte count (size_t) + // Z_ARG3 - byte value + // + address generate_unsafe_setmemory(address unsafe_byte_fill) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, StubGenStubId::unsafe_setmemory_id); + unsigned int start_off = __ offset(); + + // bump this on entry, not on exit: + // inc_counter_np(SharedRuntime::_unsafe_set_memory_ctr); + + const Register dest = Z_ARG1; + const Register size = Z_ARG2; + const Register byteVal = Z_ARG3; + NearLabel tail, finished; + // fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char) + + // Mark remaining code as such which performs Unsafe accesses. + UnsafeMemoryAccessMark umam(this, true, false); + + __ z_vlvgb(Z_V0, byteVal, 0); + __ z_vrepb(Z_V0, Z_V0, 0); + + __ z_aghi(size, -32); + __ z_brl(tail); + + { + NearLabel again; + __ bind(again); + __ z_vst(Z_V0, Address(dest, 0)); + __ z_vst(Z_V0, Address(dest, 16)); + __ z_aghi(dest, 32); + __ z_aghi(size, -32); + __ z_brnl(again); + } + + __ bind(tail); + + { + NearLabel dont; + __ testbit(size, 4); + __ z_brz(dont); + __ z_vst(Z_V0, Address(dest, 0)); + __ z_aghi(dest, 16); + __ bind(dont); + } + + { + NearLabel dont; + __ testbit(size, 3); + __ z_brz(dont); + __ z_vsteg(Z_V0, 0, Z_R0, dest, 0); + __ z_aghi(dest, 8); + __ bind(dont); + } + + __ z_tmll(size, 7); + __ z_brc(Assembler::bcondAllZero, finished); + + { + NearLabel dont; + __ testbit(size, 2); + __ z_brz(dont); + __ z_vstef(Z_V0, 0, Z_R0, dest, 0); + __ z_aghi(dest, 4); + __ bind(dont); + } + + { + NearLabel dont; + __ testbit(size, 1); + __ z_brz(dont); + __ z_vsteh(Z_V0, 0, Z_R0, dest, 0); + __ z_aghi(dest, 2); + __ bind(dont); + } + + { + NearLabel dont; + __ testbit(size, 0); + __ z_brz(dont); + __ z_vsteb(Z_V0, 0, Z_R0, dest, 0); + __ bind(dont); + } + + __ bind(finished); + __ z_br(Z_R14); + + return __ addr_at(start_off); + } + + // This is common errorexit stub for UnsafeMemoryAccess. + address generate_unsafecopy_common_error_exit() { + unsigned int start_off = __ offset(); + __ z_lghi(Z_RET, 0); // return 0 + __ z_br(Z_R14); + return __ addr_at(start_off); + } void generate_arraycopy_stubs() { // Note: the disjoint stubs must be generated first, some of // the conjoint stubs use them. + + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeMemoryAccess::set_common_exit_stub_pc(ucm_common_error_exit); + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_nonoop_copy (StubGenStubId::jbyte_disjoint_arraycopy_id); StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_nonoop_copy(StubGenStubId::jshort_disjoint_arraycopy_id); StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_nonoop_copy (StubGenStubId::jint_disjoint_arraycopy_id); @@ -1500,6 +1609,12 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_nonoop_copy(StubGenStubId::arrayof_jlong_arraycopy_id); StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(StubGenStubId::arrayof_oop_arraycopy_id); StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(StubGenStubId::arrayof_oop_arraycopy_uninit_id); + +#ifdef COMPILER2 + StubRoutines::_unsafe_setmemory = + VM_Version::has_VectorFacility() ? generate_unsafe_setmemory(StubRoutines::_jbyte_fill) : nullptr; + +#endif // COMPILER2 } // Call interface for AES_encryptBlock, AES_decryptBlock stubs. @@ -3184,6 +3299,10 @@ class StubGenerator: public StubCodeGenerator { //---------------------------------------------------------------------- // Entry points that are platform specific. + if (UnsafeMemoryAccess::_table == nullptr) { + UnsafeMemoryAccess::create_table(4); // 4 for setMemory + } + if (UseCRC32Intrinsics) { StubRoutines::_crc_table_adr = (address)StubRoutines::zarch::_crc_table; StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes(); diff --git a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp index a52d20e32a3..e03d891496a 100644 --- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp @@ -637,6 +637,8 @@ address TemplateInterpreterGenerator::generate_return_entry_for (TosState state, Register sp_before_i2c_extension = Z_bcp; __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer. __ z_lg(sp_before_i2c_extension, Address(Z_fp, _z_ijava_state_neg(top_frame_sp))); + __ z_slag(sp_before_i2c_extension, sp_before_i2c_extension, Interpreter::logStackElementSize); + __ z_agr(sp_before_i2c_extension, Z_fp); __ resize_frame_absolute(sp_before_i2c_extension, Z_locals/*tmp*/, true/*load_fp*/); // TODO(ZASM): necessary?? @@ -1134,7 +1136,11 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { __ z_agr(Z_locals, Z_esp); // z_ijava_state->locals - i*BytesPerWord points to i-th Java local (i starts at 0) // z_ijava_state->locals = Z_esp + parameter_count bytes - __ z_stg(Z_locals, _z_ijava_state_neg(locals), fp); + + __ z_sgrk(Z_R0, Z_locals, fp); // Z_R0 = Z_locals - fp(); + __ z_srlg(Z_R0, Z_R0, Interpreter::logStackElementSize); + // Store relativized Z_locals, see frame::interpreter_frame_locals(). + __ z_stg(Z_R0, _z_ijava_state_neg(locals), fp); // z_ijava_state->oop_temp = nullptr; __ store_const(Address(fp, oop_tmp_offset), 0); @@ -1168,9 +1174,14 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { // z_ijava_state->monitors = fp - frame::z_ijava_state_size - Interpreter::stackElementSize; // z_ijava_state->esp = Z_esp = z_ijava_state->monitors; __ add2reg(Z_esp, -frame::z_ijava_state_size, fp); - __ z_stg(Z_esp, _z_ijava_state_neg(monitors), fp); + + __ z_sgrk(Z_R0, Z_esp, fp); + __ z_srag(Z_R0, Z_R0, Interpreter::logStackElementSize); + __ z_stg(Z_R0, _z_ijava_state_neg(monitors), fp); + __ add2reg(Z_esp, -Interpreter::stackElementSize); - __ z_stg(Z_esp, _z_ijava_state_neg(esp), fp); + + __ save_esp(fp); // z_ijava_state->cpoolCache = Z_R1_scratch (see load above); __ z_stg(Z_R1_scratch, _z_ijava_state_neg(cpoolCache), fp); @@ -1206,7 +1217,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { // Various method entries -// Math function, frame manager must set up an interpreter state, etc. +// Math function, template interpreter must set up an interpreter state, etc. address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { // Decide what to do: Use same platform specific instructions and runtime calls as compilers. @@ -1229,6 +1240,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M case Interpreter::java_lang_math_cos : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); break; case Interpreter::java_lang_math_tan : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); break; case Interpreter::java_lang_math_tanh : /* run interpreted */ break; + case Interpreter::java_lang_math_cbrt : /* run interpreted */ break; case Interpreter::java_lang_math_abs : /* run interpreted */ break; case Interpreter::java_lang_math_sqrt : /* runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); not available */ break; case Interpreter::java_lang_math_log : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); break; @@ -1627,7 +1639,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ add2reg(Rfirst_monitor, -(frame::z_ijava_state_size + (int)sizeof(BasicObjectLock)), Z_fp); #ifdef ASSERT NearLabel ok; - __ z_lg(Z_R1, _z_ijava_state_neg(monitors), Z_fp); + __ get_monitors(Z_R1); __ compareU64_and_branch(Rfirst_monitor, Z_R1, Assembler::bcondEqual, ok); reentry = __ stop_chain_static(reentry, "native_entry:unlock: inconsistent z_ijava_state.monitors"); __ bind(ok); diff --git a/src/hotspot/cpu/s390/templateTable_s390.cpp b/src/hotspot/cpu/s390/templateTable_s390.cpp index 4262c77ecd7..2b39cc8318c 100644 --- a/src/hotspot/cpu/s390/templateTable_s390.cpp +++ b/src/hotspot/cpu/s390/templateTable_s390.cpp @@ -65,7 +65,8 @@ // The actual size of each block heavily depends on the CPU capabilities and, // of course, on the logic implemented in each block. #ifdef ASSERT - #define BTB_MINSIZE 256 +// With introduced assert in get_monitor() & set_monitor(), required block size is now 322. + #define BTB_MINSIZE 512 #else #define BTB_MINSIZE 64 #endif @@ -91,7 +92,8 @@ if (len > alignment) { \ tty->print_cr("%4d of %4d @ " INTPTR_FORMAT ": Block len for %s", \ len, alignment, e_addr-len, name); \ - guarantee(len <= alignment, "block too large"); \ + guarantee(len <= alignment, "block too large, len = %d, alignment = %d", \ + len, alignment); \ } \ guarantee(len == e_addr-b_addr, "block len mismatch"); \ } @@ -112,7 +114,8 @@ if (len > alignment) { \ tty->print_cr("%4d of %4d @ " INTPTR_FORMAT ": Block len for %s", \ len, alignment, e_addr-len, name); \ - guarantee(len <= alignment, "block too large"); \ + guarantee(len <= alignment, "block too large, len = %d, alignment = %d", \ + len, alignment); \ } \ guarantee(len == e_addr-b_addr, "block len mismatch"); \ } diff --git a/src/hotspot/cpu/s390/vm_version_s390.cpp b/src/hotspot/cpu/s390/vm_version_s390.cpp index 157b945e6e1..8261fbd083a 100644 --- a/src/hotspot/cpu/s390/vm_version_s390.cpp +++ b/src/hotspot/cpu/s390/vm_version_s390.cpp @@ -90,7 +90,7 @@ static const char* z_features[] = {" ", void VM_Version::initialize() { determine_features(); // Get processor capabilities. - set_features_string(); // Set a descriptive feature indication. + set_cpu_info_string(); // Set a descriptive feature indication. if (Verbose || PrintAssembly || PrintStubCode) { print_features_internal("CPU Version as detected internally:", PrintAssembly || PrintStubCode); @@ -388,9 +388,9 @@ int VM_Version::get_model_index() { } -void VM_Version::set_features_string() { - // A note on the _features_string format: - // There are jtreg tests checking the _features_string for various properties. +void VM_Version::set_cpu_info_string() { + // A note on the _cpu_info_string format: + // There are jtreg tests checking the _cpu_info_string for various properties. // For some strange reason, these tests require the string to contain // only _lowercase_ characters. Keep that in mind when being surprised // about the unusual notation of features - and when adding new ones. @@ -412,29 +412,29 @@ void VM_Version::set_features_string() { _model_string = "unknown model"; strcpy(buf, "z/Architecture (ambiguous detection)"); } - _features_string = os::strdup(buf); + _cpu_info_string = os::strdup(buf); if (has_Crypto_AES()) { - assert(strlen(_features_string) + 3*8 < sizeof(buf), "increase buffer size"); + assert(strlen(_cpu_info_string) + 3*8 < sizeof(buf), "increase buffer size"); jio_snprintf(buf, sizeof(buf), "%s%s%s%s", - _features_string, + _cpu_info_string, has_Crypto_AES128() ? ", aes128" : "", has_Crypto_AES192() ? ", aes192" : "", has_Crypto_AES256() ? ", aes256" : ""); - os::free((void *)_features_string); - _features_string = os::strdup(buf); + os::free((void *)_cpu_info_string); + _cpu_info_string = os::strdup(buf); } if (has_Crypto_SHA()) { - assert(strlen(_features_string) + 6 + 2*8 + 7 < sizeof(buf), "increase buffer size"); + assert(strlen(_cpu_info_string) + 6 + 2*8 + 7 < sizeof(buf), "increase buffer size"); jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s", - _features_string, + _cpu_info_string, has_Crypto_SHA1() ? ", sha1" : "", has_Crypto_SHA256() ? ", sha256" : "", has_Crypto_SHA512() ? ", sha512" : "", has_Crypto_GHASH() ? ", ghash" : ""); - os::free((void *)_features_string); - _features_string = os::strdup(buf); + os::free((void *)_cpu_info_string); + _cpu_info_string = os::strdup(buf); } } @@ -464,7 +464,7 @@ bool VM_Version::test_feature_bit(unsigned long* featureBuffer, int featureNum, } void VM_Version::print_features_internal(const char* text, bool print_anyway) { - tty->print_cr("%s %s", text, features_string()); + tty->print_cr("%s %s", text, cpu_info_string()); tty->cr(); if (Verbose || print_anyway) { @@ -906,7 +906,7 @@ void VM_Version::set_features_from(const char* march) { err = true; } if (!err) { - set_features_string(); + set_cpu_info_string(); if (prt || PrintAssembly) { print_features_internal("CPU Version as set by cmdline option:", prt); } @@ -1542,6 +1542,6 @@ void VM_Version::initialize_cpu_information(void) { _no_of_threads = _no_of_cores; _no_of_sockets = _no_of_cores; snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE, "s390 %s", VM_Version::get_model_string()); - snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "s390 %s", features_string()); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "s390 %s", cpu_info_string()); _initialized = true; } diff --git a/src/hotspot/cpu/s390/vm_version_s390.hpp b/src/hotspot/cpu/s390/vm_version_s390.hpp index 49e6f5686f6..6c6eb76bf7b 100644 --- a/src/hotspot/cpu/s390/vm_version_s390.hpp +++ b/src/hotspot/cpu/s390/vm_version_s390.hpp @@ -148,7 +148,7 @@ class VM_Version: public Abstract_VM_Version { static bool test_feature_bit(unsigned long* featureBuffer, int featureNum, unsigned int bufLen); static int get_model_index(); - static void set_features_string(); + static void set_cpu_info_string(); static void print_features_internal(const char* text, bool print_anyway=false); static void determine_features(); static long call_getFeatures(unsigned long* buffer, int buflen, int functionCode); diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 6853af9e746..897b06e94df 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -280,15 +280,14 @@ void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { emit_int24(op1, (op2 | encode(dst)), imm8); } - -void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { +void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32, bool optimize_rax_dst) { assert(isByte(op1) && isByte(op2), "wrong opcode"); assert(op1 == 0x81, "Unexpected opcode"); if (is8bit(imm32)) { emit_int24(op1 | 0x02, // set sign bit op2 | encode(dst), imm32 & 0xFF); - } else if (dst == rax) { + } else if (optimize_rax_dst && dst == rax) { switch (op2) { case 0xD0: emit_int8(0x15); break; // adc case 0xC0: emit_int8(0x05); break; // add @@ -307,21 +306,6 @@ void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { } } -void Assembler::emit_arith_ndd(int op1, int op2, Register dst, int32_t imm32) { - assert(isByte(op1) && isByte(op2), "wrong opcode"); - assert(op1 == 0x81, "Unexpected opcode"); - // This code cache friendly optimization saves 3 bytes per encoding, which offsets the EVEX encoding penalty. - if (is8bit(imm32)) { - emit_int24(op1 | 0x02, // set sign bit - op2 | encode(dst), - imm32 & 0xFF); - } - else { - emit_int16(op1, (op2 | encode(dst))); - emit_int32(imm32); - } -} - // Force generation of a 4 byte immediate value even if it fits into 8bit void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { assert(isByte(op1) && isByte(op2), "wrong opcode"); @@ -801,7 +785,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { address ip = inst; bool is_64bit = false; - debug_only(bool has_disp32 = false); + DEBUG_ONLY(bool has_disp32 = false); int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn again_after_prefix: @@ -859,7 +843,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0x8A: // movb r, a case 0x8B: // movl r, a case 0x8F: // popl a - debug_only(has_disp32 = true); + DEBUG_ONLY(has_disp32 = true); break; case 0x68: // pushq #32 @@ -898,10 +882,10 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0x8B: // movw r, a case 0x89: // movw a, r - debug_only(has_disp32 = true); + DEBUG_ONLY(has_disp32 = true); break; case 0xC7: // movw a, #16 - debug_only(has_disp32 = true); + DEBUG_ONLY(has_disp32 = true); tail_size = 2; // the imm16 break; case 0x0F: // several SSE/SSE2 variants @@ -923,7 +907,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0x69: // imul r, a, #32 case 0xC7: // movl a, #32(oop?) tail_size = 4; - debug_only(has_disp32 = true); // has both kinds of operands! + DEBUG_ONLY(has_disp32 = true); // has both kinds of operands! break; case 0x0F: // movx..., etc. @@ -932,11 +916,11 @@ address Assembler::locate_operand(address inst, WhichOperand which) { tail_size = 1; case 0x38: // ptest, pmovzxbw ip++; // skip opcode - debug_only(has_disp32 = true); // has both kinds of operands! + DEBUG_ONLY(has_disp32 = true); // has both kinds of operands! break; case 0x70: // pshufd r, r/a, #8 - debug_only(has_disp32 = true); // has both kinds of operands! + DEBUG_ONLY(has_disp32 = true); // has both kinds of operands! case 0x73: // psrldq r, #8 tail_size = 1; break; @@ -961,7 +945,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush case 0xD6: // movq case 0xFE: // paddd - debug_only(has_disp32 = true); + DEBUG_ONLY(has_disp32 = true); break; case 0xAD: // shrd r, a, %cl @@ -976,18 +960,18 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0xC1: // xaddl case 0xC7: // cmpxchg8 case REP16(0x90): // setcc a - debug_only(has_disp32 = true); + DEBUG_ONLY(has_disp32 = true); // fall out of the switch to decode the address break; case 0xC4: // pinsrw r, a, #8 - debug_only(has_disp32 = true); + DEBUG_ONLY(has_disp32 = true); case 0xC5: // pextrw r, r, #8 tail_size = 1; // the imm8 break; case 0xAC: // shrd r, a, #8 - debug_only(has_disp32 = true); + DEBUG_ONLY(has_disp32 = true); tail_size = 1; // the imm8 break; @@ -1004,12 +988,12 @@ address Assembler::locate_operand(address inst, WhichOperand which) { // also: orl, adcl, sbbl, andl, subl, xorl, cmpl // on 32bit in the case of cmpl, the imm might be an oop tail_size = 4; - debug_only(has_disp32 = true); // has both kinds of operands! + DEBUG_ONLY(has_disp32 = true); // has both kinds of operands! break; case 0x83: // addl a, #8; addl r, #8 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl - debug_only(has_disp32 = true); // has both kinds of operands! + DEBUG_ONLY(has_disp32 = true); // has both kinds of operands! tail_size = 1; break; @@ -1026,7 +1010,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0x9B: switch (0xFF & *ip++) { case 0xD9: // fnstcw a - debug_only(has_disp32 = true); + DEBUG_ONLY(has_disp32 = true); break; default: ShouldNotReachHere(); @@ -1045,7 +1029,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0x87: // xchg r, a case REP4(0x38): // cmp... case 0x85: // test r, a - debug_only(has_disp32 = true); // has both kinds of operands! + DEBUG_ONLY(has_disp32 = true); // has both kinds of operands! break; case 0xA8: // testb rax, #8 @@ -1057,7 +1041,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0xC6: // movb a, #8 case 0x80: // cmpb a, #8 case 0x6B: // imul r, a, #8 - debug_only(has_disp32 = true); // has both kinds of operands! + DEBUG_ONLY(has_disp32 = true); // has both kinds of operands! tail_size = 1; // the imm8 break; @@ -1109,7 +1093,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { break; } ip++; // skip opcode - debug_only(has_disp32 = true); // has both kinds of operands! + DEBUG_ONLY(has_disp32 = true); // has both kinds of operands! break; case 0x62: // EVEX_4bytes @@ -1135,7 +1119,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { break; } ip++; // skip opcode - debug_only(has_disp32 = true); // has both kinds of operands! + DEBUG_ONLY(has_disp32 = true); // has both kinds of operands! break; case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 @@ -1147,7 +1131,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a - debug_only(has_disp32 = true); + DEBUG_ONLY(has_disp32 = true); break; case 0xE8: // call rdisp32 @@ -1184,7 +1168,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { default: ip++; } - debug_only(has_disp32 = true); // has both kinds of operands! + DEBUG_ONLY(has_disp32 = true); // has both kinds of operands! break; default: @@ -1364,7 +1348,7 @@ void Assembler::eaddl(Register dst, Address src, int32_t imm32, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_operand(0x81, rax, src, imm32); } @@ -1416,7 +1400,7 @@ void Assembler::eaddl(Register dst, Address src1, Register src2, bool no_flags) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x01); emit_operand(src2, src1, 0); } @@ -1427,9 +1411,7 @@ void Assembler::addl(Register dst, int32_t imm32) { } void Assembler::eaddl(Register dst, Register src, int32_t imm32, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_arith_ndd(0x81, 0xC0, src, imm32); + emit_eevex_prefix_or_demote_arith_ndd(dst, src, imm32, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x81, 0xC0, no_flags); } void Assembler::addl(Register dst, Address src) { @@ -1441,11 +1423,7 @@ void Assembler::addl(Register dst, Address src) { void Assembler::eaddl(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x03); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x03, no_flags); } void Assembler::addl(Register dst, Register src) { @@ -1457,7 +1435,7 @@ void Assembler::eaddl(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void)evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void)emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith(0x03, 0xC0, src1, src2); } @@ -1647,7 +1625,7 @@ void Assembler::eandl(Register dst, Address src, int32_t imm32, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_operand(0x81, rsp, src, imm32); } @@ -1657,9 +1635,7 @@ void Assembler::andl(Register dst, int32_t imm32) { } void Assembler::eandl(Register dst, Register src, int32_t imm32, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_arith_ndd(0x81, 0xE0, src, imm32); + emit_eevex_prefix_or_demote_arith_ndd(dst, src, imm32, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x81, 0xE0, no_flags); } void Assembler::andl(Address dst, Register src) { @@ -1678,11 +1654,7 @@ void Assembler::andl(Register dst, Address src) { void Assembler::eandl(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x23); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x23, no_flags); } void Assembler::andl(Register dst, Register src) { @@ -1694,7 +1666,7 @@ void Assembler::eandl(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith(0x23, 0xC0, src1, src2); } @@ -1841,9 +1813,7 @@ void Assembler::cmovl(Condition cc, Register dst, Register src) { } void Assembler::ecmovl(Condition cc, Register dst, Register src1, Register src2) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes); - emit_int16((0x40 | cc), (0xC0 | encode)); + emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x40 | cc, false /* no_flags */, true /* is_map1 */, true /* swap */); } void Assembler::cmovl(Condition cc, Register dst, Address src) { @@ -1855,11 +1825,7 @@ void Assembler::cmovl(Condition cc, Register dst, Address src) { void Assembler::ecmovl(Condition cc, Register dst, Register src1, Address src2) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes); - emit_int8((0x40 | cc)); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, (0x40 | cc) , false /* no_flags */, true /* is_map1 */); } void Assembler::cmpb(Address dst, Register reg) { @@ -2010,6 +1976,11 @@ void Assembler::cpuid() { emit_int16(0x0F, (unsigned char)0xA2); } +void Assembler::serialize() { + assert(VM_Version::supports_serialize(), ""); + emit_int24(0x0F, 0x01, 0xE8); +} + // Opcode / Instruction Op / En 64 - Bit Mode Compat / Leg Mode Description Implemented // F2 0F 38 F0 / r CRC32 r32, r / m8 RM Valid Valid Accumulate CRC32 on r / m8. v // F2 REX 0F 38 F0 / r CRC32 r32, r / m8* RM Valid N.E. Accumulate CRC32 on r / m8. - @@ -2024,7 +1995,7 @@ void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) { assert(VM_Version::supports_sse4_2(), ""); if (needs_eevex(crc, v)) { InstructionAttr attributes(AVX_128bit, /* rex_w */ sizeInBytes == 8, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = vex_prefix_and_encode(crc->encoding(), 0, v->encoding(), sizeInBytes == 2 ? VEX_SIMD_66 : VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, true); + int encode = vex_prefix_and_encode(crc->encoding(), 0, v->encoding(), sizeInBytes == 2 ? VEX_SIMD_66 : VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, true); emit_int16(sizeInBytes == 1 ? (unsigned char)0xF0 : (unsigned char)0xF1, (0xC0 | encode)); } else { int8_t w = 0x01; @@ -2071,7 +2042,7 @@ void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) { if (needs_eevex(crc, adr.base(), adr.index())) { InstructionAttr attributes(AVX_128bit, /* vex_w */ sizeInBytes == 8, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - vex_prefix(adr, 0, crc->encoding(), sizeInBytes == 2 ? VEX_SIMD_66 : VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes); + vex_prefix(adr, 0, crc->encoding(), sizeInBytes == 2 ? VEX_SIMD_66 : VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes); emit_int8(sizeInBytes == 1 ? (unsigned char)0xF0 : (unsigned char)0xF1); emit_operand(crc, adr, 0); } else { @@ -2468,7 +2439,7 @@ void Assembler::edecl(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xFF); emit_operand(rcx, src, 0); } @@ -2516,7 +2487,7 @@ void Assembler::idivl(Register src) { void Assembler::eidivl(Register src, bool no_flags) { // Signed InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF7, (0xF8 | encode)); } @@ -2527,7 +2498,7 @@ void Assembler::divl(Register src) { // Unsigned void Assembler::edivl(Register src, bool no_flags) { // Unsigned InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF7, (0xF0 | encode)); } @@ -2538,7 +2509,7 @@ void Assembler::imull(Register src) { void Assembler::eimull(Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF7, (0xE8 | encode)); } @@ -2548,9 +2519,7 @@ void Assembler::imull(Register dst, Register src) { } void Assembler::eimull(Register dst, Register src1, Register src2, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int16((unsigned char)0xAF, (0xC0 | encode)); + emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0xAF, no_flags, true /* is_map1 */, true /* swap */); } void Assembler::imull(Register dst, Address src, int32_t value) { @@ -2571,7 +2540,7 @@ void Assembler::eimull(Register dst, Address src, int32_t value, bool no_flags) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (is8bit(value)) { emit_int8((unsigned char)0x6B); emit_operand(dst, src, 1); @@ -2595,7 +2564,7 @@ void Assembler::imull(Register dst, Register src, int value) { void Assembler::eimull(Register dst, Register src, int value, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (is8bit(value)) { emit_int24(0x6B, (0xC0 | encode), value & 0xFF); } else { @@ -2613,11 +2582,7 @@ void Assembler::imull(Register dst, Address src) { void Assembler::eimull(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8((unsigned char)0xAF); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, (unsigned char)0xAF, no_flags, true /* is_map1 */); } void Assembler::incl(Address dst) { @@ -2633,7 +2598,7 @@ void Assembler::eincl(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xFF); emit_operand(rax, src, 0); } @@ -2825,7 +2790,7 @@ void Assembler::lzcntl(Register dst, Register src) { void Assembler::elzcntl(Register dst, Register src, bool no_flags) { assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF5, (0xC0 | encode)); } @@ -2843,7 +2808,7 @@ void Assembler::elzcntl(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xF5); emit_operand(dst, src, 0); } @@ -2862,6 +2827,17 @@ void Assembler::mov(Register dst, Register src) { movq(dst, src); } +void Assembler::movapd(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_rex_vex_w_reverted(); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x28); + emit_operand(dst, src, 0); +} + void Assembler::movapd(XMMRegister dst, XMMRegister src) { int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -4050,7 +4026,7 @@ void Assembler::emull(Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_nf(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xF7); emit_operand(rsp, src, 0); } @@ -4062,7 +4038,7 @@ void Assembler::mull(Register src) { void Assembler::emull(Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF7, (0xE0 | encode)); } @@ -4105,7 +4081,7 @@ void Assembler::negl(Register dst) { void Assembler::enegl(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF7, (0xD8 | encode)); } @@ -4120,7 +4096,7 @@ void Assembler::enegl(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xF7); emit_operand(as_Register(3), src, 0); } @@ -4438,7 +4414,7 @@ void Assembler::notl(Register dst) { void Assembler::enotl(Register dst, Register src) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes); emit_int16((unsigned char)0xF7, (0xD0 | encode)); } @@ -4446,7 +4422,7 @@ void Assembler::eorw(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_66, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith(0x0B, 0xC0, src1, src2); } @@ -4460,7 +4436,7 @@ void Assembler::eorl(Register dst, Address src, int32_t imm32, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_operand(0x81, rcx, src, imm32); } @@ -4470,9 +4446,7 @@ void Assembler::orl(Register dst, int32_t imm32) { } void Assembler::eorl(Register dst, Register src, int32_t imm32, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_arith_ndd(0x81, 0xC8, src, imm32); + emit_eevex_prefix_or_demote_arith_ndd(dst, src, imm32, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x81, 0xC8, no_flags); } void Assembler::orl(Register dst, Address src) { @@ -4484,11 +4458,7 @@ void Assembler::orl(Register dst, Address src) { void Assembler::eorl(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x0B); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x0B, no_flags); } void Assembler::orl(Register dst, Register src) { @@ -4500,7 +4470,7 @@ void Assembler::eorl(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith(0x0B, 0xC0, src1, src2); } @@ -4515,7 +4485,7 @@ void Assembler::eorl(Register dst, Address src1, Register src2, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x09); emit_operand(src2, src1, 0); } @@ -4532,7 +4502,7 @@ void Assembler::eorb(Register dst, Address src, int imm8, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0x80); emit_operand(rcx, src, 1); emit_int8(imm8); @@ -4549,7 +4519,7 @@ void Assembler::eorb(Register dst, Address src1, Register src2, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x08); emit_operand(src2, src1, 0); } @@ -4584,7 +4554,7 @@ void Assembler::packuswb(XMMRegister dst, Address src) { assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x67); emit_operand(dst, src, 0); @@ -4648,6 +4618,7 @@ void Assembler::vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector InstructionMark im(this); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0x8D); emit_operand(dst, src, 0); @@ -4678,6 +4649,7 @@ void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector // VEX.NDS.256.66.0F38.W0 36 /r InstructionMark im(this); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x36); emit_operand(dst, src, 0); @@ -5387,7 +5359,7 @@ void Assembler::evpmovzxbd(XMMRegister dst, KRegister mask, Address src, int vec assert(dst != xnoreg, "sanity"); InstructionMark im(this); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_QVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); @@ -5675,7 +5647,7 @@ void Assembler::epopcntl(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0x88); emit_operand(dst, src, 0); } @@ -5690,7 +5662,7 @@ void Assembler::popcntl(Register dst, Register src) { void Assembler::epopcntl(Register dst, Register src, bool no_flags) { assert(VM_Version::supports_popcnt(), "must support"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0x88, (0xC0 | encode)); } @@ -6241,7 +6213,7 @@ void Assembler::rcll(Register dst, int imm8) { void Assembler::ercll(Register dst, Register src, int imm8) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xD0 | encode)); } else { @@ -6324,7 +6296,7 @@ void Assembler::roll(Register dst, int imm8) { void Assembler::eroll(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xC0 | encode)); } else { @@ -6339,7 +6311,7 @@ void Assembler::roll(Register dst) { void Assembler::eroll(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xD3, (0xC0 | encode)); } @@ -6356,7 +6328,7 @@ void Assembler::rorl(Register dst, int imm8) { void Assembler::erorl(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xC8 | encode)); } else { @@ -6371,7 +6343,7 @@ void Assembler::rorl(Register dst) { void Assembler::erorl(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xD3, (0xC8 | encode)); } @@ -6382,7 +6354,7 @@ void Assembler::rorq(Register dst) { void Assembler::erorq(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_int16((unsigned char)0xD3, (0xC8 | encode)); } @@ -6399,7 +6371,7 @@ void Assembler::rorq(Register dst, int imm8) { void Assembler::erorq(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xC8 | encode)); } else { @@ -6414,7 +6386,7 @@ void Assembler::rolq(Register dst) { void Assembler::erolq(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_int16((unsigned char)0xD3, (0xC0 | encode)); } @@ -6431,7 +6403,7 @@ void Assembler::rolq(Register dst, int imm8) { void Assembler::erolq(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xC0 | encode)); } else { @@ -6459,7 +6431,7 @@ void Assembler::esall(Register dst, Address src, int imm8, bool no_flags) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int8((unsigned char)0xD1); emit_operand(as_Register(4), src, 0); @@ -6482,7 +6454,7 @@ void Assembler::esall(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xD3); emit_operand(as_Register(4), src, 0); } @@ -6500,7 +6472,7 @@ void Assembler::sall(Register dst, int imm8) { void Assembler::esall(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xE0 | encode)); } else { @@ -6515,7 +6487,7 @@ void Assembler::sall(Register dst) { void Assembler::esall(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xD3, (0xE0 | encode)); } @@ -6539,7 +6511,7 @@ void Assembler::esarl(Register dst, Address src, int imm8, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int8((unsigned char)0xD1); emit_operand(as_Register(7), src, 0); @@ -6562,7 +6534,7 @@ void Assembler::esarl(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xD3); emit_operand(as_Register(7), src, 0); } @@ -6580,7 +6552,7 @@ void Assembler::sarl(Register dst, int imm8) { void Assembler::esarl(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xF8 | encode)); } else { @@ -6595,7 +6567,7 @@ void Assembler::sarl(Register dst) { void Assembler::esarl(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xD3, (0xF8 | encode)); } @@ -6736,7 +6708,7 @@ void Assembler::shll(Register dst, int imm8) { void Assembler::eshll(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1 ) { emit_int16((unsigned char)0xD1, (0xE0 | encode)); } else { @@ -6751,7 +6723,7 @@ void Assembler::shll(Register dst) { void Assembler::eshll(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xD3, (0xE0 | encode)); } @@ -6769,7 +6741,7 @@ void Assembler::shrl(Register dst, int imm8) { void Assembler::eshrl(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xE8 | encode)); } @@ -6785,7 +6757,7 @@ void Assembler::shrl(Register dst) { void Assembler::eshrl(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xD3, (0xE8 | encode)); } @@ -6800,7 +6772,7 @@ void Assembler::eshrl(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xD3); emit_operand(as_Register(5), src, 0); } @@ -6825,7 +6797,7 @@ void Assembler::eshrl(Register dst, Address src, int imm8, bool no_flags) { assert(isShiftCount(imm8), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int8((unsigned char)0xD1); emit_operand(as_Register(5), src, 0); @@ -6843,11 +6815,7 @@ void Assembler::shldl(Register dst, Register src) { } void Assembler::eshldl(Register dst, Register src1, Register src2, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - // NDD shares its encoding bits with NDS bits for regular EVEX instruction. - // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - int encode = evex_prefix_and_encode_ndd(src2->encoding(), dst->encoding(), src1->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int16(0xA5, (0xC0 | encode)); + emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0xA5, no_flags, true /* is_map1 */); } void Assembler::shldl(Register dst, Register src, int8_t imm8) { @@ -6856,11 +6824,7 @@ void Assembler::shldl(Register dst, Register src, int8_t imm8) { } void Assembler::eshldl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - // NDD shares its encoding bits with NDS bits for regular EVEX instruction. - // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - int encode = evex_prefix_and_encode_ndd(src2->encoding(), dst->encoding(), src1->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int24(0x24, (0xC0 | encode), imm8); + emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), imm8, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x24, no_flags, true /* is_map1 */); } void Assembler::shrdl(Register dst, Register src) { @@ -6869,11 +6833,7 @@ void Assembler::shrdl(Register dst, Register src) { } void Assembler::eshrdl(Register dst, Register src1, Register src2, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - // NDD shares its encoding bits with NDS bits for regular EVEX instruction. - // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - int encode = evex_prefix_and_encode_ndd(src2->encoding(), dst->encoding(), src1->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int16(0xAD, (0xC0 | encode)); + emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0xAD, no_flags, true /* is_map1 */); } void Assembler::shrdl(Register dst, Register src, int8_t imm8) { @@ -6882,11 +6842,7 @@ void Assembler::shrdl(Register dst, Register src, int8_t imm8) { } void Assembler::eshrdl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - // NDD shares its encoding bits with NDS bits for regular EVEX instruction. - // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - int encode = evex_prefix_and_encode_ndd(src2->encoding(), dst->encoding(), src1->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int24(0x2C, (0xC0 | encode), imm8); + emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), imm8, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x2C, no_flags, true /* is_map1 */); } void Assembler::shldq(Register dst, Register src, int8_t imm8) { @@ -6895,11 +6851,7 @@ void Assembler::shldq(Register dst, Register src, int8_t imm8) { } void Assembler::eshldq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - // NDD shares its encoding bits with NDS bits for regular EVEX instruction. - // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - int encode = evex_prefix_and_encode_ndd(src2->encoding(), dst->encoding(), src1->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int24(0x24, (0xC0 | encode), imm8); + emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), imm8, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x24, no_flags, true /* is_map1 */); } void Assembler::shrdq(Register dst, Register src, int8_t imm8) { @@ -6908,11 +6860,7 @@ void Assembler::shrdq(Register dst, Register src, int8_t imm8) { } void Assembler::eshrdq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - // NDD shares its encoding bits with NDS bits for regular EVEX instruction. - // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - int encode = evex_prefix_and_encode_ndd(src2->encoding(), dst->encoding(), src1->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int24(0x2C, (0xC0 | encode), imm8); + emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), imm8, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x2C, no_flags, true /* is_map1 */); } // copies a single word from [esi] to [edi] @@ -7002,7 +6950,7 @@ void Assembler::esubl(Register dst, Address src, int32_t imm32, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_operand(0x81, rbp, src, imm32); } @@ -7017,7 +6965,7 @@ void Assembler::esubl(Register dst, Address src1, Register src2, bool no_flags) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x29); emit_operand(src2, src1, 0); } @@ -7028,9 +6976,7 @@ void Assembler::subl(Register dst, int32_t imm32) { } void Assembler::esubl(Register dst, Register src, int32_t imm32, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_arith_ndd(0x81, 0xE8, src, imm32); + emit_eevex_prefix_or_demote_arith_ndd(dst, src, imm32, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x81, 0xE8, no_flags); } // Force generation of a 4 byte immediate value even if it fits into 8bit @@ -7041,7 +6987,7 @@ void Assembler::subl_imm32(Register dst, int32_t imm32) { void Assembler::esubl_imm32(Register dst, Register src, int32_t imm32, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_imm32(0x81, 0xE8, src, imm32); } @@ -7054,11 +7000,7 @@ void Assembler::subl(Register dst, Address src) { void Assembler::esubl(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x2B); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x2B, no_flags); } void Assembler::subl(Register dst, Register src) { @@ -7070,7 +7012,7 @@ void Assembler::esubl(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith(0x2B, 0xC0, src1, src2); } @@ -7175,7 +7117,7 @@ void Assembler::tzcntl(Register dst, Register src) { void Assembler::etzcntl(Register dst, Register src, bool no_flags) { assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF4, (0xC0 | encode)); } @@ -7193,7 +7135,7 @@ void Assembler::etzcntl(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xF4); emit_operand(dst, src, 0); } @@ -7208,7 +7150,7 @@ void Assembler::tzcntq(Register dst, Register src) { void Assembler::etzcntq(Register dst, Register src, bool no_flags) { assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF4, (0xC0 | encode)); } @@ -7226,7 +7168,7 @@ void Assembler::etzcntq(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xF4); emit_operand(dst, src, 0); } @@ -7350,7 +7292,7 @@ void Assembler::exorl(Register dst, Address src, int32_t imm32, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_operand(0x81, as_Register(6), src, imm32); } @@ -7360,9 +7302,7 @@ void Assembler::xorl(Register dst, int32_t imm32) { } void Assembler::exorl(Register dst, Register src, int32_t imm32, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_arith_ndd(0x81, 0xF0, src, imm32); + emit_eevex_prefix_or_demote_arith_ndd(dst, src, imm32, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x81, 0xF0, no_flags); } void Assembler::xorl(Register dst, Address src) { @@ -7374,11 +7314,7 @@ void Assembler::xorl(Register dst, Address src) { void Assembler::exorl(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x33); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x33, no_flags); } void Assembler::xorl(Register dst, Register src) { @@ -7390,7 +7326,7 @@ void Assembler::exorl(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith(0x33, 0xC0, src1, src2); } @@ -7405,7 +7341,7 @@ void Assembler::exorl(Register dst, Address src1, Register src2, bool no_flags) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x31); emit_operand(src2, src1, 0); } @@ -7419,11 +7355,7 @@ void Assembler::xorb(Register dst, Address src) { void Assembler::exorb(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x32); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_8bit, 0x32, no_flags); } void Assembler::xorb(Address dst, Register src) { @@ -7437,7 +7369,7 @@ void Assembler::exorb(Register dst, Address src1, Register src2, bool no_flags) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x30); emit_operand(src2, src1, 0); } @@ -7452,13 +7384,7 @@ void Assembler::xorw(Register dst, Address src) { void Assembler::exorw(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_16bit); - // NDD shares its encoding bits with NDS bits for regular EVEX instruction. - // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_66, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x33); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_66, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_16bit, 0x33, no_flags); } // AVX 3-operands scalar float-point arithmetic instructions @@ -8065,6 +7991,14 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector emit_operand(dst, src, 0); } +void Assembler::orpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_rex_vex_w_reverted(); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x56, (0xC0 | encode)); +} + void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) { InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); @@ -8186,6 +8120,7 @@ void Assembler::paddd(XMMRegister dst, XMMRegister src) { void Assembler::paddd(XMMRegister dst, Address src) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xFE); emit_operand(dst, src, 0); @@ -8359,7 +8294,7 @@ void Assembler::vpaddsb(XMMRegister dst, XMMRegister nds, Address src, int vecto assert(!needs_evex(dst, nds) || VM_Version::supports_avx512bw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xEC); emit_operand(dst, src, 0); @@ -8378,7 +8313,7 @@ void Assembler::vpaddsw(XMMRegister dst, XMMRegister nds, Address src, int vecto assert(!needs_evex(dst, nds) || VM_Version::supports_avx512bw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xED); emit_operand(dst, src, 0); @@ -8397,7 +8332,7 @@ void Assembler::vpaddusb(XMMRegister dst, XMMRegister nds, Address src, int vect assert(!needs_evex(dst, nds) || VM_Version::supports_avx512bw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xDC); emit_operand(dst, src, 0); @@ -8417,7 +8352,7 @@ void Assembler::vpaddusw(XMMRegister dst, XMMRegister nds, Address src, int vect assert(!needs_evex(dst, nds) || VM_Version::supports_avx512bw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xDD); emit_operand(dst, src, 0); @@ -8437,7 +8372,7 @@ void Assembler::vpsubsb(XMMRegister dst, XMMRegister nds, Address src, int vecto assert(!needs_evex(dst, nds) || VM_Version::supports_avx512bw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xE8); emit_operand(dst, src, 0); @@ -8456,7 +8391,7 @@ void Assembler::vpsubsw(XMMRegister dst, XMMRegister nds, Address src, int vecto assert(!needs_evex(dst, nds) || VM_Version::supports_avx512bw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xE9); emit_operand(dst, src, 0); @@ -8475,7 +8410,7 @@ void Assembler::vpsubusb(XMMRegister dst, XMMRegister nds, Address src, int vect assert(!needs_evex(dst, nds) || VM_Version::supports_avx512bw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xD8); emit_operand(dst, src, 0); @@ -8494,7 +8429,7 @@ void Assembler::vpsubusw(XMMRegister dst, XMMRegister nds, Address src, int vect assert(!needs_evex(dst, nds) || VM_Version::supports_avx512bw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xD9); emit_operand(dst, src, 0); @@ -8862,7 +8797,7 @@ void Assembler::vpminub(XMMRegister dst, XMMRegister nds, Address src, int vecto assert(!needs_evex(dst, nds) || VM_Version::supports_avx512bw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xDA); emit_operand(dst, src, 0); @@ -8885,7 +8820,7 @@ void Assembler::evpminub(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { attributes.reset_is_clear_context(); @@ -8908,7 +8843,7 @@ void Assembler::vpminuw(XMMRegister dst, XMMRegister nds, Address src, int vecto assert(!needs_evex(dst, nds) || VM_Version::supports_avx512bw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0x3A); emit_operand(dst, src, 0); @@ -8931,7 +8866,7 @@ void Assembler::evpminuw(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { attributes.reset_is_clear_context(); @@ -9031,7 +8966,7 @@ void Assembler::vpmaxub(XMMRegister dst, XMMRegister nds, Address src, int vecto assert(UseAVX > 0 && (vector_len == Assembler::AVX_512bit || (!needs_evex(dst, nds) || VM_Version::supports_avx512vl())), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xDE); emit_operand(dst, src, 0); @@ -9054,7 +8989,7 @@ void Assembler::evpmaxub(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { attributes.reset_is_clear_context(); @@ -9079,7 +9014,7 @@ void Assembler::vpmaxuw(XMMRegister dst, XMMRegister nds, Address src, int vecto assert(UseAVX > 0 && (vector_len == Assembler::AVX_512bit || (!needs_evex(dst, nds) || VM_Version::supports_avx512vl())), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0x3E); emit_operand(dst, src, 0); @@ -9102,7 +9037,7 @@ void Assembler::evpmaxuw(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { attributes.reset_is_clear_context(); @@ -9205,7 +9140,8 @@ void Assembler::pslld(XMMRegister dst, int shift) { } void Assembler::psllq(XMMRegister dst, int shift) { - InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_rex_vex_w_reverted(); // XMM6 is for /6 encoding: 66 0F 73 /6 ib int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int24(0x73, (0xC0 | encode), shift & 0xFF); @@ -10547,7 +10483,7 @@ void Assembler::evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, Addres InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -10574,7 +10510,7 @@ void Assembler::evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, Addres InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -10717,7 +10653,7 @@ void Assembler::evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, Addres InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -10744,7 +10680,7 @@ void Assembler::evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, Addres InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -10887,7 +10823,7 @@ void Assembler::evpaddsb(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -10914,7 +10850,7 @@ void Assembler::evpaddsw(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -10941,7 +10877,7 @@ void Assembler::evpaddusb(XMMRegister dst, KRegister mask, XMMRegister nds, Addr InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -10968,7 +10904,7 @@ void Assembler::evpaddusw(XMMRegister dst, KRegister mask, XMMRegister nds, Addr InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -10995,7 +10931,7 @@ void Assembler::evpsubsb(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -11022,7 +10958,7 @@ void Assembler::evpsubsw(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -11049,7 +10985,7 @@ void Assembler::evpsubusb(XMMRegister dst, KRegister mask, XMMRegister nds, Addr InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -11077,7 +11013,7 @@ void Assembler::evpsubusw(XMMRegister dst, KRegister mask, XMMRegister nds, Addr InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -11104,7 +11040,7 @@ void Assembler::evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -11370,7 +11306,6 @@ void Assembler::evdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src, EvexR void Assembler::evpabsb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -11385,7 +11320,7 @@ void Assembler::evpabsb(XMMRegister dst, KRegister mask, Address src, bool merge InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -11399,7 +11334,6 @@ void Assembler::evpabsb(XMMRegister dst, KRegister mask, Address src, bool merge void Assembler::evpabsw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -11414,7 +11348,7 @@ void Assembler::evpabsw(XMMRegister dst, KRegister mask, Address src, bool merge InstructionMark im(this); assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -11534,7 +11468,7 @@ void Assembler::evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, Ad assert(VM_Version::supports_evex(), ""); assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { @@ -11563,6 +11497,7 @@ void Assembler::evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, Addres InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); if (merge) { attributes.reset_is_clear_context(); } @@ -11589,6 +11524,7 @@ void Assembler::evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, Addres InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); if (merge) { attributes.reset_is_clear_context(); } @@ -11615,6 +11551,7 @@ void Assembler::evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, Addres InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); if (merge) { attributes.reset_is_clear_context(); } @@ -11641,6 +11578,7 @@ void Assembler::evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, Addres InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); if (merge) { attributes.reset_is_clear_context(); } @@ -12009,6 +11947,7 @@ void Assembler::evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); if (merge) { attributes.reset_is_clear_context(); } @@ -12034,6 +11973,7 @@ void Assembler::evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { attributes.reset_is_clear_context(); @@ -12063,6 +12003,7 @@ void Assembler::evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); if (merge) { attributes.reset_is_clear_context(); } @@ -12091,6 +12032,7 @@ void Assembler::evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); if (merge) { attributes.reset_is_clear_context(); } @@ -12118,6 +12060,7 @@ void Assembler::evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); if (merge) { attributes.reset_is_clear_context(); } @@ -12144,6 +12087,7 @@ void Assembler::evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); if (merge) { attributes.reset_is_clear_context(); } @@ -12172,6 +12116,7 @@ void Assembler::evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); if (merge) { attributes.reset_is_clear_context(); } @@ -12199,6 +12144,7 @@ void Assembler::evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); attributes.set_embedded_opmask_register_specifier(mask); if (merge) { attributes.reset_is_clear_context(); @@ -12569,7 +12515,7 @@ void Assembler::evpgatherdq(XMMRegister dst, KRegister mask, Address src, int ve assert(mask != k0, "instruction will #UD if mask is in k0"); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); @@ -12587,7 +12533,7 @@ void Assembler::evgatherdpd(XMMRegister dst, KRegister mask, Address src, int ve assert(mask != k0, "instruction will #UD if mask is in k0"); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); @@ -12634,7 +12580,7 @@ void Assembler::evpscatterdq(Address dst, KRegister mask, XMMRegister src, int v assert(mask != k0, "instruction will #UD if mask is in k0"); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); @@ -12662,7 +12608,7 @@ void Assembler::evscatterdpd(Address dst, KRegister mask, XMMRegister src, int v assert(mask != k0, "instruction will #UD if mask is in k0"); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); @@ -12940,12 +12886,35 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix } } -void Assembler::evex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags) { +void Assembler::eevex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags) { attributes->set_is_evex_instruction(); vex_prefix(adr, ndd_enc, xreg_enc, pre, opc, attributes, /* nds_is_ndd */ true, no_flags); } -void Assembler::evex_prefix_nf(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags) { +void Assembler::emit_eevex_or_demote(Register dst, Register src1, Address src2, VexSimdPrefix pre, VexOpcode opc, + int size, int opcode_byte, bool no_flags, bool is_map1) { + if (is_demotable(no_flags, dst->encoding(), src1->encoding())) { + if (size == EVEX_64bit) { + emit_prefix_and_int8(get_prefixq(src2, dst, is_map1), opcode_byte); + } else { + // For 32-bit, 16-bit and 8-bit + if (size == EVEX_16bit) { + emit_int8(0x66); + } + prefix(src2, dst, false, is_map1); + emit_int8(opcode_byte); + } + } else { + bool vex_w = (size == EVEX_64bit) ? true : false; + InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, size); + eevex_prefix_ndd(src2, dst->encoding(), src1->encoding(), pre, opc, &attributes, no_flags); + emit_int8(opcode_byte); + } + emit_operand(src1, src2, 0); +} + +void Assembler::eevex_prefix_nf(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags) { attributes->set_is_evex_instruction(); vex_prefix(adr, ndd_enc, xreg_enc, pre, opc, attributes, /* nds_is_ndd */ false, no_flags); } @@ -13007,18 +12976,98 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS return (((dst_enc & 7) << 3) | (src_enc & 7)); } -int Assembler::evex_prefix_and_encode_ndd(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, - InstructionAttr *attributes, bool no_flags) { +void Assembler::emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, int8_t imm8, VexSimdPrefix pre, VexOpcode opc, + int size, int opcode_byte, bool no_flags, bool is_map1) { + bool is_prefixq = (size == EVEX_64bit) ? true : false; + if (is_demotable(no_flags, dst_enc, nds_enc)) { + int encode = is_prefixq ? prefixq_and_encode(src_enc, dst_enc, is_map1) : prefix_and_encode(src_enc, dst_enc, is_map1); + emit_opcode_prefix_and_encoding((unsigned char)(opcode_byte | 0x80), 0xC0, encode, imm8); + } else { + InstructionAttr attributes(AVX_128bit, is_prefixq, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, size); + int encode = emit_eevex_prefix_or_demote_ndd(src_enc, dst_enc, nds_enc, pre, opc, &attributes, no_flags); + emit_int24(opcode_byte, (0xC0 | encode), imm8); + } +} + +void Assembler::emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, + int size, int opcode_byte, bool no_flags, bool is_map1, bool swap) { + int encode; + bool is_prefixq = (size == EVEX_64bit) ? true : false; + if (is_demotable(no_flags, dst_enc, nds_enc)) { + if (size == EVEX_16bit) { + emit_int8(0x66); + } + + if (swap) { + encode = is_prefixq ? prefixq_and_encode(dst_enc, src_enc, is_map1) : prefix_and_encode(dst_enc, src_enc, is_map1); + } else { + encode = is_prefixq ? prefixq_and_encode(src_enc, dst_enc, is_map1) : prefix_and_encode(src_enc, dst_enc, is_map1); + } + emit_opcode_prefix_and_encoding((unsigned char)opcode_byte, 0xC0, encode); + } else { + InstructionAttr attributes(AVX_128bit, is_prefixq, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + if (swap) { + encode = vex_prefix_and_encode(nds_enc, dst_enc, src_enc, pre, opc, &attributes, /* src_is_gpr */ true, /* nds_is_ndd */ true, no_flags); + } else { + encode = vex_prefix_and_encode(src_enc, dst_enc, nds_enc, pre, opc, &attributes, /* src_is_gpr */ true, /* nds_is_ndd */ true, no_flags); + } + emit_int16(opcode_byte, (0xC0 | encode)); + } +} + +int Assembler::emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, + InstructionAttr *attributes, bool no_flags, bool use_prefixq) { + if (is_demotable(no_flags, dst_enc, nds_enc)) { + if (pre == VEX_SIMD_66) { + emit_int8(0x66); + } + return use_prefixq ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc); + } attributes->set_is_evex_instruction(); return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes, /* src_is_gpr */ true, /* nds_is_ndd */ true, no_flags); } -int Assembler::evex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, - InstructionAttr *attributes, bool no_flags) { +int Assembler::emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, VexSimdPrefix pre, VexOpcode opc, + InstructionAttr *attributes, bool no_flags, bool use_prefixq) { + //Demote RegReg and RegRegImm instructions + if (is_demotable(no_flags, dst_enc, nds_enc)) { + return use_prefixq ? prefixq_and_encode(dst_enc) : prefix_and_encode(dst_enc); + } + attributes->set_is_evex_instruction(); + return vex_prefix_and_encode(0, dst_enc, nds_enc, pre, opc, attributes, /* src_is_gpr */ true, /* nds_is_ndd */ true, no_flags); +} + +int Assembler::emit_eevex_prefix_ndd(int dst_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags) { + attributes->set_is_evex_instruction(); + return vex_prefix_and_encode(0, 0, dst_enc, pre, opc, attributes, /* src_is_gpr */ true, /* nds_is_ndd */ true, no_flags); +} + +int Assembler::eevex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, + InstructionAttr *attributes, bool no_flags) { attributes->set_is_evex_instruction(); return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes, /* src_is_gpr */ true, /* nds_is_ndd */ false, no_flags); } +void Assembler::emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds, int32_t imm32, VexSimdPrefix pre, VexOpcode opc, + int size, int op1, int op2, bool no_flags) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + bool demote = is_demotable(no_flags, dst_enc, nds_enc); + if (demote) { + (size == EVEX_64bit) ? (void) prefixq_and_encode(dst_enc) : (void) prefix_and_encode(dst_enc); + } else { + bool vex_w = (size == EVEX_64bit) ? true : false; + InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + //attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, size); + attributes.set_is_evex_instruction(); + vex_prefix_and_encode(0, dst_enc, nds_enc, pre, opc, &attributes, /* src_is_gpr */ true, /* nds_is_ndd */ true, no_flags); + + } + emit_arith(op1, op2, nds, imm32, demote); +} + void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) { if (UseAVX > 0) { @@ -13044,6 +13093,10 @@ int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegis } } +bool Assembler::is_demotable(bool no_flags, int dst_enc, int nds_enc) { + return (!no_flags && dst_enc == nds_enc); +} + void Assembler::vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -14478,7 +14531,7 @@ void Assembler::eaddq(Register dst, Address src, int32_t imm32, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_operand(0x81, rax, src, imm32); } @@ -14492,7 +14545,7 @@ void Assembler::eaddq(Register dst, Address src1, Register src2, bool no_flags) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x01); emit_operand(src2, src1, 0); } @@ -14503,9 +14556,7 @@ void Assembler::addq(Register dst, int32_t imm32) { } void Assembler::eaddq(Register dst, Register src, int32_t imm32, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_arith_ndd(0x81, 0xC0, src, imm32); + emit_eevex_prefix_or_demote_arith_ndd(dst, src, imm32, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x81, 0xC0, no_flags); } void Assembler::addq(Register dst, Address src) { @@ -14516,11 +14567,7 @@ void Assembler::addq(Register dst, Address src) { void Assembler::eaddq(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x03); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x03, no_flags); } void Assembler::addq(Register dst, Register src) { @@ -14532,7 +14579,7 @@ void Assembler::eaddq(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_arith(0x03, 0xC0, src1, src2); } @@ -14540,7 +14587,7 @@ void Assembler::adcxq(Register dst, Register src) { //assert(VM_Version::supports_adx(), "adx instructions not supported"); if (needs_rex2(dst, src)) { InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C, &attributes, true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, true); emit_int16((unsigned char)0x66, (0xC0 | encode)); } else { emit_int8(0x66); @@ -14553,16 +14600,19 @@ void Assembler::adcxq(Register dst, Register src) { } void Assembler::eadcxq(Register dst, Register src1, Register src2) { - InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C, &attributes); - emit_int16((unsigned char)0x66, (0xC0 | encode)); + if (is_demotable(false, dst->encoding(), src1->encoding())) { + return adcxq(dst, src2); + } + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, false /* no_flags */, true /* use_prefixq */); + emit_int16((unsigned char)0x66, (0xC0 | encode)); } void Assembler::adoxq(Register dst, Register src) { //assert(VM_Version::supports_adx(), "adx instructions not supported"); if (needs_rex2(dst, src)) { InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_3C, &attributes, true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, true); emit_int16((unsigned char)0x66, (0xC0 | encode)); } else { emit_int8((unsigned char)0xF3); @@ -14575,9 +14625,12 @@ void Assembler::adoxq(Register dst, Register src) { } void Assembler::eadoxq(Register dst, Register src1, Register src2) { - InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_3C, &attributes); - emit_int16((unsigned char)0x66, (0xC0 | encode)); + if (is_demotable(false, dst->encoding(), src1->encoding())) { + return adoxq(dst, src2); + } + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, false /* no_flags */, true /* use_prefixq */); + emit_int16((unsigned char)0x66, (0xC0 | encode)); } void Assembler::andq(Address dst, int32_t imm32) { @@ -14590,7 +14643,7 @@ void Assembler::eandq(Register dst, Address src, int32_t imm32, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_operand(0x81, as_Register(4), src, imm32); } @@ -14600,9 +14653,7 @@ void Assembler::andq(Register dst, int32_t imm32) { } void Assembler::eandq(Register dst, Register src, int32_t imm32, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_arith_ndd(0x81, 0xE0, src, imm32); + emit_eevex_prefix_or_demote_arith_ndd(dst, src, imm32, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x81, 0xE0, no_flags); } void Assembler::andq(Register dst, Address src) { @@ -14613,11 +14664,7 @@ void Assembler::andq(Register dst, Address src) { void Assembler::eandq(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x23); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x23, no_flags); } void Assembler::andq(Register dst, Register src) { @@ -14629,7 +14676,7 @@ void Assembler::eandq(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_arith(0x23, 0xC0, src1, src2); } @@ -14643,7 +14690,7 @@ void Assembler::eandq(Register dst, Address src1, Register src2, bool no_flags) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x21); emit_operand(src2, src1, 0); } @@ -14782,9 +14829,7 @@ void Assembler::cmovq(Condition cc, Register dst, Register src) { } void Assembler::ecmovq(Condition cc, Register dst, Register src1, Register src2) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes); - emit_int16((0x40 | cc), (0xC0 | encode)); + emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x40 | cc, false /* no_flags */, true /* is_map1 */, true /* swap */); } void Assembler::cmovq(Condition cc, Register dst, Address src) { @@ -14796,11 +14841,7 @@ void Assembler::cmovq(Condition cc, Register dst, Address src) { void Assembler::ecmovq(Condition cc, Register dst, Register src1, Address src2) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes); - emit_int8((0x40 | cc)); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, (0x40 | cc) , false /* no_flags */, true /* is_map1 */); } void Assembler::cmpq(Address dst, int32_t imm32) { @@ -14847,7 +14888,7 @@ void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_operand(dst, src, 0); @@ -14856,7 +14897,7 @@ void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_operand(dst, src, 0); @@ -14899,7 +14940,7 @@ void Assembler::decl(Register dst) { void Assembler::edecl(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xFF, (0xC8 | encode)); } @@ -14912,7 +14953,7 @@ void Assembler::decq(Register dst) { void Assembler::edecq(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_int16((unsigned char)0xFF, (0xC8 | encode)); } @@ -14927,7 +14968,7 @@ void Assembler::edecq(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xFF); emit_operand(rcx, src, 0); } @@ -14967,7 +15008,7 @@ void Assembler::idivq(Register src) { void Assembler::eidivq(Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF7, (0xF8 | encode)); } @@ -14978,7 +15019,7 @@ void Assembler::divq(Register src) { void Assembler::edivq(Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF7, (0xF0 | encode)); } @@ -14988,15 +15029,16 @@ void Assembler::imulq(Register dst, Register src) { } void Assembler::eimulq(Register dst, Register src, bool no_flags) { + if (is_demotable(no_flags, dst->encoding(), src->encoding())) { + return imulq(dst); + } InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xAF, (0xC0 | encode)); } void Assembler::eimulq(Register dst, Register src1, Register src2, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int16((unsigned char)0xAF, (0xC0 | encode)); + emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0xAF, no_flags, true /* is_map1 */, true /* swap */); } void Assembler::imulq(Register src) { @@ -15006,7 +15048,7 @@ void Assembler::imulq(Register src) { void Assembler::eimulq(Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF7, (0xE8 | encode)); } @@ -15028,7 +15070,7 @@ void Assembler::eimulq(Register dst, Address src, int32_t value, bool no_flags) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (is8bit(value)) { emit_int8((unsigned char)0x6B); emit_operand(dst, src, 1); @@ -15052,7 +15094,7 @@ void Assembler::imulq(Register dst, Register src, int value) { void Assembler::eimulq(Register dst, Register src, int value, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (is8bit(value)) { emit_int24(0x6B, (0xC0 | encode), (value & 0xFF)); } else { @@ -15072,7 +15114,7 @@ void Assembler::eimulq(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xAF); emit_operand(dst, src, 0); @@ -15080,11 +15122,7 @@ void Assembler::eimulq(Register dst, Address src, bool no_flags) { void Assembler::eimulq(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8((unsigned char)0xAF); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, (unsigned char)0xAF, no_flags, true /* is_map1 */); } void Assembler::incl(Register dst) { @@ -15098,8 +15136,7 @@ void Assembler::eincl(Register dst, Register src, bool no_flags) { // Don't use it directly. Use MacroAssembler::incrementl() instead. // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - // int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xFF, (0xC0 | encode)); } @@ -15114,7 +15151,7 @@ void Assembler::eincq(Register dst, Register src, bool no_flags) { // Don't use it directly. Use MacroAssembler::incrementq() instead. // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_int16((unsigned char)0xFF, (0xC0 | encode)); } @@ -15130,7 +15167,7 @@ void Assembler::eincq(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char) 0xFF); emit_operand(rax, src, 0); } @@ -15206,7 +15243,7 @@ void Assembler::lzcntq(Register dst, Register src) { void Assembler::elzcntq(Register dst, Register src, bool no_flags) { assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF5, (0xC0 | encode)); } @@ -15224,7 +15261,7 @@ void Assembler::elzcntq(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xF5); emit_operand(dst, src, 0); } @@ -15351,7 +15388,7 @@ void Assembler::emulq(Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_nf(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0xF7); emit_operand(rsp, src, 0); } @@ -15363,7 +15400,7 @@ void Assembler::mulq(Register src) { void Assembler::emulq(Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0xF7, (0xE0 | encode)); } @@ -15381,7 +15418,7 @@ void Assembler::negq(Register dst) { void Assembler::enegq(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_int16((unsigned char)0xF7, (0xD8 | encode)); } @@ -15395,7 +15432,7 @@ void Assembler::enegq(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xF7); emit_operand(as_Register(3), src, 0); } @@ -15407,7 +15444,7 @@ void Assembler::notq(Register dst) { void Assembler::enotq(Register dst, Register src) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, false /* no_flags */, true /* use_prefixq */); emit_int16((unsigned char)0xF7, (0xD0 | encode)); } @@ -15451,7 +15488,7 @@ void Assembler::eorq(Register dst, Address src, int32_t imm32, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_operand(0x81, as_Register(1), src, imm32); } @@ -15465,7 +15502,7 @@ void Assembler::eorq(Register dst, Address src1, Register src2, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x09); emit_operand(src2, src1, 0); } @@ -15476,9 +15513,7 @@ void Assembler::orq(Register dst, int32_t imm32) { } void Assembler::eorq(Register dst, Register src, int32_t imm32, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_arith_ndd(0x81, 0xC8, src, imm32); + emit_eevex_prefix_or_demote_arith_ndd(dst, src, imm32, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x81, 0xC8, no_flags); } void Assembler::orq_imm32(Register dst, int32_t imm32) { @@ -15488,7 +15523,7 @@ void Assembler::orq_imm32(Register dst, int32_t imm32) { void Assembler::eorq_imm32(Register dst, Register src, int32_t imm32, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_arith_imm32(0x81, 0xC8, src, imm32); } @@ -15500,11 +15535,7 @@ void Assembler::orq(Register dst, Address src) { void Assembler::eorq(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x0B); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x0B, no_flags); } void Assembler::orq(Register dst, Register src) { @@ -15516,7 +15547,7 @@ void Assembler::eorq(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_arith(0x0B, 0xC0, src1, src2); } @@ -15533,7 +15564,7 @@ void Assembler::epopcntq(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_nf(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char) 0x88); emit_operand(dst, src, 0); } @@ -15548,7 +15579,7 @@ void Assembler::popcntq(Register dst, Register src) { void Assembler::epopcntq(Register dst, Register src, bool no_flags) { assert(VM_Version::supports_popcnt(), "must support"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = eevex_prefix_and_encode_nf(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int16((unsigned char)0x88, (0xC0 | encode)); } @@ -15773,7 +15804,7 @@ void Assembler::rclq(Register dst, int imm8) { void Assembler::erclq(Register dst, Register src, int imm8) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, false /* no_flags */, true /* use_prefixq */); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xD0 | encode)); } else { @@ -15794,7 +15825,7 @@ void Assembler::rcrq(Register dst, int imm8) { void Assembler::ercrq(Register dst, Register src, int imm8) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, false /* no_flags */, true /* use_prefixq */); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xD8 | encode)); } else { @@ -15857,7 +15888,7 @@ void Assembler::esalq(Register dst, Address src, int imm8, bool no_flags) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int8((unsigned char)0xD1); emit_operand(as_Register(4), src, 0); @@ -15879,7 +15910,7 @@ void Assembler::esalq(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xD3); emit_operand(as_Register(4), src, 0); } @@ -15897,7 +15928,7 @@ void Assembler::salq(Register dst, int imm8) { void Assembler::esalq(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xE0 | encode)); } else { @@ -15912,7 +15943,7 @@ void Assembler::salq(Register dst) { void Assembler::esalq(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_int16((unsigned char)0xD3, (0xE0 | encode)); } @@ -15935,7 +15966,7 @@ void Assembler::esarq(Register dst, Address src, int imm8, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int8((unsigned char)0xD1); emit_operand(as_Register(7), src, 0); @@ -15957,7 +15988,7 @@ void Assembler::esarq(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xD3); emit_operand(as_Register(7), src, 0); } @@ -15974,7 +16005,7 @@ void Assembler::sarq(Register dst, int imm8) { void Assembler::esarq(Register dst, Register src, int imm8, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xF8 | encode)); } else { @@ -15989,7 +16020,7 @@ void Assembler::sarq(Register dst) { void Assembler::esarq(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_int16((unsigned char)0xD3, (0xF8 | encode)); } @@ -16028,7 +16059,7 @@ void Assembler::shlq(Register dst, int imm8) { void Assembler::eshlq(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); if (imm8 == 1 ) { emit_int16((unsigned char)0xD1, (0xE0 | encode)); } else { @@ -16043,7 +16074,7 @@ void Assembler::shlq(Register dst) { void Assembler::eshlq(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_int16((unsigned char)0xD3, (0xE0 | encode)); } @@ -16061,7 +16092,7 @@ void Assembler::shrq(Register dst, int imm8) { void Assembler::eshrq(Register dst, Register src, int imm8, bool no_flags) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); if (imm8 == 1) { emit_int16((unsigned char)0xD1, (0xE8 | encode)); } @@ -16077,7 +16108,7 @@ void Assembler::shrq(Register dst) { void Assembler::eshrq(Register dst, Register src, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + int encode = emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_int16((unsigned char)0xD3, (0xE8 | encode)); } @@ -16091,7 +16122,7 @@ void Assembler::eshrq(Register dst, Address src, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8((unsigned char)0xD3); emit_operand(as_Register(5), src, 0); } @@ -16115,7 +16146,7 @@ void Assembler::eshrq(Register dst, Address src, int imm8, bool no_flags) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); if (imm8 == 1) { emit_int8((unsigned char)0xD1); emit_operand(as_Register(5), src, 0); @@ -16137,7 +16168,7 @@ void Assembler::esubq(Register dst, Address src, int32_t imm32, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_operand(0x81, rbp, src, imm32); } @@ -16151,7 +16182,7 @@ void Assembler::esubq(Register dst, Address src1, Register src2, bool no_flags) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x29); emit_operand(src2, src1, 0); } @@ -16162,9 +16193,7 @@ void Assembler::subq(Register dst, int32_t imm32) { } void Assembler::esubq(Register dst, Register src, int32_t imm32, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_arith_ndd(0x81, 0xE8, src, imm32); + emit_eevex_prefix_or_demote_arith_ndd(dst, src, imm32, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x81, 0xE8, no_flags); } // Force generation of a 4 byte immediate value even if it fits into 8bit @@ -16175,7 +16204,7 @@ void Assembler::subq_imm32(Register dst, int32_t imm32) { void Assembler::esubq_imm32(Register dst, Register src, int32_t imm32, bool no_flags) { InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_arith_imm32(0x81, 0xE8, src, imm32); } @@ -16187,11 +16216,7 @@ void Assembler::subq(Register dst, Address src) { void Assembler::esubq(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x2B); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x2B, no_flags); } void Assembler::subq(Register dst, Register src) { @@ -16203,7 +16228,7 @@ void Assembler::esubq(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_arith(0x2B, 0xC0, src1, src2); } @@ -16268,7 +16293,7 @@ void Assembler::exorq(Register dst, Register src1, Register src2, bool no_flags) InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // NDD shares its encoding bits with NDS bits for regular EVEX instruction. // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine. - (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); + (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */); emit_arith(0x33, 0xC0, src1, src2); } @@ -16280,11 +16305,7 @@ void Assembler::xorq(Register dst, Address src) { void Assembler::exorq(Register dst, Register src1, Address src2, bool no_flags) { InstructionMark im(this); - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x33); - emit_operand(src1, src2, 0); + emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x33, no_flags); } void Assembler::xorq(Register dst, int32_t imm32) { @@ -16293,9 +16314,7 @@ void Assembler::xorq(Register dst, int32_t imm32) { } void Assembler::exorq(Register dst, Register src, int32_t imm32, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_arith_ndd(0x81, 0xF0, src, imm32); + emit_eevex_prefix_or_demote_arith_ndd(dst, src, imm32, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x81, 0xF0, no_flags); } void Assembler::xorq(Address dst, int32_t imm32) { @@ -16308,7 +16327,7 @@ void Assembler::exorq(Register dst, Address src, int32_t imm32, bool no_flags) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src, dst->encoding(), 0, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_arith_operand(0x81, as_Register(6), src, imm32); } @@ -16323,7 +16342,7 @@ void Assembler::esetzucc(Condition cc, Register dst) { assert(0 <= cc && cc < 16, "illegal cc"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // Encoding Format : eevex_prefix (4 bytes) | opcode_cc | modrm - int encode = evex_prefix_and_encode_ndd(0, 0, dst->encoding(), VEX_SIMD_F2, /* MAP4 */VEX_OPCODE_0F_3C, &attributes); + int encode = emit_eevex_prefix_ndd(dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3C /* MAP4 */, &attributes); // demotion disabled emit_opcode_prefix_and_encoding((0x40 | cc), 0xC0, encode); } @@ -16331,7 +16350,7 @@ void Assembler::exorq(Register dst, Address src1, Register src2, bool no_flags) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit); - evex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); + eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags); emit_int8(0x31); emit_operand(src2, src1, 0); } @@ -16438,7 +16457,7 @@ void Assembler::evaddph(XMMRegister dst, XMMRegister nds, Address src, int vecto InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); - attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes); emit_int8(0x58); emit_operand(dst, src, 0); @@ -16459,7 +16478,7 @@ void Assembler::evsubph(XMMRegister dst, XMMRegister nds, Address src, int vecto InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); - attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes); emit_int8(0x5C); emit_operand(dst, src, 0); @@ -16480,7 +16499,7 @@ void Assembler::evmulph(XMMRegister dst, XMMRegister nds, Address src, int vecto InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); - attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes); emit_int8(0x59); emit_operand(dst, src, 0); @@ -16501,7 +16520,7 @@ void Assembler::evminph(XMMRegister dst, XMMRegister nds, Address src, int vecto InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); - attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes); emit_int8(0x5D); emit_operand(dst, src, 0); @@ -16522,7 +16541,7 @@ void Assembler::evmaxph(XMMRegister dst, XMMRegister nds, Address src, int vecto InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); - attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes); emit_int8(0x5F); emit_operand(dst, src, 0); @@ -16543,7 +16562,7 @@ void Assembler::evdivph(XMMRegister dst, XMMRegister nds, Address src, int vecto InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); - attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes); emit_int8(0x5E); emit_operand(dst, src, 0); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 6395be02f27..b1959e23722 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -772,25 +772,42 @@ class Assembler : public AbstractAssembler { void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_v, bool evex_r, bool evex_b, bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool no_flags = false); - void evex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, - InstructionAttr *attributes, bool no_flags = false); + void eevex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, + InstructionAttr *attributes, bool no_flags = false); - void evex_prefix_nf(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, - InstructionAttr *attributes, bool no_flags = false); + void eevex_prefix_nf(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, + InstructionAttr *attributes, bool no_flags = false); void vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool nds_is_ndd = false, bool no_flags = false); - int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, + int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool src_is_gpr = false, bool nds_is_ndd = false, bool no_flags = false); - int evex_prefix_and_encode_ndd(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, - InstructionAttr *attributes, bool no_flags = false); - - int evex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, + int eevex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags = false); + int emit_eevex_prefix_ndd(int dst_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags = false); + + int emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, + InstructionAttr *attributes, bool no_flags = false, bool use_prefixq = false); + + int emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, VexSimdPrefix pre, VexOpcode opc, + InstructionAttr *attributes, bool no_flags = false, bool use_prefixq = false); + + void emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds, int32_t imm32, VexSimdPrefix pre, VexOpcode opc, + int size, int op1, int op2, bool no_flags); + + void emit_eevex_or_demote(Register dst, Register src1, Address src2, VexSimdPrefix pre, VexOpcode opc, + int size, int opcode_byte, bool no_flags = false, bool is_map1 = false); + + void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, + int size, int opcode_byte, bool no_flags, bool is_map1 = false, bool swap = false); + + void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, int8_t imm8, VexSimdPrefix pre, VexOpcode opc, + int size, int opcode_byte, bool no_flags, bool is_map1 = false); + void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes); @@ -798,10 +815,10 @@ class Assembler : public AbstractAssembler { VexOpcode opc, InstructionAttr *attributes, bool src_is_gpr = false); // Helper functions for groups of instructions + bool is_demotable(bool no_flags, int dst_enc, int nds_enc); void emit_arith_b(int op1, int op2, Register dst, int imm8); - void emit_arith(int op1, int op2, Register dst, int32_t imm32); - void emit_arith_ndd(int op1, int op2, Register dst, int32_t imm32); + void emit_arith(int op1, int op2, Register dst, int32_t imm32, bool optimize_rax_dst = true); // Force generation of a 4 byte immediate value even if it fits into 8bit void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32); void emit_arith(int op1, int op2, Register dst, Register src); @@ -950,6 +967,7 @@ class Assembler : public AbstractAssembler { // New cpus require use of movaps and movapd to avoid partial register stall // when moving between registers. void movaps(XMMRegister dst, XMMRegister src); + void movapd(XMMRegister dst, Address src); void movapd(XMMRegister dst, XMMRegister src); // End avoid using directly @@ -1232,6 +1250,9 @@ class Assembler : public AbstractAssembler { // Identify processor type and features void cpuid(); + // Serialize instruction stream + void serialize(); + // CRC32C void crc32(Register crc, Register v, int8_t sizeInBytes); void crc32(Register crc, Address adr, int8_t sizeInBytes); @@ -2447,6 +2468,9 @@ class Assembler : public AbstractAssembler { void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + // Bitwise Logical OR of Packed Floating-Point Values + void orpd(XMMRegister dst, XMMRegister src); + void unpckhpd(XMMRegister dst, XMMRegister src); void unpcklpd(XMMRegister dst, XMMRegister src); diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp index 73262b21365..7c0d3ff624d 100644 --- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp @@ -68,7 +68,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); return; } @@ -88,7 +88,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ call(RuntimeAddress(Runtime1::entry_for(stub_id))); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { @@ -101,7 +101,7 @@ void PredicateFailedStub::emit_code(LIR_Assembler* ce) { __ call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } void DivByZeroStub::emit_code(LIR_Assembler* ce) { @@ -111,7 +111,7 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::throw_div0_exception_id))); ce->add_call_info_here(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } @@ -399,7 +399,7 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { __ call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } @@ -413,7 +413,7 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { } __ call(RuntimeAddress(Runtime1::entry_for(_stub))); ce->add_call_info_here(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 60ce3419dfb..3ea2e99fe57 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -720,7 +720,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || - x->id() == vmIntrinsics::_dlog10 || x->id() == vmIntrinsics::_dtanh + x->id() == vmIntrinsics::_dlog10 || x->id() == vmIntrinsics::_dtanh || + x->id() == vmIntrinsics::_dcbrt ) { do_LibmIntrinsic(x); return; @@ -807,7 +808,7 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { } break; case vmIntrinsics::_dpow: - if (StubRoutines::dpow() != nullptr) { + if (StubRoutines::dpow() != nullptr) { __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); } else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); @@ -828,18 +829,24 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { } break; case vmIntrinsics::_dtan: - if (StubRoutines::dtan() != nullptr) { + if (StubRoutines::dtan() != nullptr) { __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); } else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); } break; case vmIntrinsics::_dtanh: - assert(StubRoutines::dtanh() != nullptr, "tanh intrinsic not found"); - if (StubRoutines::dtanh() != nullptr) { + assert(StubRoutines::dtanh() != nullptr, "tanh intrinsic not found"); + if (StubRoutines::dtanh() != nullptr) { __ call_runtime_leaf(StubRoutines::dtanh(), getThreadTemp(), result_reg, cc->args()); } break; + case vmIntrinsics::_dcbrt: + assert(StubRoutines::dcbrt() != nullptr, "cbrt intrinsic not found"); + if (StubRoutines::dcbrt() != nullptr) { + __ call_runtime_leaf(StubRoutines::dcbrt(), getThreadTemp(), result_reg, cc->args()); + } + break; default: ShouldNotReachHere(); } diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index b8873758b61..684347e35fa 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -55,16 +55,17 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr null_check_offset = offset(); - if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(hdr, obj, rscratch1); - testb(Address(hdr, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); - jcc(Assembler::notZero, slow_case); - } - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(disp_hdr, obj, hdr, tmp, slow_case); } else if (LockingMode == LM_LEGACY) { Label done; + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(hdr, obj, rscratch1); + testb(Address(hdr, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); + jcc(Assembler::notZero, slow_case); + } + // Load object header movptr(hdr, Address(obj, hdr_offset)); // and mark it as unlocked diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index a7967d83a4e..177be6e59f7 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -476,7 +476,7 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist Label slow_path; if (UseObjectMonitorTable) { - // Clear cache in case fast locking succeeds. + // Clear cache in case fast locking succeeds or we need to take the slow-path. movptr(Address(box, BasicLock::object_monitor_cache_offset_in_bytes()), 0); } @@ -787,6 +787,119 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // C2 uses the value of ZF to determine the continuation. } +static void abort_verify_int_in_range(uint idx, jint val, jint lo, jint hi) { + fatal("Invalid CastII, idx: %u, val: %d, lo: %d, hi: %d", idx, val, lo, hi); +} + +static void reconstruct_frame_pointer_helper(MacroAssembler* masm, Register dst) { + const int framesize = Compile::current()->output()->frame_size_in_bytes(); + masm->movptr(dst, rsp); + if (framesize > 2 * wordSize) { + masm->addptr(dst, framesize - 2 * wordSize); + } +} + +void C2_MacroAssembler::reconstruct_frame_pointer(Register rtmp) { + if (PreserveFramePointer) { + // frame pointer is valid +#ifdef ASSERT + // Verify frame pointer value in rbp. + reconstruct_frame_pointer_helper(this, rtmp); + Label L_success; + cmpq(rbp, rtmp); + jccb(Assembler::equal, L_success); + STOP("frame pointer mismatch"); + bind(L_success); +#endif // ASSERT + } else { + reconstruct_frame_pointer_helper(this, rbp); + } +} + +void C2_MacroAssembler::verify_int_in_range(uint idx, const TypeInt* t, Register val) { + jint lo = t->_lo; + jint hi = t->_hi; + assert(lo < hi, "type should not be empty or constant, idx: %u, lo: %d, hi: %d", idx, lo, hi); + if (t == TypeInt::INT) { + return; + } + + BLOCK_COMMENT("CastII {"); + Label fail; + Label succeed; + if (hi == max_jint) { + cmpl(val, lo); + jccb(Assembler::greaterEqual, succeed); + } else { + if (lo != min_jint) { + cmpl(val, lo); + jccb(Assembler::less, fail); + } + cmpl(val, hi); + jccb(Assembler::lessEqual, succeed); + } + + bind(fail); + movl(c_rarg0, idx); + movl(c_rarg1, val); + movl(c_rarg2, lo); + movl(c_rarg3, hi); + reconstruct_frame_pointer(rscratch1); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, abort_verify_int_in_range))); + hlt(); + bind(succeed); + BLOCK_COMMENT("} // CastII"); +} + +static void abort_verify_long_in_range(uint idx, jlong val, jlong lo, jlong hi) { + fatal("Invalid CastLL, idx: %u, val: " JLONG_FORMAT ", lo: " JLONG_FORMAT ", hi: " JLONG_FORMAT, idx, val, lo, hi); +} + +void C2_MacroAssembler::verify_long_in_range(uint idx, const TypeLong* t, Register val, Register tmp) { + jlong lo = t->_lo; + jlong hi = t->_hi; + assert(lo < hi, "type should not be empty or constant, idx: %u, lo: " JLONG_FORMAT ", hi: " JLONG_FORMAT, idx, lo, hi); + if (t == TypeLong::LONG) { + return; + } + + BLOCK_COMMENT("CastLL {"); + Label fail; + Label succeed; + + auto cmp_val = [&](jlong bound) { + if (is_simm32(bound)) { + cmpq(val, checked_cast (bound)); + } else { + mov64(tmp, bound); + cmpq(val, tmp); + } + }; + + if (hi == max_jlong) { + cmp_val(lo); + jccb(Assembler::greaterEqual, succeed); + } else { + if (lo != min_jlong) { + cmp_val(lo); + jccb(Assembler::less, fail); + } + cmp_val(hi); + jccb(Assembler::lessEqual, succeed); + } + + bind(fail); + movl(c_rarg0, idx); + movq(c_rarg1, val); + mov64(c_rarg2, lo); + mov64(c_rarg3, hi); + reconstruct_frame_pointer(rscratch1); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, abort_verify_long_in_range))); + hlt(); + bind(succeed); + BLOCK_COMMENT("} // CastLL"); +} + //------------------------------------------------------------------------------------------- // Generic instructions support for use in .ad files C2 code generation @@ -5618,7 +5731,7 @@ void C2_MacroAssembler::vector_compress_expand_avx2(int opcode, XMMRegister dst, // in a permute table row contains either a valid permute index or a -1 (default) // value, this can potentially be used as a blending mask after // compressing/expanding the source vector lanes. - vblendvps(dst, dst, xtmp, permv, vec_enc, false, permv); + vblendvps(dst, dst, xtmp, permv, vec_enc, true, permv); } void C2_MacroAssembler::vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask, diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index dd2880d88c3..713eb73d68f 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -44,6 +44,9 @@ Register t, Register thread); void fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread); + void verify_int_in_range(uint idx, const TypeInt* t, Register val); + void verify_long_in_range(uint idx, const TypeLong* t, Register val, Register tmp); + // Generic instructions support for use in .ad files C2 code generation void vabsnegd(int opcode, XMMRegister dst, XMMRegister src); void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len); @@ -574,4 +577,7 @@ void scalar_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2); + + void reconstruct_frame_pointer(Register rtmp); + #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/compressedKlass_x86.cpp b/src/hotspot/cpu/x86/compressedKlass_x86.cpp index 8a06a7ba3d5..e88b7a3d4e1 100644 --- a/src/hotspot/cpu/x86/compressedKlass_x86.cpp +++ b/src/hotspot/cpu/x86/compressedKlass_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, Red Hat, Inc. All rights reserved. + * Copyright (c) 2023, 2025, Red Hat, Inc. All rights reserved. * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -25,6 +25,7 @@ #ifdef _LP64 +#include "memory/metaspace.hpp" #include "oops/compressedKlass.hpp" #include "utilities/globalDefinitions.hpp" @@ -32,15 +33,25 @@ char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size char* result = nullptr; - // Optimize for unscaled encoding; failing that, for zero-based encoding: - if (optimize_for_zero_base) { - result = reserve_address_space_for_unscaled_encoding(size, aslr); - if (result == nullptr) { - result = reserve_address_space_for_zerobased_encoding(size, aslr); - } - } // end: low-address reservation + assert(CompressedKlassPointers::narrow_klass_pointer_bits() == 32 || + CompressedKlassPointers::narrow_klass_pointer_bits() == 22, "Rethink if we ever use different nKlass bit sizes"); + + // Unconditionally attempting to reserve in lower 4G first makes always sense: + // -CDS -COH: Try to get unscaled mode (zero base, zero shift) + // +CDS -COH: No zero base possible (CDS prevents it); but we still benefit from small base pointers (imm32 movabs) + // -CDS +COH: No zero base possible (22bit nKlass + zero base zero shift = 4MB encoding range, way too small); + // but we still benefit from small base pointers (imm32 movabs) + // +CDS +COH: No zero base possible for multiple reasons (CDS prevents it and encoding range too small); + // but we still benefit from small base pointers (imm32 movabs) + + result = reserve_address_space_below_4G(size, aslr); + + if (result == nullptr && optimize_for_zero_base) { + // Failing that, if we are running without CDS, attempt to allocate below 32G. + // This allows us to use zero-based encoding with a non-zero shift. + result = reserve_address_space_for_zerobased_encoding(size, aslr); + } - // Nothing more to optimize for on x64. If base != 0, we will always emit the full 64-bit immediate. return result; } diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp index a5700134f60..b9a2ef35f10 100644 --- a/src/hotspot/cpu/x86/frame_x86.cpp +++ b/src/hotspot/cpu/x86/frame_x86.cpp @@ -701,7 +701,6 @@ void JavaFrameAnchor::make_walkable() { if (last_Java_sp() == nullptr) return; // already walkable? if (walkable()) return; - vmassert(last_Java_pc() == nullptr, "already walkable"); _last_Java_pc = (address)_last_Java_sp[-1]; vmassert(walkable(), "something went wrong"); } diff --git a/src/hotspot/cpu/x86/frame_x86.inline.hpp b/src/hotspot/cpu/x86/frame_x86.inline.hpp index c74731d0410..afc4ab8767b 100644 --- a/src/hotspot/cpu/x86/frame_x86.inline.hpp +++ b/src/hotspot/cpu/x86/frame_x86.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,6 +35,53 @@ // Inline functions for Intel frames: +#if INCLUDE_JFR + +// Static helper routines + +inline address frame::interpreter_bcp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast(fp[frame::interpreter_frame_bcp_offset]); +} + +inline address frame::interpreter_return_address(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast(fp[frame::return_addr_offset]); +} + +inline intptr_t* frame::interpreter_sender_sp(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast (fp[frame::interpreter_frame_sender_sp_offset]); +} + +inline bool frame::is_interpreter_frame_setup_at(const intptr_t* fp, const void* sp) { + assert(fp != nullptr, "invariant"); + assert(sp != nullptr, "invariant"); + return sp <= fp + frame::interpreter_frame_initial_sp_offset; +} + +inline intptr_t* frame::sender_sp(intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return fp + frame::sender_sp_offset; +} + +inline intptr_t* frame::link(const intptr_t* fp) { + assert(fp != nullptr, "invariant"); + return reinterpret_cast (fp[frame::link_offset]); +} + +inline address frame::return_address(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast(sp[-1]); +} + +inline intptr_t* frame::fp(const intptr_t* sp) { + assert(sp != nullptr, "invariant"); + return reinterpret_cast (sp[-2]); +} + +#endif // INCLUDE_JFR + // Constructors: inline frame::frame() { diff --git a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp index 298e5640b27..66fb4cbb8c7 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp @@ -26,9 +26,9 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/shared/gc_globals.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" #define __ masm->masm()-> diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp index 45e4c46161f..deb8111adad 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp @@ -23,6 +23,8 @@ * */ +#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" +#include "gc/shenandoah/mode/shenandoahMode.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" #include "gc/shenandoah/shenandoahForwarding.hpp" @@ -30,8 +32,6 @@ #include "gc/shenandoah/shenandoahHeapRegion.hpp" #include "gc/shenandoah/shenandoahRuntime.hpp" #include "gc/shenandoah/shenandoahThreadLocalData.hpp" -#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" -#include "gc/shenandoah/mode/shenandoahMode.hpp" #include "interpreter/interpreter.hpp" #include "runtime/javaThread.hpp" #include "runtime/sharedRuntime.hpp" diff --git a/src/hotspot/cpu/x86/gc/z/zAddress_x86.cpp b/src/hotspot/cpu/x86/gc/z/zAddress_x86.cpp index 6b5b64d3036..db35a4efe08 100644 --- a/src/hotspot/cpu/x86/gc/z/zAddress_x86.cpp +++ b/src/hotspot/cpu/x86/gc/z/zAddress_x86.cpp @@ -30,11 +30,15 @@ size_t ZPointerLoadShift; size_t ZPlatformAddressOffsetBits() { +#ifdef ADDRESS_SANITIZER + return 44; +#else const size_t min_address_offset_bits = 42; // 4TB const size_t max_address_offset_bits = 44; // 16TB const size_t address_offset = ZGlobalsPointers::min_address_offset_request(); const size_t address_offset_bits = log2i_exact(address_offset); return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); +#endif } size_t ZPlatformAddressHeapBaseShift() { diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp index 9cdf0b229c0..fe7f19e1260 100644 --- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp @@ -78,15 +78,48 @@ class ZRuntimeCallSpill { void save() { MacroAssembler* masm = _masm; - __ push(rax); - __ push(rcx); - __ push(rdx); - __ push(rdi); - __ push(rsi); - __ push(r8); - __ push(r9); - __ push(r10); - __ push(r11); + if (VM_Version::supports_apx_f()) { + if (_result != rax) { + __ pushp(rax); + } + __ pushp(rcx); + // Save current stack pointer into rcx + __ movptr(rcx, rsp); + // Align stack pointer to 16 byte boundary. This is hard constraint + // for push2/pop2 with PPX hints. + __ andptr(rsp, -StackAlignmentInBytes); + // Push original stack pointer. + __ push(rcx); + // Restore the original contents of RCX register. + __ movptr(rcx, Address(rcx)); + // Now push remaining caller save GPRs and EGPRs on 16B aligned stack. + // Note: For PPX to work properly, a PPX-marked PUSH2 (respectively, POP2) should always + // be matched with a PPX-marked POP2 (PUSH2), not with two PPX-marked POPs (PUSHs). + __ pushp(rdx); + __ push2p(rdi, rsi); + __ push2p(r8, r9); + __ push2p(r10, r11); + __ push2p(r16, r17); + __ push2p(r18, r19); + __ push2p(r20, r21); + __ push2p(r22, r23); + __ push2p(r24, r25); + __ push2p(r26, r27); + __ push2p(r28, r29); + __ push2p(r30, r31); + } else { + if (_result != rax) { + __ push(rax); + } + __ push(rcx); + __ push(rdx); + __ push(rdi); + __ push(rsi); + __ push(r8); + __ push(r9); + __ push(r10); + __ push(r11); + } if (_xmm_spill_size != 0) { __ subptr(rsp, _xmm_spill_size); @@ -139,21 +172,43 @@ class ZRuntimeCallSpill { __ addptr(rsp, _xmm_spill_size); } - __ pop(r11); - __ pop(r10); - __ pop(r9); - __ pop(r8); - __ pop(rsi); - __ pop(rdi); - __ pop(rdx); - __ pop(rcx); - if (_result == noreg) { - __ pop(rax); - } else if (_result == rax) { - __ addptr(rsp, wordSize); + if (VM_Version::supports_apx_f()) { + __ pop2p(r31, r30); + __ pop2p(r29, r28); + __ pop2p(r27, r26); + __ pop2p(r25, r24); + __ pop2p(r23, r22); + __ pop2p(r21, r20); + __ pop2p(r19, r18); + __ pop2p(r17, r16); + __ pop2p(r11, r10); + __ pop2p(r9, r8); + __ pop2p(rsi, rdi); + __ popp(rdx); + // Re-instantiate original stack pointer. + __ movptr(rsp, Address(rsp)); + __ popp(rcx); + if (_result != rax) { + if (_result != noreg) { + __ movptr(_result, rax); + } + __ popp(rax); + } } else { - __ movptr(_result, rax); - __ pop(rax); + __ pop(r11); + __ pop(r10); + __ pop(r9); + __ pop(r8); + __ pop(rsi); + __ pop(rdi); + __ pop(rdx); + __ pop(rcx); + if (_result != rax) { + if (_result != noreg) { + __ movptr(_result, rax); + } + __ pop(rax); + } } } @@ -1328,7 +1383,13 @@ void ZBarrierSetAssembler::patch_barrier_relocation(address addr, int format) { const uint16_t value = patch_barrier_relocation_value(format); uint8_t* const patch_addr = (uint8_t*)addr + offset; if (format == ZBarrierRelocationFormatLoadGoodBeforeShl) { - *patch_addr = (uint8_t)value; + if (VM_Version::supports_apx_f()) { + NativeInstruction* instruction = nativeInstruction_at(addr); + uint8_t* const rex2_patch_addr = patch_addr + (instruction->has_rex2_prefix() ? 1 : 0); + *rex2_patch_addr = (uint8_t)value; + } else { + *patch_addr = (uint8_t)value; + } } else { *(uint16_t*)patch_addr = value; } diff --git a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp index 873cfbdcea0..3c1474ae861 100644 --- a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp +++ b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp @@ -34,9 +34,7 @@ const bool CCallingConventionRequiresIntsAsLongs = false; #define SUPPORTS_NATIVE_CX8 -#ifdef _LP64 #define SUPPORT_MONITOR_COUNT -#endif #define CPU_MULTI_COPY_ATOMIC @@ -44,15 +42,11 @@ const bool CCallingConventionRequiresIntsAsLongs = false; #define DEFAULT_CACHE_LINE_SIZE 64 // The default padding size for data structures to avoid false sharing. -#ifdef _LP64 // The common wisdom is that adjacent cache line prefetchers on some hardware // may pull two cache lines on access, so we have to pessimistically assume twice // the cache line size for padding. TODO: Check if this is still true for modern // hardware. If not, DEFAULT_CACHE_LINE_SIZE might as well suffice. #define DEFAULT_PADDING_SIZE (DEFAULT_CACHE_LINE_SIZE*2) -#else -#define DEFAULT_PADDING_SIZE DEFAULT_CACHE_LINE_SIZE -#endif #if defined(LINUX) || defined(__APPLE__) #define SUPPORT_RESERVED_STACK_AREA diff --git a/src/hotspot/cpu/x86/globals_x86.hpp b/src/hotspot/cpu/x86/globals_x86.hpp index 54888a9f849..a1d4a71874f 100644 --- a/src/hotspot/cpu/x86/globals_x86.hpp +++ b/src/hotspot/cpu/x86/globals_x86.hpp @@ -61,29 +61,19 @@ define_pd_global(intx, InlineSmallCode, 1000); #define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES #define MIN_STACK_RESERVED_PAGES (0) -#ifdef _LP64 // Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the -// stack if compiled for unix and LP64. To pass stack overflow tests we need -// 20 shadow pages. +// stack if compiled for unix. To pass stack overflow tests we need 20 shadow pages. #define DEFAULT_STACK_SHADOW_PAGES (NOT_WIN64(20) WIN64_ONLY(8) DEBUG_ONLY(+4)) // For those clients that do not use write socket, we allow // the min range value to be below that of the default #define MIN_STACK_SHADOW_PAGES (NOT_WIN64(10) WIN64_ONLY(8) DEBUG_ONLY(+4)) -#else -#define DEFAULT_STACK_SHADOW_PAGES (4 DEBUG_ONLY(+5)) -#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES -#endif // _LP64 define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); -#ifdef _LP64 define_pd_global(bool, VMContinuations, true); -#else -define_pd_global(bool, VMContinuations, false); -#endif define_pd_global(bool, RewriteBytecodes, true); define_pd_global(bool, RewriteFrequentPairs, true); @@ -191,6 +181,15 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); product(bool, IntelJccErratumMitigation, true, DIAGNOSTIC, \ "Turn off JVM mitigations related to Intel micro code " \ "mitigations for the Intel JCC erratum") \ + \ + product(int, X86ICacheSync, -1, DIAGNOSTIC, \ + "Select the X86 ICache sync mechanism: -1 = auto-select; " \ + "0 = none (dangerous); 1 = CLFLUSH loop; 2 = CLFLUSHOPT loop; "\ + "3 = CLWB loop; 4 = single CPUID; 5 = single SERIALIZE. " \ + "Explicitly selected mechanism will fail at startup if " \ + "hardware does not support it.") \ + range(-1, 5) \ + \ // end of ARCH_FLAGS #endif // CPU_X86_GLOBALS_X86_HPP diff --git a/src/hotspot/cpu/x86/icache_x86.cpp b/src/hotspot/cpu/x86/icache_x86.cpp index 45679332eca..889cfb32931 100644 --- a/src/hotspot/cpu/x86/icache_x86.cpp +++ b/src/hotspot/cpu/x86/icache_x86.cpp @@ -23,15 +23,63 @@ */ #include "asm/macroAssembler.hpp" +#include "runtime/flags/flagSetting.hpp" +#include "runtime/globals_extension.hpp" #include "runtime/icache.hpp" #define __ _masm-> +void x86_generate_icache_fence(MacroAssembler* _masm) { + switch (X86ICacheSync) { + case 0: + break; + case 1: + __ mfence(); + break; + case 2: + case 3: + __ sfence(); + break; + case 4: + __ push(rax); + __ push(rbx); + __ push(rcx); + __ push(rdx); + __ xorptr(rax, rax); + __ cpuid(); + __ pop(rdx); + __ pop(rcx); + __ pop(rbx); + __ pop(rax); + break; + case 5: + __ serialize(); + break; + default: + ShouldNotReachHere(); + } +} + +void x86_generate_icache_flush_insn(MacroAssembler* _masm, Register addr) { + switch (X86ICacheSync) { + case 1: + __ clflush(Address(addr, 0)); + break; + case 2: + __ clflushopt(Address(addr, 0)); + break; + case 3: + __ clwb(Address(addr, 0)); + break; + default: + ShouldNotReachHere(); + } +} + void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { - StubCodeMark mark(this, "ICache", "flush_icache_stub"); + StubCodeMark mark(this, "ICache", _stub_name); address start = __ pc(); -#ifdef AMD64 const Register addr = c_rarg0; const Register lines = c_rarg1; @@ -40,26 +88,22 @@ void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flu Label flush_line, done; __ testl(lines, lines); - __ jcc(Assembler::zero, done); + __ jccb(Assembler::zero, done); - // Force ordering wrt cflush. - // Other fence and sync instructions won't do the job. - __ mfence(); + x86_generate_icache_fence(_masm); - __ bind(flush_line); - __ clflush(Address(addr, 0)); - __ addptr(addr, ICache::line_size); - __ decrementl(lines); - __ jcc(Assembler::notZero, flush_line); + if (1 <= X86ICacheSync && X86ICacheSync <= 3) { + __ bind(flush_line); + x86_generate_icache_flush_insn(_masm, addr); + __ addptr(addr, ICache::line_size); + __ decrementl(lines); + __ jccb(Assembler::notZero, flush_line); - __ mfence(); + x86_generate_icache_fence(_masm); + } __ bind(done); -#else - const Address magic(rsp, 3*wordSize); - __ lock(); __ addl(Address(rsp, 0), 0); -#endif // AMD64 __ movptr(rax, magic); // Handshake with caller to make sure it happened! __ ret(0); @@ -67,4 +111,22 @@ void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flu *flush_icache_stub = (ICache::flush_icache_stub_t)start; } +void ICache::initialize(int phase) { + switch (phase) { + case 1: { + // Initial phase, we assume only CLFLUSH is available. + IntFlagSetting fs(X86ICacheSync, 1); + AbstractICache::initialize(phase); + break; + } + case 2: { + // Final phase, generate the stub again. + AbstractICache::initialize(phase); + break; + } + default: + ShouldNotReachHere(); + } +} + #undef __ diff --git a/src/hotspot/cpu/x86/icache_x86.hpp b/src/hotspot/cpu/x86/icache_x86.hpp index 48286a7e3b3..805022fbb32 100644 --- a/src/hotspot/cpu/x86/icache_x86.hpp +++ b/src/hotspot/cpu/x86/icache_x86.hpp @@ -40,21 +40,13 @@ class ICache : public AbstractICache { public: -#ifdef AMD64 enum { stub_size = 64, // Size of the icache flush stub in bytes line_size = 64, // Icache line size in bytes log2_line_size = 6 // log2(line_size) }; - // Use default implementation -#else - enum { - stub_size = 16, // Size of the icache flush stub in bytes - line_size = BytesPerWord, // conservative - log2_line_size = LogBytesPerWord // log2(line_size) - }; -#endif // AMD64 + static void initialize(int phase); }; #endif // CPU_X86_ICACHE_X86_HPP diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp index bd029f2e4ac..92233ee0d07 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.cpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp @@ -778,9 +778,10 @@ void InterpreterMacroAssembler::narrow(Register result) { // remove activation // -// Apply stack watermark barrier. // Unlock the receiver if this is a synchronized method. // Unlock any Java monitors from synchronized blocks. +// Apply stack watermark barrier. +// Notify JVMTI. // Remove the activation from the stack. // // If there are locked Java monitors @@ -790,12 +791,11 @@ void InterpreterMacroAssembler::narrow(Register result) { // installs IllegalMonitorStateException // Else // no error processing -void InterpreterMacroAssembler::remove_activation( - TosState state, - Register ret_addr, - bool throw_monitor_exception, - bool install_monitor_exception, - bool notify_jvmdi) { +void InterpreterMacroAssembler::remove_activation(TosState state, + Register ret_addr, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { // Note: Registers rdx xmm0 may be in use for the // result check if synchronized method Label unlocked, unlock, no_unlock; @@ -804,21 +804,6 @@ void InterpreterMacroAssembler::remove_activation( const Register robj = c_rarg1; const Register rmon = c_rarg1; - // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, - // that would normally not be safe to use. Such bad returns into unsafe territory of - // the stack, will call InterpreterRuntime::at_unwind. - Label slow_path; - Label fast_path; - safepoint_poll(slow_path, true /* at_return */, false /* in_nmethod */); - jmp(fast_path); - bind(slow_path); - push(state); - set_last_Java_frame(noreg, rbp, (address)pc(), rscratch1); - super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread); - reset_last_Java_frame(true); - pop(state); - bind(fast_path); - // get the value of _do_not_unlock_if_synchronized into rdx const Address do_not_unlock_if_synchronized(rthread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); @@ -940,7 +925,24 @@ void InterpreterMacroAssembler::remove_activation( bind(no_unlock); - // jvmti support + JFR_ONLY(enter_jfr_critical_section();) + + // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, + // that would normally not be safe to use. Such bad returns into unsafe territory of + // the stack, will call InterpreterRuntime::at_unwind. + Label slow_path; + Label fast_path; + safepoint_poll(slow_path, true /* at_return */, false /* in_nmethod */); + jmp(fast_path); + bind(slow_path); + push(state); + set_last_Java_frame(noreg, rbp, (address)pc(), rscratch1); + super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), r15_thread); + reset_last_Java_frame(true); + pop(state); + bind(fast_path); + + // JVMTI support. Make sure the safepoint poll test is issued prior. if (notify_jvmdi) { notify_method_exit(state, NotifyJVMTI); // preserve TOSCA } else { @@ -964,6 +966,8 @@ void InterpreterMacroAssembler::remove_activation( cmpptr(rbx, Address(rthread, JavaThread::reserved_stack_activation_offset())); jcc(Assembler::lessEqual, no_reserved_zone_enabling); + JFR_ONLY(leave_jfr_critical_section();) + call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), rthread); call_VM(noreg, CAST_FROM_FN_PTR(address, @@ -972,12 +976,29 @@ void InterpreterMacroAssembler::remove_activation( bind(no_reserved_zone_enabling); } + leave(); // remove frame anchor + + JFR_ONLY(leave_jfr_critical_section();) + pop(ret_addr); // get return address mov(rsp, rbx); // set sp to sender sp pop_cont_fastpath(); + } +#if INCLUDE_JFR +void InterpreterMacroAssembler::enter_jfr_critical_section() { + const Address sampling_critical_section(r15_thread, in_bytes(SAMPLING_CRITICAL_SECTION_OFFSET_JFR)); + movbool(sampling_critical_section, true); +} + +void InterpreterMacroAssembler::leave_jfr_critical_section() { + const Address sampling_critical_section(r15_thread, in_bytes(SAMPLING_CRITICAL_SECTION_OFFSET_JFR)); + movbool(sampling_critical_section, false); +} +#endif // INCLUDE_JFR + void InterpreterMacroAssembler::get_method_counters(Register method, Register mcs, Label& skip) { Label has_counters; @@ -1023,15 +1044,15 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { // Load object pointer into obj_reg movptr(obj_reg, Address(lock_reg, obj_offset)); - if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(tmp_reg, obj_reg, rklass_decode_tmp); - testb(Address(tmp_reg, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); - jcc(Assembler::notZero, slow_case); - } - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(lock_reg, obj_reg, swap_reg, tmp_reg, slow_case); } else if (LockingMode == LM_LEGACY) { + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp_reg, obj_reg, rklass_decode_tmp); + testb(Address(tmp_reg, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); + jcc(Assembler::notZero, slow_case); + } + // Load immediate 1 into swap_reg %rax movl(swap_reg, 1); @@ -1254,46 +1275,19 @@ void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, - int constant, - bool decrement) { - // Counter address - Address data(mdp_in, constant); - - increment_mdp_data_at(data, decrement); -} - -void InterpreterMacroAssembler::increment_mdp_data_at(Address data, - bool decrement) { + int constant) { assert(ProfileInterpreter, "must be profiling interpreter"); - // %%% this does 64bit counters at best it is wasting space - // at worst it is a rare bug when counters overflow - - if (decrement) { - // Decrement the register. Set condition codes. - addptr(data, -DataLayout::counter_increment); - // If the decrement causes the counter to overflow, stay negative - Label L; - jcc(Assembler::negative, L); - addptr(data, DataLayout::counter_increment); - bind(L); - } else { - assert(DataLayout::counter_increment == 1, - "flow-free idiom only works with 1"); - // Increment the register. Set carry flag. - addptr(data, DataLayout::counter_increment); - // If the increment causes the counter to overflow, pull back by 1. - sbbptr(data, 0); - } + Address data(mdp_in, constant); + addptr(data, DataLayout::counter_increment); } void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, - Register reg, - int constant, - bool decrement) { - Address data(mdp_in, reg, Address::times_1, constant); - - increment_mdp_data_at(data, decrement); + Register index, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address data(mdp_in, index, Address::times_1, constant); + addptr(data, DataLayout::counter_increment); } void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, @@ -1361,25 +1355,15 @@ void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { } -void InterpreterMacroAssembler::profile_taken_branch(Register mdp, - Register bumped_count) { +void InterpreterMacroAssembler::profile_taken_branch(Register mdp) { if (ProfileInterpreter) { Label profile_continue; // If no method data exists, go to profile_continue. - // Otherwise, assign to mdp test_method_data_pointer(mdp, profile_continue); // We are taking a branch. Increment the taken count. - // We inline increment_mdp_data_at to return bumped_count in a register - //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); - Address data(mdp, in_bytes(JumpData::taken_offset())); - movptr(bumped_count, data); - assert(DataLayout::counter_increment == 1, - "flow-free idiom only works with 1"); - addptr(bumped_count, DataLayout::counter_increment); - sbbptr(bumped_count, 0); - movptr(data, bumped_count); // Store back out + increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); // The method data pointer needs to be updated to reflect the new target. update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); @@ -1395,7 +1379,7 @@ void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); - // We are taking a branch. Increment the not taken count. + // We are not taking a branch. Increment the not taken count. increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); // The method data pointer needs to be updated to correspond to diff --git a/src/hotspot/cpu/x86/interp_masm_x86.hpp b/src/hotspot/cpu/x86/interp_masm_x86.hpp index 308d700ff4f..a36a697eebf 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.hpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -212,11 +212,8 @@ class InterpreterMacroAssembler: public MacroAssembler { void verify_method_data_pointer(); void set_mdp_data_at(Register mdp_in, int constant, Register value); - void increment_mdp_data_at(Address data, bool decrement = false); - void increment_mdp_data_at(Register mdp_in, int constant, - bool decrement = false); - void increment_mdp_data_at(Register mdp_in, Register reg, int constant, - bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant); + void increment_mdp_data_at(Register mdp_in, Register index, int constant); void increment_mask_and_jump(Address counter_addr, Address mask, Register scratch, Label* where); void set_mdp_flag_at(Register mdp_in, int flag_constant); @@ -239,7 +236,7 @@ class InterpreterMacroAssembler: public MacroAssembler { void update_mdp_by_constant(Register mdp_in, int constant); void update_mdp_for_ret(Register return_bci); - void profile_taken_branch(Register mdp, Register bumped_count); + void profile_taken_branch(Register mdp); void profile_not_taken_branch(Register mdp); void profile_call(Register mdp); void profile_final_call(Register mdp); @@ -265,6 +262,9 @@ class InterpreterMacroAssembler: public MacroAssembler { void notify_method_entry(); void notify_method_exit(TosState state, NotifyMethodExitMode mode); + JFR_ONLY(void enter_jfr_critical_section();) + JFR_ONLY(void leave_jfr_critical_section();) + private: Register _locals_register; // register that contains the pointer to the locals diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 25de76a7e40..803bce48945 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -24,6 +24,7 @@ #include "asm/assembler.hpp" #include "asm/assembler.inline.hpp" +#include "code/aotCodeCache.hpp" #include "code/compiledIC.hpp" #include "compiler/compiler_globals.hpp" #include "compiler/disassembler.hpp" @@ -366,7 +367,9 @@ void MacroAssembler::stop(const char* msg) { lea(c_rarg1, InternalAddress(rip)); movq(c_rarg2, rsp); // pass pointer to regs array } - lea(c_rarg0, ExternalAddress((address) msg)); + // Skip AOT caching C strings in scratch buffer. + const char* str = (code_section()->scratch_emit()) ? msg : AOTCodeCache::add_C_string(msg); + lea(c_rarg0, ExternalAddress((address) str)); andq(rsp, -16); // align stack as required by ABI call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); hlt(); @@ -2247,6 +2250,16 @@ void MacroAssembler::evmovdqaq(XMMRegister dst, AddressLiteral src, int vector_l } } +void MacroAssembler::movapd(XMMRegister dst, AddressLiteral src, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + + if (reachable(src)) { + Assembler::movapd(dst, as_Address(src)); + } else { + lea(rscratch, src); + Assembler::movapd(dst, Address(rscratch, 0)); + } +} void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src, Register rscratch) { assert(rscratch != noreg || always_reachable(src), "missing"); @@ -5399,20 +5412,27 @@ void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { } void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { + BLOCK_COMMENT("encode_klass_not_null {"); assert_different_registers(r, tmp); if (CompressedKlassPointers::base() != nullptr) { - mov64(tmp, (int64_t)CompressedKlassPointers::base()); + if (AOTCodeCache::is_on_for_dump()) { + movptr(tmp, ExternalAddress(CompressedKlassPointers::base_addr())); + } else { + movptr(tmp, (intptr_t)CompressedKlassPointers::base()); + } subq(r, tmp); } if (CompressedKlassPointers::shift() != 0) { shrq(r, CompressedKlassPointers::shift()); } + BLOCK_COMMENT("} encode_klass_not_null"); } void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src) { + BLOCK_COMMENT("encode_and_move_klass_not_null {"); assert_different_registers(src, dst); if (CompressedKlassPointers::base() != nullptr) { - mov64(dst, -(int64_t)CompressedKlassPointers::base()); + movptr(dst, -(intptr_t)CompressedKlassPointers::base()); addq(dst, src); } else { movptr(dst, src); @@ -5420,9 +5440,11 @@ void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src) if (CompressedKlassPointers::shift() != 0) { shrq(dst, CompressedKlassPointers::shift()); } + BLOCK_COMMENT("} encode_and_move_klass_not_null"); } void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { + BLOCK_COMMENT("decode_klass_not_null {"); assert_different_registers(r, tmp); // Note: it will change flags assert(UseCompressedClassPointers, "should only be used for compressed headers"); @@ -5433,12 +5455,18 @@ void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { shlq(r, CompressedKlassPointers::shift()); } if (CompressedKlassPointers::base() != nullptr) { - mov64(tmp, (int64_t)CompressedKlassPointers::base()); + if (AOTCodeCache::is_on_for_dump()) { + movptr(tmp, ExternalAddress(CompressedKlassPointers::base_addr())); + } else { + movptr(tmp, (intptr_t)CompressedKlassPointers::base()); + } addq(r, tmp); } + BLOCK_COMMENT("} decode_klass_not_null"); } void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) { + BLOCK_COMMENT("decode_and_move_klass_not_null {"); assert_different_registers(src, dst); // Note: it will change flags assert (UseCompressedClassPointers, "should only be used for compressed headers"); @@ -5454,7 +5482,7 @@ void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) } else { if (CompressedKlassPointers::shift() <= Address::times_8) { if (CompressedKlassPointers::base() != nullptr) { - mov64(dst, (int64_t)CompressedKlassPointers::base()); + movptr(dst, (intptr_t)CompressedKlassPointers::base()); } else { xorq(dst, dst); } @@ -5466,9 +5494,9 @@ void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) } } else { if (CompressedKlassPointers::base() != nullptr) { - const uint64_t base_right_shifted = - (uint64_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift(); - mov64(dst, base_right_shifted); + const intptr_t base_right_shifted = + (intptr_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift(); + movptr(dst, base_right_shifted); } else { xorq(dst, dst); } @@ -5476,6 +5504,7 @@ void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) shlq(dst, CompressedKlassPointers::shift()); } } + BLOCK_COMMENT("} decode_and_move_klass_not_null"); } void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { @@ -9584,10 +9613,16 @@ void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Registe movptr(reg_rax, Address(obj, oopDesc::mark_offset_in_bytes())); if (UseObjectMonitorTable) { - // Clear cache in case fast locking succeeds. + // Clear cache in case fast locking succeeds or we need to take the slow-path. movptr(Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes()))), 0); } + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp, obj, rscratch1); + testb(Address(tmp, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); + jcc(Assembler::notZero, slow); + } + // Load top. movl(top, Address(thread, JavaThread::lock_stack_top_offset())); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index efd1a4c154f..f7ac6fb4297 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -995,6 +995,8 @@ class MacroAssembler: public Assembler { void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } void andpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + void andnpd(XMMRegister dst, XMMRegister src) { Assembler::andnpd(dst, src); } + void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } void andps(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); @@ -1007,6 +1009,8 @@ class MacroAssembler: public Assembler { void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } void comisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + void orpd(XMMRegister dst, XMMRegister src) { Assembler::orpd(dst, src); } + void cmp32_mxcsr_std(Address mxcsr_save, Register tmp, Register rscratch = noreg); void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } void ldmxcsr(AddressLiteral src, Register rscratch = noreg); @@ -1241,6 +1245,9 @@ class MacroAssembler: public Assembler { void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); void evmovdqaq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); + using Assembler::movapd; + void movapd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + // Move Aligned Double Quadword void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp index 432f9277549..9f0232075cd 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp @@ -1511,7 +1511,7 @@ void MacroAssembler::sha512_update_ni_x1(Register arg_hash, Register arg_msg, Re //ymm13 = A B E F, ymm14 = C D G H lea(rax, ExternalAddress(K512_W)); - align(32); + align(CodeEntryAlignment); bind(block_loop); vmovdqu(xmm11, xmm13);//ABEF vmovdqu(xmm12, xmm14);//CDGH diff --git a/src/hotspot/cpu/x86/methodHandles_x86.cpp b/src/hotspot/cpu/x86/methodHandles_x86.cpp index ee4dc26ae40..f3683e7d09c 100644 --- a/src/hotspot/cpu/x86/methodHandles_x86.cpp +++ b/src/hotspot/cpu/x86/methodHandles_x86.cpp @@ -122,17 +122,64 @@ void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Registe __ bind(L); } -#endif //ASSERT +void MethodHandles::verify_method(MacroAssembler* _masm, Register method, Register temp, vmIntrinsics::ID iid) { + BLOCK_COMMENT("verify_method {"); + __ verify_method_ptr(method); + if (VerifyMethodHandles) { + Label L_ok; + assert_different_registers(method, temp); + + const Register method_holder = temp; + __ load_method_holder(method_holder, method); + __ push(method_holder); // keep holder around for diagnostic purposes + + switch (iid) { + case vmIntrinsicID::_invokeBasic: + // Require compiled LambdaForm class to be fully initialized. + __ cmpb(Address(method_holder, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); + __ jccb(Assembler::equal, L_ok); + break; + + case vmIntrinsicID::_linkToStatic: + __ clinit_barrier(method_holder, &L_ok); + break; + + case vmIntrinsicID::_linkToVirtual: + case vmIntrinsicID::_linkToSpecial: + case vmIntrinsicID::_linkToInterface: + // Class initialization check is too strong here. Just ensure that initialization has been initiated. + __ cmpb(Address(method_holder, InstanceKlass::init_state_offset()), InstanceKlass::being_initialized); + __ jcc(Assembler::greaterEqual, L_ok); + + // init_state check failed, but it may be an abstract interface method + __ load_unsigned_short(temp, Address(method, Method::access_flags_offset())); + __ testl(temp, JVM_ACC_ABSTRACT); + __ jccb(Assembler::notZero, L_ok); + break; + + default: + fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); + } + + // clinit check failed for a concrete method + __ STOP("Method holder klass is not initialized"); + + __ BIND(L_ok); + __ pop(method_holder); // restore stack layout + } + BLOCK_COMMENT("} verify_method"); +} +#endif // ASSERT void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, - bool for_compiler_entry) { + bool for_compiler_entry, vmIntrinsics::ID iid) { assert(method == rbx, "interpreter calling convention"); Label L_no_such_method; __ testptr(rbx, rbx); __ jcc(Assembler::zero, L_no_such_method); - __ verify_method_ptr(method); + verify_method(_masm, method, temp, iid); if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { Label run_compiled_code; @@ -193,7 +240,7 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, __ BIND(L); } - jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); + jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry, vmIntrinsics::_invokeBasic); BLOCK_COMMENT("} jump_to_lambda_form"); } @@ -485,8 +532,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, // After figuring out which concrete method to call, jump into it. // Note that this works in the interpreter with no data motion. // But the compiled version will require that rcx_recv be shifted out. - __ verify_method_ptr(rbx_method); - jump_from_method_handle(_masm, rbx_method, temp1, for_compiler_entry); + jump_from_method_handle(_masm, rbx_method, temp1, for_compiler_entry, iid); if (iid == vmIntrinsics::_linkToInterface) { __ bind(L_incompatible_class_change_error); diff --git a/src/hotspot/cpu/x86/methodHandles_x86.hpp b/src/hotspot/cpu/x86/methodHandles_x86.hpp index 9ffe5e198ac..6ba9b5f6a4f 100644 --- a/src/hotspot/cpu/x86/methodHandles_x86.hpp +++ b/src/hotspot/cpu/x86/methodHandles_x86.hpp @@ -38,6 +38,8 @@ enum /* platform_dependent_constants */ { Register obj, vmClassID klass_id, const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + static void verify_method(MacroAssembler* _masm, Register method, Register temp, vmIntrinsics::ID iid) NOT_DEBUG_RETURN; + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { verify_klass(_masm, mh_reg, VM_CLASS_ID(MethodHandle_klass), "reference is a MH"); @@ -48,7 +50,7 @@ enum /* platform_dependent_constants */ { // Similar to InterpreterMacroAssembler::jump_from_interpreted. // Takes care of special dispatch from single stepping too. static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, - bool for_compiler_entry); + bool for_compiler_entry, vmIntrinsics::ID iid); static void jump_to_lambda_form(MacroAssembler* _masm, Register recv, Register method_temp, diff --git a/src/hotspot/cpu/x86/nativeInst_x86.cpp b/src/hotspot/cpu/x86/nativeInst_x86.cpp index 4ee741077dc..c3345be2172 100644 --- a/src/hotspot/cpu/x86/nativeInst_x86.cpp +++ b/src/hotspot/cpu/x86/nativeInst_x86.cpp @@ -67,9 +67,7 @@ void NativeCall::print() { // Inserts a native call instruction at a given pc void NativeCall::insert(address code_pos, address entry) { intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); -#ifdef AMD64 guarantee(disp == (intptr_t)(jint)disp, "must be 32-bit offset"); -#endif // AMD64 *code_pos = instruction_code; *((int32_t *)(code_pos+1)) = (int32_t) disp; ICache::invalidate_range(code_pos, instruction_size); @@ -140,7 +138,7 @@ bool NativeCall::is_displacement_aligned() { // Used in the runtime linkage of calls; see class CompiledIC. // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) void NativeCall::set_destination_mt_safe(address dest) { - debug_only(verify()); + DEBUG_ONLY(verify()); // Make sure patching code is locked. No two threads can patch at the same // time but one may be executing this code. assert(CodeCache_lock->is_locked() || SafepointSynchronize::is_at_safepoint() || @@ -157,7 +155,6 @@ void NativeCall::set_destination_mt_safe(address dest) { void NativeMovConstReg::verify() { -#ifdef AMD64 // make sure code pattern is actually a mov reg64, imm64 instruction bool valid_rex_prefix = ubyte_at(0) == Assembler::REX_W || ubyte_at(0) == Assembler::REX_WB; bool valid_rex2_prefix = ubyte_at(0) == Assembler::REX2 && @@ -169,12 +166,6 @@ void NativeMovConstReg::verify() { print(); fatal("not a REX.W[B] mov reg64, imm64"); } -#else - // make sure code pattern is actually a mov reg, imm32 instruction - u_char test_byte = *(u_char*)instruction_address(); - u_char test_byte_2 = test_byte & ( 0xff ^ register_mask); - if (test_byte_2 != instruction_code) fatal("not a mov reg, imm32"); -#endif // AMD64 } @@ -192,12 +183,10 @@ int NativeMovRegMem::instruction_start() const { // See comment in Assembler::locate_operand() about VEX prefixes. if (instr_0 == instruction_VEX_prefix_2bytes) { assert((UseAVX > 0), "shouldn't have VEX prefix"); - NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions")); return 2; } if (instr_0 == instruction_VEX_prefix_3bytes) { assert((UseAVX > 0), "shouldn't have VEX prefix"); - NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions")); return 3; } if (instr_0 == instruction_EVEX_prefix_4bytes) { @@ -313,8 +302,7 @@ void NativeMovRegMem::print() { void NativeLoadAddress::verify() { // make sure code pattern is actually a mov [reg+offset], reg instruction u_char test_byte = *(u_char*)instruction_address(); - if ( ! ((test_byte == lea_instruction_code) - LP64_ONLY(|| (test_byte == mov64_instruction_code) ))) { + if ((test_byte != lea_instruction_code) && (test_byte != mov64_instruction_code)) { fatal ("not a lea reg, [reg+offs] instruction"); } } @@ -340,9 +328,7 @@ void NativeJump::verify() { void NativeJump::insert(address code_pos, address entry) { intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); -#ifdef AMD64 guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset"); -#endif // AMD64 *code_pos = instruction_code; *((int32_t*)(code_pos + 1)) = (int32_t)disp; @@ -355,11 +341,7 @@ void NativeJump::check_verified_entry_alignment(address entry, address verified_ // in use. The patching in that instance must happen only when certain // alignment restrictions are true. These guarantees check those // conditions. -#ifdef AMD64 const int linesize = 64; -#else - const int linesize = 32; -#endif // AMD64 // Must be wordSize aligned guarantee(((uintptr_t) verified_entry & (wordSize -1)) == 0, @@ -386,7 +368,6 @@ void NativeJump::check_verified_entry_alignment(address entry, address verified_ // void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { // complete jump instruction (to be inserted) is in code_buffer; -#ifdef _LP64 union { jlong cb_long; unsigned char code_buffer[8]; @@ -402,43 +383,6 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add Atomic::store((jlong *) verified_entry, u.cb_long); ICache::invalidate_range(verified_entry, 8); - -#else - unsigned char code_buffer[5]; - code_buffer[0] = instruction_code; - intptr_t disp = (intptr_t)dest - ((intptr_t)verified_entry + 1 + 4); - *(int32_t*)(code_buffer + 1) = (int32_t)disp; - - check_verified_entry_alignment(entry, verified_entry); - - // Can't call nativeJump_at() because it's asserts jump exists - NativeJump* n_jump = (NativeJump*) verified_entry; - - //First patch dummy jmp in place - - unsigned char patch[4]; - assert(sizeof(patch)==sizeof(int32_t), "sanity check"); - patch[0] = 0xEB; // jmp rel8 - patch[1] = 0xFE; // jmp to self - patch[2] = 0xEB; - patch[3] = 0xFE; - - // First patch dummy jmp in place - *(int32_t*)verified_entry = *(int32_t *)patch; - - n_jump->wrote(0); - - // Patch 5th byte (from jump instruction) - verified_entry[4] = code_buffer[4]; - - n_jump->wrote(4); - - // Patch bytes 0-3 (from jump instruction) - *(int32_t*)verified_entry = *(int32_t *)code_buffer; - // Invalidate. Opteron requires a flush after every write. - n_jump->wrote(0); -#endif // _LP64 - } void NativeIllegalInstruction::insert(address code_pos) { @@ -455,9 +399,7 @@ void NativeGeneralJump::verify() { void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); -#ifdef AMD64 guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset"); -#endif // AMD64 *code_pos = unconditional_long_jump; *((int32_t *)(code_pos+1)) = (int32_t) disp; diff --git a/src/hotspot/cpu/x86/nativeInst_x86.hpp b/src/hotspot/cpu/x86/nativeInst_x86.hpp index d02387aa9ff..b2448cb99fd 100644 --- a/src/hotspot/cpu/x86/nativeInst_x86.hpp +++ b/src/hotspot/cpu/x86/nativeInst_x86.hpp @@ -126,10 +126,8 @@ class NativeCall: public NativeInstruction { address return_address() const { return addr_at(return_address_offset); } address destination() const; void set_destination(address dest) { -#ifdef AMD64 intptr_t disp = dest - return_address(); guarantee(disp == (intptr_t)(jint)disp, "must be 32-bit offset"); -#endif // AMD64 set_int_at(displacement_offset, (int)(dest - return_address())); } // Returns whether the 4-byte displacement operand is 4-byte aligned. @@ -211,15 +209,9 @@ class NativeCallReg: public NativeInstruction { // Instruction format for implied addressing mode immediate operand move to register instruction: // [REX/REX2] [OPCODE] [IMM32] class NativeMovConstReg: public NativeInstruction { -#ifdef AMD64 static const bool has_rex = true; static const int rex_size = 1; static const int rex2_size = 2; -#else - static const bool has_rex = false; - static const int rex_size = 0; - static const int rex2_size = 0; -#endif // AMD64 public: enum Intel_specific_constants { instruction_code = 0xB8, @@ -390,13 +382,8 @@ inline NativeMovRegMem* nativeMovRegMem_at (address address) { // leal reg, [reg + offset] class NativeLoadAddress: public NativeMovRegMem { -#ifdef AMD64 static const bool has_rex = true; static const int rex_size = 1; -#else - static const bool has_rex = false; - static const int rex_size = 0; -#endif // AMD64 public: enum Intel_specific_constants { instruction_prefix_wide = Assembler::REX_W, @@ -447,9 +434,7 @@ class NativeJump: public NativeInstruction { if (dest == (address) -1) { val = -5; // jump to self } -#ifdef AMD64 assert((labs(val) & 0xFFFFFFFF00000000) == 0 || dest == (address)-1, "must be 32bit offset or -1"); -#endif // AMD64 set_int_at(data_offset, (jint)val); } @@ -503,7 +488,7 @@ class NativeGeneralJump: public NativeInstruction { inline NativeGeneralJump* nativeGeneralJump_at(address address) { NativeGeneralJump* jump = (NativeGeneralJump*)(address); - debug_only(jump->verify();) + DEBUG_ONLY(jump->verify();) return jump; } @@ -572,19 +557,14 @@ inline bool NativeInstruction::is_jump_reg() { inline bool NativeInstruction::is_cond_jump() { return (int_at(0) & 0xF0FF) == 0x800F /* long jump */ || (ubyte_at(0) & 0xF0) == 0x70; /* short jump */ } inline bool NativeInstruction::is_safepoint_poll() { -#ifdef AMD64 const bool has_rex_prefix = ubyte_at(0) == NativeTstRegMem::instruction_rex_b_prefix; const int test_offset = has_rex2_prefix() ? 2 : (has_rex_prefix ? 1 : 0); -#else - const int test_offset = 0; -#endif const bool is_test_opcode = ubyte_at(test_offset) == NativeTstRegMem::instruction_code_memXregl; const bool is_rax_target = (ubyte_at(test_offset + 1) & NativeTstRegMem::modrm_mask) == NativeTstRegMem::modrm_reg; return is_test_opcode && is_rax_target; } inline bool NativeInstruction::is_mov_literal64() { -#ifdef AMD64 bool valid_rex_prefix = ubyte_at(0) == Assembler::REX_W || ubyte_at(0) == Assembler::REX_WB; bool valid_rex2_prefix = ubyte_at(0) == Assembler::REX2 && (ubyte_at(1) == Assembler::REX2BIT_W || @@ -593,9 +573,6 @@ inline bool NativeInstruction::is_mov_literal64() { int opcode = has_rex2_prefix() ? ubyte_at(2) : ubyte_at(1); return ((valid_rex_prefix || valid_rex2_prefix) && (opcode & (0xff ^ NativeMovConstReg::register_mask)) == 0xB8); -#else - return false; -#endif // AMD64 } class NativePostCallNop: public NativeInstruction { diff --git a/src/hotspot/cpu/x86/runtime_x86_64.cpp b/src/hotspot/cpu/x86/runtime_x86_64.cpp index a063c7aeb37..5865bec2e39 100644 --- a/src/hotspot/cpu/x86/runtime_x86_64.cpp +++ b/src/hotspot/cpu/x86/runtime_x86_64.cpp @@ -25,6 +25,7 @@ #ifdef COMPILER2 #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "code/aotCodeCache.hpp" #include "code/vmreg.hpp" #include "interpreter/interpreter.hpp" #include "opto/runtime.hpp" @@ -56,11 +57,19 @@ class SimpleRuntimeFrame { //------------------------------generate_uncommon_trap_blob-------------------- UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() { + const char* name = OptoRuntime::stub_name(OptoStubId::uncommon_trap_id); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::C2Blob, (uint)OptoStubId::uncommon_trap_id, name); + if (blob != nullptr) { + return blob->as_uncommon_trap_blob(); + } + // Allocate space for the code ResourceMark rm; // Setup code generation tools - const char* name = OptoRuntime::stub_name(OptoStubId::uncommon_trap_id); CodeBuffer buffer(name, 2048, 1024); + if (buffer.blob() == nullptr) { + return nullptr; + } MacroAssembler* masm = new MacroAssembler(&buffer); assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); @@ -225,8 +234,10 @@ UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() { // Make sure all code is generated masm->flush(); - return UncommonTrapBlob::create(&buffer, oop_maps, - SimpleRuntimeFrame::framesize >> 1); + UncommonTrapBlob *ut_blob = UncommonTrapBlob::create(&buffer, oop_maps, + SimpleRuntimeFrame::framesize >> 1); + AOTCodeCache::store_code_blob(*ut_blob, AOTCodeEntry::C2Blob, (uint)OptoStubId::uncommon_trap_id, name); + return ut_blob; } //------------------------------generate_exception_blob--------------------------- @@ -262,11 +273,19 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() { assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + const char* name = OptoRuntime::stub_name(OptoStubId::exception_id); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::C2Blob, (uint)OptoStubId::exception_id, name); + if (blob != nullptr) { + return blob->as_exception_blob(); + } + // Allocate space for the code ResourceMark rm; // Setup code generation tools - const char* name = OptoRuntime::stub_name(OptoStubId::exception_id); CodeBuffer buffer(name, 2048, 1024); + if (buffer.blob() == nullptr) { + return nullptr; + } MacroAssembler* masm = new MacroAssembler(&buffer); @@ -357,6 +376,8 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() { masm->flush(); // Set exception blob - return ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); + ExceptionBlob* ex_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); + AOTCodeCache::store_code_blob(*ex_blob, AOTCodeEntry::C2Blob, (uint)OptoStubId::exception_id, name); + return ex_blob; } #endif // COMPILER2 diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp index 621340964ac..78259157dfa 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp @@ -27,6 +27,7 @@ #endif #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "code/aotCodeCache.hpp" #include "code/compiledIC.hpp" #include "code/debugInfoRec.hpp" #include "code/nativeInst.hpp" @@ -675,7 +676,6 @@ static void patch_callers_callsite(MacroAssembler *masm) { __ bind(L); } - static void gen_c2i_adapter(MacroAssembler *masm, int total_args_passed, int comp_args_on_stack, @@ -826,19 +826,6 @@ static void gen_c2i_adapter(MacroAssembler *masm, __ jmp(rcx); } -static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, - address code_start, address code_end, - Label& L_ok) { - Label L_fail; - __ lea(temp_reg, AddressLiteral(code_start, relocInfo::none)); - __ cmpptr(pc_reg, temp_reg); - __ jcc(Assembler::belowEqual, L_fail); - __ lea(temp_reg, AddressLiteral(code_end, relocInfo::none)); - __ cmpptr(pc_reg, temp_reg); - __ jcc(Assembler::below, L_ok); - __ bind(L_fail); -} - void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int total_args_passed, int comp_args_on_stack, @@ -871,41 +858,6 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // If this happens, control eventually transfers back to the compiled // caller, but with an uncorrected stack, causing delayed havoc. - if (VerifyAdapterCalls && - (Interpreter::code() != nullptr || StubRoutines::final_stubs_code() != nullptr)) { - // So, let's test for cascading c2i/i2c adapters right now. - // assert(Interpreter::contains($return_addr) || - // StubRoutines::contains($return_addr), - // "i2c adapter must return to an interpreter frame"); - __ block_comment("verify_i2c { "); - // Pick up the return address - __ movptr(rax, Address(rsp, 0)); - Label L_ok; - if (Interpreter::code() != nullptr) { - range_check(masm, rax, r11, - Interpreter::code()->code_start(), - Interpreter::code()->code_end(), - L_ok); - } - if (StubRoutines::initial_stubs_code() != nullptr) { - range_check(masm, rax, r11, - StubRoutines::initial_stubs_code()->code_begin(), - StubRoutines::initial_stubs_code()->code_end(), - L_ok); - } - if (StubRoutines::final_stubs_code() != nullptr) { - range_check(masm, rax, r11, - StubRoutines::final_stubs_code()->code_begin(), - StubRoutines::final_stubs_code()->code_end(), - L_ok); - } - const char* msg = "i2c adapter must return to an interpreter frame"; - __ block_comment(msg); - __ stop(msg); - __ bind(L_ok); - __ block_comment("} verify_i2ce "); - } - // Must preserve original SP for loading incoming arguments because // we need to align the outgoing SP for compiled code. __ movptr(r11, rsp); @@ -1050,12 +1002,12 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } // --------------------------------------------------------------- -AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterHandlerEntry* handler) { address i2c_entry = __ pc(); gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); @@ -1117,7 +1069,8 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + handler->set_entry_points(i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + return; } int SharedRuntime::c_calling_convention(const BasicType *sig_bt, @@ -2472,6 +2425,23 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ leave(); +#if INCLUDE_JFR + // We need to do a poll test after unwind in case the sampler + // managed to sample the native frame after returning to Java. + Label L_return; + address poll_test_pc = __ pc(); + __ relocate(relocInfo::poll_return_type); + __ testb(Address(r15_thread, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit()); + __ jccb(Assembler::zero, L_return); + __ lea(rscratch1, InternalAddress(poll_test_pc)); + __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1); + assert(SharedRuntime::polling_page_return_handler_blob() != nullptr, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + __ jump(RuntimeAddress(stub)); + __ bind(L_return); +#endif // INCLUDE_JFR + // Any exception pending? __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); __ jcc(Assembler::notEqual, exception_pending); @@ -2648,6 +2618,12 @@ void SharedRuntime::generate_deopt_blob() { } #endif const char* name = SharedRuntime::stub_name(SharedStubId::deopt_id); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, (uint)SharedStubId::deopt_id, name); + if (blob != nullptr) { + _deopt_blob = blob->as_deoptimization_blob(); + return; + } + CodeBuffer buffer(name, 2560+pad, 1024); MacroAssembler* masm = new MacroAssembler(&buffer); int frame_size_in_words; @@ -2999,6 +2975,8 @@ void SharedRuntime::generate_deopt_blob() { _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); } #endif + + AOTCodeCache::store_code_blob(*_deopt_blob, AOTCodeEntry::SharedBlob, (uint)SharedStubId::deopt_id, name); } //------------------------------generate_handler_blob------ @@ -3011,12 +2989,16 @@ SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address cal "must be generated before"); assert(is_polling_page_id(id), "expected a polling page stub id"); + // Allocate space for the code. Setup code generation tools. + const char* name = SharedRuntime::stub_name(id); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, (uint)id, name); + if (blob != nullptr) { + return blob->as_safepoint_blob(); + } + ResourceMark rm; OopMapSet *oop_maps = new OopMapSet(); OopMap* map; - - // Allocate space for the code. Setup code generation tools. - const char* name = SharedRuntime::stub_name(id); CodeBuffer buffer(name, 2548, 1024); MacroAssembler* masm = new MacroAssembler(&buffer); @@ -3176,7 +3158,10 @@ SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address cal masm->flush(); // Fill-out other meta info - return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); + SafepointBlob* sp_blob = SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); + + AOTCodeCache::store_code_blob(*sp_blob, AOTCodeEntry::SharedBlob, (uint)id, name); + return sp_blob; } // @@ -3191,10 +3176,14 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address desti assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); assert(is_resolve_id(id), "expected a resolve stub id"); + const char* name = SharedRuntime::stub_name(id); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, (uint)id, name); + if (blob != nullptr) { + return blob->as_runtime_stub(); + } + // allocate space for the code ResourceMark rm; - - const char* name = SharedRuntime::stub_name(id); CodeBuffer buffer(name, 1552, 512); MacroAssembler* masm = new MacroAssembler(&buffer); @@ -3263,7 +3252,10 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address desti // return the blob // frame_size_words or bytes?? - return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); + RuntimeStub* rs_blob = RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); + + AOTCodeCache::store_code_blob(*rs_blob, AOTCodeEntry::SharedBlob, (uint)id, name); + return rs_blob; } // Continuation point for throwing of implicit exceptions that are @@ -3301,10 +3293,15 @@ RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address ru int insts_size = 512; int locs_size = 64; - ResourceMark rm; const char* timer_msg = "SharedRuntime generate_throw_exception"; TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime)); + CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, (uint)id, name); + if (blob != nullptr) { + return blob->as_runtime_stub(); + } + + ResourceMark rm; CodeBuffer code(name, insts_size, locs_size); OopMapSet* oop_maps = new OopMapSet(); MacroAssembler* masm = new MacroAssembler(&code); @@ -3362,6 +3359,8 @@ RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address ru frame_complete, (framesize >> (LogBytesPerWord - LogBytesPerInt)), oop_maps, false); + AOTCodeCache::store_code_blob(*stub, AOTCodeEntry::SharedBlob, (uint)id, name); + return stub; } diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index b88a2bd1f8e..1014c1c376f 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -3692,6 +3692,9 @@ void StubGenerator::generate_libm_stubs() { if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtanh)) { StubRoutines::_dtanh = generate_libmTanh(); // from stubGenerator_x86_64_tanh.cpp } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcbrt)) { + StubRoutines::_dcbrt = generate_libmCbrt(); // from stubGenerator_x86_64_cbrt.cpp + } if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) { StubRoutines::_dexp = generate_libmExp(); // from stubGenerator_x86_64_exp.cpp } @@ -4204,6 +4207,8 @@ void StubGenerator::generate_compiler_stubs() { generate_chacha_stubs(); + generate_kyber_stubs(); + generate_dilithium_stubs(); generate_sha3_stubs(); @@ -4333,70 +4338,6 @@ void StubGenerator::generate_compiler_stubs() { } } - // Get svml stub routine addresses - void *libjsvml = nullptr; - char ebuf[1024]; - char dll_name[JVM_MAXPATHLEN]; - if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "jsvml")) { - libjsvml = os::dll_load(dll_name, ebuf, sizeof ebuf); - } - if (libjsvml != nullptr) { - // SVML method naming convention - // All the methods are named as __jsvml_op _ha_ - // Where: - // ha stands for high accuracy - // is optional to indicate float/double - // Set to f for vector float operation - // Omitted for vector double operation - // is the number of elements in the vector - // 1, 2, 4, 8, 16 - // e.g. 128 bit float vector has 4 float elements - // indicates the avx/sse level: - // z0 is AVX512, l9 is AVX2, e9 is AVX1 and ex is for SSE2 - // e.g. __jsvml_expf16_ha_z0 is the method for computing 16 element vector float exp using AVX 512 insns - // __jsvml_exp8_ha_z0 is the method for computing 8 element vector double exp using AVX 512 insns - - log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "jsvml" JNI_LIB_SUFFIX, p2i(libjsvml)); - if (UseAVX > 2) { - for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { - int vop = VectorSupport::VECTOR_OP_MATH_START + op; - if ((!VM_Version::supports_avx512dq()) && - (vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) { - continue; - } - snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::mathname[op]); - StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf); - - snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::mathname[op]); - StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf); - } - } - const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex"); - for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { - int vop = VectorSupport::VECTOR_OP_MATH_START + op; - if (vop == VectorSupport::VECTOR_OP_POW) { - continue; - } - snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str); - StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf); - - snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str); - StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf); - - snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::mathname[op], avx_sse_str); - StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf); - - snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::mathname[op], avx_sse_str); - StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf); - - snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::mathname[op], avx_sse_str); - StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf); - - snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::mathname[op], avx_sse_str); - StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf); - } - } - #endif // COMPILER2 #endif // COMPILER2_OR_JVMCI } diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp index c08b0168796..2f1e46f3132 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp @@ -490,10 +490,13 @@ class StubGenerator: public StubCodeGenerator { // SHA3 stubs void generate_sha3_stubs(); - // Dilithium stubs and helper functions + // Kyber stubs + void generate_kyber_stubs(); + + // Dilithium stubs void generate_dilithium_stubs(); - // BASE64 stubs + // BASE64 stubs address base64_shuffle_addr(); address base64_avx2_shuffle_addr(); address base64_avx2_input_mask_addr(); @@ -553,6 +556,7 @@ class StubGenerator: public StubCodeGenerator { address generate_libmCos(); address generate_libmTan(); address generate_libmTanh(); + address generate_libmCbrt(); address generate_libmExp(); address generate_libmPow(); address generate_libmLog(); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp new file mode 100644 index 00000000000..da60a9be276 --- /dev/null +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2025, Intel Corporation. All rights reserved. + * Intel Math Library (LIBM) Source Code + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "macroAssembler_x86.hpp" +#include "stubGenerator_x86_64.hpp" + +/******************************************************************************/ +// ALGORITHM DESCRIPTION +// --------------------- +// +// x=2^{3*k+j} * 1.b1 b2 ... b5 b6 ... b52 +// Let r=(x*2^{-3k-j} - 1.b1 b2 ... b5 1)* rcp[b1 b2 ..b5], +// where rcp[b1 b2 .. b5]=1/(1.b1 b2 b3 b4 b5 1) in double precision +// cbrt(2^j * 1. b1 b2 .. b5 1) is approximated as T[j][b1..b5]+D[j][b1..b5] +// (T stores the high 53 bits, D stores the low order bits) +// Result=2^k*T+(2^k*T*r)*P+2^k*D +// where P=p1+p2*r+..+p8*r^7 +// +// Special cases: +// cbrt(NaN) = quiet NaN +// cbrt(+/-INF) = +/-INF +// cbrt(+/-0) = +/-0 +// +/******************************************************************************/ + +ATTRIBUTE_ALIGNED(4) static const juint _SIG_MASK[] = +{ + 0, 1032192 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _EXP_MASK[] = +{ + 0, 3220176896 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _EXP_MSK2[] = +{ + 0, 3220193280 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _EXP_MSK3[] = +{ + 4294967295, 1048575 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _SCALE63[] = +{ + 0, 1138753536 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _ZERON[] = +{ + 0, 2147483648 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _INF[] = +{ + 0, 2146435072 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _NEG_INF[] = +{ + 0, 4293918720 +}; + +ATTRIBUTE_ALIGNED(16) static const juint _coeff_table[] = +{ + 1553778919, 3213899486, 3534952507, 3215266280, 1646371399, + 3214412045, 477218588, 3216798151, 3582521621, 1066628362, + 1007461464, 1068473053, 889629714, 1067378449, 1431655765, + 1070945621 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _rcp_table[] = +{ + 528611360, 3220144632, 2884679527, 3220082993, 1991868891, 3220024928, + 2298714891, 3219970134, 58835168, 3219918343, 3035110223, 3219869313, + 1617585086, 3219822831, 2500867033, 3219778702, 4241943008, 3219736752, + 258732970, 3219696825, 404232216, 3219658776, 2172167368, 3219622476, + 1544257904, 3219587808, 377579543, 3219554664, 1616385542, 3219522945, + 813783277, 3219492562, 3940743189, 3219463431, 2689777499, 3219435478, + 1700977147, 3219408632, 3169102082, 3219382828, 327235604, 3219358008, + 1244336319, 3219334115, 1300311200, 3219311099, 3095471925, 3219288912, + 2166487928, 3219267511, 2913108253, 3219246854, 293672978, 3219226904, + 288737297, 3219207624, 1810275472, 3219188981, 174592167, 3219170945, + 3539053052, 3219153485, 2164392968, 3219136576 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _cbrt_table[] = +{ + 572345495, 1072698681, 1998204467, 1072709382, 3861501553, 1072719872, + 2268192434, 1072730162, 2981979308, 1072740260, 270859143, 1072750176, + 2958651392, 1072759916, 313113243, 1072769490, 919449400, 1072778903, + 2809328903, 1072788162, 2222981587, 1072797274, 2352530781, 1072806244, + 594152517, 1072815078, 1555767199, 1072823780, 4282421314, 1072832355, + 2355578597, 1072840809, 1162590619, 1072849145, 797864051, 1072857367, + 431273680, 1072865479, 2669831148, 1072873484, 733477752, 1072881387, + 4280220604, 1072889189, 801961634, 1072896896, 2915370760, 1072904508, + 1159613482, 1072912030, 2689944798, 1072919463, 1248687822, 1072926811, + 2967951030, 1072934075, 630170432, 1072941259, 3760898254, 1072948363, + 0, 1072955392, 2370273294, 1072962345, 1261754802, 1072972640, + 546334065, 1072986123, 1054893830, 1072999340, 1571187597, 1073012304, + 1107975175, 1073025027, 3606909377, 1073037519, 1113616747, 1073049792, + 4154744632, 1073061853, 3358931423, 1073073713, 4060702372, 1073085379, + 747576176, 1073096860, 3023138255, 1073108161, 1419988548, 1073119291, + 1914185305, 1073130255, 294389948, 1073141060, 3761802570, 1073151710, + 978281566, 1073162213, 823148820, 1073172572, 2420954441, 1073182792, + 3815449908, 1073192878, 2046058587, 1073202835, 1807524753, 1073212666, + 2628681401, 1073222375, 3225667357, 1073231966, 1555307421, 1073241443, + 3454043099, 1073250808, 1208137896, 1073260066, 3659916772, 1073269218, + 1886261264, 1073278269, 3593647839, 1073287220, 3086012205, 1073296075, + 2769796922, 1073304836, 888716057, 1073317807, 2201465623, 1073334794, + 164369365, 1073351447, 3462666733, 1073367780, 2773905457, 1073383810, + 1342879088, 1073399550, 2543933975, 1073415012, 1684477781, 1073430209, + 3532178543, 1073445151, 1147747300, 1073459850, 1928031793, 1073474314, + 2079717015, 1073488553, 4016765315, 1073502575, 3670431139, 1073516389, + 3549227225, 1073530002, 11637607, 1073543422, 588220169, 1073556654, + 2635407503, 1073569705, 2042029317, 1073582582, 1925128962, 1073595290, + 4136375664, 1073607834, 759964600, 1073620221, 4257606771, 1073632453, + 297278907, 1073644538, 3655053093, 1073656477, 2442253172, 1073668277, + 1111876799, 1073679941, 3330973139, 1073691472, 3438879452, 1073702875, + 3671565478, 1073714153, 1317849547, 1073725310, 1642364115, 1073736348 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _D_table[] = +{ + 4050900474, 1014427190, 1157977860, 1016444461, 1374568199, 1017271387, + 2809163288, 1016882676, 3742377377, 1013168191, 3101606597, 1017541672, + 65224358, 1017217597, 2691591250, 1017266643, 4020758549, 1017689313, + 1316310992, 1018030788, 1031537856, 1014090882, 3261395239, 1016413641, + 886424999, 1016313335, 3114776834, 1014195875, 1681120620, 1017825416, + 1329600273, 1016625740, 465474623, 1017097119, 4251633980, 1017169077, + 1986990133, 1017710645, 752958613, 1017159641, 2216216792, 1018020163, + 4282860129, 1015924861, 1557627859, 1016039538, 3889219754, 1018086237, + 3684996408, 1017353275, 723532103, 1017717141, 2951149676, 1012528470, + 831890937, 1017830553, 1031212645, 1017387331, 2741737450, 1017604974, + 2863311531, 1003776682, 4276736099, 1013153088, 4111778382, 1015673686, + 1728065769, 1016413986, 2708718031, 1018078833, 1069335005, 1015291224, + 700037144, 1016482032, 2904566452, 1017226861, 4074156649, 1017622651, + 25019565, 1015245366, 3601952608, 1015771755, 3267129373, 1017904664, + 503203103, 1014921629, 2122011730, 1018027866, 3927295461, 1014189456, + 2790625147, 1016024251, 1330460186, 1016940346, 4033568463, 1015538390, + 3695818227, 1017509621, 257573361, 1017208868, 3227697852, 1017337964, + 234118548, 1017169577, 4009025803, 1017278524, 1948343394, 1017749310, + 678398162, 1018144239, 3083864863, 1016669086, 2415453452, 1017890370, + 175467344, 1017330033, 3197359580, 1010339928, 2071276951, 1015941358, + 268372543, 1016737773, 938132959, 1017389108, 1816750559, 1017337448, + 4119203749, 1017152174, 2578653878, 1013108497, 2470331096, 1014678606, + 123855735, 1016553320, 1265650889, 1014782687, 3414398172, 1017182638, + 1040773369, 1016158401, 3483628886, 1016886550, 4140499405, 1016191425, + 3893477850, 1016964495, 3935319771, 1009634717, 2978982660, 1015027112, + 2452709923, 1017990229, 3190365712, 1015835149, 4237588139, 1015832925, + 2610678389, 1017962711, 2127316774, 1017405770, 824267502, 1017959463, + 2165924042, 1017912225, 2774007076, 1013257418, 4123916326, 1017582284, + 1976417958, 1016959909, 4092806412, 1017711279, 119251817, 1015363631, + 3475418768, 1017675415, 1972580503, 1015470684, 815541017, 1017517969, + 2429917451, 1017397776, 4062888482, 1016749897, 68284153, 1017925678, + 2207779246, 1016320298, 1183466520, 1017408657, 143326427, 1017060403 +}; + +#define __ _masm-> + +address StubGenerator::generate_libmCbrt() { + StubGenStubId stub_id = StubGenStubId::dcbrt_id; + StubCodeMark mark(this, stub_id); + address start = __ pc(); + + Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; + Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1; + Label B1_1, B1_2, B1_4; + + address SIG_MASK = (address)_SIG_MASK; + address EXP_MASK = (address)_EXP_MASK; + address EXP_MSK2 = (address)_EXP_MSK2; + address EXP_MSK3 = (address)_EXP_MSK3; + address SCALE63 = (address)_SCALE63; + address ZERON = (address)_ZERON; + address INF = (address)_INF; + address NEG_INF = (address)_NEG_INF; + address coeff_table = (address)_coeff_table; + address rcp_table = (address)_rcp_table; + address cbrt_table = (address)_cbrt_table; + address D_table = (address)_D_table; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ bind(B1_1); + __ subq(rsp, 24); + __ movsd(Address(rsp), xmm0); + + __ bind(B1_2); + __ movq(xmm7, xmm0); + __ movl(rdx, 524032); + __ movsd(xmm5, ExternalAddress(EXP_MSK3), r11 /*rscratch*/); + __ movsd(xmm3, ExternalAddress(EXP_MSK2), r11 /*rscratch*/); + __ psrlq(xmm7, 44); + __ pextrw(rcx, xmm7, 0); + __ movdl(rax, xmm7); + __ movsd(xmm1, ExternalAddress(EXP_MASK), r11 /*rscratch*/); + __ movsd(xmm2, ExternalAddress(SIG_MASK), r11 /*rscratch*/); + __ andl(rcx, 248); + __ lea(r8, ExternalAddress(rcp_table)); + __ movsd(xmm4, Address(rcx, r8, Address::times_1)); + __ movq(r9, rax); + __ andl(rdx, rax); + __ cmpl(rdx, 0); + __ jcc(Assembler::equal, L_2TAG_PACKET_0_0_1); // Branch only if |x| is denormalized + __ cmpl(rdx, 524032); + __ jcc(Assembler::equal, L_2TAG_PACKET_1_0_1); // Branch only if |x| is INF or NaN + __ shrl(rdx, 8); + __ shrq(r9, 8); + __ andpd(xmm2, xmm0); + __ andpd(xmm0, xmm5); + __ orpd(xmm3, xmm2); + __ orpd(xmm1, xmm0); + __ movapd(xmm5, ExternalAddress(coeff_table), r11 /*rscratch*/); + __ movl(rax, 5462); + __ movapd(xmm6, ExternalAddress(coeff_table + 16), r11 /*rscratch*/); + __ mull(rdx); + __ movq(rdx, r9); + __ andq(r9, 2047); + __ shrl(rax, 14); + __ andl(rdx, 2048); + __ subq(r9, rax); + __ subq(r9, rax); + __ subq(r9, rax); + __ shlq(r9, 8); + __ addl(rax, 682); + __ orl(rax, rdx); + __ movdl(xmm7, rax); + __ addq(rcx, r9); + __ psllq(xmm7, 52); + + __ bind(L_2TAG_PACKET_2_0_1); + __ movapd(xmm2, ExternalAddress(coeff_table + 32), r11 /*rscratch*/); + __ movapd(xmm0, ExternalAddress(coeff_table + 48), r11 /*rscratch*/); + __ subsd(xmm1, xmm3); + __ movq(xmm3, xmm7); + __ lea(r8, ExternalAddress(cbrt_table)); + __ mulsd(xmm7, Address(rcx, r8, Address::times_1)); + __ mulsd(xmm1, xmm4); + __ lea(r8, ExternalAddress(D_table)); + __ mulsd(xmm3, Address(rcx, r8, Address::times_1)); + __ movapd(xmm4, xmm1); + __ unpcklpd(xmm1, xmm1); + __ mulpd(xmm5, xmm1); + __ mulpd(xmm6, xmm1); + __ mulpd(xmm1, xmm1); + __ addpd(xmm2, xmm5); + __ addpd(xmm0, xmm6); + __ mulpd(xmm2, xmm1); + __ mulpd(xmm1, xmm1); + __ mulsd(xmm4, xmm7); + __ addpd(xmm0, xmm2); + __ mulsd(xmm1, xmm0); + __ unpckhpd(xmm0, xmm0); + __ addsd(xmm0, xmm1); + __ mulsd(xmm0, xmm4); + __ addsd(xmm0, xmm3); + __ addsd(xmm0, xmm7); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_0_0_1); + __ mulsd(xmm0, ExternalAddress(SCALE63), r11 /*rscratch*/); + __ movq(xmm7, xmm0); + __ movl(rdx, 524032); + __ psrlq(xmm7, 44); + __ pextrw(rcx, xmm7, 0); + __ movdl(rax, xmm7); + __ andl(rcx, 248); + __ lea(r8, ExternalAddress(rcp_table)); + __ movsd(xmm4, Address(rcx, r8, Address::times_1)); + __ movq(r9, rax); + __ andl(rdx, rax); + __ shrl(rdx, 8); + __ shrq(r9, 8); + __ cmpl(rdx, 0); + __ jcc(Assembler::equal, L_2TAG_PACKET_3_0_1); // Branch only if |x| is zero + __ andpd(xmm2, xmm0); + __ andpd(xmm0, xmm5); + __ orpd(xmm3, xmm2); + __ orpd(xmm1, xmm0); + __ movapd(xmm5, ExternalAddress(coeff_table), r11 /*rscratch*/); + __ movl(rax, 5462); + __ movapd(xmm6, ExternalAddress(coeff_table + 16), r11 /*rscratch*/); + __ mull(rdx); + __ movq(rdx, r9); + __ andq(r9, 2047); + __ shrl(rax, 14); + __ andl(rdx, 2048); + __ subq(r9, rax); + __ subq(r9, rax); + __ subq(r9, rax); + __ shlq(r9, 8); + __ addl(rax, 661); + __ orl(rax, rdx); + __ movdl(xmm7, rax); + __ addq(rcx, r9); + __ psllq(xmm7, 52); + __ jmp(L_2TAG_PACKET_2_0_1); + + __ bind(L_2TAG_PACKET_3_0_1); + __ cmpq(r9, 0); + __ jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1); // Branch only if x is negative zero + __ xorpd(xmm0, xmm0); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_4_0_1); + __ movsd(xmm0, ExternalAddress(ZERON), r11 /*rscratch*/); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_1_0_1); + __ movl(rax, Address(rsp, 4)); + __ movl(rdx, Address(rsp)); + __ movl(rcx, rax); + __ andl(rcx, 2147483647); + __ cmpl(rcx, 2146435072); + __ jcc(Assembler::above, L_2TAG_PACKET_5_0_1); // Branch only if |x| is NaN + __ cmpl(rdx, 0); + __ jcc(Assembler::notEqual, L_2TAG_PACKET_5_0_1); // Branch only if |x| is NaN + __ cmpl(rax, 2146435072); + __ jcc(Assembler::notEqual, L_2TAG_PACKET_6_0_1); // Branch only if x is negative INF + __ movsd(xmm0, ExternalAddress(INF), r11 /*rscratch*/); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_6_0_1); + __ movsd(xmm0, ExternalAddress(NEG_INF), r11 /*rscratch*/); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_5_0_1); + __ movsd(xmm0, Address(rsp)); + __ addsd(xmm0, xmm0); + __ movq(Address(rsp, 8), xmm0); + + __ bind(B1_4); + __ addq(rsp, 24); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; +} + +#undef __ diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp new file mode 100644 index 00000000000..91c005e92de --- /dev/null +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp @@ -0,0 +1,952 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "macroAssembler_x86.hpp" +#include "stubGenerator_x86_64.hpp" + +#define __ _masm-> + +#define xmm(i) as_XMMRegister(i) + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif // PRODUCT + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Constants +// +ATTRIBUTE_ALIGNED(64) static const uint16_t kyberAvx512Consts[] = { + 0xF301, 0xF301, 0xF301, 0xF301, // q^-1 mod montR + 0x0D01, 0x0D01, 0x0D01, 0x0D01, // q + 0x4EBF, 0x4EBF, 0x4EBF, 0x4EBF, // Barrett multiplier + 0x0200, 0x0200, 0x0200, 0x0200, //(dim/2)^-1 mod q + 0x0549, 0x0549, 0x0549, 0x0549, // montR^2 mod q + 0x0F00, 0x0F00, 0x0F00, 0x0F00 // mask for kyber12to16 + }; + +static int qInvModROffset = 0; +static int qOffset = 8; +static int barretMultiplierOffset = 16; +static int dimHalfInverseOffset = 24; +static int montRSquareModqOffset = 32; +static int f00Offset = 40; + +static address kyberAvx512ConstsAddr(int offset) { + return ((address) kyberAvx512Consts) + offset; +} + +const Register scratch = r10; + +ATTRIBUTE_ALIGNED(64) static const uint16_t kyberAvx512NttPerms[] = { +// 0 + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, +// 128 + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, +// 256 + 0x00, 0x01, 0x02, 0x03, 0x20, 0x21, 0x22, 0x23, + 0x08, 0x09, 0x0A, 0x0B, 0x28, 0x29, 0x2A, 0x2B, + 0x10, 0x11, 0x12, 0x13, 0x30, 0x31, 0x32, 0x33, + 0x18, 0x19, 0x1A, 0x1B, 0x38, 0x39, 0x3A, 0x3B, + 0x04, 0x05, 0x06, 0x07, 0x24, 0x25, 0x26, 0x27, + 0x0C, 0x0D, 0x0E, 0x0F, 0x2C, 0x2D, 0x2E, 0x2F, + 0x14, 0x15, 0x16, 0x17, 0x34, 0x35, 0x36, 0x37, + 0x1C, 0x1D, 0x1E, 0x1F, 0x3C, 0x3D, 0x3E, 0x3F, +// 384 + 0x00, 0x01, 0x20, 0x21, 0x04, 0x05, 0x24, 0x25, + 0x08, 0x09, 0x28, 0x29, 0x0C, 0x0D, 0x2C, 0x2D, + 0x10, 0x11, 0x30, 0x31, 0x14, 0x15, 0x34, 0x35, + 0x18, 0x19, 0x38, 0x39, 0x1C, 0x1D, 0x3C, 0x3D, + 0x02, 0x03, 0x22, 0x23, 0x06, 0x07, 0x26, 0x27, + 0x0A, 0x0B, 0x2A, 0x2B, 0x0E, 0x0F, 0x2E, 0x2F, + 0x12, 0x13, 0x32, 0x33, 0x16, 0x17, 0x36, 0x37, + 0x1A, 0x1B, 0x3A, 0x3B, 0x1E, 0x1F, 0x3E, 0x3F, +// 512 + 0x10, 0x11, 0x30, 0x31, 0x12, 0x13, 0x32, 0x33, + 0x14, 0x15, 0x34, 0x35, 0x16, 0x17, 0x36, 0x37, + 0x18, 0x19, 0x38, 0x39, 0x1A, 0x1B, 0x3A, 0x3B, + 0x1C, 0x1D, 0x3C, 0x3D, 0x1E, 0x1F, 0x3E, 0x3F, + 0x00, 0x01, 0x20, 0x21, 0x02, 0x03, 0x22, 0x23, + 0x04, 0x05, 0x24, 0x25, 0x06, 0x07, 0x26, 0x27, + 0x08, 0x09, 0x28, 0x29, 0x0A, 0x0B, 0x2A, 0x2B, + 0x0C, 0x0D, 0x2C, 0x2D, 0x0E, 0x0F, 0x2E, 0x2F + }; + +static address kyberAvx512NttPermsAddr() { + return (address) kyberAvx512NttPerms; +} + +ATTRIBUTE_ALIGNED(64) static const uint16_t kyberAvx512InverseNttPerms[] = { +// 0 + 0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F, + 0x22, 0x23, 0x26, 0x27, 0x2A, 0x2B, 0x2E, 0x2F, + 0x32, 0x33, 0x36, 0x37, 0x3A, 0x3B, 0x3E, 0x3F, + 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, + 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D, + 0x20, 0x21, 0x24, 0x25, 0x28, 0x29, 0x2C, 0x2D, + 0x30, 0x31, 0x34, 0x35, 0x38, 0x39, 0x3C, 0x3D, +// 128 + 0x00, 0x01, 0x20, 0x21, 0x04, 0x05, 0x24, 0x25, + 0x08, 0x09, 0x28, 0x29, 0x0C, 0x0D, 0x2C, 0x2D, + 0x10, 0x11, 0x30, 0x31, 0x14, 0x15, 0x34, 0x35, + 0x18, 0x19, 0x38, 0x39, 0x1C, 0x1D, 0x3C, 0x3D, + 0x02, 0x03, 0x22, 0x23, 0x06, 0x07, 0x26, 0x27, + 0x0A, 0x0B, 0x2A, 0x2B, 0x0E, 0x0F, 0x2E, 0x2F, + 0x12, 0x13, 0x32, 0x33, 0x16, 0x17, 0x36, 0x37, + 0x1A, 0x1B, 0x3A, 0x3B, 0x1E, 0x1F, 0x3E, 0x3F, +// 256 + 0x00, 0x01, 0x02, 0x03, 0x20, 0x21, 0x22, 0x23, + 0x08, 0x09, 0x0A, 0x0B, 0x28, 0x29, 0x2A, 0x2B, + 0x10, 0x11, 0x12, 0x13, 0x30, 0x31, 0x32, 0x33, + 0x18, 0x19, 0x1A, 0x1B, 0x38, 0x39, 0x3A, 0x3B, + 0x04, 0x05, 0x06, 0x07, 0x24, 0x25, 0x26, 0x27, + 0x0C, 0x0D, 0x0E, 0x0F, 0x2C, 0x2D, 0x2E, 0x2F, + 0x14, 0x15, 0x16, 0x17, 0x34, 0x35, 0x36, 0x37, + 0x1C, 0x1D, 0x1E, 0x1F, 0x3C, 0x3D, 0x3E, 0x3F, +// 384 + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, +// 512 + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F + }; + +static address kyberAvx512InverseNttPermsAddr() { + return (address) kyberAvx512InverseNttPerms; +} + +ATTRIBUTE_ALIGNED(64) static const uint16_t kyberAvx512_nttMultPerms[] = { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2A, 0x2C, 0x2E, + 0x30, 0x32, 0x34, 0x36, 0x38, 0x3A, 0x3C, 0x3E, + + 0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F, + 0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F, + 0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F, + + 0x00, 0x20, 0x01, 0x21, 0x02, 0x22, 0x03, 0x23, + 0x04, 0x24, 0x05, 0x25, 0x06, 0x26, 0x07, 0x27, + 0x08, 0x28, 0x09, 0x29, 0x0A, 0x2A, 0x0B, 0x2B, + 0x0C, 0x2C, 0x0D, 0x2D, 0x0E, 0x2E, 0x0F, 0x2F, + + 0x10, 0x30, 0x11, 0x31, 0x12, 0x32, 0x13, 0x33, + 0x14, 0x34, 0x15, 0x35, 0x16, 0x36, 0x17, 0x37, + 0x18, 0x38, 0x19, 0x39, 0x1A, 0x3A, 0x1B, 0x3B, + 0x1C, 0x3C, 0x1D, 0x3D, 0x1E, 0x3E, 0x1F, 0x3F + }; + +static address kyberAvx512_nttMultPermsAddr() { + return (address) kyberAvx512_nttMultPerms; +} + + ATTRIBUTE_ALIGNED(64) static const uint16_t kyberAvx512_12To16Perms[] = { +// 0 + 0x00, 0x03, 0x06, 0x09, 0x0C, 0x0F, 0x12, 0x15, + 0x18, 0x1B, 0x1E, 0x21, 0x24, 0x27, 0x2A, 0x2D, + 0x30, 0x33, 0x36, 0x39, 0x3C, 0x3F, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x04, 0x07, 0x0A, 0x0D, 0x10, 0x13, 0x16, + 0x19, 0x1C, 0x1F, 0x22, 0x25, 0x28, 0x2B, 0x2E, + 0x31, 0x34, 0x37, 0x3A, 0x3D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 128 + 0x02, 0x05, 0x08, 0x0B, 0x0E, 0x11, 0x14, 0x17, + 0x1A, 0x1D, 0x20, 0x23, 0x26, 0x29, 0x2C, 0x2F, + 0x32, 0x35, 0x38, 0x3B, 0x3E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x22, 0x25, + 0x28, 0x2B, 0x2E, 0x31, 0x34, 0x37, 0x3A, 0x3D, +// 256 + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x20, 0x23, 0x26, + 0x29, 0x2C, 0x2F, 0x32, 0x35, 0x38, 0x3B, 0x3E, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x21, 0x24, 0x27, + 0x2A, 0x2D, 0x30, 0x33, 0x36, 0x39, 0x3C, 0x3F, +// 384 + 0x00, 0x20, 0x01, 0x21, 0x02, 0x22, 0x03, 0x23, + 0x04, 0x24, 0x05, 0x25, 0x06, 0x26, 0x07, 0x27, + 0x08, 0x28, 0x09, 0x29, 0x0A, 0x2A, 0x0B, 0x2B, + 0x0C, 0x2C, 0x0D, 0x2D, 0x0E, 0x2E, 0x0F, 0x2F, + 0x10, 0x30, 0x11, 0x31, 0x12, 0x32, 0x13, 0x33, + 0x14, 0x34, 0x15, 0x35, 0x16, 0x36, 0x17, 0x37, + 0x18, 0x38, 0x19, 0x39, 0x1A, 0x3A, 0x1B, 0x3B, + 0x1C, 0x3C, 0x1D, 0x3D, 0x1E, 0x3E, 0x1F, 0x3F + }; + +static address kyberAvx512_12To16PermsAddr() { + return (address) kyberAvx512_12To16Perms; +} + +static void load4regs(int destRegs[], Register address, int offset, + MacroAssembler *_masm) { + for (int i = 0; i < 4; i++) { + __ evmovdquw(xmm(destRegs[i]), Address(address, offset + i * 64), + Assembler::AVX_512bit); + } +} + +// For z = montmul(a,b), z will be between -q and q and congruent +// to a * b * R^-1 mod q, where R > 2 * q, R is a power of 2, +// -R/2 * q <= a * b < R/2 * q. +// (See e.g. Algorithm 3 in https://eprint.iacr.org/2018/039.pdf) +// For the Java code, we use R = 2^20 and for the intrinsic, R = 2^16. +// In our computations, b is always c * R mod q, so the montmul() really +// computes a * c mod q. In the Java code, we use 32-bit numbers for the +// computations, and we use R = 2^20 because that way the a * b numbers +// that occur during all computations stay in the required range. +// For the intrinsics, we use R = 2^16, because this way we can do twice +// as much work in parallel, the only drawback is that we should do some Barrett +// reductions in kyberInverseNtt so that the numbers stay in the required range. +static void montmul(int outputRegs[], int inputRegs1[], int inputRegs2[], + int scratchRegs1[], int scratchRegs2[], MacroAssembler *_masm) { + for (int i = 0; i < 4; i++) { + __ evpmullw(xmm(scratchRegs1[i]), k0, xmm(inputRegs1[i]), + xmm(inputRegs2[i]), false, Assembler::AVX_512bit); + } + for (int i = 0; i < 4; i++) { + __ evpmulhw(xmm(scratchRegs2[i]), k0, xmm(inputRegs1[i]), + xmm(inputRegs2[i]), false, Assembler::AVX_512bit); + } + for (int i = 0; i < 4; i++) { + __ evpmullw(xmm(scratchRegs1[i]), k0, xmm(scratchRegs1[i]), + xmm31, false, Assembler::AVX_512bit); + } + for (int i = 0; i < 4; i++) { + __ evpmulhw(xmm(scratchRegs1[i]), k0, xmm(scratchRegs1[i]), + xmm30, false, Assembler::AVX_512bit); + } + for (int i = 0; i < 4; i++) { + __ evpsubw(xmm(outputRegs[i]), k0, xmm(scratchRegs2[i]), + xmm(scratchRegs1[i]), false, Assembler::AVX_512bit); + } +} + +static void sub_add(int subResult[], int addResult[], int input1[], int input2[], + MacroAssembler *_masm) { + for (int i = 0; i < 4; i++) { + __ evpsubw(xmm(subResult[i]), k0, xmm(input1[i]), xmm(input2[i]), + false, Assembler::AVX_512bit); + __ evpaddw(xmm(addResult[i]), k0, xmm(input1[i]), xmm(input2[i]), + false, Assembler::AVX_512bit); + } +} + +// result2 also acts as input1 +// result1 also acts as perm1 +static void permute(int result1[], int result2[], int input2[], int perm2, + MacroAssembler *_masm) { + + for (int i = 1; i < 4; i++) { + __ evmovdquw(xmm(result1[i]), xmm(result1[0]), Assembler::AVX_512bit); + } + + for (int i = 0; i < 4; i++) { + __ evpermi2w(xmm(result1[i]), xmm(result2[i]), xmm(input2[i]), + Assembler::AVX_512bit); + __ evpermt2w(xmm(result2[i]), xmm(perm2), xmm(input2[i]), + Assembler::AVX_512bit); + } +} + +static void store4regs(Register address, int offset, int sourceRegs[], + MacroAssembler *_masm) { + for (int i = 0; i < 4; i++) { + __ evmovdquw(Address(address, offset + i * 64), xmm(sourceRegs[i]), + Assembler::AVX_512bit); + } +} + +// In all 3 invocations of this function we use the same registers: +// xmm0-xmm7 for the input and the result, +// xmm8-xmm15 as scratch registers and +// xmm16-xmm17 for the constants, +// so we don't pass register arguments. +static void barrettReduce(MacroAssembler *_masm) { + for (int i = 0; i < 8; i++) { + __ evpmulhw(xmm(i + 8), k0, xmm(i), xmm16, false, Assembler::AVX_512bit); + } + + for (int i = 0; i < 8; i++) { + __ evpsraw(xmm(i + 8), k0, xmm(i + 8), 10, false, Assembler::AVX_512bit); + } + + for (int i = 0; i < 8; i++) { + __ evpmullw(xmm(i + 8), k0, xmm(i + 8), xmm17, false, Assembler::AVX_512bit); + } + + for (int i = 0; i < 8; i++) { + __ evpsubw(xmm(i), k0, xmm(i), xmm(i + 8), false, Assembler::AVX_512bit); + } +} + +static int xmm0_3[] = {0, 1, 2, 3}; +static int xmm0145[] = {0, 1, 4, 5}; +static int xmm0246[] = {0, 2, 4, 6}; +static int xmm0829[] = {0, 8, 2, 9}; +static int xmm1001[] = {1, 0, 0, 1}; +static int xmm1357[] = {1, 3, 5, 7}; +static int xmm2367[] = {2, 3, 6, 7}; +static int xmm2_0_10_8[] = {2, 0, 10, 8}; +static int xmm3223[] = {3, 2, 2, 3}; +static int xmm4_7[] = {4, 5, 6, 7}; +static int xmm5454[] = {5, 4, 5, 4}; +static int xmm7676[] = {7, 6, 7, 6}; +static int xmm8_11[] = {8, 9, 10, 11}; +static int xmm12_15[] = {12, 13, 14, 15}; +static int xmm16_19[] = {16, 17, 18, 19}; +static int xmm20_23[] = {20, 21, 22, 23}; +static int xmm23_23[] = {23, 23, 23, 23}; +static int xmm24_27[] = {24, 25, 26, 27}; +static int xmm26_29[] = {26, 27, 28, 29}; +static int xmm28_31[] = {28, 29, 30, 31}; +static int xmm29_29[] = {29, 29, 29, 29}; + +// Kyber NTT function. +// +// coeffs (short[256]) = c_rarg0 +// ntt_zetas (short[256]) = c_rarg1 +address generate_kyberNtt_avx512(StubGenerator *stubgen, + MacroAssembler *_masm) { + __ align(CodeEntryAlignment); + StubGenStubId stub_id = kyberNtt_id; + StubCodeMark mark(stubgen, stub_id); + address start = __ pc(); + __ enter(); + + const Register coeffs = c_rarg0; + const Register zetas = c_rarg1; + + const Register perms = r11; + + __ lea(perms, ExternalAddress(kyberAvx512NttPermsAddr())); + + load4regs(xmm4_7, coeffs, 256, _masm); + load4regs(xmm20_23, zetas, 0, _masm); + + __ vpbroadcastq(xmm30, + ExternalAddress(kyberAvx512ConstsAddr(qOffset)), + Assembler::AVX_512bit, scratch); // q + __ vpbroadcastq(xmm31, + ExternalAddress(kyberAvx512ConstsAddr(qInvModROffset)), + Assembler::AVX_512bit, scratch); // q^-1 mod montR + + load4regs(xmm0_3, coeffs, 0, _masm); + + // Each level represents one iteration of the outer for loop of the Java version. + // level 0 + montmul(xmm8_11, xmm4_7, xmm20_23, xmm8_11, xmm4_7, _masm); + load4regs(xmm20_23, zetas, 256, _masm); + sub_add(xmm4_7, xmm0_3, xmm0_3, xmm8_11, _masm); + + //level 1 + montmul(xmm12_15, xmm2367, xmm20_23, xmm12_15, xmm8_11, _masm); + load4regs(xmm20_23, zetas, 512, _masm); + sub_add(xmm2367, xmm0145, xmm0145, xmm12_15, _masm); + + // level 2 + montmul(xmm8_11, xmm1357, xmm20_23, xmm12_15, xmm8_11, _masm); + __ evmovdquw(xmm12, Address(perms, 0), Assembler::AVX_512bit); + __ evmovdquw(xmm16, Address(perms, 64), Assembler::AVX_512bit); + load4regs(xmm20_23, zetas, 768, _masm); + sub_add(xmm1357, xmm0246, xmm0246, xmm8_11, _masm); + + //level 3 + permute(xmm12_15, xmm0246, xmm1357, 16, _masm); + montmul(xmm8_11, xmm12_15, xmm20_23, xmm16_19, xmm8_11, _masm); + __ evmovdquw(xmm16, Address(perms, 128), Assembler::AVX_512bit); + __ evmovdquw(xmm24, Address(perms, 192), Assembler::AVX_512bit); + load4regs(xmm20_23, zetas, 1024, _masm); + sub_add(xmm1357, xmm0246, xmm0246, xmm8_11, _masm); + + // level 4 + permute(xmm16_19, xmm0246, xmm1357, 24, _masm); + montmul(xmm8_11, xmm0246, xmm20_23, xmm24_27, xmm8_11, _masm); + __ evmovdquw(xmm1, Address(perms, 256), Assembler::AVX_512bit); + __ evmovdquw(xmm24, Address(perms, 320), Assembler::AVX_512bit); + load4regs(xmm20_23, zetas, 1280, _masm); + sub_add(xmm12_15, xmm0246, xmm16_19, xmm8_11, _masm); + + // level 5 + permute(xmm1357, xmm0246, xmm12_15, 24, _masm); + montmul(xmm16_19, xmm0246, xmm20_23, xmm16_19, xmm8_11, _masm); + + __ evmovdquw(xmm12, Address(perms, 384), Assembler::AVX_512bit); + __ evmovdquw(xmm8, Address(perms, 448), Assembler::AVX_512bit); + + load4regs(xmm20_23, zetas, 1536, _masm); + sub_add(xmm24_27, xmm0246, xmm1357, xmm16_19, _masm); + + // level 6 + permute(xmm12_15, xmm0246, xmm24_27, 8, _masm); + + __ evmovdquw(xmm1, Address(perms, 512), Assembler::AVX_512bit); + __ evmovdquw(xmm24, Address(perms, 576), Assembler::AVX_512bit); + + montmul(xmm16_19, xmm0246, xmm20_23, xmm16_19, xmm8_11, _masm); + sub_add(xmm20_23, xmm0246, xmm12_15, xmm16_19, _masm); + + permute(xmm1357, xmm0246, xmm20_23, 24, _masm); + + store4regs(coeffs, 0, xmm0_3, _masm); + store4regs(coeffs, 256, xmm4_7, _masm); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ mov64(rax, 0); // return 0 + __ ret(0); + + return start; +} + +// Kyber Inverse NTT function +// +// coeffs (short[256]) = c_rarg0 +// ntt_zetas (short[256]) = c_rarg1 +address generate_kyberInverseNtt_avx512(StubGenerator *stubgen, + MacroAssembler *_masm) { + + __ align(CodeEntryAlignment); + StubGenStubId stub_id = kyberInverseNtt_id; + StubCodeMark mark(stubgen, stub_id); + address start = __ pc(); + __ enter(); + + const Register coeffs = c_rarg0; + const Register zetas = c_rarg1; + + const Register perms = r11; + + __ lea(perms, ExternalAddress(kyberAvx512InverseNttPermsAddr())); + __ evmovdquw(xmm12, Address(perms, 0), Assembler::AVX_512bit); + __ evmovdquw(xmm16, Address(perms, 64), Assembler::AVX_512bit); + + __ vpbroadcastq(xmm31, + ExternalAddress(kyberAvx512ConstsAddr(qInvModROffset)), + Assembler::AVX_512bit, scratch); // q^-1 mod montR + __ vpbroadcastq(xmm30, + ExternalAddress(kyberAvx512ConstsAddr(qOffset)), + Assembler::AVX_512bit, scratch); // q + __ vpbroadcastq(xmm29, + ExternalAddress(kyberAvx512ConstsAddr(dimHalfInverseOffset)), + Assembler::AVX_512bit, scratch); // (dim/2)^-1 mod q + + load4regs(xmm0_3, coeffs, 0, _masm); + load4regs(xmm4_7, coeffs, 256, _masm); + + // Each level represents one iteration of the outer for loop of the Java version. + // level 0 + load4regs(xmm8_11, zetas, 0, _masm); + permute(xmm12_15, xmm0246, xmm1357, 16, _masm); + + __ evmovdquw(xmm1, Address(perms, 128), Assembler::AVX_512bit); + __ evmovdquw(xmm20, Address(perms, 192), Assembler::AVX_512bit); + + sub_add(xmm16_19, xmm0246, xmm0246, xmm12_15, _masm); + montmul(xmm12_15, xmm16_19, xmm8_11, xmm12_15, xmm8_11, _masm); + + // level 1 + load4regs(xmm8_11, zetas, 256, _masm); + permute(xmm1357, xmm0246, xmm12_15, 20, _masm); + sub_add(xmm16_19, xmm0246, xmm1357, xmm0246, _masm); + + __ evmovdquw(xmm1, Address(perms, 256), Assembler::AVX_512bit); + __ evmovdquw(xmm20, Address(perms, 320), Assembler::AVX_512bit); + + montmul(xmm12_15, xmm16_19, xmm8_11, xmm12_15, xmm8_11, _masm); + + // level2 + load4regs(xmm8_11, zetas, 512, _masm); + permute(xmm1357, xmm0246, xmm12_15, 20, _masm); + sub_add(xmm16_19, xmm0246, xmm1357, xmm0246,_masm); + + __ evmovdquw(xmm1, Address(perms, 384), Assembler::AVX_512bit); + __ evmovdquw(xmm20, Address(perms, 448), Assembler::AVX_512bit); + + montmul(xmm12_15, xmm16_19, xmm8_11, xmm12_15, xmm8_11, _masm); + + __ vpbroadcastq(xmm16, + ExternalAddress(kyberAvx512ConstsAddr(barretMultiplierOffset)), + Assembler::AVX_512bit, scratch); // Barrett multiplier + __ vpbroadcastq(xmm17, + ExternalAddress(kyberAvx512ConstsAddr(qOffset)), + Assembler::AVX_512bit, scratch); // q + + permute(xmm1357, xmm0246, xmm12_15, 20, _masm); + barrettReduce(_masm); + +// level 3 + load4regs(xmm8_11, zetas, 768, _masm); + sub_add(xmm16_19, xmm0246, xmm1357, xmm0246, _masm); + + __ evmovdquw(xmm1, Address(perms, 512), Assembler::AVX_512bit); + __ evmovdquw(xmm20, Address(perms, 576), Assembler::AVX_512bit); + + montmul(xmm12_15, xmm16_19, xmm8_11, xmm12_15, xmm8_11, _masm); + permute(xmm1357, xmm0246, xmm12_15, 20, _masm); + + // level 4 + load4regs(xmm8_11, zetas, 1024, _masm); + + __ vpbroadcastq(xmm16, + ExternalAddress(kyberAvx512ConstsAddr(barretMultiplierOffset)), + Assembler::AVX_512bit, scratch); // Barrett multiplier + __ vpbroadcastq(xmm17, + ExternalAddress(kyberAvx512ConstsAddr(qOffset)), + Assembler::AVX_512bit, scratch); // q + + sub_add(xmm12_15, xmm0246, xmm0246, xmm1357, _masm); + montmul(xmm1357, xmm12_15, xmm8_11, xmm1357, xmm8_11, _masm); + barrettReduce(_masm); + + // level 5 + load4regs(xmm8_11, zetas, 1280, _masm); + sub_add(xmm12_15, xmm0145, xmm0145, xmm2367, _masm); + montmul(xmm2367, xmm12_15, xmm8_11, xmm2367, xmm8_11, _masm); + + // level 6 + load4regs(xmm8_11, zetas, 1536, _masm); + sub_add(xmm12_15, xmm0_3, xmm0_3, xmm4_7, _masm); + montmul(xmm4_7, xmm12_15, xmm8_11, xmm4_7, xmm8_11, _masm); + + montmul(xmm8_11, xmm29_29, xmm0_3, xmm8_11, xmm0_3, _masm); + montmul(xmm12_15, xmm29_29, xmm4_7, xmm12_15, xmm4_7, _masm); + + store4regs(coeffs, 0, xmm8_11, _masm); + store4regs(coeffs, 256, xmm12_15, _masm); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ mov64(rax, 0); // return 0 + __ ret(0); + + return start; +} + +// Kyber multiply polynomials in the NTT domain. +// +// result (short[256]) = c_rarg0 +// ntta (short[256]) = c_rarg1 +// nttb (short[256]) = c_rarg2 +// zetas (short[128]) = c_rarg3 +address generate_kyberNttMult_avx512(StubGenerator *stubgen, + MacroAssembler *_masm) { + + __ align(CodeEntryAlignment); + StubGenStubId stub_id = kyberNttMult_id; + StubCodeMark mark(stubgen, stub_id); + address start = __ pc(); + __ enter(); + + const Register result = c_rarg0; + const Register ntta = c_rarg1; + const Register nttb = c_rarg2; + const Register zetas = c_rarg3; + + const Register perms = r11; + const Register loopCnt = r12; + + __ push(r12); + __ movl(loopCnt, 2); + + Label Loop; + + __ lea(perms, ExternalAddress(kyberAvx512_nttMultPermsAddr())); + + + load4regs(xmm26_29, perms, 0, _masm); + __ vpbroadcastq(xmm31, + ExternalAddress(kyberAvx512ConstsAddr(qInvModROffset)), + Assembler::AVX_512bit, scratch); // q^-1 mod montR + __ vpbroadcastq(xmm30, + ExternalAddress(kyberAvx512ConstsAddr(qOffset)), + Assembler::AVX_512bit, scratch); // q + __ vpbroadcastq(xmm23, + ExternalAddress(kyberAvx512ConstsAddr(montRSquareModqOffset)), + Assembler::AVX_512bit, scratch); // montR^2 mod q + + __ BIND(Loop); + + __ evmovdquw(xmm1, Address(ntta, 0), Assembler::AVX_512bit); + __ evmovdquw(xmm8, Address(ntta, 64), Assembler::AVX_512bit); + __ evmovdquw(xmm3, Address(ntta, 128), Assembler::AVX_512bit); + __ evmovdquw(xmm9, Address(ntta, 192), Assembler::AVX_512bit); + + __ evmovdquw(xmm5, Address(nttb, 0), Assembler::AVX_512bit); + __ evmovdquw(xmm10, Address(nttb, 64), Assembler::AVX_512bit); + __ evmovdquw(xmm7, Address(nttb, 128), Assembler::AVX_512bit); + __ evmovdquw(xmm11, Address(nttb, 192), Assembler::AVX_512bit); + + __ evmovdquw(xmm0, xmm26, Assembler::AVX_512bit); + __ evmovdquw(xmm2, xmm26, Assembler::AVX_512bit); + __ evmovdquw(xmm4, xmm26, Assembler::AVX_512bit); + __ evmovdquw(xmm6, xmm26, Assembler::AVX_512bit); + + __ evpermi2w(xmm0, xmm1, xmm8, Assembler::AVX_512bit); + __ evpermt2w(xmm1, xmm27, xmm8, Assembler::AVX_512bit); + __ evpermi2w(xmm2, xmm3, xmm9, Assembler::AVX_512bit); + __ evpermt2w(xmm3, xmm27, xmm9, Assembler::AVX_512bit); + + __ evpermi2w(xmm4, xmm5, xmm10, Assembler::AVX_512bit); + __ evpermt2w(xmm5, xmm27, xmm10, Assembler::AVX_512bit); + __ evpermi2w(xmm6, xmm7, xmm11, Assembler::AVX_512bit); + __ evpermt2w(xmm7, xmm27, xmm11, Assembler::AVX_512bit); + + __ evmovdquw(xmm24, Address(zetas, 0), Assembler::AVX_512bit); + __ evmovdquw(xmm25, Address(zetas, 64), Assembler::AVX_512bit); + + montmul(xmm16_19, xmm1001, xmm5454, xmm16_19, xmm12_15, _masm); + + montmul(xmm0145, xmm3223, xmm7676, xmm0145, xmm12_15, _masm); + + __ evpmullw(xmm2, k0, xmm16, xmm24, false, Assembler::AVX_512bit); + __ evpmullw(xmm3, k0, xmm0, xmm25, false, Assembler::AVX_512bit); + __ evpmulhw(xmm12, k0, xmm16, xmm24, false, Assembler::AVX_512bit); + __ evpmulhw(xmm13, k0, xmm0, xmm25, false, Assembler::AVX_512bit); + + __ evpmullw(xmm2, k0, xmm2, xmm31, false, Assembler::AVX_512bit); + __ evpmullw(xmm3, k0, xmm3, xmm31, false, Assembler::AVX_512bit); + __ evpmulhw(xmm2, k0, xmm30, xmm2, false, Assembler::AVX_512bit); + __ evpmulhw(xmm3, k0, xmm30, xmm3, false, Assembler::AVX_512bit); + + __ evpsubw(xmm2, k0, xmm12, xmm2, false, Assembler::AVX_512bit); + __ evpsubw(xmm3, k0, xmm13, xmm3, false, Assembler::AVX_512bit); + + __ evpaddw(xmm0, k0, xmm2, xmm17, false, Assembler::AVX_512bit); + __ evpaddw(xmm8, k0, xmm3, xmm1, false, Assembler::AVX_512bit); + __ evpaddw(xmm2, k0, xmm18, xmm19, false, Assembler::AVX_512bit); + __ evpaddw(xmm9, k0, xmm4, xmm5, false, Assembler::AVX_512bit); + + montmul(xmm1357, xmm0829, xmm23_23, xmm1357, xmm0829, _masm); + + __ evmovdquw(xmm0, xmm28, Assembler::AVX_512bit); + __ evmovdquw(xmm2, xmm28, Assembler::AVX_512bit); + __ evpermi2w(xmm0, xmm1, xmm5, Assembler::AVX_512bit); + __ evpermt2w(xmm1, xmm29, xmm5, Assembler::AVX_512bit); + __ evpermi2w(xmm2, xmm3, xmm7, Assembler::AVX_512bit); + __ evpermt2w(xmm3, xmm29, xmm7, Assembler::AVX_512bit); + + store4regs(result, 0, xmm0_3, _masm); + + __ addptr(ntta, 256); + __ addptr(nttb, 256); + __ addptr(result, 256); + __ addptr(zetas, 128); + __ subl(loopCnt, 1); + __ jcc(Assembler::greater, Loop); + + __ pop(r12); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ mov64(rax, 0); // return 0 + __ ret(0); + + return start; +} + +// Kyber add 2 polynomials. +// +// result (short[256]) = c_rarg0 +// a (short[256]) = c_rarg1 +// b (short[256]) = c_rarg2 +address generate_kyberAddPoly_2_avx512(StubGenerator *stubgen, + MacroAssembler *_masm) { + + __ align(CodeEntryAlignment); + StubGenStubId stub_id = kyberAddPoly_2_id; + StubCodeMark mark(stubgen, stub_id); + address start = __ pc(); + __ enter(); + + const Register result = c_rarg0; + const Register a = c_rarg1; + const Register b = c_rarg2; + + __ vpbroadcastq(xmm31, + ExternalAddress(kyberAvx512ConstsAddr(qOffset)), + Assembler::AVX_512bit, scratch); // q + + for (int i = 0; i < 8; i++) { + __ evmovdquw(xmm(i), Address(a, 64 * i), Assembler::AVX_512bit); + __ evmovdquw(xmm(i + 8), Address(b, 64 * i), Assembler::AVX_512bit); + } + + for (int i = 0; i < 8; i++) { + __ evpaddw(xmm(i), k0, xmm(i), xmm(i + 8), false, Assembler::AVX_512bit); + } + + for (int i = 0; i < 8; i++) { + __ evpaddw(xmm(i), k0, xmm(i), xmm31, false, Assembler::AVX_512bit); + } + + store4regs(result, 0, xmm0_3, _masm); + store4regs(result, 256, xmm4_7, _masm); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ mov64(rax, 0); // return 0 + __ ret(0); + + return start; +} + +// Kyber add 3 polynomials. +// +// result (short[256]) = c_rarg0 +// a (short[256]) = c_rarg1 +// b (short[256]) = c_rarg2 +// c (short[256]) = c_rarg3 +address generate_kyberAddPoly_3_avx512(StubGenerator *stubgen, + MacroAssembler *_masm) { + + __ align(CodeEntryAlignment); + StubGenStubId stub_id = kyberAddPoly_3_id; + StubCodeMark mark(stubgen, stub_id); + address start = __ pc(); + __ enter(); + + const Register result = c_rarg0; + const Register a = c_rarg1; + const Register b = c_rarg2; + const Register c = c_rarg3; + + __ vpbroadcastq(xmm31, + ExternalAddress(kyberAvx512ConstsAddr(qOffset)), + Assembler::AVX_512bit, scratch); // q + + for (int i = 0; i < 8; i++) { + __ evmovdquw(xmm(i), Address(a, 64 * i), Assembler::AVX_512bit); + __ evmovdquw(xmm(i + 8), Address(b, 64 * i), Assembler::AVX_512bit); + __ evmovdquw(xmm(i + 16), Address(c, 64 * i), Assembler::AVX_512bit); + } + + __ evpaddw(xmm31, k0, xmm31, xmm31, false, Assembler::AVX_512bit); + + for (int i = 0; i < 8; i++) { + __ evpaddw(xmm(i), k0, xmm(i), xmm(i + 8), false, Assembler::AVX_512bit); + } + + for (int i = 0; i < 8; i++) { + __ evpaddw(xmm(i), k0, xmm(i), xmm(i + 16), false, Assembler::AVX_512bit); + } + + for (int i = 0; i < 8; i++) { + __ evpaddw(xmm(i), k0, xmm(i), xmm31, false, Assembler::AVX_512bit); + } + + store4regs(result, 0, xmm0_3, _masm); + store4regs(result, 256, xmm4_7, _masm); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ mov64(rax, 0); // return 0 + __ ret(0); + + return start; +} + +// Kyber parse XOF output to polynomial coefficient candidates. +// +// condensed (byte[168]) = c_rarg0 +// condensedOffs (int) = c_rarg1 +// parsed (short[112]) = c_rarg2 +// parsedLength (int) = c_rarg3 +address generate_kyber12To16_avx512(StubGenerator *stubgen, + MacroAssembler *_masm) { + + __ align(CodeEntryAlignment); + StubGenStubId stub_id = kyber12To16_id; + StubCodeMark mark(stubgen, stub_id); + address start = __ pc(); + __ enter(); + + const Register condensed = c_rarg0; + const Register condensedOffs = c_rarg1; + const Register parsed = c_rarg2; + const Register parsedLength = c_rarg3; + + const Register perms = r11; + + Label Loop; + + __ addptr(condensed, condensedOffs); + + __ lea(perms, ExternalAddress(kyberAvx512_12To16PermsAddr())); + + load4regs(xmm24_27, perms, 0, _masm); + load4regs(xmm28_31, perms, 256, _masm); + __ vpbroadcastq(xmm23, + ExternalAddress(kyberAvx512ConstsAddr(f00Offset)), + Assembler::AVX_512bit, scratch); // 0xF00 + + __ BIND(Loop); + __ evmovdqub(xmm0, Address(condensed, 0),Assembler::AVX_256bit); + __ evmovdqub(xmm1, Address(condensed, 32),Assembler::AVX_256bit); + __ evmovdqub(xmm2, Address(condensed, 64),Assembler::AVX_256bit); + __ evmovdqub(xmm8, Address(condensed, 96),Assembler::AVX_256bit); + __ evmovdqub(xmm9, Address(condensed, 128),Assembler::AVX_256bit); + __ evmovdqub(xmm10, Address(condensed, 160),Assembler::AVX_256bit); + __ vpmovzxbw(xmm0, xmm0, Assembler::AVX_512bit); + __ vpmovzxbw(xmm1, xmm1, Assembler::AVX_512bit); + __ vpmovzxbw(xmm2, xmm2, Assembler::AVX_512bit); + __ vpmovzxbw(xmm8, xmm8, Assembler::AVX_512bit); + __ vpmovzxbw(xmm9, xmm9, Assembler::AVX_512bit); + __ vpmovzxbw(xmm10, xmm10, Assembler::AVX_512bit); + __ evmovdquw(xmm3, xmm24, Assembler::AVX_512bit); + __ evmovdquw(xmm4, xmm25, Assembler::AVX_512bit); + __ evmovdquw(xmm5, xmm26, Assembler::AVX_512bit); + __ evmovdquw(xmm11, xmm24, Assembler::AVX_512bit); + __ evmovdquw(xmm12, xmm25, Assembler::AVX_512bit); + __ evmovdquw(xmm13, xmm26, Assembler::AVX_512bit); + __ evpermi2w(xmm3, xmm0, xmm1, Assembler::AVX_512bit); + __ evpermi2w(xmm4, xmm0, xmm1, Assembler::AVX_512bit); + __ evpermi2w(xmm5, xmm0, xmm1, Assembler::AVX_512bit); + __ evpermi2w(xmm11, xmm8, xmm9, Assembler::AVX_512bit); + __ evpermi2w(xmm12, xmm8, xmm9, Assembler::AVX_512bit); + __ evpermi2w(xmm13, xmm8, xmm9, Assembler::AVX_512bit); + __ evpermt2w(xmm3, xmm27, xmm2, Assembler::AVX_512bit); + __ evpermt2w(xmm4, xmm28, xmm2, Assembler::AVX_512bit); + __ evpermt2w(xmm5, xmm29, xmm2, Assembler::AVX_512bit); + __ evpermt2w(xmm11, xmm27, xmm10, Assembler::AVX_512bit); + __ evpermt2w(xmm12, xmm28, xmm10, Assembler::AVX_512bit); + __ evpermt2w(xmm13, xmm29, xmm10, Assembler::AVX_512bit); + + __ evpsraw(xmm2, k0, xmm4, 4, false, Assembler::AVX_512bit); + __ evpsllw(xmm0, k0, xmm4, 8, false, Assembler::AVX_512bit); + __ evpsllw(xmm1, k0, xmm5, 4, false, Assembler::AVX_512bit); + __ evpsllw(xmm8, k0, xmm12, 8, false, Assembler::AVX_512bit); + __ evpsraw(xmm10, k0, xmm12, 4, false, Assembler::AVX_512bit); + __ evpsllw(xmm9, k0, xmm13, 4, false, Assembler::AVX_512bit); + __ evpandq(xmm0, k0, xmm0, xmm23, false, Assembler::AVX_512bit); + __ evpandq(xmm8, k0, xmm8, xmm23, false, Assembler::AVX_512bit); + __ evpaddw(xmm1, k0, xmm1, xmm2, false, Assembler::AVX_512bit); + __ evpaddw(xmm0, k0, xmm0, xmm3, false, Assembler::AVX_512bit); + __ evmovdquw(xmm2, xmm30, Assembler::AVX_512bit); + __ evpaddw(xmm9, k0, xmm9, xmm10, false, Assembler::AVX_512bit); + __ evpaddw(xmm8, k0, xmm8, xmm11, false, Assembler::AVX_512bit); + __ evmovdquw(xmm10, xmm30, Assembler::AVX_512bit); + __ evpermi2w(xmm2, xmm0, xmm1, Assembler::AVX_512bit); + __ evpermt2w(xmm0, xmm31, xmm1, Assembler::AVX_512bit); + __ evpermi2w(xmm10, xmm8, xmm9, Assembler::AVX_512bit); + __ evpermt2w(xmm8, xmm31, xmm9, Assembler::AVX_512bit); + + store4regs(parsed, 0, xmm2_0_10_8, _masm); + + __ addptr(condensed, 192); + __ addptr(parsed, 256); + __ subl(parsedLength, 128); + __ jcc(Assembler::greater, Loop); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ mov64(rax, 0); // return 0 + __ ret(0); + + return start; +} + + +// Kyber barrett reduce function. +// +// coeffs (short[256]) = c_rarg0 +address generate_kyberBarrettReduce_avx512(StubGenerator *stubgen, + MacroAssembler *_masm) { + + __ align(CodeEntryAlignment); + StubGenStubId stub_id = kyberBarrettReduce_id; + StubCodeMark mark(stubgen, stub_id); + address start = __ pc(); + __ enter(); + + const Register coeffs = c_rarg0; + + __ vpbroadcastq(xmm16, + ExternalAddress(kyberAvx512ConstsAddr(barretMultiplierOffset)), + Assembler::AVX_512bit, scratch); // Barrett multiplier + __ vpbroadcastq(xmm17, + ExternalAddress(kyberAvx512ConstsAddr(qOffset)), + Assembler::AVX_512bit, scratch); // q + + load4regs(xmm0_3, coeffs, 0, _masm); + load4regs(xmm4_7, coeffs, 256, _masm); + + barrettReduce(_masm); + + store4regs(coeffs, 0, xmm0_3, _masm); + store4regs(coeffs, 256, xmm4_7, _masm); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ mov64(rax, 0); // return 0 + __ ret(0); + + return start; +} + +void StubGenerator::generate_kyber_stubs() { + // Generate Kyber intrinsics code + if (UseKyberIntrinsics) { + if (VM_Version::supports_evex()) { + StubRoutines::_kyberNtt = generate_kyberNtt_avx512(this, _masm); + StubRoutines::_kyberInverseNtt = generate_kyberInverseNtt_avx512(this, _masm); + StubRoutines::_kyberNttMult = generate_kyberNttMult_avx512(this, _masm); + StubRoutines::_kyberAddPoly_2 = generate_kyberAddPoly_2_avx512(this, _masm); + StubRoutines::_kyberAddPoly_3 = generate_kyberAddPoly_3_avx512(this, _masm); + StubRoutines::_kyber12To16 = generate_kyber12To16_avx512(this, _masm); + StubRoutines::_kyberBarrettReduce = generate_kyberBarrettReduce_avx512(this, _masm); + } + } +} diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp index d13809bfcd9..52ce2731b1f 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2024, Intel Corporation. All rights reserved. +* Copyright (c) 2024, 2025, Intel Corporation. All rights reserved. * Intel Math Library (LIBM) Source Code * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -46,7 +46,7 @@ // for |x| in [23/64,3*2^7) // e^{-2*|x|}=2^{-k-f}*2^{-r} ~ 2^{-k}*(Tn+Dn)*(1+p)=(T0+D0)*(1+p) // -// For |x| in [2^{-4},2^5): +// For |x| in [2^{-4},22): // 2^{-r}-1 ~ p=c1*r+c2*r^2+..+c5*r^5 // Let R=1/(1+T0+p*T0), truncated to 35 significant bits // R=1/(1+T0+D0+p*(T0+D0))*(1+eps), |eps|<2^{-33} @@ -66,11 +66,11 @@ // // For |x|<2^{-64}: x is returned // -// For |x|>=2^32: return +/-1 +// For |x|>=22: return +/-1 // // Special cases: // tanh(NaN) = quiet NaN, and raise invalid exception -// tanh(INF) = that INF +// tanh(+/-INF) = +/-1 // tanh(+/-0) = +/-0 // /******************************************************************************/ @@ -324,6 +324,12 @@ address StubGenerator::generate_libmTanh() { __ enter(); // required for proper stackwalking of RuntimeStub frame __ bind(B1_2); + __ pextrw(rcx, xmm0, 3); + __ movl(rdx, 32768); + __ andl(rdx, rcx); + __ andl(rcx, 32767); + __ cmpl(rcx, 16438); + __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1); // Branch only if |x| >= 22 __ movsd(xmm3, ExternalAddress(HALFMASK), r11 /*rscratch*/); __ xorpd(xmm4, xmm4); __ movsd(xmm1, ExternalAddress(L2E), r11 /*rscratch*/); @@ -331,16 +337,12 @@ address StubGenerator::generate_libmTanh() { __ movl(rax, 32768); __ pinsrw(xmm4, rax, 3); __ movsd(xmm6, ExternalAddress(Shifter), r11 /*rscratch*/); - __ pextrw(rcx, xmm0, 3); __ andpd(xmm3, xmm0); __ andnpd(xmm4, xmm0); __ pshufd(xmm5, xmm4, 68); - __ movl(rdx, 32768); - __ andl(rdx, rcx); - __ andl(rcx, 32767); __ subl(rcx, 16304); - __ cmpl(rcx, 144); - __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1); + __ cmpl(rcx, 134); + __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1); // Branch only if |x| is not in [2^{-4},22) __ subsd(xmm4, xmm3); __ mulsd(xmm3, xmm1); __ mulsd(xmm2, xmm5); @@ -427,8 +429,8 @@ address StubGenerator::generate_libmTanh() { __ bind(L_2TAG_PACKET_0_0_1); __ addl(rcx, 960); - __ cmpl(rcx, 1104); - __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1); + __ cmpl(rcx, 1094); + __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1); // Branch only if |x| not in [2^{-64}, 2^{-4}) __ movdqu(xmm2, ExternalAddress(pv), r11 /*rscratch*/); __ pshufd(xmm1, xmm0, 68); __ movdqu(xmm3, ExternalAddress(pv + 16), r11 /*rscratch*/); @@ -449,11 +451,8 @@ address StubGenerator::generate_libmTanh() { __ jmp(B1_4); __ bind(L_2TAG_PACKET_1_0_1); - __ addl(rcx, 15344); - __ cmpl(rcx, 16448); - __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1); __ cmpl(rcx, 16); - __ jcc(Assembler::below, L_2TAG_PACKET_3_0_1); + __ jcc(Assembler::below, L_2TAG_PACKET_3_0_1); // Branch only if |x| is denormalized __ xorpd(xmm2, xmm2); __ movl(rax, 17392); __ pinsrw(xmm2, rax, 3); @@ -468,7 +467,7 @@ address StubGenerator::generate_libmTanh() { __ bind(L_2TAG_PACKET_2_0_1); __ cmpl(rcx, 32752); - __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1); + __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1); // Branch only if |x| is INF or NaN __ xorpd(xmm2, xmm2); __ movl(rcx, 15344); __ pinsrw(xmm2, rcx, 3); @@ -489,7 +488,7 @@ address StubGenerator::generate_libmTanh() { __ movdl(rcx, xmm2); __ orl(rcx, rax); __ cmpl(rcx, 0); - __ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1); + __ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1); // Branch only if |x| is not NaN __ addsd(xmm0, xmm0); __ bind(B1_4); diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp index 45e30a8b4fb..bd061d45fbd 100644 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp @@ -1147,6 +1147,30 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ bind(L); } +#if INCLUDE_JFR + __ enter_jfr_critical_section(); + + // This poll test is to uphold the invariant that a JFR sampled frame + // must not return to its caller without a prior safepoint poll check. + // The earlier poll check in this routine is insufficient for this purpose + // because the thread has transitioned back to Java. + + Label slow_path; + Label fast_path; + __ safepoint_poll(slow_path, true /* at_return */, false /* in_nmethod */); + __ jmp(fast_path); + __ bind(slow_path); + __ push(dtos); + __ push(ltos); + __ set_last_Java_frame(noreg, rbp, (address)__ pc(), rscratch1); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), r15_thread); + __ reset_last_Java_frame(true); + __ pop(ltos); + __ pop(dtos); + __ bind(fast_path); + +#endif // INCLUDE_JFR + // jvmti support // Note: This must happen _after_ handling/throwing any exceptions since // the exception handler code notifies the runtime of method exits @@ -1169,8 +1193,12 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp __ leave(); // remove frame anchor + + JFR_ONLY(__ leave_jfr_critical_section();) + __ pop(rdi); // get return address __ mov(rsp, t); // set sp to sender sp + __ jmp(rdi); if (inc_counter) { @@ -1413,6 +1441,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { Interpreter::_remove_activation_preserving_args_entry = __ pc(); __ empty_expression_stack(); + __ restore_bcp(); // We could have returned from deoptimizing this frame, so restore rbcp. // Set the popframe_processing bit in pending_popframe_condition // indicating that we are currently handling popframe, so that // call_VMs that may happen later do not trigger new popframe diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp index 6be702f2699..9ea4aeeccfa 100644 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp @@ -468,6 +468,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M assert(StubRoutines::dtanh() != nullptr, "not initialized"); __ movdbl(xmm0, Address(rsp, wordSize)); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtanh()))); + } else if (kind == Interpreter::java_lang_math_cbrt) { + assert(StubRoutines::dcbrt() != nullptr, "not initialized"); + __ movdbl(xmm0, Address(rsp, wordSize)); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcbrt()))); } else if (kind == Interpreter::java_lang_math_abs) { assert(StubRoutines::x86::double_sign_mask() != nullptr, "not initialized"); __ movdbl(xmm0, Address(rsp, wordSize)); diff --git a/src/hotspot/cpu/x86/templateTable_x86.cpp b/src/hotspot/cpu/x86/templateTable_x86.cpp index 43da80f4082..82ca18d8a1f 100644 --- a/src/hotspot/cpu/x86/templateTable_x86.cpp +++ b/src/hotspot/cpu/x86/templateTable_x86.cpp @@ -1687,8 +1687,7 @@ void TemplateTable::float_cmp(bool is_float, int unordered_result) { void TemplateTable::branch(bool is_jsr, bool is_wide) { __ get_method(rcx); // rcx holds method - __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx - // holds bumped taken count + __ profile_taken_branch(rax); // rax holds updated MDP const ByteSize be_offset = MethodCounters::backedge_counter_offset() + InvocationCounter::counter_offset(); @@ -1739,7 +1738,6 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { if (UseLoopCounter) { // increment backedge counter for backward branches // rax: MDO - // rbx: MDO bumped taken-count // rcx: method // rdx: target offset // r13: target bcp @@ -1825,6 +1823,8 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { // it will be preserved in rbx. __ mov(rbx, rax); + JFR_ONLY(__ enter_jfr_critical_section();) + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); // rax is OSR buffer, move it to expected parameter location @@ -1839,14 +1839,12 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { // pop the interpreter frame __ movptr(sender_sp, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp __ leave(); // remove frame anchor + JFR_ONLY(__ leave_jfr_critical_section();) __ pop(retaddr); // get return address - __ mov(rsp, sender_sp); // set sp to sender sp + __ mov(rsp, sender_sp); // set sp to sender sp // Ensure compiled code always sees stack at proper alignment __ andptr(rsp, -(StackAlignmentInBytes)); - // unlike x86 we need no specialized return from compiled code - // to the interpreter or the call stub. - // push the return address __ push(retaddr); diff --git a/src/hotspot/cpu/x86/vmStructs_x86.hpp b/src/hotspot/cpu/x86/vmStructs_x86.hpp index d894d8b09a7..b8089a6413e 100644 --- a/src/hotspot/cpu/x86/vmStructs_x86.hpp +++ b/src/hotspot/cpu/x86/vmStructs_x86.hpp @@ -29,15 +29,20 @@ // constants required by the Serviceability Agent. This file is // referenced by vmStructs.cpp. -#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field) \ - volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field) \ + volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ + static_field(VM_Version, _features, VM_Version::VM_Features) \ + nonstatic_field(VM_Version::VM_Features, _features_bitmap[0], uint64_t) \ + static_field(VM_Version::VM_Features, _features_bitmap_size, int) #define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type) \ + declare_toplevel_type(VM_Version::VM_Features) #define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant) \ - LP64_ONLY(declare_constant(frame::arg_reg_save_area_bytes)) \ - declare_constant(frame::interpreter_frame_sender_sp_offset) \ - declare_constant(frame::interpreter_frame_last_sp_offset) + declare_constant(frame::arg_reg_save_area_bytes) \ + declare_constant(frame::interpreter_frame_sender_sp_offset) \ + declare_constant(frame::interpreter_frame_last_sp_offset) \ + declare_constant(frame::entry_frame_call_wrapper_offset) #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant) diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 32e6e33d133..152866e65f3 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -63,6 +63,11 @@ address VM_Version::_cpuinfo_cont_addr_apx = nullptr; static BufferBlob* stub_blob; static const int stub_size = 2000; +int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong; + +VM_Version::VM_Features VM_Version::_features; +VM_Version::VM_Features VM_Version::_cpu_features; + extern "C" { typedef void (*get_cpu_info_stub_t)(void*); typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); @@ -72,8 +77,6 @@ static get_cpu_info_stub_t get_cpu_info_stub = nullptr; static detect_virt_stub_t detect_virt_stub = nullptr; static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; -#ifdef _LP64 - bool VM_Version::supports_clflush() { // clflush should always be available on x86_64 // if not we are in real trouble because we rely on it @@ -84,10 +87,9 @@ bool VM_Version::supports_clflush() { // up. Assembler::flush calls this routine to check that clflush // is allowed. So, we give the caller a free pass if Universe init // is still in progress. - assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); + assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available"); return true; } -#endif #define CPUID_STANDARD_FN 0x0 #define CPUID_STANDARD_FN_1 0x1 @@ -107,7 +109,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator { VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} -#if defined(_LP64) address clear_apx_test_state() { # define __ _masm-> address start = __ pc(); @@ -126,7 +127,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ ret(0); return start; } -#endif address generate_get_cpu_info() { // Flags to test CPU type. @@ -138,7 +138,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); - Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; + Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24; Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7; Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning; Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; @@ -151,14 +151,10 @@ class VM_Version_StubGenerator: public StubCodeGenerator { // // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); // - // LP64: rcx and rdx are first and second argument registers on windows + // rcx and rdx are first and second argument registers on windows __ push(rbp); -#ifdef _LP64 __ mov(rbp, c_rarg0); // cpuid_info address -#else - __ movptr(rbp, Address(rsp, 8)); // cpuid_info address -#endif __ push(rbx); __ push(rsi); __ pushf(); // preserve rbx, and flags @@ -341,6 +337,17 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movl(Address(rsi, 0), rax); __ movl(Address(rsi, 4), rdx); + // + // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0). + // + __ bind(std_cpuid24); + __ movl(rax, 0x24); + __ movl(rcx, 0); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + // // Extended cpuid(0x80000000) // @@ -418,7 +425,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movl(Address(rsi, 8), rcx); __ movl(Address(rsi,12), rdx); -#if defined(_LP64) // // Check if OS has enabled XGETBV instruction to access XCR0 // (OSXSAVE feature flag) and CPU supports APX @@ -428,13 +434,11 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); __ movl(rax, 0x200000); __ andl(rax, Address(rsi, 4)); - __ cmpl(rax, 0x200000); - __ jcc(Assembler::notEqual, vector_save_restore); + __ jcc(Assembler::equal, vector_save_restore); // check _cpuid_info.xem_xcr0_eax.bits.apx_f __ movl(rax, 0x80000); __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f - __ cmpl(rax, 0x80000); - __ jcc(Assembler::notEqual, vector_save_restore); + __ jcc(Assembler::equal, vector_save_restore); #ifndef PRODUCT bool save_apx = UseAPX; @@ -453,7 +457,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movq(Address(rsi, 8), r31); UseAPX = save_apx; -#endif #endif __ bind(vector_save_restore); // @@ -488,11 +491,15 @@ class VM_Version_StubGenerator: public StubCodeGenerator { // If UseAVX is uninitialized or is set by the user to include EVEX if (use_evex) { // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f + // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); __ movl(rax, 0x10000); - __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm - __ cmpl(rax, 0x10000); - __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported + __ andl(rax, Address(rsi, 4)); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset()))); + __ movl(rbx, 0x80000); + __ andl(rbx, Address(rsi, 4)); + __ orl(rax, rbx); + __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported // check _cpuid_info.xem_xcr0_eax.bits.opmask // check _cpuid_info.xem_xcr0_eax.bits.zmm512 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 @@ -527,10 +534,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movdl(xmm0, rcx); __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); -#ifdef _LP64 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); -#endif VM_Version::clean_cpuFeatures(); __ jmp(save_restore_except); } @@ -556,10 +561,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ pshufd(xmm0, xmm0, 0x00); __ vinsertf128_high(xmm0, xmm0); __ vmovdqu(xmm7, xmm0); -#ifdef _LP64 __ vmovdqu(xmm8, xmm0); __ vmovdqu(xmm15, xmm0); -#endif VM_Version::clean_cpuFeatures(); __ bind(save_restore_except); @@ -577,8 +580,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); __ movl(rax, 0x10000); __ andl(rax, Address(rsi, 4)); - __ cmpl(rax, 0x10000); - __ jcc(Assembler::notEqual, legacy_save_restore); + __ jcc(Assembler::equal, legacy_save_restore); // check _cpuid_info.xem_xcr0_eax.bits.opmask // check _cpuid_info.xem_xcr0_eax.bits.zmm512 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 @@ -600,10 +602,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); -#ifdef _LP64 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); -#endif #ifdef _WINDOWS __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); @@ -628,10 +628,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); __ vmovdqu(Address(rsi, 0), xmm0); __ vmovdqu(Address(rsi, 32), xmm7); -#ifdef _LP64 __ vmovdqu(Address(rsi, 64), xmm8); __ vmovdqu(Address(rsi, 96), xmm15); -#endif #ifdef _WINDOWS __ vmovdqu(xmm15, Address(rsp, 0)); @@ -687,13 +685,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ push(rbx); __ push(rsi); // for Windows -#ifdef _LP64 __ mov(rax, c_rarg0); // CPUID leaf __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) -#else - __ movptr(rax, Address(rsp, 16)); // CPUID leaf - __ movptr(rsi, Address(rsp, 20)); // register array address -#endif __ cpuid(); @@ -734,14 +727,10 @@ class VM_Version_StubGenerator: public StubCodeGenerator { // // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); // - // LP64: rcx and rdx are first and second argument registers on windows + // rcx and rdx are first and second argument registers on windows __ push(rbp); -#ifdef _LP64 __ mov(rbp, c_rarg0); // cpuid_info address -#else - __ movptr(rbp, Address(rsp, 8)); // cpuid_info address -#endif __ push(rbx); __ push(rsi); __ pushf(); // preserve rbx, and flags @@ -863,7 +852,6 @@ void VM_Version::get_processor_features() { _cpu = 4; // 486 by default _model = 0; _stepping = 0; - _features = 0; _logical_processors_per_package = 1; // i486 internal cache is both I&D and has a 16-byte line size _L1_data_cache_line_size = 16; @@ -879,7 +867,7 @@ void VM_Version::get_processor_features() { if (cpu_family() > 4) { // it supports CPUID _features = _cpuid_info.feature_flags(); // These can be changed by VM settings - _cpu_features = _features; // Preserve features + _cpu_features = _features; // Preserve features // Logical processors are only available on P4s and above, // and only if hyperthreading is available. _logical_processors_per_package = logical_processor_count(); @@ -889,19 +877,16 @@ void VM_Version::get_processor_features() { // xchg and xadd instructions _supports_atomic_getset4 = true; _supports_atomic_getadd4 = true; - LP64_ONLY(_supports_atomic_getset8 = true); - LP64_ONLY(_supports_atomic_getadd8 = true); + _supports_atomic_getset8 = true; + _supports_atomic_getadd8 = true; -#ifdef _LP64 // OS should support SSE for x64 and hardware should support at least SSE2. if (!VM_Version::supports_sse2()) { vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); } // in 64 bit the use of SSE2 is the minimum if (UseSSE < 2) UseSSE = 2; -#endif -#ifdef AMD64 // flush_icache_stub have to be generated first. // That is why Icache line size is hard coded in ICache class, // see icache_x86.hpp. It is also the reason why we can't use @@ -913,9 +898,7 @@ void VM_Version::get_processor_features() { guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); // clflush_size is size in quadwords (8 bytes). guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); -#endif -#ifdef _LP64 // assigning this field effectively enables Unsafe.writebackMemory() // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero // that is only implemented on x86_64 and only if the OS plays ball @@ -924,31 +907,30 @@ void VM_Version::get_processor_features() { // let if default to zero thereby disabling writeback _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; } -#endif // Check if processor has Intel Ecore - if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && + if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() && (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF || _model == 0xCC || _model == 0xDD)) { FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); } if (UseSSE < 4) { - _features &= ~CPU_SSE4_1; - _features &= ~CPU_SSE4_2; + _features.clear_feature(CPU_SSE4_1); + _features.clear_feature(CPU_SSE4_2); } if (UseSSE < 3) { - _features &= ~CPU_SSE3; - _features &= ~CPU_SSSE3; - _features &= ~CPU_SSE4A; + _features.clear_feature(CPU_SSE3); + _features.clear_feature(CPU_SSSE3); + _features.clear_feature(CPU_SSE4A); } if (UseSSE < 2) - _features &= ~CPU_SSE2; + _features.clear_feature(CPU_SSE2); if (UseSSE < 1) - _features &= ~CPU_SSE; + _features.clear_feature(CPU_SSE); //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { @@ -1014,21 +996,25 @@ void VM_Version::get_processor_features() { } if (UseAVX < 3) { - _features &= ~CPU_AVX512F; - _features &= ~CPU_AVX512DQ; - _features &= ~CPU_AVX512CD; - _features &= ~CPU_AVX512BW; - _features &= ~CPU_AVX512VL; - _features &= ~CPU_AVX512_VPOPCNTDQ; - _features &= ~CPU_AVX512_VPCLMULQDQ; - _features &= ~CPU_AVX512_VAES; - _features &= ~CPU_AVX512_VNNI; - _features &= ~CPU_AVX512_VBMI; - _features &= ~CPU_AVX512_VBMI2; - _features &= ~CPU_AVX512_BITALG; - _features &= ~CPU_AVX512_IFMA; - _features &= ~CPU_APX_F; - _features &= ~CPU_AVX512_FP16; + _features.clear_feature(CPU_AVX512F); + _features.clear_feature(CPU_AVX512DQ); + _features.clear_feature(CPU_AVX512CD); + _features.clear_feature(CPU_AVX512BW); + _features.clear_feature(CPU_AVX512ER); + _features.clear_feature(CPU_AVX512PF); + _features.clear_feature(CPU_AVX512VL); + _features.clear_feature(CPU_AVX512_VPOPCNTDQ); + _features.clear_feature(CPU_AVX512_VPCLMULQDQ); + _features.clear_feature(CPU_AVX512_VAES); + _features.clear_feature(CPU_AVX512_VNNI); + _features.clear_feature(CPU_AVX512_VBMI); + _features.clear_feature(CPU_AVX512_VBMI2); + _features.clear_feature(CPU_AVX512_BITALG); + _features.clear_feature(CPU_AVX512_IFMA); + _features.clear_feature(CPU_APX_F); + _features.clear_feature(CPU_AVX512_FP16); + _features.clear_feature(CPU_AVX10_1); + _features.clear_feature(CPU_AVX10_2); } // Currently APX support is only enabled for targets supporting AVX512VL feature. @@ -1041,45 +1027,47 @@ void VM_Version::get_processor_features() { } if (!UseAPX) { - _features &= ~CPU_APX_F; + _features.clear_feature(CPU_APX_F); } if (UseAVX < 2) { - _features &= ~CPU_AVX2; - _features &= ~CPU_AVX_IFMA; + _features.clear_feature(CPU_AVX2); + _features.clear_feature(CPU_AVX_IFMA); } if (UseAVX < 1) { - _features &= ~CPU_AVX; - _features &= ~CPU_VZEROUPPER; - _features &= ~CPU_F16C; - _features &= ~CPU_SHA512; + _features.clear_feature(CPU_AVX); + _features.clear_feature(CPU_VZEROUPPER); + _features.clear_feature(CPU_F16C); + _features.clear_feature(CPU_SHA512); } if (logical_processors_per_package() == 1) { // HT processor could be installed on a system which doesn't support HT. - _features &= ~CPU_HT; + _features.clear_feature(CPU_HT); } if (is_intel()) { // Intel cpus specific settings if (is_knights_family()) { - _features &= ~CPU_VZEROUPPER; - _features &= ~CPU_AVX512BW; - _features &= ~CPU_AVX512VL; - _features &= ~CPU_AVX512DQ; - _features &= ~CPU_AVX512_VNNI; - _features &= ~CPU_AVX512_VAES; - _features &= ~CPU_AVX512_VPOPCNTDQ; - _features &= ~CPU_AVX512_VPCLMULQDQ; - _features &= ~CPU_AVX512_VBMI; - _features &= ~CPU_AVX512_VBMI2; - _features &= ~CPU_CLWB; - _features &= ~CPU_FLUSHOPT; - _features &= ~CPU_GFNI; - _features &= ~CPU_AVX512_BITALG; - _features &= ~CPU_AVX512_IFMA; - _features &= ~CPU_AVX_IFMA; - _features &= ~CPU_AVX512_FP16; + _features.clear_feature(CPU_VZEROUPPER); + _features.clear_feature(CPU_AVX512BW); + _features.clear_feature(CPU_AVX512VL); + _features.clear_feature(CPU_AVX512DQ); + _features.clear_feature(CPU_AVX512_VNNI); + _features.clear_feature(CPU_AVX512_VAES); + _features.clear_feature(CPU_AVX512_VPOPCNTDQ); + _features.clear_feature(CPU_AVX512_VPCLMULQDQ); + _features.clear_feature(CPU_AVX512_VBMI); + _features.clear_feature(CPU_AVX512_VBMI2); + _features.clear_feature(CPU_CLWB); + _features.clear_feature(CPU_FLUSHOPT); + _features.clear_feature(CPU_GFNI); + _features.clear_feature(CPU_AVX512_BITALG); + _features.clear_feature(CPU_AVX512_IFMA); + _features.clear_feature(CPU_AVX_IFMA); + _features.clear_feature(CPU_AVX512_FP16); + _features.clear_feature(CPU_AVX10_1); + _features.clear_feature(CPU_AVX10_2); } } @@ -1089,16 +1077,44 @@ void VM_Version::get_processor_features() { _has_intel_jcc_erratum = IntelJccErratumMitigation; } - char buf[1024]; - int res = jio_snprintf( + assert(supports_clflush(), "Always present"); + if (X86ICacheSync == -1) { + // Auto-detect, choosing the best performant one that still flushes + // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward. + if (supports_clwb()) { + FLAG_SET_ERGO(X86ICacheSync, 3); + } else if (supports_clflushopt()) { + FLAG_SET_ERGO(X86ICacheSync, 2); + } else { + FLAG_SET_ERGO(X86ICacheSync, 1); + } + } else { + if ((X86ICacheSync == 2) && !supports_clflushopt()) { + vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2"); + } + if ((X86ICacheSync == 3) && !supports_clwb()) { + vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3"); + } + if ((X86ICacheSync == 5) && !supports_serialize()) { + vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5"); + } + } + + char buf[2048]; + size_t cpu_info_size = jio_snprintf( buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, os::cpu_microcode_revision()); - assert(res > 0, "not enough temporary space allocated"); - insert_features_names(buf + res, sizeof(buf) - res, _features_names); + assert(cpu_info_size > 0, "not enough temporary space allocated"); - _features_string = os::strdup(buf); + insert_features_names(_features, buf + cpu_info_size, sizeof(buf) - cpu_info_size); + + _cpu_info_string = os::strdup(buf); + + _features_string = extract_features_string(_cpu_info_string, + strnlen(_cpu_info_string, sizeof(buf)), + cpu_info_size); // Use AES instructions if available. if (supports_aes()) { @@ -1182,7 +1198,6 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); } -#ifdef _LP64 if (supports_avx2()) { if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { UseAdler32Intrinsics = true; @@ -1193,12 +1208,6 @@ void VM_Version::get_processor_features() { } FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); } -#else - if (UseAdler32Intrinsics) { - warning("Adler32Intrinsics not available on this CPU."); - FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); - } -#endif if (supports_sse4_2() && supports_clmul()) { if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { @@ -1222,7 +1231,6 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); } -#ifdef _LP64 // ChaCha20 Intrinsics // As long as the system supports AVX as a baseline we can do a // SIMD-enabled block function. StubGenerator makes the determination @@ -1238,24 +1246,28 @@ void VM_Version::get_processor_features() { } FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); } -#else - // No support currently for ChaCha20 intrinsics on 32-bit platforms - if (UseChaCha20Intrinsics) { - warning("ChaCha20 intrinsics are not available on this CPU."); - FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); + + // Kyber Intrinsics + // Currently we only have them for AVX512 +#ifdef _LP64 + if (supports_evex() && supports_avx512bw()) { + if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) { + UseKyberIntrinsics = true; + } + } else +#endif + if (UseKyberIntrinsics) { + warning("Intrinsics for ML-KEM are not available on this CPU."); + FLAG_SET_DEFAULT(UseKyberIntrinsics, false); } -#endif // _LP64 // Dilithium Intrinsics // Currently we only have them for AVX512 -#ifdef _LP64 if (supports_evex() && supports_avx512bw()) { if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) { UseDilithiumIntrinsics = true; } - } else -#endif - if (UseDilithiumIntrinsics) { + } else if (UseDilithiumIntrinsics) { warning("Intrinsics for ML-DSA are not available on this CPU."); FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false); } @@ -1284,7 +1296,7 @@ void VM_Version::get_processor_features() { UseMD5Intrinsics = true; } - if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) { + if (supports_sha() || (supports_avx2() && supports_bmi2())) { if (FLAG_IS_DEFAULT(UseSHA)) { UseSHA = true; } @@ -1311,27 +1323,20 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); } -#ifdef _LP64 - // These are only supported on 64-bit if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); } - } else -#endif - if (UseSHA512Intrinsics) { + } else if (UseSHA512Intrinsics) { warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } -#ifdef _LP64 if (supports_evex() && supports_avx512bw()) { if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { UseSHA3Intrinsics = true; } - } else -#endif - if (UseSHA3Intrinsics) { + } else if (UseSHA3Intrinsics) { warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); } @@ -1353,11 +1358,7 @@ void VM_Version::get_processor_features() { max_vector_size = 64; } -#ifdef _LP64 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit -#else - int min_vector_size = 0; -#endif if (!FLAG_IS_DEFAULT(MaxVectorSize)) { if (MaxVectorSize < min_vector_size) { @@ -1381,7 +1382,7 @@ void VM_Version::get_processor_features() { if (MaxVectorSize > 0) { if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { tty->print_cr("State of YMM registers after signal handle:"); - int nreg = 2 LP64_ONLY(+2); + int nreg = 4; const char* ymm_name[4] = {"0", "7", "8", "15"}; for (int i = 0; i < nreg; i++) { tty->print("YMM%s:", ymm_name[i]); @@ -1394,31 +1395,24 @@ void VM_Version::get_processor_features() { } #endif // COMPILER2 && ASSERT -#ifdef _LP64 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); } - } else -#endif - if (UsePoly1305Intrinsics) { + } else if (UsePoly1305Intrinsics) { warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); } -#ifdef _LP64 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); } - } else -#endif - if (UseIntPolyIntrinsics) { + } else if (UseIntPolyIntrinsics) { warning("Intrinsics for Polynomial crypto functions not available on this CPU."); FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); } -#ifdef _LP64 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { UseMultiplyToLenIntrinsic = true; } @@ -1434,38 +1428,6 @@ void VM_Version::get_processor_features() { if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { UseMontgomerySquareIntrinsic = true; } -#else - if (UseMultiplyToLenIntrinsic) { - if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { - warning("multiplyToLen intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); - } - if (UseMontgomeryMultiplyIntrinsic) { - if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { - warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); - } - if (UseMontgomerySquareIntrinsic) { - if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { - warning("montgomerySquare intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); - } - if (UseSquareToLenIntrinsic) { - if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { - warning("squareToLen intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); - } - if (UseMulAddIntrinsic) { - if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { - warning("mulAdd intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); - } -#endif // _LP64 #endif // COMPILER2_OR_JVMCI // On new cpus instructions which update whole XMM register should be used @@ -1632,7 +1594,7 @@ void VM_Version::get_processor_features() { if (FLAG_IS_DEFAULT(UseStoreImmI16)) { UseStoreImmI16 = false; // don't use it on Intel cpus } - if (cpu_family() == 6 || cpu_family() == 15) { + if (is_intel_server_family() || cpu_family() == 15) { if (FLAG_IS_DEFAULT(UseAddressNop)) { // Use it on all Intel cpus starting from PentiumPro UseAddressNop = true; @@ -1648,7 +1610,7 @@ void VM_Version::get_processor_features() { UseXmmRegToRegMoveAll = false; } } - if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus + if (is_intel_server_family() && supports_sse3()) { // New Intel cpus #ifdef COMPILER2 if (FLAG_IS_DEFAULT(MaxLoopPad)) { // For new Intel cpus do the next optimization: @@ -1742,7 +1704,6 @@ void VM_Version::get_processor_features() { } #endif -#ifdef _LP64 if (UseSSE42Intrinsics) { if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { UseVectorizedMismatchIntrinsic = true; @@ -1759,20 +1720,6 @@ void VM_Version::get_processor_features() { warning("vectorizedHashCode intrinsics are not available on this CPU"); FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); } -#else - if (UseVectorizedMismatchIntrinsic) { - if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { - warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); - } - if (UseVectorizedHashCodeIntrinsic) { - if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { - warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); - } -#endif // _LP64 // Use count leading zeros count instruction if available. if (supports_lzcnt()) { @@ -1901,7 +1848,7 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); } - if (is_intel() && cpu_family() == 6 && supports_sse3()) { + if (is_intel() && is_intel_server_family() && supports_sse3()) { if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && supports_sse4_2() && supports_ht()) { // Nehalem based cpus FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); @@ -1921,7 +1868,6 @@ void VM_Version::get_processor_features() { #endif } -#ifdef _LP64 // Prefetch settings // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from @@ -1940,7 +1886,6 @@ void VM_Version::get_processor_features() { if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); } -#endif if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) @@ -2171,16 +2116,15 @@ int VM_Version::avx3_threshold() { FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; } -#if defined(_LP64) void VM_Version::clear_apx_test_state() { clear_apx_test_state_stub(); } -#endif static bool _vm_version_initialized = false; void VM_Version::initialize() { ResourceMark rm; + // Making this stub must be FIRST use of assembler stub_blob = BufferBlob::create("VM_Version stub", stub_size); if (stub_blob == nullptr) { @@ -2193,14 +2137,11 @@ void VM_Version::initialize() { g.generate_get_cpu_info()); detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, g.generate_detect_virt()); - -#if defined(_LP64) clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, g.clear_apx_test_state()); -#endif get_processor_features(); - LP64_ONLY(Assembler::precompute_instructions();) + Assembler::precompute_instructions(); if (VM_Version::supports_hv()) { // Supports hypervisor check_virtualizations(); @@ -2959,192 +2900,217 @@ int64_t VM_Version::maximum_qualified_cpu_frequency(void) { return _max_qualified_cpu_frequency; } -uint64_t VM_Version::CpuidInfo::feature_flags() const { - uint64_t result = 0; +VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const { + VM_Features vm_features; if (std_cpuid1_edx.bits.cmpxchg8 != 0) - result |= CPU_CX8; + vm_features.set_feature(CPU_CX8); if (std_cpuid1_edx.bits.cmov != 0) - result |= CPU_CMOV; + vm_features.set_feature(CPU_CMOV); if (std_cpuid1_edx.bits.clflush != 0) - result |= CPU_FLUSH; -#ifdef _LP64 + vm_features.set_feature(CPU_FLUSH); // clflush should always be available on x86_64 // if not we are in real trouble because we rely on it // to flush the code cache. - assert ((result & CPU_FLUSH) != 0, "clflush should be available"); -#endif + assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available"); if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && ext_cpuid1_edx.bits.fxsr != 0)) - result |= CPU_FXSR; + vm_features.set_feature(CPU_FXSR); // HT flag is set for multi-core processors also. if (threads_per_core() > 1) - result |= CPU_HT; + vm_features.set_feature(CPU_HT); if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && ext_cpuid1_edx.bits.mmx != 0)) - result |= CPU_MMX; + vm_features.set_feature(CPU_MMX); if (std_cpuid1_edx.bits.sse != 0) - result |= CPU_SSE; + vm_features.set_feature(CPU_SSE); if (std_cpuid1_edx.bits.sse2 != 0) - result |= CPU_SSE2; + vm_features.set_feature(CPU_SSE2); if (std_cpuid1_ecx.bits.sse3 != 0) - result |= CPU_SSE3; + vm_features.set_feature(CPU_SSE3); if (std_cpuid1_ecx.bits.ssse3 != 0) - result |= CPU_SSSE3; + vm_features.set_feature(CPU_SSSE3); if (std_cpuid1_ecx.bits.sse4_1 != 0) - result |= CPU_SSE4_1; + vm_features.set_feature(CPU_SSE4_1); if (std_cpuid1_ecx.bits.sse4_2 != 0) - result |= CPU_SSE4_2; + vm_features.set_feature(CPU_SSE4_2); if (std_cpuid1_ecx.bits.popcnt != 0) - result |= CPU_POPCNT; + vm_features.set_feature(CPU_POPCNT); if (sefsl1_cpuid7_edx.bits.apx_f != 0 && xem_xcr0_eax.bits.apx_f != 0) { - result |= CPU_APX_F; + vm_features.set_feature(CPU_APX_F); } if (std_cpuid1_ecx.bits.avx != 0 && std_cpuid1_ecx.bits.osxsave != 0 && xem_xcr0_eax.bits.sse != 0 && xem_xcr0_eax.bits.ymm != 0) { - result |= CPU_AVX; - result |= CPU_VZEROUPPER; + vm_features.set_feature(CPU_AVX); + vm_features.set_feature(CPU_VZEROUPPER); if (sefsl1_cpuid7_eax.bits.sha512 != 0) - result |= CPU_SHA512; + vm_features.set_feature(CPU_SHA512); if (std_cpuid1_ecx.bits.f16c != 0) - result |= CPU_F16C; + vm_features.set_feature(CPU_F16C); if (sef_cpuid7_ebx.bits.avx2 != 0) { - result |= CPU_AVX2; + vm_features.set_feature(CPU_AVX2); if (sefsl1_cpuid7_eax.bits.avx_ifma != 0) - result |= CPU_AVX_IFMA; + vm_features.set_feature(CPU_AVX_IFMA); } if (sef_cpuid7_ecx.bits.gfni != 0) - result |= CPU_GFNI; + vm_features.set_feature(CPU_GFNI); if (sef_cpuid7_ebx.bits.avx512f != 0 && xem_xcr0_eax.bits.opmask != 0 && xem_xcr0_eax.bits.zmm512 != 0 && xem_xcr0_eax.bits.zmm32 != 0) { - result |= CPU_AVX512F; + vm_features.set_feature(CPU_AVX512F); if (sef_cpuid7_ebx.bits.avx512cd != 0) - result |= CPU_AVX512CD; + vm_features.set_feature(CPU_AVX512CD); if (sef_cpuid7_ebx.bits.avx512dq != 0) - result |= CPU_AVX512DQ; + vm_features.set_feature(CPU_AVX512DQ); if (sef_cpuid7_ebx.bits.avx512ifma != 0) - result |= CPU_AVX512_IFMA; + vm_features.set_feature(CPU_AVX512_IFMA); if (sef_cpuid7_ebx.bits.avx512pf != 0) - result |= CPU_AVX512PF; + vm_features.set_feature(CPU_AVX512PF); if (sef_cpuid7_ebx.bits.avx512er != 0) - result |= CPU_AVX512ER; + vm_features.set_feature(CPU_AVX512ER); if (sef_cpuid7_ebx.bits.avx512bw != 0) - result |= CPU_AVX512BW; + vm_features.set_feature(CPU_AVX512BW); if (sef_cpuid7_ebx.bits.avx512vl != 0) - result |= CPU_AVX512VL; + vm_features.set_feature(CPU_AVX512VL); if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) - result |= CPU_AVX512_VPOPCNTDQ; + vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) - result |= CPU_AVX512_VPCLMULQDQ; + vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); if (sef_cpuid7_ecx.bits.vaes != 0) - result |= CPU_AVX512_VAES; + vm_features.set_feature(CPU_AVX512_VAES); if (sef_cpuid7_ecx.bits.avx512_vnni != 0) - result |= CPU_AVX512_VNNI; + vm_features.set_feature(CPU_AVX512_VNNI); if (sef_cpuid7_ecx.bits.avx512_bitalg != 0) - result |= CPU_AVX512_BITALG; + vm_features.set_feature(CPU_AVX512_BITALG); if (sef_cpuid7_ecx.bits.avx512_vbmi != 0) - result |= CPU_AVX512_VBMI; + vm_features.set_feature(CPU_AVX512_VBMI); if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) - result |= CPU_AVX512_VBMI2; + vm_features.set_feature(CPU_AVX512_VBMI2); + } + if (is_intel()) { + if (sefsl1_cpuid7_edx.bits.avx10 != 0 && + std_cpuid24_ebx.bits.avx10_vlen_512 !=0 && + std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 && + xem_xcr0_eax.bits.opmask != 0 && + xem_xcr0_eax.bits.zmm512 != 0 && + xem_xcr0_eax.bits.zmm32 != 0) { + vm_features.set_feature(CPU_AVX10_1); + vm_features.set_feature(CPU_AVX512F); + vm_features.set_feature(CPU_AVX512CD); + vm_features.set_feature(CPU_AVX512DQ); + vm_features.set_feature(CPU_AVX512PF); + vm_features.set_feature(CPU_AVX512ER); + vm_features.set_feature(CPU_AVX512BW); + vm_features.set_feature(CPU_AVX512VL); + vm_features.set_feature(CPU_AVX512_VPOPCNTDQ); + vm_features.set_feature(CPU_AVX512_VPCLMULQDQ); + vm_features.set_feature(CPU_AVX512_VAES); + vm_features.set_feature(CPU_AVX512_VNNI); + vm_features.set_feature(CPU_AVX512_BITALG); + vm_features.set_feature(CPU_AVX512_VBMI); + vm_features.set_feature(CPU_AVX512_VBMI2); + if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) { + vm_features.set_feature(CPU_AVX10_2); + } + } } } + if (std_cpuid1_ecx.bits.hv != 0) - result |= CPU_HV; + vm_features.set_feature(CPU_HV); if (sef_cpuid7_ebx.bits.bmi1 != 0) - result |= CPU_BMI1; + vm_features.set_feature(CPU_BMI1); if (std_cpuid1_edx.bits.tsc != 0) - result |= CPU_TSC; + vm_features.set_feature(CPU_TSC); if (ext_cpuid7_edx.bits.tsc_invariance != 0) - result |= CPU_TSCINV_BIT; + vm_features.set_feature(CPU_TSCINV_BIT); if (std_cpuid1_ecx.bits.aes != 0) - result |= CPU_AES; + vm_features.set_feature(CPU_AES); if (ext_cpuid1_ecx.bits.lzcnt != 0) - result |= CPU_LZCNT; + vm_features.set_feature(CPU_LZCNT); if (ext_cpuid1_ecx.bits.prefetchw != 0) - result |= CPU_3DNOW_PREFETCH; + vm_features.set_feature(CPU_3DNOW_PREFETCH); if (sef_cpuid7_ebx.bits.erms != 0) - result |= CPU_ERMS; + vm_features.set_feature(CPU_ERMS); if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0) - result |= CPU_FSRM; + vm_features.set_feature(CPU_FSRM); if (std_cpuid1_ecx.bits.clmul != 0) - result |= CPU_CLMUL; + vm_features.set_feature(CPU_CLMUL); if (sef_cpuid7_ebx.bits.rtm != 0) - result |= CPU_RTM; + vm_features.set_feature(CPU_RTM); if (sef_cpuid7_ebx.bits.adx != 0) - result |= CPU_ADX; + vm_features.set_feature(CPU_ADX); if (sef_cpuid7_ebx.bits.bmi2 != 0) - result |= CPU_BMI2; + vm_features.set_feature(CPU_BMI2); if (sef_cpuid7_ebx.bits.sha != 0) - result |= CPU_SHA; + vm_features.set_feature(CPU_SHA); if (std_cpuid1_ecx.bits.fma != 0) - result |= CPU_FMA; + vm_features.set_feature(CPU_FMA); if (sef_cpuid7_ebx.bits.clflushopt != 0) - result |= CPU_FLUSHOPT; + vm_features.set_feature(CPU_FLUSHOPT); if (sef_cpuid7_ebx.bits.clwb != 0) - result |= CPU_CLWB; + vm_features.set_feature(CPU_CLWB); if (ext_cpuid1_edx.bits.rdtscp != 0) - result |= CPU_RDTSCP; + vm_features.set_feature(CPU_RDTSCP); if (sef_cpuid7_ecx.bits.rdpid != 0) - result |= CPU_RDPID; + vm_features.set_feature(CPU_RDPID); // AMD|Hygon additional features. if (is_amd_family()) { // PREFETCHW was checked above, check TDNOW here. if ((ext_cpuid1_edx.bits.tdnow != 0)) - result |= CPU_3DNOW_PREFETCH; + vm_features.set_feature(CPU_3DNOW_PREFETCH); if (ext_cpuid1_ecx.bits.sse4a != 0) - result |= CPU_SSE4A; + vm_features.set_feature(CPU_SSE4A); } // Intel additional features. if (is_intel()) { if (sef_cpuid7_edx.bits.serialize != 0) - result |= CPU_SERIALIZE; + vm_features.set_feature(CPU_SERIALIZE); if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0) - result |= CPU_AVX512_FP16; + vm_features.set_feature(CPU_AVX512_FP16); } // ZX additional features. if (is_zx()) { // We do not know if these are supported by ZX, so we cannot trust // common CPUID bit for them. - assert((result & CPU_CLWB) == 0, "Check if it is supported?"); - result &= ~CPU_CLWB; + assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?"); + vm_features.clear_feature(CPU_CLWB); } // Protection key features. if (sef_cpuid7_ecx.bits.pku != 0) { - result |= CPU_PKU; + vm_features.set_feature(CPU_PKU); } if (sef_cpuid7_ecx.bits.ospke != 0) { - result |= CPU_OSPKE; + vm_features.set_feature(CPU_OSPKE); } // Control flow enforcement (CET) features. if (sef_cpuid7_ecx.bits.cet_ss != 0) { - result |= CPU_CET_SS; + vm_features.set_feature(CPU_CET_SS); } if (sef_cpuid7_edx.bits.cet_ibt != 0) { - result |= CPU_CET_IBT; + vm_features.set_feature(CPU_CET_IBT); } // Composite features. if (supports_tscinv_bit() && ((is_amd_family() && !is_amd_Barcelona()) || is_intel_tsc_synched_at_init())) { - result |= CPU_TSCINV; + vm_features.set_feature(CPU_TSCINV); } - - return result; + return vm_features; } bool VM_Version::os_supports_avx_vectors() { bool retVal = false; - int nreg = 2 LP64_ONLY(+2); + int nreg = 4; if (supports_evex()) { // Verify that OS save/restore all bits of EVEX registers // during signal processing. @@ -3296,19 +3262,15 @@ int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { return 128; // Athlon } } else { // Intel - if (supports_sse3() && cpu_family() == 6) { + if (supports_sse3() && is_intel_server_family()) { if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus return 192; } else if (use_watermark_prefetch) { // watermark prefetching on Core -#ifdef _LP64 return 384; -#else - return 320; -#endif } } if (supports_sse2()) { - if (cpu_family() == 6) { + if (is_intel_server_family()) { return 256; // Pentium M, Core, Core2 } else { return 512; // Pentium 4 @@ -3333,3 +3295,14 @@ bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { } return true; } + +void VM_Version::insert_features_names(VM_Version::VM_Features features, char* buf, size_t buflen) { + for (int i = 0; i < MAX_CPU_FEATURES; i++) { + if (features.supports_feature((VM_Version::Feature_Flag)i)) { + int res = jio_snprintf(buf, buflen, ", %s", _features_names[i]); + assert(res > 0, "not enough temporary space allocated"); + buf += res; + buflen -= res; + } + } +} diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp index cc5c6c1c639..3c8971e474b 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp @@ -295,12 +295,32 @@ class VM_Version : public Abstract_VM_Version { union SefCpuid7SubLeaf1Edx { uint32_t value; struct { - uint32_t : 21, + uint32_t : 19, + avx10 : 1, + : 1, apx_f : 1, : 10; } bits; }; + union StdCpuid24MainLeafEax { + uint32_t value; + struct { + uint32_t sub_leaves_cnt : 31; + } bits; + }; + + union StdCpuid24MainLeafEbx { + uint32_t value; + struct { + uint32_t avx10_converged_isa_version : 8, + : 8, + : 2, + avx10_vlen_512 : 1, + : 13; + } bits; + }; + union ExtCpuid1EEbx { uint32_t value; struct { @@ -342,9 +362,9 @@ class VM_Version : public Abstract_VM_Version { /* * Update following files when declaring new flags: * test/lib-test/jdk/test/whitebox/CPUInfoTest.java - * src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.amd64/src/jdk/vm/ci/amd64/AMD64.java + * src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/amd64/AMD64.java */ - enum Feature_Flag : uint64_t { + enum Feature_Flag { #define CPU_FEATURE_FLAGS(decl) \ decl(CX8, "cx8", 0) /* next bits are from cpuid 1 (EDX) */ \ decl(CMOV, "cmov", 1) \ @@ -420,15 +440,85 @@ class VM_Version : public Abstract_VM_Version { decl(AVX_IFMA, "avx_ifma", 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \ decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/ \ decl(SHA512, "sha512", 61) /* SHA512 instructions*/ \ - decl(AVX512_FP16, "avx512_fp16", 62) /* AVX512 FP16 ISA support*/ + decl(AVX512_FP16, "avx512_fp16", 62) /* AVX512 FP16 ISA support*/ \ + decl(AVX10_1, "avx10_1", 63) /* AVX10 512 bit vector ISA Version 1 support*/ \ + decl(AVX10_2, "avx10_2", 64) /* AVX10 512 bit vector ISA Version 2 support*/ -#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit), +#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (bit), CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) #undef DECLARE_CPU_FEATURE_FLAG + MAX_CPU_FEATURES }; + class VM_Features { + friend class VMStructs; + friend class JVMCIVMStructs; + + private: + uint64_t _features_bitmap[(MAX_CPU_FEATURES / BitsPerLong) + 1]; + + STATIC_ASSERT(sizeof(_features_bitmap) * BitsPerByte >= MAX_CPU_FEATURES); + + // Number of 8-byte elements in _bitmap. + constexpr static int features_bitmap_element_count() { + return sizeof(_features_bitmap) / sizeof(uint64_t); + } + + constexpr static int features_bitmap_element_shift_count() { + return LogBitsPerLong; + } + + constexpr static uint64_t features_bitmap_element_mask() { + return (1ULL << features_bitmap_element_shift_count()) - 1; + } + + static int index(Feature_Flag feature) { + int idx = feature >> features_bitmap_element_shift_count(); + assert(idx < features_bitmap_element_count(), "Features array index out of bounds"); + return idx; + } + + static uint64_t bit_mask(Feature_Flag feature) { + return (1ULL << (feature & features_bitmap_element_mask())); + } + + static int _features_bitmap_size; // for JVMCI purposes + public: + VM_Features() { + for (int i = 0; i < features_bitmap_element_count(); i++) { + _features_bitmap[i] = 0; + } + } + + void set_feature(Feature_Flag feature) { + int idx = index(feature); + _features_bitmap[idx] |= bit_mask(feature); + } + + void clear_feature(VM_Version::Feature_Flag feature) { + int idx = index(feature); + _features_bitmap[idx] &= ~bit_mask(feature); + } + + bool supports_feature(VM_Version::Feature_Flag feature) { + int idx = index(feature); + return (_features_bitmap[idx] & bit_mask(feature)) != 0; + } + }; + + // CPU feature flags vector, can be affected by VM settings. + static VM_Features _features; + + // Original CPU feature flags vector, not affected by VM settings. + static VM_Features _cpu_features; + static const char* _features_names[]; + static void clear_cpu_features() { + _features = VM_Features(); + _cpu_features = VM_Features(); + } + enum Extended_Family { // AMD CPU_FAMILY_AMD_11H = 0x11, @@ -492,6 +582,11 @@ class VM_Version : public Abstract_VM_Version { SefCpuid7SubLeaf1Eax sefsl1_cpuid7_eax; SefCpuid7SubLeaf1Edx sefsl1_cpuid7_edx; + // cpuid function 24 converged vector ISA main leaf + // eax = 24, ecx = 0 + StdCpuid24MainLeafEax std_cpuid24_eax; + StdCpuid24MainLeafEbx std_cpuid24_ebx; + // cpuid function 0xB (processor topology) // ecx = 0 uint32_t tpl_cpuidB0_eax; @@ -565,7 +660,7 @@ class VM_Version : public Abstract_VM_Version { // Space to save apx registers after signal handle jlong apx_save[2]; // Save r16 and r31 - uint64_t feature_flags() const; + VM_Features feature_flags() const; // Asserts void assert_is_initialized() const { @@ -611,6 +706,7 @@ class VM_Version : public Abstract_VM_Version { // Offsets for cpuid asm stub static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } + static ByteSize std_cpuid24_offset() { return byte_offset_of(CpuidInfo, std_cpuid24_eax); } static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); } static ByteSize sefsl1_cpuid7_offset() { return byte_offset_of(CpuidInfo, sefsl1_cpuid7_eax); } @@ -642,13 +738,31 @@ class VM_Version : public Abstract_VM_Version { static void set_cpuinfo_cont_addr_apx(address pc) { _cpuinfo_cont_addr_apx = pc; } static address cpuinfo_cont_addr_apx() { return _cpuinfo_cont_addr_apx; } - LP64_ONLY(static void clear_apx_test_state()); + static void clear_apx_test_state(); - static void clean_cpuFeatures() { _features = 0; } - static void set_avx_cpuFeatures() { _features |= (CPU_SSE | CPU_SSE2 | CPU_AVX | CPU_VZEROUPPER ); } - static void set_evex_cpuFeatures() { _features |= (CPU_AVX512F | CPU_SSE | CPU_SSE2 | CPU_VZEROUPPER ); } - static void set_apx_cpuFeatures() { _features |= CPU_APX_F; } - static void set_bmi_cpuFeatures() { _features |= (CPU_BMI1 | CPU_BMI2 | CPU_LZCNT | CPU_POPCNT); } + static void clean_cpuFeatures() { + VM_Version::clear_cpu_features(); + } + static void set_avx_cpuFeatures() { + _features.set_feature(CPU_SSE); + _features.set_feature(CPU_SSE2); + _features.set_feature(CPU_AVX); + _features.set_feature(CPU_VZEROUPPER); + } + static void set_evex_cpuFeatures() { + _features.set_feature(CPU_AVX10_1); + _features.set_feature(CPU_AVX512F); + _features.set_feature(CPU_SSE); + _features.set_feature(CPU_SSE2); + _features.set_feature(CPU_VZEROUPPER); + } + static void set_apx_cpuFeatures() { _features.set_feature(CPU_APX_F); } + static void set_bmi_cpuFeatures() { + _features.set_feature(CPU_BMI1); + _features.set_feature(CPU_BMI2); + _features.set_feature(CPU_LZCNT); + _features.set_feature(CPU_POPCNT); + } // Initialization static void initialize(); @@ -677,6 +791,7 @@ class VM_Version : public Abstract_VM_Version { static uint32_t cpu_stepping() { return _cpuid_info.cpu_stepping(); } static int cpu_family() { return _cpu;} static bool is_P6() { return cpu_family() >= 6; } + static bool is_intel_server_family() { return cpu_family() == 6 || cpu_family() == 19; } static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' static bool is_hygon() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x6F677948; } // 'ogyH' static bool is_amd_family() { return is_amd() || is_hygon(); } @@ -703,40 +818,39 @@ class VM_Version : public Abstract_VM_Version { // // Feature identification which can be affected by VM settings // - static bool supports_cpuid() { return _features != 0; } - static bool supports_cmov() { return (_features & CPU_CMOV) != 0; } - static bool supports_fxsr() { return (_features & CPU_FXSR) != 0; } - static bool supports_ht() { return (_features & CPU_HT) != 0; } - static bool supports_mmx() { return (_features & CPU_MMX) != 0; } - static bool supports_sse() { return (_features & CPU_SSE) != 0; } - static bool supports_sse2() { return (_features & CPU_SSE2) != 0; } - static bool supports_sse3() { return (_features & CPU_SSE3) != 0; } - static bool supports_ssse3() { return (_features & CPU_SSSE3)!= 0; } - static bool supports_sse4_1() { return (_features & CPU_SSE4_1) != 0; } - static bool supports_sse4_2() { return (_features & CPU_SSE4_2) != 0; } - static bool supports_popcnt() { return (_features & CPU_POPCNT) != 0; } - static bool supports_avx() { return (_features & CPU_AVX) != 0; } - static bool supports_avx2() { return (_features & CPU_AVX2) != 0; } - static bool supports_tsc() { return (_features & CPU_TSC) != 0; } - static bool supports_rdtscp() { return (_features & CPU_RDTSCP) != 0; } - static bool supports_rdpid() { return (_features & CPU_RDPID) != 0; } - static bool supports_aes() { return (_features & CPU_AES) != 0; } - static bool supports_erms() { return (_features & CPU_ERMS) != 0; } - static bool supports_fsrm() { return (_features & CPU_FSRM) != 0; } - static bool supports_clmul() { return (_features & CPU_CLMUL) != 0; } - static bool supports_rtm() { return (_features & CPU_RTM) != 0; } - static bool supports_bmi1() { return (_features & CPU_BMI1) != 0; } - static bool supports_bmi2() { return (_features & CPU_BMI2) != 0; } - static bool supports_adx() { return (_features & CPU_ADX) != 0; } - static bool supports_evex() { return (_features & CPU_AVX512F) != 0; } - static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; } - static bool supports_avx512ifma() { return (_features & CPU_AVX512_IFMA) != 0; } - static bool supports_avxifma() { return (_features & CPU_AVX_IFMA) != 0; } - static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; } - static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; } - static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; } - static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; } - static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; } + static bool supports_cmov() { return _features.supports_feature(CPU_CMOV); } + static bool supports_fxsr() { return _features.supports_feature(CPU_FXSR); } + static bool supports_ht() { return _features.supports_feature(CPU_HT); } + static bool supports_mmx() { return _features.supports_feature(CPU_MMX); } + static bool supports_sse() { return _features.supports_feature(CPU_SSE); } + static bool supports_sse2() { return _features.supports_feature(CPU_SSE2); } + static bool supports_sse3() { return _features.supports_feature(CPU_SSE3); } + static bool supports_ssse3() { return _features.supports_feature(CPU_SSSE3); } + static bool supports_sse4_1() { return _features.supports_feature(CPU_SSE4_1); } + static bool supports_sse4_2() { return _features.supports_feature(CPU_SSE4_2); } + static bool supports_popcnt() { return _features.supports_feature(CPU_POPCNT); } + static bool supports_avx() { return _features.supports_feature(CPU_AVX); } + static bool supports_avx2() { return _features.supports_feature(CPU_AVX2); } + static bool supports_tsc() { return _features.supports_feature(CPU_TSC); } + static bool supports_rdtscp() { return _features.supports_feature(CPU_RDTSCP); } + static bool supports_rdpid() { return _features.supports_feature(CPU_RDPID); } + static bool supports_aes() { return _features.supports_feature(CPU_AES); } + static bool supports_erms() { return _features.supports_feature(CPU_ERMS); } + static bool supports_fsrm() { return _features.supports_feature(CPU_FSRM); } + static bool supports_clmul() { return _features.supports_feature(CPU_CLMUL); } + static bool supports_rtm() { return _features.supports_feature(CPU_RTM); } + static bool supports_bmi1() { return _features.supports_feature(CPU_BMI1); } + static bool supports_bmi2() { return _features.supports_feature(CPU_BMI2); } + static bool supports_adx() { return _features.supports_feature(CPU_ADX); } + static bool supports_evex() { return _features.supports_feature(CPU_AVX512F); } + static bool supports_avx512dq() { return _features.supports_feature(CPU_AVX512DQ); } + static bool supports_avx512ifma() { return _features.supports_feature(CPU_AVX512_IFMA); } + static bool supports_avxifma() { return _features.supports_feature(CPU_AVX_IFMA); } + static bool supports_avx512pf() { return _features.supports_feature(CPU_AVX512PF); } + static bool supports_avx512er() { return _features.supports_feature(CPU_AVX512ER); } + static bool supports_avx512cd() { return _features.supports_feature(CPU_AVX512CD); } + static bool supports_avx512bw() { return _features.supports_feature(CPU_AVX512BW); } + static bool supports_avx512vl() { return _features.supports_feature(CPU_AVX512VL); } static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); } static bool supports_avx512bwdq() { return (supports_evex() && supports_avx512bw() && supports_avx512dq()); } static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); } @@ -745,33 +859,39 @@ class VM_Version : public Abstract_VM_Version { static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); } static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); } static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); } - static bool supports_apx_f() { return (_features & CPU_APX_F) != 0; } + static bool supports_apx_f() { return _features.supports_feature(CPU_APX_F); } static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); } - static bool supports_sha() { return (_features & CPU_SHA) != 0; } - static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); } - static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; } - static bool supports_avx512_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; } - static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; } - static bool supports_avx512_vaes() { return (_features & CPU_AVX512_VAES) != 0; } - static bool supports_gfni() { return (_features & CPU_GFNI) != 0; } - static bool supports_avx512_vnni() { return (_features & CPU_AVX512_VNNI) != 0; } - static bool supports_avx512_bitalg() { return (_features & CPU_AVX512_BITALG) != 0; } - static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; } - static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; } - static bool supports_avx512_fp16() { return (_features & CPU_AVX512_FP16) != 0; } - static bool supports_hv() { return (_features & CPU_HV) != 0; } - static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; } - static bool supports_f16c() { return (_features & CPU_F16C) != 0; } - static bool supports_pku() { return (_features & CPU_PKU) != 0; } - static bool supports_ospke() { return (_features & CPU_OSPKE) != 0; } - static bool supports_cet_ss() { return (_features & CPU_CET_SS) != 0; } - static bool supports_cet_ibt() { return (_features & CPU_CET_IBT) != 0; } - static bool supports_sha512() { return (_features & CPU_SHA512) != 0; } + static bool supports_sha() { return _features.supports_feature(CPU_SHA); } + static bool supports_fma() { return _features.supports_feature(CPU_FMA) && supports_avx(); } + static bool supports_vzeroupper() { return _features.supports_feature(CPU_VZEROUPPER); } + static bool supports_avx512_vpopcntdq() { return _features.supports_feature(CPU_AVX512_VPOPCNTDQ); } + static bool supports_avx512_vpclmulqdq() { return _features.supports_feature(CPU_AVX512_VPCLMULQDQ); } + static bool supports_avx512_vaes() { return _features.supports_feature(CPU_AVX512_VAES); } + static bool supports_gfni() { return _features.supports_feature(CPU_GFNI); } + static bool supports_avx512_vnni() { return _features.supports_feature(CPU_AVX512_VNNI); } + static bool supports_avx512_bitalg() { return _features.supports_feature(CPU_AVX512_BITALG); } + static bool supports_avx512_vbmi() { return _features.supports_feature(CPU_AVX512_VBMI); } + static bool supports_avx512_vbmi2() { return _features.supports_feature(CPU_AVX512_VBMI2); } + static bool supports_avx512_fp16() { return _features.supports_feature(CPU_AVX512_FP16); } + static bool supports_hv() { return _features.supports_feature(CPU_HV); } + static bool supports_serialize() { return _features.supports_feature(CPU_SERIALIZE); } + static bool supports_f16c() { return _features.supports_feature(CPU_F16C); } + static bool supports_pku() { return _features.supports_feature(CPU_PKU); } + static bool supports_ospke() { return _features.supports_feature(CPU_OSPKE); } + static bool supports_cet_ss() { return _features.supports_feature(CPU_CET_SS); } + static bool supports_cet_ibt() { return _features.supports_feature(CPU_CET_IBT); } + static bool supports_sha512() { return _features.supports_feature(CPU_SHA512); } + + // Intel® AVX10 introduces a versioned approach for enumeration that is monotonically increasing, inclusive, + // and supporting all vector lengths. Feature set supported by an AVX10 vector ISA version is also supported + // by all the versions above it. + static bool supports_avx10_1() { return _features.supports_feature(CPU_AVX10_1);} + static bool supports_avx10_2() { return _features.supports_feature(CPU_AVX10_2);} // // Feature identification not affected by VM flags // - static bool cpu_supports_evex() { return (_cpu_features & CPU_AVX512F) != 0; } + static bool cpu_supports_evex() { return _cpu_features.supports_feature(CPU_AVX512F); } static bool supports_avx512_simd_sort() { if (supports_avx512dq()) { @@ -802,6 +922,8 @@ class VM_Version : public Abstract_VM_Version { static bool is_intel_tsc_synched_at_init(); + static void insert_features_names(VM_Version::VM_Features features, char* buf, size_t buflen); + // This checks if the JVM is potentially affected by an erratum on Intel CPUs (SKX102) // that causes unpredictable behaviour when jcc crosses 64 byte boundaries. Its microcode // mitigation causes regressions when jumps or fused conditional branches cross or end at @@ -809,23 +931,23 @@ class VM_Version : public Abstract_VM_Version { static bool has_intel_jcc_erratum() { return _has_intel_jcc_erratum; } // AMD features - static bool supports_3dnow_prefetch() { return (_features & CPU_3DNOW_PREFETCH) != 0; } - static bool supports_lzcnt() { return (_features & CPU_LZCNT) != 0; } - static bool supports_sse4a() { return (_features & CPU_SSE4A) != 0; } + static bool supports_3dnow_prefetch() { return _features.supports_feature(CPU_3DNOW_PREFETCH); } + static bool supports_lzcnt() { return _features.supports_feature(CPU_LZCNT); } + static bool supports_sse4a() { return _features.supports_feature(CPU_SSE4A); } static bool is_amd_Barcelona() { return is_amd() && extended_cpu_family() == CPU_FAMILY_AMD_11H; } // Intel and AMD newer cores support fast timestamps well static bool supports_tscinv_bit() { - return (_features & CPU_TSCINV_BIT) != 0; + return _features.supports_feature(CPU_TSCINV_BIT); } static bool supports_tscinv() { - return (_features & CPU_TSCINV) != 0; + return _features.supports_feature(CPU_TSCINV); } // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). - static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && + static bool has_fast_idiv() { return is_intel() && is_intel_server_family() && supports_sse3() && _model != 0x1C; } static bool supports_compare_and_exchange() { return true; } @@ -839,12 +961,12 @@ class VM_Version : public Abstract_VM_Version { // x86_64 supports fast class initialization checks static bool supports_fast_class_init_checks() { - return LP64_ONLY(true) NOT_LP64(false); // not implemented on x86_32 + return true; } // x86_64 supports secondary supers table constexpr static bool supports_secondary_supers_table() { - return LP64_ONLY(true) NOT_LP64(false); // not implemented on x86_32 + return true; } constexpr static bool supports_stack_watermark_barrier() { @@ -879,15 +1001,11 @@ class VM_Version : public Abstract_VM_Version { // synchronize with other memory ops. so, it needs preceding // and trailing StoreStore fences. -#ifdef _LP64 static bool supports_clflush(); // Can't inline due to header file conflict -#else - static bool supports_clflush() { return ((_features & CPU_FLUSH) != 0); } -#endif // _LP64 // Note: CPU_FLUSHOPT and CPU_CLWB bits should always be zero for 32-bit - static bool supports_clflushopt() { return ((_features & CPU_FLUSHOPT) != 0); } - static bool supports_clwb() { return ((_features & CPU_CLWB) != 0); } + static bool supports_clflushopt() { return (_features.supports_feature(CPU_FLUSHOPT)); } + static bool supports_clwb() { return (_features.supports_feature(CPU_CLWB)); } // Old CPUs perform lea on AGU which causes additional latency transferring the // value from/to ALU for other operations diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 078150c61fb..22490ba7bb3 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -422,6 +422,18 @@ source_hpp %{ #include "peephole_x86_64.hpp" +bool castLL_is_imm32(const Node* n); + +%} + +source %{ + +bool castLL_is_imm32(const Node* n) { + assert(n->is_CastLL(), "must be a CastLL"); + const TypeLong* t = n->bottom_type()->is_long(); + return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi)); +} + %} // Register masks @@ -1584,14 +1596,11 @@ uint MachUEPNode::size(PhaseRegAlloc* ra_) const //============================================================================= bool Matcher::supports_vector_calling_convention(void) { - if (EnableVectorSupport && UseVectorStubs) { - return true; - } - return false; + return EnableVectorSupport; } OptoRegPair Matcher::vector_return_value(uint ideal_reg) { - assert(EnableVectorSupport && UseVectorStubs, "sanity"); + assert(EnableVectorSupport, "sanity"); int lo = XMM0_num; int hi = XMM0b_num; if (ideal_reg == Op_VecX) hi = XMM0d_num; @@ -1838,20 +1847,19 @@ encode %{ %} enc_class clear_avx %{ - debug_only(int off0 = __ offset()); + DEBUG_ONLY(int off0 = __ offset()); if (generate_vzeroupper(Compile::current())) { // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty // Clear upper bits of YMM registers when current compiled code uses // wide vectors to avoid AVX <-> SSE transition penalty during call. __ vzeroupper(); } - debug_only(int off1 = __ offset()); + DEBUG_ONLY(int off1 = __ offset()); assert(off1 - off0 == clear_avx_size(), "correct size prediction"); %} enc_class Java_To_Runtime(method meth) %{ - // No relocation needed - __ mov64(r10, (int64_t) $meth$$method); + __ lea(r10, RuntimeAddress((address)$meth$$method)); __ call(r10); __ post_call_nop(); %} @@ -6258,13 +6266,14 @@ instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{ predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne); match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2))); + effect(TEMP dst); ins_cost(200); format %{ "ecmovpl $dst, $src1, $src2\n\t" - "ecmovnel $dst, $src1, $src2" %} + "cmovnel $dst, $src2" %} ins_encode %{ __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register); - __ ecmovl(Assembler::notEqual, $dst$$Register, $src1$$Register, $src2$$Register); + __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register); %} ins_pipe(pipe_cmov_reg); %} @@ -6274,6 +6283,7 @@ instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI s instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{ predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq); match(Set dst (CMoveI (Binary cop cr) (Binary src dst))); + effect(TEMP dst); ins_cost(200); // XXX format %{ "cmovpl $dst, $src\n\t" @@ -6289,14 +6299,15 @@ instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) // and parity flag bit is set if any of the operand is a NaN. instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{ predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq); - match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2))); + match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1))); + effect(TEMP dst); ins_cost(200); format %{ "ecmovpl $dst, $src1, $src2\n\t" - "ecmovnel $dst, $src1, $src2" %} + "cmovnel $dst, $src2" %} ins_encode %{ __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register); - __ ecmovl(Assembler::notEqual, $dst$$Register, $src1$$Register, $src2$$Register); + __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register); %} ins_pipe(pipe_cmov_reg); %} @@ -6539,6 +6550,7 @@ instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP %} instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{ + predicate(!UseAPX); match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ @@ -6547,6 +6559,7 @@ instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{ %} instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{ + predicate(UseAPX); match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2))); ins_cost(200); format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %} @@ -6573,13 +6586,14 @@ instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{ predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne); match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2))); + effect(TEMP dst); ins_cost(200); format %{ "ecmovpq $dst, $src1, $src2\n\t" - "ecmovneq $dst, $src1, $src2" %} + "cmovneq $dst, $src2" %} ins_encode %{ __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register); - __ ecmovq(Assembler::notEqual, $dst$$Register, $src1$$Register, $src2$$Register); + __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register); %} ins_pipe(pipe_cmov_reg); %} @@ -6602,14 +6616,15 @@ instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{ predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq); - match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2))); + match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1))); + effect(TEMP dst); ins_cost(200); format %{ "ecmovpq $dst, $src1, $src2\n\t" - "ecmovneq $dst, $src1, $src2" %} + "cmovneq $dst, $src2" %} ins_encode %{ __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register); - __ ecmovq(Assembler::notEqual, $dst$$Register, $src1$$Register, $src2$$Register); + __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register); %} ins_pipe(pipe_cmov_reg); %} @@ -6772,13 +6787,14 @@ instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{ predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne); match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2))); + effect(TEMP dst); ins_cost(200); format %{ "ecmovpq $dst, $src1, $src2\n\t" - "ecmovneq $dst, $src1, $src2" %} + "cmovneq $dst, $src2" %} ins_encode %{ __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register); - __ ecmovq(Assembler::notEqual, $dst$$Register, $src1$$Register, $src2$$Register); + __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register); %} ins_pipe(pipe_cmov_reg); %} @@ -6801,14 +6817,15 @@ instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{ predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq); - match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2))); + match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1))); + effect(TEMP dst); ins_cost(200); format %{ "ecmovpq $dst, $src1, $src2\n\t" - "ecmovneq $dst, $src1, $src2" %} + "cmovneq $dst, $src2" %} ins_encode %{ __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register); - __ ecmovq(Assembler::notEqual, $dst$$Register, $src1$$Register, $src2$$Register); + __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register); %} ins_pipe(pipe_cmov_reg); %} @@ -7035,21 +7052,6 @@ instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr) ins_pipe(ialu_reg_mem); %} -instruct addI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr) -%{ - predicate(UseAPX); - match(Set dst (AddI (LoadI src1) src2)); - effect(KILL cr); - flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag); - - ins_cost(150); - format %{ "eaddl $dst, $src1, $src2\t# int ndd" %} - ins_encode %{ - __ eaddl($dst$$Register, $src1$$Address, $src2$$Register, false); - %} - ins_pipe(ialu_reg_mem); -%} - instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr) %{ predicate(UseAPX); @@ -7353,21 +7355,6 @@ instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr ins_pipe(ialu_reg_mem); %} -instruct addL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr) -%{ - predicate(UseAPX); - match(Set dst (AddL (LoadL src1) src2)); - effect(KILL cr); - flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag); - - ins_cost(150); - format %{ "eaddq $dst, $src1, $src2\t# long ndd" %} - ins_encode %{ - __ eaddq($dst$$Register, $src1$$Address, $src2$$Register, false); - %} - ins_pipe(ialu_reg_mem); -%} - instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr) %{ match(Set dst (StoreL dst (AddL (LoadL dst) src))); @@ -7605,6 +7592,7 @@ instruct castPP(rRegP dst) instruct castII(rRegI dst) %{ + predicate(VerifyConstraintCasts == 0); match(Set dst (CastII dst)); size(0); @@ -7614,8 +7602,22 @@ instruct castII(rRegI dst) ins_pipe(empty); %} +instruct castII_checked(rRegI dst, rFlagsReg cr) +%{ + predicate(VerifyConstraintCasts > 0); + match(Set dst (CastII dst)); + + effect(KILL cr); + format %{ "# cast_checked_II $dst" %} + ins_encode %{ + __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + instruct castLL(rRegL dst) %{ + predicate(VerifyConstraintCasts == 0); match(Set dst (CastLL dst)); size(0); @@ -7625,6 +7627,32 @@ instruct castLL(rRegL dst) ins_pipe(empty); %} +instruct castLL_checked_L32(rRegL dst, rFlagsReg cr) +%{ + predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n)); + match(Set dst (CastLL dst)); + + effect(KILL cr); + format %{ "# cast_checked_LL $dst" %} + ins_encode %{ + __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg); + %} + ins_pipe(pipe_slow); +%} + +instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr) +%{ + predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n)); + match(Set dst (CastLL dst)); + + effect(KILL cr, TEMP tmp); + format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %} + ins_encode %{ + __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register); + %} + ins_pipe(pipe_slow); +%} + instruct castFF(regF dst) %{ match(Set dst (CastFF dst)); @@ -8538,7 +8566,6 @@ instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr) instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr) %{ - predicate(!UseAPX); match(Set dst (MulI src imm)); effect(KILL cr); @@ -8550,20 +8577,6 @@ instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr) ins_pipe(ialu_reg_reg_alu0); %} -instruct mulI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr) -%{ - predicate(UseAPX); - match(Set dst (MulI src1 src2)); - effect(KILL cr); - - ins_cost(300); - format %{ "eimull $dst, $src1, $src2\t# int ndd" %} - ins_encode %{ - __ eimull($dst$$Register, $src1$$Register, $src2$$constant, false); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr) %{ predicate(!UseAPX); @@ -8594,7 +8607,6 @@ instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr) %{ - predicate(!UseAPX); match(Set dst (MulI (LoadI src) imm)); effect(KILL cr); @@ -8606,20 +8618,6 @@ instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr) ins_pipe(ialu_reg_mem_alu0); %} -instruct mulI_rReg_mem_imm(rRegI dst, memory src1, immI src2, rFlagsReg cr) -%{ - predicate(UseAPX); - match(Set dst (MulI (LoadI src1) src2)); - effect(KILL cr); - - ins_cost(300); - format %{ "eimull $dst, $src1, $src2\t# int ndd" %} - ins_encode %{ - __ eimull($dst$$Register, $src1$$Address, $src2$$constant, false); - %} - ins_pipe(ialu_reg_mem_alu0); -%} - instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr) %{ match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); @@ -8660,7 +8658,6 @@ instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr) instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr) %{ - predicate(!UseAPX); match(Set dst (MulL src imm)); effect(KILL cr); @@ -8672,20 +8669,6 @@ instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr) ins_pipe(ialu_reg_reg_alu0); %} -instruct mulL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr) -%{ - predicate(UseAPX); - match(Set dst (MulL src1 src2)); - effect(KILL cr); - - ins_cost(300); - format %{ "eimulq $dst, $src1, $src2\t# long ndd" %} - ins_encode %{ - __ eimulq($dst$$Register, $src1$$Register, $src2$$constant, false); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr) %{ predicate(!UseAPX); @@ -8716,7 +8699,6 @@ instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr) %{ - predicate(!UseAPX); match(Set dst (MulL (LoadL src) imm)); effect(KILL cr); @@ -8728,20 +8710,6 @@ instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr) ins_pipe(ialu_reg_mem_alu0); %} -instruct mulL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr) -%{ - predicate(UseAPX); - match(Set dst (MulL (LoadL src1) src2)); - effect(KILL cr); - - ins_cost(300); - format %{ "eimulq $dst, $src1, $src2\t# long ndd" %} - ins_encode %{ - __ eimulq($dst$$Register, $src1$$Address, $src2$$constant, false); - %} - ins_pipe(ialu_reg_mem_alu0); -%} - instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr) %{ match(Set dst (MulHiL src rax)); @@ -10631,21 +10599,6 @@ instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr ins_pipe(ialu_reg_mem); %} -instruct xorI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr) -%{ - predicate(UseAPX); - match(Set dst (XorI (LoadI src1) src2)); - effect(KILL cr); - flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag); - - ins_cost(150); - format %{ "exorl $dst, $src1, $src2\t# int ndd" %} - ins_encode %{ - __ exorl($dst$$Register, $src1$$Address, $src2$$Register, false); - %} - ins_pipe(ialu_reg_mem); -%} - // Xor Memory with Register instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr) %{ @@ -10825,21 +10778,6 @@ instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr ins_pipe(ialu_reg_mem); %} -instruct andL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr) -%{ - predicate(UseAPX); - match(Set dst (AndL (LoadL src1) src2)); - effect(KILL cr); - flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag); - - ins_cost(150); - format %{ "eandq $dst, $src1, $src2\t# long ndd" %} - ins_encode %{ - __ eandq($dst$$Register, $src1$$Address, $src2$$Register, false); - %} - ins_pipe(ialu_reg_mem); -%} - // And Memory with Register instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr) %{ @@ -11335,21 +11273,6 @@ instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr ins_pipe(ialu_reg_mem); %} -instruct xorL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr) -%{ - predicate(UseAPX); - match(Set dst (XorL (LoadL src1) src2)); - effect(KILL cr); - flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag); - - ins_cost(150); - format %{ "exorq $dst, $src1, $src2\t# long ndd" %} - ins_encode %{ - __ exorq($dst$$Register, $src1$$Address, $src2$$Register, false); - %} - ins_pipe(ialu_reg_mem); -%} - // Xor Memory with Register instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr) %{ diff --git a/src/hotspot/cpu/zero/icache_zero.hpp b/src/hotspot/cpu/zero/icache_zero.hpp index b40e07d5e3b..781021a2b20 100644 --- a/src/hotspot/cpu/zero/icache_zero.hpp +++ b/src/hotspot/cpu/zero/icache_zero.hpp @@ -33,7 +33,7 @@ class ICache : public AbstractICache { public: - static void initialize() {} + static void initialize(int phase) {} static void invalidate_word(address addr) {} static void invalidate_range(address start, int nbytes) {} }; diff --git a/src/hotspot/cpu/zero/sharedRuntime_zero.cpp b/src/hotspot/cpu/zero/sharedRuntime_zero.cpp index f141135ff95..60a873ab31f 100644 --- a/src/hotspot/cpu/zero/sharedRuntime_zero.cpp +++ b/src/hotspot/cpu/zero/sharedRuntime_zero.cpp @@ -50,18 +50,17 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt, return 0; } -AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters( - MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint *fingerprint) { - return AdapterHandlerLibrary::new_entry( - fingerprint, - CAST_FROM_FN_PTR(address,zero_null_code_stub), - CAST_FROM_FN_PTR(address,zero_null_code_stub), - CAST_FROM_FN_PTR(address,zero_null_code_stub)); +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterHandlerEntry* handler) { + handler->set_entry_points(CAST_FROM_FN_PTR(address,zero_null_code_stub), + CAST_FROM_FN_PTR(address,zero_null_code_stub), + CAST_FROM_FN_PTR(address,zero_null_code_stub), + nullptr); + return; } nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, diff --git a/src/hotspot/cpu/zero/vm_version_zero.cpp b/src/hotspot/cpu/zero/vm_version_zero.cpp index e38561e19c5..3ce9227c193 100644 --- a/src/hotspot/cpu/zero/vm_version_zero.cpp +++ b/src/hotspot/cpu/zero/vm_version_zero.cpp @@ -151,6 +151,6 @@ void VM_Version::initialize_cpu_information(void) { _no_of_threads = _no_of_cores; _no_of_sockets = _no_of_cores; snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Zero VM"); - snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _features_string); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _cpu_info_string); _initialized = true; } diff --git a/src/hotspot/os/aix/libodm_aix.cpp b/src/hotspot/os/aix/libodm_aix.cpp index 9fe0fb7abd8..854fd5e2b79 100644 --- a/src/hotspot/os/aix/libodm_aix.cpp +++ b/src/hotspot/os/aix/libodm_aix.cpp @@ -30,6 +30,7 @@ #include #include "runtime/arguments.hpp" #include "runtime/os.hpp" +#include "utilities/permitForbiddenFunctions.hpp" dynamicOdm::dynamicOdm() { @@ -59,7 +60,7 @@ dynamicOdm::~dynamicOdm() { } -void odmWrapper::clean_data() { if (_data) { free(_data); _data = nullptr; } } +void odmWrapper::clean_data() { if (_data) { permit_forbidden_function::free(_data); _data = nullptr; } } int odmWrapper::class_offset(const char *field, bool is_aix_5) diff --git a/src/hotspot/os/aix/loadlib_aix.cpp b/src/hotspot/os/aix/loadlib_aix.cpp index 90a7271ad6d..e7dbd775e37 100644 --- a/src/hotspot/os/aix/loadlib_aix.cpp +++ b/src/hotspot/os/aix/loadlib_aix.cpp @@ -38,6 +38,7 @@ #include "logging/log.hpp" #include "utilities/debug.hpp" #include "utilities/ostream.hpp" +#include "utilities/permitForbiddenFunctions.hpp" // For loadquery() #include