1 //
   2 // Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20 // CA 95054 USA or visit www.sun.com if you need additional information or
  21 // have any questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R12, R12_H,
 330                          R13, R13_H,
 331                          R14, R14_H);
 332 
 333 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 334                          RAX, RAX_H,
 335                          RDI, RDI_H,
 336                          RSI, RSI_H,
 337                          RCX, RCX_H,
 338                          RBX, RBX_H,
 339                          R8,  R8_H,
 340                          R9,  R9_H,
 341                          R10, R10_H,
 342                          R11, R11_H,
 343                          R12, R12_H,
 344                          R13, R13_H,
 345                          R14, R14_H);
 346 
 347 // Class for all pointer registers except RAX, RBX and RSP
 348 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 349                              RBP, RBP_H,
 350                              RDI, RDI_H,
 351                              RSI, RSI_H,
 352                              RCX, RCX_H,
 353                              R8,  R8_H,
 354                              R9,  R9_H,
 355                              R10, R10_H,
 356                              R11, R11_H,
 357                              R12, R12_H,
 358                              R13, R13_H,
 359                              R14, R14_H);
 360 
 361 // Singleton class for RAX pointer register
 362 reg_class ptr_rax_reg(RAX, RAX_H);
 363 
 364 // Singleton class for RBX pointer register
 365 reg_class ptr_rbx_reg(RBX, RBX_H);
 366 
 367 // Singleton class for RSI pointer register
 368 reg_class ptr_rsi_reg(RSI, RSI_H);
 369 
 370 // Singleton class for RDI pointer register
 371 reg_class ptr_rdi_reg(RDI, RDI_H);
 372 
 373 // Singleton class for RBP pointer register
 374 reg_class ptr_rbp_reg(RBP, RBP_H);
 375 
 376 // Singleton class for stack pointer
 377 reg_class ptr_rsp_reg(RSP, RSP_H);
 378 
 379 // Singleton class for TLS pointer
 380 reg_class ptr_r15_reg(R15, R15_H);
 381 
 382 // Class for all long registers (except RSP)
 383 reg_class long_reg(RAX, RAX_H,
 384                    RDX, RDX_H,
 385                    RBP, RBP_H,
 386                    RDI, RDI_H,
 387                    RSI, RSI_H,
 388                    RCX, RCX_H,
 389                    RBX, RBX_H,
 390                    R8,  R8_H,
 391                    R9,  R9_H,
 392                    R10, R10_H,
 393                    R11, R11_H,
 394                    R13, R13_H,
 395                    R14, R14_H);
 396 
 397 // Class for all long registers except RAX, RDX (and RSP)
 398 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 399                               RDI, RDI_H,
 400                               RSI, RSI_H,
 401                               RCX, RCX_H,
 402                               RBX, RBX_H,
 403                               R8,  R8_H,
 404                               R9,  R9_H,
 405                               R10, R10_H,
 406                               R11, R11_H,
 407                               R13, R13_H,
 408                               R14, R14_H);
 409 
 410 // Class for all long registers except RCX (and RSP)
 411 reg_class long_no_rcx_reg(RBP, RBP_H,
 412                           RDI, RDI_H,
 413                           RSI, RSI_H,
 414                           RAX, RAX_H,
 415                           RDX, RDX_H,
 416                           RBX, RBX_H,
 417                           R8,  R8_H,
 418                           R9,  R9_H,
 419                           R10, R10_H,
 420                           R11, R11_H,
 421                           R13, R13_H,
 422                           R14, R14_H);
 423 
 424 // Class for all long registers except RAX (and RSP)
 425 reg_class long_no_rax_reg(RBP, RBP_H,
 426                           RDX, RDX_H,
 427                           RDI, RDI_H,
 428                           RSI, RSI_H,
 429                           RCX, RCX_H,
 430                           RBX, RBX_H,
 431                           R8,  R8_H,
 432                           R9,  R9_H,
 433                           R10, R10_H,
 434                           R11, R11_H,
 435                           R13, R13_H,
 436                           R14, R14_H);
 437 
 438 // Singleton class for RAX long register
 439 reg_class long_rax_reg(RAX, RAX_H);
 440 
 441 // Singleton class for RCX long register
 442 reg_class long_rcx_reg(RCX, RCX_H);
 443 
 444 // Singleton class for RDX long register
 445 reg_class long_rdx_reg(RDX, RDX_H);
 446 
 447 // Singleton class for R12 long register
 448 reg_class long_r12_reg(R12, R12_H);
 449 
 450 // Class for all int registers (except RSP)
 451 reg_class int_reg(RAX,
 452                   RDX,
 453                   RBP,
 454                   RDI,
 455                   RSI,
 456                   RCX,
 457                   RBX,
 458                   R8,
 459                   R9,
 460                   R10,
 461                   R11,
 462                   R13,
 463                   R14);
 464 
 465 // Class for all int registers except RCX (and RSP)
 466 reg_class int_no_rcx_reg(RAX,
 467                          RDX,
 468                          RBP,
 469                          RDI,
 470                          RSI,
 471                          RBX,
 472                          R8,
 473                          R9,
 474                          R10,
 475                          R11,
 476                          R13,
 477                          R14);
 478 
 479 // Class for all int registers except RAX, RDX (and RSP)
 480 reg_class int_no_rax_rdx_reg(RBP,
 481                              RDI
 482                              RSI,
 483                              RCX,
 484                              RBX,
 485                              R8,
 486                              R9,
 487                              R10,
 488                              R11,
 489                              R13,
 490                              R14);
 491 
 492 // Singleton class for RAX int register
 493 reg_class int_rax_reg(RAX);
 494 
 495 // Singleton class for RBX int register
 496 reg_class int_rbx_reg(RBX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rcx_reg(RCX);
 500 
 501 // Singleton class for RCX int register
 502 reg_class int_rdx_reg(RDX);
 503 
 504 // Singleton class for RCX int register
 505 reg_class int_rdi_reg(RDI);
 506 
 507 // Singleton class for instruction pointer
 508 // reg_class ip_reg(RIP);
 509 
 510 // Singleton class for condition codes
 511 reg_class int_flags(RFLAGS);
 512 
 513 // Class for all float registers
 514 reg_class float_reg(XMM0,
 515                     XMM1,
 516                     XMM2,
 517                     XMM3,
 518                     XMM4,
 519                     XMM5,
 520                     XMM6,
 521                     XMM7,
 522                     XMM8,
 523                     XMM9,
 524                     XMM10,
 525                     XMM11,
 526                     XMM12,
 527                     XMM13,
 528                     XMM14,
 529                     XMM15);
 530 
 531 // Class for all double registers
 532 reg_class double_reg(XMM0,  XMM0_H,
 533                      XMM1,  XMM1_H,
 534                      XMM2,  XMM2_H,
 535                      XMM3,  XMM3_H,
 536                      XMM4,  XMM4_H,
 537                      XMM5,  XMM5_H,
 538                      XMM6,  XMM6_H,
 539                      XMM7,  XMM7_H,
 540                      XMM8,  XMM8_H,
 541                      XMM9,  XMM9_H,
 542                      XMM10, XMM10_H,
 543                      XMM11, XMM11_H,
 544                      XMM12, XMM12_H,
 545                      XMM13, XMM13_H,
 546                      XMM14, XMM14_H,
 547                      XMM15, XMM15_H);
 548 %}
 549 
 550 
 551 //----------SOURCE BLOCK-------------------------------------------------------
 552 // This is a block of C++ code which provides values, functions, and
 553 // definitions necessary in the rest of the architecture description
 554 source %{
 555 #define   RELOC_IMM64    Assembler::imm64_operand
 556 #define   RELOC_DISP32   Assembler::disp32_operand
 557 
 558 #define __ _masm.
 559 
 560 // !!!!! Special hack to get all types of calls to specify the byte offset
 561 //       from the start of the call to the point where the return address
 562 //       will point.
 563 int MachCallStaticJavaNode::ret_addr_offset()
 564 {
 565   return 5; // 5 bytes from start of call to where return address points
 566 }
 567 
 568 int MachCallDynamicJavaNode::ret_addr_offset()
 569 {
 570   return 15; // 15 bytes from start of call to where return address points
 571 }
 572 
 573 // In os_cpu .ad file
 574 // int MachCallRuntimeNode::ret_addr_offset()
 575 
 576 // Indicate if the safepoint node needs the polling page as an input.
 577 // Since amd64 does not have absolute addressing but RIP-relative
 578 // addressing and the polling page is within 2G, it doesn't.
 579 bool SafePointNode::needs_polling_address_input()
 580 {
 581   return false;
 582 }
 583 
 584 //
 585 // Compute padding required for nodes which need alignment
 586 //
 587 
 588 // The address of the call instruction needs to be 4-byte aligned to
 589 // ensure that it does not span a cache line so that it can be patched.
 590 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 591 {
 592   current_offset += 1; // skip call opcode byte
 593   return round_to(current_offset, alignment_required()) - current_offset;
 594 }
 595 
 596 // The address of the call instruction needs to be 4-byte aligned to
 597 // ensure that it does not span a cache line so that it can be patched.
 598 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 599 {
 600   current_offset += 11; // skip movq instruction + call opcode byte
 601   return round_to(current_offset, alignment_required()) - current_offset;
 602 }
 603 
 604 #ifndef PRODUCT
 605 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 606 {
 607   st->print("INT3");
 608 }
 609 #endif
 610 
 611 // EMIT_RM()
 612 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
 613 {
 614   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 615   *(cbuf.code_end()) = c;
 616   cbuf.set_code_end(cbuf.code_end() + 1);
 617 }
 618 
 619 // EMIT_CC()
 620 void emit_cc(CodeBuffer &cbuf, int f1, int f2)
 621 {
 622   unsigned char c = (unsigned char) (f1 | f2);
 623   *(cbuf.code_end()) = c;
 624   cbuf.set_code_end(cbuf.code_end() + 1);
 625 }
 626 
 627 // EMIT_OPCODE()
 628 void emit_opcode(CodeBuffer &cbuf, int code)
 629 {
 630   *(cbuf.code_end()) = (unsigned char) code;
 631   cbuf.set_code_end(cbuf.code_end() + 1);
 632 }
 633 
 634 // EMIT_OPCODE() w/ relocation information
 635 void emit_opcode(CodeBuffer &cbuf,
 636                  int code, relocInfo::relocType reloc, int offset, int format)
 637 {
 638   cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
 639   emit_opcode(cbuf, code);
 640 }
 641 
 642 // EMIT_D8()
 643 void emit_d8(CodeBuffer &cbuf, int d8)
 644 {
 645   *(cbuf.code_end()) = (unsigned char) d8;
 646   cbuf.set_code_end(cbuf.code_end() + 1);
 647 }
 648 
 649 // EMIT_D16()
 650 void emit_d16(CodeBuffer &cbuf, int d16)
 651 {
 652   *((short *)(cbuf.code_end())) = d16;
 653   cbuf.set_code_end(cbuf.code_end() + 2);
 654 }
 655 
 656 // EMIT_D32()
 657 void emit_d32(CodeBuffer &cbuf, int d32)
 658 {
 659   *((int *)(cbuf.code_end())) = d32;
 660   cbuf.set_code_end(cbuf.code_end() + 4);
 661 }
 662 
 663 // EMIT_D64()
 664 void emit_d64(CodeBuffer &cbuf, int64_t d64)
 665 {
 666   *((int64_t*) (cbuf.code_end())) = d64;
 667   cbuf.set_code_end(cbuf.code_end() + 8);
 668 }
 669 
 670 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 671 void emit_d32_reloc(CodeBuffer& cbuf,
 672                     int d32,
 673                     relocInfo::relocType reloc,
 674                     int format)
 675 {
 676   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 677   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 678 
 679   *((int*) (cbuf.code_end())) = d32;
 680   cbuf.set_code_end(cbuf.code_end() + 4);
 681 }
 682 
 683 // emit 32 bit value and construct relocation entry from RelocationHolder
 684 void emit_d32_reloc(CodeBuffer& cbuf,
 685                     int d32,
 686                     RelocationHolder const& rspec,
 687                     int format)
 688 {
 689 #ifdef ASSERT
 690   if (rspec.reloc()->type() == relocInfo::oop_type &&
 691       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 692     assert(oop((intptr_t)d32)->is_oop() && oop((intptr_t)d32)->is_perm(), "cannot embed non-perm oops in code");
 693   }
 694 #endif
 695   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 696 
 697   *((int* )(cbuf.code_end())) = d32;
 698   cbuf.set_code_end(cbuf.code_end() + 4);
 699 }
 700 
 701 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 702   address next_ip = cbuf.code_end() + 4;
 703   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 704                  external_word_Relocation::spec(addr),
 705                  RELOC_DISP32);
 706 }
 707 
 708 
 709 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 710 void emit_d64_reloc(CodeBuffer& cbuf,
 711                     int64_t d64,
 712                     relocInfo::relocType reloc,
 713                     int format)
 714 {
 715   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 716 
 717   *((int64_t*) (cbuf.code_end())) = d64;
 718   cbuf.set_code_end(cbuf.code_end() + 8);
 719 }
 720 
 721 // emit 64 bit value and construct relocation entry from RelocationHolder
 722 void emit_d64_reloc(CodeBuffer& cbuf,
 723                     int64_t d64,
 724                     RelocationHolder const& rspec,
 725                     int format)
 726 {
 727 #ifdef ASSERT
 728   if (rspec.reloc()->type() == relocInfo::oop_type &&
 729       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 730     assert(oop(d64)->is_oop() && oop(d64)->is_perm(),
 731            "cannot embed non-perm oops in code");
 732   }
 733 #endif
 734   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 735 
 736   *((int64_t*) (cbuf.code_end())) = d64;
 737   cbuf.set_code_end(cbuf.code_end() + 8);
 738 }
 739 
 740 // Access stack slot for load or store
 741 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 742 {
 743   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 744   if (-0x80 <= disp && disp < 0x80) {
 745     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 746     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 747     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 748   } else {
 749     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 750     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 751     emit_d32(cbuf, disp);     // Displacement // R/M byte
 752   }
 753 }
 754 
 755    // rRegI ereg, memory mem) %{    // emit_reg_mem
 756 void encode_RegMem(CodeBuffer &cbuf,
 757                    int reg,
 758                    int base, int index, int scale, int disp, bool disp_is_oop)
 759 {
 760   assert(!disp_is_oop, "cannot have disp");
 761   int regenc = reg & 7;
 762   int baseenc = base & 7;
 763   int indexenc = index & 7;
 764 
 765   // There is no index & no scale, use form without SIB byte
 766   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 767     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 768     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 769       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 770     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 771       // If 8-bit displacement, mode 0x1
 772       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 773       emit_d8(cbuf, disp);
 774     } else {
 775       // If 32-bit displacement
 776       if (base == -1) { // Special flag for absolute address
 777         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 778         if (disp_is_oop) {
 779           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 780         } else {
 781           emit_d32(cbuf, disp);
 782         }
 783       } else {
 784         // Normal base + offset
 785         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 786         if (disp_is_oop) {
 787           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 788         } else {
 789           emit_d32(cbuf, disp);
 790         }
 791       }
 792     }
 793   } else {
 794     // Else, encode with the SIB byte
 795     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 796     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 797       // If no displacement
 798       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 799       emit_rm(cbuf, scale, indexenc, baseenc);
 800     } else {
 801       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 802         // If 8-bit displacement, mode 0x1
 803         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 804         emit_rm(cbuf, scale, indexenc, baseenc);
 805         emit_d8(cbuf, disp);
 806       } else {
 807         // If 32-bit displacement
 808         if (base == 0x04 ) {
 809           emit_rm(cbuf, 0x2, regenc, 0x4);
 810           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 811         } else {
 812           emit_rm(cbuf, 0x2, regenc, 0x4);
 813           emit_rm(cbuf, scale, indexenc, baseenc); // *
 814         }
 815         if (disp_is_oop) {
 816           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 817         } else {
 818           emit_d32(cbuf, disp);
 819         }
 820       }
 821     }
 822   }
 823 }
 824 
 825 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 826 {
 827   if (dstenc != srcenc) {
 828     if (dstenc < 8) {
 829       if (srcenc >= 8) {
 830         emit_opcode(cbuf, Assembler::REX_B);
 831         srcenc -= 8;
 832       }
 833     } else {
 834       if (srcenc < 8) {
 835         emit_opcode(cbuf, Assembler::REX_R);
 836       } else {
 837         emit_opcode(cbuf, Assembler::REX_RB);
 838         srcenc -= 8;
 839       }
 840       dstenc -= 8;
 841     }
 842 
 843     emit_opcode(cbuf, 0x8B);
 844     emit_rm(cbuf, 0x3, dstenc, srcenc);
 845   }
 846 }
 847 
 848 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 849   if( dst_encoding == src_encoding ) {
 850     // reg-reg copy, use an empty encoding
 851   } else {
 852     MacroAssembler _masm(&cbuf);
 853 
 854     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 855   }
 856 }
 857 
 858 
 859 //=============================================================================
 860 #ifndef PRODUCT
 861 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 862 {
 863   Compile* C = ra_->C;
 864 
 865   int framesize = C->frame_slots() << LogBytesPerInt;
 866   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 867   // Remove wordSize for return adr already pushed
 868   // and another for the RBP we are going to save
 869   framesize -= 2*wordSize;
 870   bool need_nop = true;
 871 
 872   // Calls to C2R adapters often do not accept exceptional returns.
 873   // We require that their callers must bang for them.  But be
 874   // careful, because some VM calls (such as call site linkage) can
 875   // use several kilobytes of stack.  But the stack safety zone should
 876   // account for that.  See bugs 4446381, 4468289, 4497237.
 877   if (C->need_stack_bang(framesize)) {
 878     st->print_cr("# stack bang"); st->print("\t");
 879     need_nop = false;
 880   }
 881   st->print_cr("pushq   rbp"); st->print("\t");
 882 
 883   if (VerifyStackAtCalls) {
 884     // Majik cookie to verify stack depth
 885     st->print_cr("pushq   0xffffffffbadb100d"
 886                   "\t# Majik cookie for stack depth check");
 887     st->print("\t");
 888     framesize -= wordSize; // Remove 2 for cookie
 889     need_nop = false;
 890   }
 891 
 892   if (framesize) {
 893     st->print("subq    rsp, #%d\t# Create frame", framesize);
 894     if (framesize < 0x80 && need_nop) {
 895       st->print("\n\tnop\t# nop for patch_verified_entry");
 896     }
 897   }
 898 }
 899 #endif
 900 
 901 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 902 {
 903   Compile* C = ra_->C;
 904 
 905   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 906   // NativeJump::patch_verified_entry will be able to patch out the entry
 907   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 908   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 909   // 6 bytes. So if we don't do the fldcw or the push then we must
 910   // use the 6 byte frame allocation even if we have no frame. :-(
 911   // If method sets FPU control word do it now
 912 
 913   int framesize = C->frame_slots() << LogBytesPerInt;
 914   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 915   // Remove wordSize for return adr already pushed
 916   // and another for the RBP we are going to save
 917   framesize -= 2*wordSize;
 918   bool need_nop = true;
 919 
 920   // Calls to C2R adapters often do not accept exceptional returns.
 921   // We require that their callers must bang for them.  But be
 922   // careful, because some VM calls (such as call site linkage) can
 923   // use several kilobytes of stack.  But the stack safety zone should
 924   // account for that.  See bugs 4446381, 4468289, 4497237.
 925   if (C->need_stack_bang(framesize)) {
 926     MacroAssembler masm(&cbuf);
 927     masm.generate_stack_overflow_check(framesize);
 928     need_nop = false;
 929   }
 930 
 931   // We always push rbp so that on return to interpreter rbp will be
 932   // restored correctly and we can correct the stack.
 933   emit_opcode(cbuf, 0x50 | RBP_enc);
 934 
 935   if (VerifyStackAtCalls) {
 936     // Majik cookie to verify stack depth
 937     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 938     emit_d32(cbuf, 0xbadb100d);
 939     framesize -= wordSize; // Remove 2 for cookie
 940     need_nop = false;
 941   }
 942 
 943   if (framesize) {
 944     emit_opcode(cbuf, Assembler::REX_W);
 945     if (framesize < 0x80) {
 946       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 947       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 948       emit_d8(cbuf, framesize);
 949       if (need_nop) {
 950         emit_opcode(cbuf, 0x90); // nop
 951       }
 952     } else {
 953       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 954       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 955       emit_d32(cbuf, framesize);
 956     }
 957   }
 958 
 959   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 960 
 961 #ifdef ASSERT
 962   if (VerifyStackAtCalls) {
 963     Label L;
 964     MacroAssembler masm(&cbuf);
 965     masm.pushq(rax);
 966     masm.movq(rax, rsp);
 967     masm.andq(rax, StackAlignmentInBytes-1);
 968     masm.cmpq(rax, StackAlignmentInBytes-wordSize);
 969     masm.popq(rax);
 970     masm.jcc(Assembler::equal, L);
 971     masm.stop("Stack is not properly aligned!");
 972     masm.bind(L);
 973   }
 974 #endif
 975 }
 976 
 977 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 978 {
 979   return MachNode::size(ra_); // too many variables; just compute it
 980                               // the hard way
 981 }
 982 
 983 int MachPrologNode::reloc() const
 984 {
 985   return 0; // a large enough number
 986 }
 987 
 988 //=============================================================================
 989 #ifndef PRODUCT
 990 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 991 {
 992   Compile* C = ra_->C;
 993   int framesize = C->frame_slots() << LogBytesPerInt;
 994   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 995   // Remove word for return adr already pushed
 996   // and RBP
 997   framesize -= 2*wordSize;
 998 
 999   if (framesize) {
1000     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
1001     st->print("\t");
1002   }
1003 
1004   st->print_cr("popq\trbp");
1005   if (do_polling() && C->is_method_compilation()) {
1006     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1007                   "# Safepoint: poll for GC");
1008     st->print("\t");
1009   }
1010 }
1011 #endif
1012 
1013 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1014 {
1015   Compile* C = ra_->C;
1016   int framesize = C->frame_slots() << LogBytesPerInt;
1017   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1018   // Remove word for return adr already pushed
1019   // and RBP
1020   framesize -= 2*wordSize;
1021 
1022   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1023 
1024   if (framesize) {
1025     emit_opcode(cbuf, Assembler::REX_W);
1026     if (framesize < 0x80) {
1027       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1028       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1029       emit_d8(cbuf, framesize);
1030     } else {
1031       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1032       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1033       emit_d32(cbuf, framesize);
1034     }
1035   }
1036 
1037   // popq rbp
1038   emit_opcode(cbuf, 0x58 | RBP_enc);
1039 
1040   if (do_polling() && C->is_method_compilation()) {
1041     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1042     // XXX reg_mem doesn't support RIP-relative addressing yet
1043     cbuf.set_inst_mark();
1044     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1045     emit_opcode(cbuf, 0x85); // testl
1046     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1047     // cbuf.inst_mark() is beginning of instruction
1048     emit_d32_reloc(cbuf, os::get_polling_page());
1049 //                    relocInfo::poll_return_type,
1050   }
1051 }
1052 
1053 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1054 {
1055   Compile* C = ra_->C;
1056   int framesize = C->frame_slots() << LogBytesPerInt;
1057   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1058   // Remove word for return adr already pushed
1059   // and RBP
1060   framesize -= 2*wordSize;
1061 
1062   uint size = 0;
1063 
1064   if (do_polling() && C->is_method_compilation()) {
1065     size += 6;
1066   }
1067 
1068   // count popq rbp
1069   size++;
1070 
1071   if (framesize) {
1072     if (framesize < 0x80) {
1073       size += 4;
1074     } else if (framesize) {
1075       size += 7;
1076     }
1077   }
1078 
1079   return size;
1080 }
1081 
1082 int MachEpilogNode::reloc() const
1083 {
1084   return 2; // a large enough number
1085 }
1086 
1087 const Pipeline* MachEpilogNode::pipeline() const
1088 {
1089   return MachNode::pipeline_class();
1090 }
1091 
1092 int MachEpilogNode::safepoint_offset() const
1093 {
1094   return 0;
1095 }
1096 
1097 //=============================================================================
1098 
1099 enum RC {
1100   rc_bad,
1101   rc_int,
1102   rc_float,
1103   rc_stack
1104 };
1105 
1106 static enum RC rc_class(OptoReg::Name reg)
1107 {
1108   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1109 
1110   if (OptoReg::is_stack(reg)) return rc_stack;
1111 
1112   VMReg r = OptoReg::as_VMReg(reg);
1113 
1114   if (r->is_Register()) return rc_int;
1115 
1116   assert(r->is_XMMRegister(), "must be");
1117   return rc_float;
1118 }
1119 
1120 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1121                                        PhaseRegAlloc* ra_,
1122                                        bool do_size,
1123                                        outputStream* st) const
1124 {
1125 
1126   // Get registers to move
1127   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1128   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1129   OptoReg::Name dst_second = ra_->get_reg_second(this);
1130   OptoReg::Name dst_first = ra_->get_reg_first(this);
1131 
1132   enum RC src_second_rc = rc_class(src_second);
1133   enum RC src_first_rc = rc_class(src_first);
1134   enum RC dst_second_rc = rc_class(dst_second);
1135   enum RC dst_first_rc = rc_class(dst_first);
1136 
1137   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1138          "must move at least 1 register" );
1139 
1140   if (src_first == dst_first && src_second == dst_second) {
1141     // Self copy, no move
1142     return 0;
1143   } else if (src_first_rc == rc_stack) {
1144     // mem ->
1145     if (dst_first_rc == rc_stack) {
1146       // mem -> mem
1147       assert(src_second != dst_first, "overlap");
1148       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1149           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1150         // 64-bit
1151         int src_offset = ra_->reg2offset(src_first);
1152         int dst_offset = ra_->reg2offset(dst_first);
1153         if (cbuf) {
1154           emit_opcode(*cbuf, 0xFF);
1155           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1156 
1157           emit_opcode(*cbuf, 0x8F);
1158           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1159 
1160 #ifndef PRODUCT
1161         } else if (!do_size) {
1162           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1163                      "popq    [rsp + #%d]",
1164                      src_offset,
1165                      dst_offset);
1166 #endif
1167         }
1168         return
1169           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1170           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1171       } else {
1172         // 32-bit
1173         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1174         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1175         // No pushl/popl, so:
1176         int src_offset = ra_->reg2offset(src_first);
1177         int dst_offset = ra_->reg2offset(dst_first);
1178         if (cbuf) {
1179           emit_opcode(*cbuf, Assembler::REX_W);
1180           emit_opcode(*cbuf, 0x89);
1181           emit_opcode(*cbuf, 0x44);
1182           emit_opcode(*cbuf, 0x24);
1183           emit_opcode(*cbuf, 0xF8);
1184 
1185           emit_opcode(*cbuf, 0x8B);
1186           encode_RegMem(*cbuf,
1187                         RAX_enc,
1188                         RSP_enc, 0x4, 0, src_offset,
1189                         false);
1190 
1191           emit_opcode(*cbuf, 0x89);
1192           encode_RegMem(*cbuf,
1193                         RAX_enc,
1194                         RSP_enc, 0x4, 0, dst_offset,
1195                         false);
1196 
1197           emit_opcode(*cbuf, Assembler::REX_W);
1198           emit_opcode(*cbuf, 0x8B);
1199           emit_opcode(*cbuf, 0x44);
1200           emit_opcode(*cbuf, 0x24);
1201           emit_opcode(*cbuf, 0xF8);
1202 
1203 #ifndef PRODUCT
1204         } else if (!do_size) {
1205           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1206                      "movl    rax, [rsp + #%d]\n\t"
1207                      "movl    [rsp + #%d], rax\n\t"
1208                      "movq    rax, [rsp - #8]",
1209                      src_offset,
1210                      dst_offset);
1211 #endif
1212         }
1213         return
1214           5 + // movq
1215           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1216           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1217           5; // movq
1218       }
1219     } else if (dst_first_rc == rc_int) {
1220       // mem -> gpr
1221       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1222           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1223         // 64-bit
1224         int offset = ra_->reg2offset(src_first);
1225         if (cbuf) {
1226           if (Matcher::_regEncode[dst_first] < 8) {
1227             emit_opcode(*cbuf, Assembler::REX_W);
1228           } else {
1229             emit_opcode(*cbuf, Assembler::REX_WR);
1230           }
1231           emit_opcode(*cbuf, 0x8B);
1232           encode_RegMem(*cbuf,
1233                         Matcher::_regEncode[dst_first],
1234                         RSP_enc, 0x4, 0, offset,
1235                         false);
1236 #ifndef PRODUCT
1237         } else if (!do_size) {
1238           st->print("movq    %s, [rsp + #%d]\t# spill",
1239                      Matcher::regName[dst_first],
1240                      offset);
1241 #endif
1242         }
1243         return
1244           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1245       } else {
1246         // 32-bit
1247         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1248         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1249         int offset = ra_->reg2offset(src_first);
1250         if (cbuf) {
1251           if (Matcher::_regEncode[dst_first] >= 8) {
1252             emit_opcode(*cbuf, Assembler::REX_R);
1253           }
1254           emit_opcode(*cbuf, 0x8B);
1255           encode_RegMem(*cbuf,
1256                         Matcher::_regEncode[dst_first],
1257                         RSP_enc, 0x4, 0, offset,
1258                         false);
1259 #ifndef PRODUCT
1260         } else if (!do_size) {
1261           st->print("movl    %s, [rsp + #%d]\t# spill",
1262                      Matcher::regName[dst_first],
1263                      offset);
1264 #endif
1265         }
1266         return
1267           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1268           ((Matcher::_regEncode[dst_first] < 8)
1269            ? 3
1270            : 4); // REX
1271       }
1272     } else if (dst_first_rc == rc_float) {
1273       // mem-> xmm
1274       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1275           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1276         // 64-bit
1277         int offset = ra_->reg2offset(src_first);
1278         if (cbuf) {
1279           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1280           if (Matcher::_regEncode[dst_first] >= 8) {
1281             emit_opcode(*cbuf, Assembler::REX_R);
1282           }
1283           emit_opcode(*cbuf, 0x0F);
1284           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1285           encode_RegMem(*cbuf,
1286                         Matcher::_regEncode[dst_first],
1287                         RSP_enc, 0x4, 0, offset,
1288                         false);
1289 #ifndef PRODUCT
1290         } else if (!do_size) {
1291           st->print("%s  %s, [rsp + #%d]\t# spill",
1292                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1293                      Matcher::regName[dst_first],
1294                      offset);
1295 #endif
1296         }
1297         return
1298           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1299           ((Matcher::_regEncode[dst_first] < 8)
1300            ? 5
1301            : 6); // REX
1302       } else {
1303         // 32-bit
1304         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1305         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1306         int offset = ra_->reg2offset(src_first);
1307         if (cbuf) {
1308           emit_opcode(*cbuf, 0xF3);
1309           if (Matcher::_regEncode[dst_first] >= 8) {
1310             emit_opcode(*cbuf, Assembler::REX_R);
1311           }
1312           emit_opcode(*cbuf, 0x0F);
1313           emit_opcode(*cbuf, 0x10);
1314           encode_RegMem(*cbuf,
1315                         Matcher::_regEncode[dst_first],
1316                         RSP_enc, 0x4, 0, offset,
1317                         false);
1318 #ifndef PRODUCT
1319         } else if (!do_size) {
1320           st->print("movss   %s, [rsp + #%d]\t# spill",
1321                      Matcher::regName[dst_first],
1322                      offset);
1323 #endif
1324         }
1325         return
1326           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1327           ((Matcher::_regEncode[dst_first] < 8)
1328            ? 5
1329            : 6); // REX
1330       }
1331     }
1332   } else if (src_first_rc == rc_int) {
1333     // gpr ->
1334     if (dst_first_rc == rc_stack) {
1335       // gpr -> mem
1336       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1337           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1338         // 64-bit
1339         int offset = ra_->reg2offset(dst_first);
1340         if (cbuf) {
1341           if (Matcher::_regEncode[src_first] < 8) {
1342             emit_opcode(*cbuf, Assembler::REX_W);
1343           } else {
1344             emit_opcode(*cbuf, Assembler::REX_WR);
1345           }
1346           emit_opcode(*cbuf, 0x89);
1347           encode_RegMem(*cbuf,
1348                         Matcher::_regEncode[src_first],
1349                         RSP_enc, 0x4, 0, offset,
1350                         false);
1351 #ifndef PRODUCT
1352         } else if (!do_size) {
1353           st->print("movq    [rsp + #%d], %s\t# spill",
1354                      offset,
1355                      Matcher::regName[src_first]);
1356 #endif
1357         }
1358         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1359       } else {
1360         // 32-bit
1361         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1362         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1363         int offset = ra_->reg2offset(dst_first);
1364         if (cbuf) {
1365           if (Matcher::_regEncode[src_first] >= 8) {
1366             emit_opcode(*cbuf, Assembler::REX_R);
1367           }
1368           emit_opcode(*cbuf, 0x89);
1369           encode_RegMem(*cbuf,
1370                         Matcher::_regEncode[src_first],
1371                         RSP_enc, 0x4, 0, offset,
1372                         false);
1373 #ifndef PRODUCT
1374         } else if (!do_size) {
1375           st->print("movl    [rsp + #%d], %s\t# spill",
1376                      offset,
1377                      Matcher::regName[src_first]);
1378 #endif
1379         }
1380         return
1381           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1382           ((Matcher::_regEncode[src_first] < 8)
1383            ? 3
1384            : 4); // REX
1385       }
1386     } else if (dst_first_rc == rc_int) {
1387       // gpr -> gpr
1388       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1389           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1390         // 64-bit
1391         if (cbuf) {
1392           if (Matcher::_regEncode[dst_first] < 8) {
1393             if (Matcher::_regEncode[src_first] < 8) {
1394               emit_opcode(*cbuf, Assembler::REX_W);
1395             } else {
1396               emit_opcode(*cbuf, Assembler::REX_WB);
1397             }
1398           } else {
1399             if (Matcher::_regEncode[src_first] < 8) {
1400               emit_opcode(*cbuf, Assembler::REX_WR);
1401             } else {
1402               emit_opcode(*cbuf, Assembler::REX_WRB);
1403             }
1404           }
1405           emit_opcode(*cbuf, 0x8B);
1406           emit_rm(*cbuf, 0x3,
1407                   Matcher::_regEncode[dst_first] & 7,
1408                   Matcher::_regEncode[src_first] & 7);
1409 #ifndef PRODUCT
1410         } else if (!do_size) {
1411           st->print("movq    %s, %s\t# spill",
1412                      Matcher::regName[dst_first],
1413                      Matcher::regName[src_first]);
1414 #endif
1415         }
1416         return 3; // REX
1417       } else {
1418         // 32-bit
1419         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1420         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1421         if (cbuf) {
1422           if (Matcher::_regEncode[dst_first] < 8) {
1423             if (Matcher::_regEncode[src_first] >= 8) {
1424               emit_opcode(*cbuf, Assembler::REX_B);
1425             }
1426           } else {
1427             if (Matcher::_regEncode[src_first] < 8) {
1428               emit_opcode(*cbuf, Assembler::REX_R);
1429             } else {
1430               emit_opcode(*cbuf, Assembler::REX_RB);
1431             }
1432           }
1433           emit_opcode(*cbuf, 0x8B);
1434           emit_rm(*cbuf, 0x3,
1435                   Matcher::_regEncode[dst_first] & 7,
1436                   Matcher::_regEncode[src_first] & 7);
1437 #ifndef PRODUCT
1438         } else if (!do_size) {
1439           st->print("movl    %s, %s\t# spill",
1440                      Matcher::regName[dst_first],
1441                      Matcher::regName[src_first]);
1442 #endif
1443         }
1444         return
1445           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1446           ? 2
1447           : 3; // REX
1448       }
1449     } else if (dst_first_rc == rc_float) {
1450       // gpr -> xmm
1451       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1452           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1453         // 64-bit
1454         if (cbuf) {
1455           emit_opcode(*cbuf, 0x66);
1456           if (Matcher::_regEncode[dst_first] < 8) {
1457             if (Matcher::_regEncode[src_first] < 8) {
1458               emit_opcode(*cbuf, Assembler::REX_W);
1459             } else {
1460               emit_opcode(*cbuf, Assembler::REX_WB);
1461             }
1462           } else {
1463             if (Matcher::_regEncode[src_first] < 8) {
1464               emit_opcode(*cbuf, Assembler::REX_WR);
1465             } else {
1466               emit_opcode(*cbuf, Assembler::REX_WRB);
1467             }
1468           }
1469           emit_opcode(*cbuf, 0x0F);
1470           emit_opcode(*cbuf, 0x6E);
1471           emit_rm(*cbuf, 0x3,
1472                   Matcher::_regEncode[dst_first] & 7,
1473                   Matcher::_regEncode[src_first] & 7);
1474 #ifndef PRODUCT
1475         } else if (!do_size) {
1476           st->print("movdq   %s, %s\t# spill",
1477                      Matcher::regName[dst_first],
1478                      Matcher::regName[src_first]);
1479 #endif
1480         }
1481         return 5; // REX
1482       } else {
1483         // 32-bit
1484         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1485         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1486         if (cbuf) {
1487           emit_opcode(*cbuf, 0x66);
1488           if (Matcher::_regEncode[dst_first] < 8) {
1489             if (Matcher::_regEncode[src_first] >= 8) {
1490               emit_opcode(*cbuf, Assembler::REX_B);
1491             }
1492           } else {
1493             if (Matcher::_regEncode[src_first] < 8) {
1494               emit_opcode(*cbuf, Assembler::REX_R);
1495             } else {
1496               emit_opcode(*cbuf, Assembler::REX_RB);
1497             }
1498           }
1499           emit_opcode(*cbuf, 0x0F);
1500           emit_opcode(*cbuf, 0x6E);
1501           emit_rm(*cbuf, 0x3,
1502                   Matcher::_regEncode[dst_first] & 7,
1503                   Matcher::_regEncode[src_first] & 7);
1504 #ifndef PRODUCT
1505         } else if (!do_size) {
1506           st->print("movdl   %s, %s\t# spill",
1507                      Matcher::regName[dst_first],
1508                      Matcher::regName[src_first]);
1509 #endif
1510         }
1511         return
1512           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1513           ? 4
1514           : 5; // REX
1515       }
1516     }
1517   } else if (src_first_rc == rc_float) {
1518     // xmm ->
1519     if (dst_first_rc == rc_stack) {
1520       // xmm -> mem
1521       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1522           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1523         // 64-bit
1524         int offset = ra_->reg2offset(dst_first);
1525         if (cbuf) {
1526           emit_opcode(*cbuf, 0xF2);
1527           if (Matcher::_regEncode[src_first] >= 8) {
1528               emit_opcode(*cbuf, Assembler::REX_R);
1529           }
1530           emit_opcode(*cbuf, 0x0F);
1531           emit_opcode(*cbuf, 0x11);
1532           encode_RegMem(*cbuf,
1533                         Matcher::_regEncode[src_first],
1534                         RSP_enc, 0x4, 0, offset,
1535                         false);
1536 #ifndef PRODUCT
1537         } else if (!do_size) {
1538           st->print("movsd   [rsp + #%d], %s\t# spill",
1539                      offset,
1540                      Matcher::regName[src_first]);
1541 #endif
1542         }
1543         return
1544           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1545           ((Matcher::_regEncode[src_first] < 8)
1546            ? 5
1547            : 6); // REX
1548       } else {
1549         // 32-bit
1550         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1551         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1552         int offset = ra_->reg2offset(dst_first);
1553         if (cbuf) {
1554           emit_opcode(*cbuf, 0xF3);
1555           if (Matcher::_regEncode[src_first] >= 8) {
1556               emit_opcode(*cbuf, Assembler::REX_R);
1557           }
1558           emit_opcode(*cbuf, 0x0F);
1559           emit_opcode(*cbuf, 0x11);
1560           encode_RegMem(*cbuf,
1561                         Matcher::_regEncode[src_first],
1562                         RSP_enc, 0x4, 0, offset,
1563                         false);
1564 #ifndef PRODUCT
1565         } else if (!do_size) {
1566           st->print("movss   [rsp + #%d], %s\t# spill",
1567                      offset,
1568                      Matcher::regName[src_first]);
1569 #endif
1570         }
1571         return
1572           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1573           ((Matcher::_regEncode[src_first] < 8)
1574            ? 5
1575            : 6); // REX
1576       }
1577     } else if (dst_first_rc == rc_int) {
1578       // xmm -> gpr
1579       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1580           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1581         // 64-bit
1582         if (cbuf) {
1583           emit_opcode(*cbuf, 0x66);
1584           if (Matcher::_regEncode[dst_first] < 8) {
1585             if (Matcher::_regEncode[src_first] < 8) {
1586               emit_opcode(*cbuf, Assembler::REX_W);
1587             } else {
1588               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1589             }
1590           } else {
1591             if (Matcher::_regEncode[src_first] < 8) {
1592               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1593             } else {
1594               emit_opcode(*cbuf, Assembler::REX_WRB);
1595             }
1596           }
1597           emit_opcode(*cbuf, 0x0F);
1598           emit_opcode(*cbuf, 0x7E);
1599           emit_rm(*cbuf, 0x3,
1600                   Matcher::_regEncode[dst_first] & 7,
1601                   Matcher::_regEncode[src_first] & 7);
1602 #ifndef PRODUCT
1603         } else if (!do_size) {
1604           st->print("movdq   %s, %s\t# spill",
1605                      Matcher::regName[dst_first],
1606                      Matcher::regName[src_first]);
1607 #endif
1608         }
1609         return 5; // REX
1610       } else {
1611         // 32-bit
1612         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1613         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1614         if (cbuf) {
1615           emit_opcode(*cbuf, 0x66);
1616           if (Matcher::_regEncode[dst_first] < 8) {
1617             if (Matcher::_regEncode[src_first] >= 8) {
1618               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1619             }
1620           } else {
1621             if (Matcher::_regEncode[src_first] < 8) {
1622               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1623             } else {
1624               emit_opcode(*cbuf, Assembler::REX_RB);
1625             }
1626           }
1627           emit_opcode(*cbuf, 0x0F);
1628           emit_opcode(*cbuf, 0x7E);
1629           emit_rm(*cbuf, 0x3,
1630                   Matcher::_regEncode[dst_first] & 7,
1631                   Matcher::_regEncode[src_first] & 7);
1632 #ifndef PRODUCT
1633         } else if (!do_size) {
1634           st->print("movdl   %s, %s\t# spill",
1635                      Matcher::regName[dst_first],
1636                      Matcher::regName[src_first]);
1637 #endif
1638         }
1639         return
1640           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1641           ? 4
1642           : 5; // REX
1643       }
1644     } else if (dst_first_rc == rc_float) {
1645       // xmm -> xmm
1646       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1647           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1648         // 64-bit
1649         if (cbuf) {
1650           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1651           if (Matcher::_regEncode[dst_first] < 8) {
1652             if (Matcher::_regEncode[src_first] >= 8) {
1653               emit_opcode(*cbuf, Assembler::REX_B);
1654             }
1655           } else {
1656             if (Matcher::_regEncode[src_first] < 8) {
1657               emit_opcode(*cbuf, Assembler::REX_R);
1658             } else {
1659               emit_opcode(*cbuf, Assembler::REX_RB);
1660             }
1661           }
1662           emit_opcode(*cbuf, 0x0F);
1663           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1664           emit_rm(*cbuf, 0x3,
1665                   Matcher::_regEncode[dst_first] & 7,
1666                   Matcher::_regEncode[src_first] & 7);
1667 #ifndef PRODUCT
1668         } else if (!do_size) {
1669           st->print("%s  %s, %s\t# spill",
1670                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1671                      Matcher::regName[dst_first],
1672                      Matcher::regName[src_first]);
1673 #endif
1674         }
1675         return
1676           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1677           ? 4
1678           : 5; // REX
1679       } else {
1680         // 32-bit
1681         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1682         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1683         if (cbuf) {
1684           if (!UseXmmRegToRegMoveAll)
1685             emit_opcode(*cbuf, 0xF3);
1686           if (Matcher::_regEncode[dst_first] < 8) {
1687             if (Matcher::_regEncode[src_first] >= 8) {
1688               emit_opcode(*cbuf, Assembler::REX_B);
1689             }
1690           } else {
1691             if (Matcher::_regEncode[src_first] < 8) {
1692               emit_opcode(*cbuf, Assembler::REX_R);
1693             } else {
1694               emit_opcode(*cbuf, Assembler::REX_RB);
1695             }
1696           }
1697           emit_opcode(*cbuf, 0x0F);
1698           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1699           emit_rm(*cbuf, 0x3,
1700                   Matcher::_regEncode[dst_first] & 7,
1701                   Matcher::_regEncode[src_first] & 7);
1702 #ifndef PRODUCT
1703         } else if (!do_size) {
1704           st->print("%s  %s, %s\t# spill",
1705                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1706                      Matcher::regName[dst_first],
1707                      Matcher::regName[src_first]);
1708 #endif
1709         }
1710         return
1711           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1712           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1713           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1714       }
1715     }
1716   }
1717 
1718   assert(0," foo ");
1719   Unimplemented();
1720 
1721   return 0;
1722 }
1723 
1724 #ifndef PRODUCT
1725 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1726 {
1727   implementation(NULL, ra_, false, st);
1728 }
1729 #endif
1730 
1731 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1732 {
1733   implementation(&cbuf, ra_, false, NULL);
1734 }
1735 
1736 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1737 {
1738   return implementation(NULL, ra_, true, NULL);
1739 }
1740 
1741 //=============================================================================
1742 #ifndef PRODUCT
1743 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1744 {
1745   st->print("nop \t# %d bytes pad for loops and calls", _count);
1746 }
1747 #endif
1748 
1749 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1750 {
1751   MacroAssembler _masm(&cbuf);
1752   __ nop(_count);
1753 }
1754 
1755 uint MachNopNode::size(PhaseRegAlloc*) const
1756 {
1757   return _count;
1758 }
1759 
1760 
1761 //=============================================================================
1762 #ifndef PRODUCT
1763 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1764 {
1765   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1766   int reg = ra_->get_reg_first(this);
1767   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1768             Matcher::regName[reg], offset);
1769 }
1770 #endif
1771 
1772 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1773 {
1774   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1775   int reg = ra_->get_encode(this);
1776   if (offset >= 0x80) {
1777     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1778     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1779     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1780     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1781     emit_d32(cbuf, offset);
1782   } else {
1783     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1784     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1785     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1786     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1787     emit_d8(cbuf, offset);
1788   }
1789 }
1790 
1791 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1792 {
1793   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1794   return (offset < 0x80) ? 5 : 8; // REX
1795 }
1796 
1797 //=============================================================================
1798 
1799 // emit call stub, compiled java to interpreter
1800 void emit_java_to_interp(CodeBuffer& cbuf)
1801 {
1802   // Stub is fixed up when the corresponding call is converted from
1803   // calling compiled code to calling interpreted code.
1804   // movq rbx, 0
1805   // jmp -5 # to self
1806 
1807   address mark = cbuf.inst_mark();  // get mark within main instrs section
1808 
1809   // Note that the code buffer's inst_mark is always relative to insts.
1810   // That's why we must use the macroassembler to generate a stub.
1811   MacroAssembler _masm(&cbuf);
1812 
1813   address base =
1814   __ start_a_stub(Compile::MAX_stubs_size);
1815   if (base == NULL)  return;  // CodeBuffer::expand failed
1816   // static stub relocation stores the instruction address of the call
1817   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1818   // static stub relocation also tags the methodOop in the code-stream.
1819   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1820   __ jump(RuntimeAddress(__ pc()));
1821 
1822   // Update current stubs pointer and restore code_end.
1823   __ end_a_stub();
1824 }
1825 
1826 // size of call stub, compiled java to interpretor
1827 uint size_java_to_interp()
1828 {
1829   return 15;  // movq (1+1+8); jmp (1+4)
1830 }
1831 
1832 // relocation entries for call stub, compiled java to interpretor
1833 uint reloc_java_to_interp()
1834 {
1835   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1836 }
1837 
1838 //=============================================================================
1839 #ifndef PRODUCT
1840 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1841 {
1842   if (UseCompressedOops) {
1843     st->print("subq    rax, r12_heapbase\t # encode_heap_oop_not_null(rax)\n\t");
1844     st->print("shrq    rax, %d\n\t", LogMinObjAlignmentInBytes);
1845     st->print_cr("cmpl    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1846                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1847   } else {
1848     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1849                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1850   }
1851   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1852 }
1853 #endif
1854 
1855 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1856 {
1857   MacroAssembler masm(&cbuf);
1858 #ifdef ASSERT
1859   uint code_size = cbuf.code_size();
1860 #endif
1861   if (UseCompressedOops) {
1862     // masm.encode_heap_oop_not_null(rax);
1863     masm.subq(rax, r12_heapbase);
1864     masm.shrq(rax, LogMinObjAlignmentInBytes);
1865     masm.cmpl(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1866   } else {
1867     masm.cmpq(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1868   }
1869 
1870   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1871 
1872   /* WARNING these NOPs are critical so that verified entry point is properly
1873      aligned (4 bytes) for patching by NativeJump::patch_verified_entry() */
1874 
1875   // C2 aligns Start block (verified entry point) to InteriorEntryAlignment
1876   assert((InteriorEntryAlignment & 0x3) == 0, "verified entry point should be aligned to 4 bytes");
1877 }
1878 
1879 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1880 {
1881   return MachNode::size(ra_); // too many variables; just compute it
1882                               // the hard way
1883 }
1884 
1885 
1886 //=============================================================================
1887 uint size_exception_handler()
1888 {
1889   // NativeCall instruction size is the same as NativeJump.
1890   // Note that this value is also credited (in output.cpp) to
1891   // the size of the code section.
1892   return NativeJump::instruction_size;
1893 }
1894 
1895 // Emit exception handler code.
1896 int emit_exception_handler(CodeBuffer& cbuf)
1897 {
1898 
1899   // Note that the code buffer's inst_mark is always relative to insts.
1900   // That's why we must use the macroassembler to generate a handler.
1901   MacroAssembler _masm(&cbuf);
1902   address base =
1903   __ start_a_stub(size_exception_handler());
1904   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1905   int offset = __ offset();
1906   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1907   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1908   __ end_a_stub();
1909   return offset;
1910 }
1911 
1912 uint size_deopt_handler()
1913 {
1914   // three 5 byte instructions
1915   return 15;
1916 }
1917 
1918 // Emit deopt handler code.
1919 int emit_deopt_handler(CodeBuffer& cbuf)
1920 {
1921 
1922   // Note that the code buffer's inst_mark is always relative to insts.
1923   // That's why we must use the macroassembler to generate a handler.
1924   MacroAssembler _masm(&cbuf);
1925   address base =
1926   __ start_a_stub(size_deopt_handler());
1927   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1928   int offset = __ offset();
1929   address the_pc = (address) __ pc();
1930   Label next;
1931   // push a "the_pc" on the stack without destroying any registers
1932   // as they all may be live.
1933 
1934   // push address of "next"
1935   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1936   __ bind(next);
1937   // adjust it so it matches "the_pc"
1938   __ subq(Address(rsp, 0), __ offset() - offset);
1939   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1940   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1941   __ end_a_stub();
1942   return offset;
1943 }
1944 
1945 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1946   int mark = cbuf.insts()->mark_off();
1947   MacroAssembler _masm(&cbuf);
1948   address double_address = __ double_constant(x);
1949   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1950   emit_d32_reloc(cbuf,
1951                  (int) (double_address - cbuf.code_end() - 4),
1952                  internal_word_Relocation::spec(double_address),
1953                  RELOC_DISP32);
1954 }
1955 
1956 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1957   int mark = cbuf.insts()->mark_off();
1958   MacroAssembler _masm(&cbuf);
1959   address float_address = __ float_constant(x);
1960   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1961   emit_d32_reloc(cbuf,
1962                  (int) (float_address - cbuf.code_end() - 4),
1963                  internal_word_Relocation::spec(float_address),
1964                  RELOC_DISP32);
1965 }
1966 
1967 
1968 int Matcher::regnum_to_fpu_offset(int regnum)
1969 {
1970   return regnum - 32; // The FP registers are in the second chunk
1971 }
1972 
1973 // This is UltraSparc specific, true just means we have fast l2f conversion
1974 const bool Matcher::convL2FSupported(void) {
1975   return true;
1976 }
1977 
1978 // Vector width in bytes
1979 const uint Matcher::vector_width_in_bytes(void) {
1980   return 8;
1981 }
1982 
1983 // Vector ideal reg
1984 const uint Matcher::vector_ideal_reg(void) {
1985   return Op_RegD;
1986 }
1987 
1988 // Is this branch offset short enough that a short branch can be used?
1989 //
1990 // NOTE: If the platform does not provide any short branch variants, then
1991 //       this method should return false for offset 0.
1992 bool Matcher::is_short_branch_offset(int offset)
1993 {
1994   return -0x80 <= offset && offset < 0x80;
1995 }
1996 
1997 const bool Matcher::isSimpleConstant64(jlong value) {
1998   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1999   //return value == (int) value;  // Cf. storeImmL and immL32.
2000 
2001   // Probably always true, even if a temp register is required.
2002   return true;
2003 }
2004 
2005 // The ecx parameter to rep stosq for the ClearArray node is in words.
2006 const bool Matcher::init_array_count_is_in_bytes = false;
2007 
2008 // Threshold size for cleararray.
2009 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2010 
2011 // Should the Matcher clone shifts on addressing modes, expecting them
2012 // to be subsumed into complex addressing expressions or compute them
2013 // into registers?  True for Intel but false for most RISCs
2014 const bool Matcher::clone_shift_expressions = true;
2015 
2016 // Is it better to copy float constants, or load them directly from
2017 // memory?  Intel can load a float constant from a direct address,
2018 // requiring no extra registers.  Most RISCs will have to materialize
2019 // an address into a register first, so they would do better to copy
2020 // the constant from stack.
2021 const bool Matcher::rematerialize_float_constants = true; // XXX
2022 
2023 // If CPU can load and store mis-aligned doubles directly then no
2024 // fixup is needed.  Else we split the double into 2 integer pieces
2025 // and move it piece-by-piece.  Only happens when passing doubles into
2026 // C code as the Java calling convention forces doubles to be aligned.
2027 const bool Matcher::misaligned_doubles_ok = true;
2028 
2029 // No-op on amd64
2030 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2031 
2032 // Advertise here if the CPU requires explicit rounding operations to
2033 // implement the UseStrictFP mode.
2034 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2035 
2036 // Do floats take an entire double register or just half?
2037 const bool Matcher::float_in_double = true;
2038 // Do ints take an entire long register or just half?
2039 const bool Matcher::int_in_long = true;
2040 
2041 // Return whether or not this register is ever used as an argument.
2042 // This function is used on startup to build the trampoline stubs in
2043 // generateOptoStub.  Registers not mentioned will be killed by the VM
2044 // call in the trampoline, and arguments in those registers not be
2045 // available to the callee.
2046 bool Matcher::can_be_java_arg(int reg)
2047 {
2048   return
2049     reg ==  RDI_num || reg ==  RDI_H_num ||
2050     reg ==  RSI_num || reg ==  RSI_H_num ||
2051     reg ==  RDX_num || reg ==  RDX_H_num ||
2052     reg ==  RCX_num || reg ==  RCX_H_num ||
2053     reg ==   R8_num || reg ==   R8_H_num ||
2054     reg ==   R9_num || reg ==   R9_H_num ||
2055     reg ==  R12_num || reg ==  R12_H_num ||
2056     reg == XMM0_num || reg == XMM0_H_num ||
2057     reg == XMM1_num || reg == XMM1_H_num ||
2058     reg == XMM2_num || reg == XMM2_H_num ||
2059     reg == XMM3_num || reg == XMM3_H_num ||
2060     reg == XMM4_num || reg == XMM4_H_num ||
2061     reg == XMM5_num || reg == XMM5_H_num ||
2062     reg == XMM6_num || reg == XMM6_H_num ||
2063     reg == XMM7_num || reg == XMM7_H_num;
2064 }
2065 
2066 bool Matcher::is_spillable_arg(int reg)
2067 {
2068   return can_be_java_arg(reg);
2069 }
2070 
2071 // Register for DIVI projection of divmodI
2072 RegMask Matcher::divI_proj_mask() {
2073   return INT_RAX_REG_mask;
2074 }
2075 
2076 // Register for MODI projection of divmodI
2077 RegMask Matcher::modI_proj_mask() {
2078   return INT_RDX_REG_mask;
2079 }
2080 
2081 // Register for DIVL projection of divmodL
2082 RegMask Matcher::divL_proj_mask() {
2083   return LONG_RAX_REG_mask;
2084 }
2085 
2086 // Register for MODL projection of divmodL
2087 RegMask Matcher::modL_proj_mask() {
2088   return LONG_RDX_REG_mask;
2089 }
2090 
2091 static Address build_address(int b, int i, int s, int d) {
2092   Register index = as_Register(i);
2093   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2094   if (index == rsp) {
2095     index = noreg;
2096     scale = Address::no_scale;
2097   }
2098   Address addr(as_Register(b), index, scale, d);
2099   return addr;
2100 }
2101 
2102 %}
2103 
2104 //----------ENCODING BLOCK-----------------------------------------------------
2105 // This block specifies the encoding classes used by the compiler to
2106 // output byte streams.  Encoding classes are parameterized macros
2107 // used by Machine Instruction Nodes in order to generate the bit
2108 // encoding of the instruction.  Operands specify their base encoding
2109 // interface with the interface keyword.  There are currently
2110 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2111 // COND_INTER.  REG_INTER causes an operand to generate a function
2112 // which returns its register number when queried.  CONST_INTER causes
2113 // an operand to generate a function which returns the value of the
2114 // constant when queried.  MEMORY_INTER causes an operand to generate
2115 // four functions which return the Base Register, the Index Register,
2116 // the Scale Value, and the Offset Value of the operand when queried.
2117 // COND_INTER causes an operand to generate six functions which return
2118 // the encoding code (ie - encoding bits for the instruction)
2119 // associated with each basic boolean condition for a conditional
2120 // instruction.
2121 //
2122 // Instructions specify two basic values for encoding.  Again, a
2123 // function is available to check if the constant displacement is an
2124 // oop. They use the ins_encode keyword to specify their encoding
2125 // classes (which must be a sequence of enc_class names, and their
2126 // parameters, specified in the encoding block), and they use the
2127 // opcode keyword to specify, in order, their primary, secondary, and
2128 // tertiary opcode.  Only the opcode sections which a particular
2129 // instruction needs for encoding need to be specified.
2130 encode %{
2131   // Build emit functions for each basic byte or larger field in the
2132   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2133   // from C++ code in the enc_class source block.  Emit functions will
2134   // live in the main source block for now.  In future, we can
2135   // generalize this by adding a syntax that specifies the sizes of
2136   // fields in an order, so that the adlc can build the emit functions
2137   // automagically
2138 
2139   // Emit primary opcode
2140   enc_class OpcP
2141   %{
2142     emit_opcode(cbuf, $primary);
2143   %}
2144 
2145   // Emit secondary opcode
2146   enc_class OpcS
2147   %{
2148     emit_opcode(cbuf, $secondary);
2149   %}
2150 
2151   // Emit tertiary opcode
2152   enc_class OpcT
2153   %{
2154     emit_opcode(cbuf, $tertiary);
2155   %}
2156 
2157   // Emit opcode directly
2158   enc_class Opcode(immI d8)
2159   %{
2160     emit_opcode(cbuf, $d8$$constant);
2161   %}
2162 
2163   // Emit size prefix
2164   enc_class SizePrefix
2165   %{
2166     emit_opcode(cbuf, 0x66);
2167   %}
2168 
2169   enc_class reg(rRegI reg)
2170   %{
2171     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2172   %}
2173 
2174   enc_class reg_reg(rRegI dst, rRegI src)
2175   %{
2176     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2177   %}
2178 
2179   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2180   %{
2181     emit_opcode(cbuf, $opcode$$constant);
2182     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2183   %}
2184 
2185   enc_class cmpfp_fixup()
2186   %{
2187     // jnp,s exit
2188     emit_opcode(cbuf, 0x7B);
2189     emit_d8(cbuf, 0x0A);
2190 
2191     // pushfq
2192     emit_opcode(cbuf, 0x9C);
2193 
2194     // andq $0xffffff2b, (%rsp)
2195     emit_opcode(cbuf, Assembler::REX_W);
2196     emit_opcode(cbuf, 0x81);
2197     emit_opcode(cbuf, 0x24);
2198     emit_opcode(cbuf, 0x24);
2199     emit_d32(cbuf, 0xffffff2b);
2200 
2201     // popfq
2202     emit_opcode(cbuf, 0x9D);
2203 
2204     // nop (target for branch to avoid branch to branch)
2205     emit_opcode(cbuf, 0x90);
2206   %}
2207 
2208   enc_class cmpfp3(rRegI dst)
2209   %{
2210     int dstenc = $dst$$reg;
2211 
2212     // movl $dst, -1
2213     if (dstenc >= 8) {
2214       emit_opcode(cbuf, Assembler::REX_B);
2215     }
2216     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2217     emit_d32(cbuf, -1);
2218 
2219     // jp,s done
2220     emit_opcode(cbuf, 0x7A);
2221     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2222 
2223     // jb,s done
2224     emit_opcode(cbuf, 0x72);
2225     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2226 
2227     // setne $dst
2228     if (dstenc >= 4) {
2229       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2230     }
2231     emit_opcode(cbuf, 0x0F);
2232     emit_opcode(cbuf, 0x95);
2233     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2234 
2235     // movzbl $dst, $dst
2236     if (dstenc >= 4) {
2237       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2238     }
2239     emit_opcode(cbuf, 0x0F);
2240     emit_opcode(cbuf, 0xB6);
2241     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2242   %}
2243 
2244   enc_class cdql_enc(no_rax_rdx_RegI div)
2245   %{
2246     // Full implementation of Java idiv and irem; checks for
2247     // special case as described in JVM spec., p.243 & p.271.
2248     //
2249     //         normal case                           special case
2250     //
2251     // input : rax: dividend                         min_int
2252     //         reg: divisor                          -1
2253     //
2254     // output: rax: quotient  (= rax idiv reg)       min_int
2255     //         rdx: remainder (= rax irem reg)       0
2256     //
2257     //  Code sequnce:
2258     //
2259     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2260     //    5:   75 07/08                jne    e <normal>
2261     //    7:   33 d2                   xor    %edx,%edx
2262     //  [div >= 8 -> offset + 1]
2263     //  [REX_B]
2264     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2265     //    c:   74 03/04                je     11 <done>
2266     // 000000000000000e <normal>:
2267     //    e:   99                      cltd
2268     //  [div >= 8 -> offset + 1]
2269     //  [REX_B]
2270     //    f:   f7 f9                   idiv   $div
2271     // 0000000000000011 <done>:
2272 
2273     // cmp    $0x80000000,%eax
2274     emit_opcode(cbuf, 0x3d);
2275     emit_d8(cbuf, 0x00);
2276     emit_d8(cbuf, 0x00);
2277     emit_d8(cbuf, 0x00);
2278     emit_d8(cbuf, 0x80);
2279 
2280     // jne    e <normal>
2281     emit_opcode(cbuf, 0x75);
2282     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2283 
2284     // xor    %edx,%edx
2285     emit_opcode(cbuf, 0x33);
2286     emit_d8(cbuf, 0xD2);
2287 
2288     // cmp    $0xffffffffffffffff,%ecx
2289     if ($div$$reg >= 8) {
2290       emit_opcode(cbuf, Assembler::REX_B);
2291     }
2292     emit_opcode(cbuf, 0x83);
2293     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2294     emit_d8(cbuf, 0xFF);
2295 
2296     // je     11 <done>
2297     emit_opcode(cbuf, 0x74);
2298     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2299 
2300     // <normal>
2301     // cltd
2302     emit_opcode(cbuf, 0x99);
2303 
2304     // idivl (note: must be emitted by the user of this rule)
2305     // <done>
2306   %}
2307 
2308   enc_class cdqq_enc(no_rax_rdx_RegL div)
2309   %{
2310     // Full implementation of Java ldiv and lrem; checks for
2311     // special case as described in JVM spec., p.243 & p.271.
2312     //
2313     //         normal case                           special case
2314     //
2315     // input : rax: dividend                         min_long
2316     //         reg: divisor                          -1
2317     //
2318     // output: rax: quotient  (= rax idiv reg)       min_long
2319     //         rdx: remainder (= rax irem reg)       0
2320     //
2321     //  Code sequnce:
2322     //
2323     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2324     //    7:   00 00 80
2325     //    a:   48 39 d0                cmp    %rdx,%rax
2326     //    d:   75 08                   jne    17 <normal>
2327     //    f:   33 d2                   xor    %edx,%edx
2328     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2329     //   15:   74 05                   je     1c <done>
2330     // 0000000000000017 <normal>:
2331     //   17:   48 99                   cqto
2332     //   19:   48 f7 f9                idiv   $div
2333     // 000000000000001c <done>:
2334 
2335     // mov    $0x8000000000000000,%rdx
2336     emit_opcode(cbuf, Assembler::REX_W);
2337     emit_opcode(cbuf, 0xBA);
2338     emit_d8(cbuf, 0x00);
2339     emit_d8(cbuf, 0x00);
2340     emit_d8(cbuf, 0x00);
2341     emit_d8(cbuf, 0x00);
2342     emit_d8(cbuf, 0x00);
2343     emit_d8(cbuf, 0x00);
2344     emit_d8(cbuf, 0x00);
2345     emit_d8(cbuf, 0x80);
2346 
2347     // cmp    %rdx,%rax
2348     emit_opcode(cbuf, Assembler::REX_W);
2349     emit_opcode(cbuf, 0x39);
2350     emit_d8(cbuf, 0xD0);
2351 
2352     // jne    17 <normal>
2353     emit_opcode(cbuf, 0x75);
2354     emit_d8(cbuf, 0x08);
2355 
2356     // xor    %edx,%edx
2357     emit_opcode(cbuf, 0x33);
2358     emit_d8(cbuf, 0xD2);
2359 
2360     // cmp    $0xffffffffffffffff,$div
2361     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2362     emit_opcode(cbuf, 0x83);
2363     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2364     emit_d8(cbuf, 0xFF);
2365 
2366     // je     1e <done>
2367     emit_opcode(cbuf, 0x74);
2368     emit_d8(cbuf, 0x05);
2369 
2370     // <normal>
2371     // cqto
2372     emit_opcode(cbuf, Assembler::REX_W);
2373     emit_opcode(cbuf, 0x99);
2374 
2375     // idivq (note: must be emitted by the user of this rule)
2376     // <done>
2377   %}
2378 
2379   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2380   enc_class OpcSE(immI imm)
2381   %{
2382     // Emit primary opcode and set sign-extend bit
2383     // Check for 8-bit immediate, and set sign extend bit in opcode
2384     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2385       emit_opcode(cbuf, $primary | 0x02);
2386     } else {
2387       // 32-bit immediate
2388       emit_opcode(cbuf, $primary);
2389     }
2390   %}
2391 
2392   enc_class OpcSErm(rRegI dst, immI imm)
2393   %{
2394     // OpcSEr/m
2395     int dstenc = $dst$$reg;
2396     if (dstenc >= 8) {
2397       emit_opcode(cbuf, Assembler::REX_B);
2398       dstenc -= 8;
2399     }
2400     // Emit primary opcode and set sign-extend bit
2401     // Check for 8-bit immediate, and set sign extend bit in opcode
2402     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2403       emit_opcode(cbuf, $primary | 0x02);
2404     } else {
2405       // 32-bit immediate
2406       emit_opcode(cbuf, $primary);
2407     }
2408     // Emit r/m byte with secondary opcode, after primary opcode.
2409     emit_rm(cbuf, 0x3, $secondary, dstenc);
2410   %}
2411 
2412   enc_class OpcSErm_wide(rRegL dst, immI imm)
2413   %{
2414     // OpcSEr/m
2415     int dstenc = $dst$$reg;
2416     if (dstenc < 8) {
2417       emit_opcode(cbuf, Assembler::REX_W);
2418     } else {
2419       emit_opcode(cbuf, Assembler::REX_WB);
2420       dstenc -= 8;
2421     }
2422     // Emit primary opcode and set sign-extend bit
2423     // Check for 8-bit immediate, and set sign extend bit in opcode
2424     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2425       emit_opcode(cbuf, $primary | 0x02);
2426     } else {
2427       // 32-bit immediate
2428       emit_opcode(cbuf, $primary);
2429     }
2430     // Emit r/m byte with secondary opcode, after primary opcode.
2431     emit_rm(cbuf, 0x3, $secondary, dstenc);
2432   %}
2433 
2434   enc_class Con8or32(immI imm)
2435   %{
2436     // Check for 8-bit immediate, and set sign extend bit in opcode
2437     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2438       $$$emit8$imm$$constant;
2439     } else {
2440       // 32-bit immediate
2441       $$$emit32$imm$$constant;
2442     }
2443   %}
2444 
2445   enc_class Lbl(label labl)
2446   %{
2447     // JMP, CALL
2448     Label* l = $labl$$label;
2449     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2450   %}
2451 
2452   enc_class LblShort(label labl)
2453   %{
2454     // JMP, CALL
2455     Label* l = $labl$$label;
2456     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2457     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2458     emit_d8(cbuf, disp);
2459   %}
2460 
2461   enc_class opc2_reg(rRegI dst)
2462   %{
2463     // BSWAP
2464     emit_cc(cbuf, $secondary, $dst$$reg);
2465   %}
2466 
2467   enc_class opc3_reg(rRegI dst)
2468   %{
2469     // BSWAP
2470     emit_cc(cbuf, $tertiary, $dst$$reg);
2471   %}
2472 
2473   enc_class reg_opc(rRegI div)
2474   %{
2475     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2476     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2477   %}
2478 
2479   enc_class Jcc(cmpOp cop, label labl)
2480   %{
2481     // JCC
2482     Label* l = $labl$$label;
2483     $$$emit8$primary;
2484     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2485     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2486   %}
2487 
2488   enc_class JccShort (cmpOp cop, label labl)
2489   %{
2490   // JCC
2491     Label *l = $labl$$label;
2492     emit_cc(cbuf, $primary, $cop$$cmpcode);
2493     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2494     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2495     emit_d8(cbuf, disp);
2496   %}
2497 
2498   enc_class enc_cmov(cmpOp cop)
2499   %{
2500     // CMOV
2501     $$$emit8$primary;
2502     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2503   %}
2504 
2505   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2506   %{
2507     // Invert sense of branch from sense of cmov
2508     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2509     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2510                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2511                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2512     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2513     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2514     if ($dst$$reg < 8) {
2515       if ($src$$reg >= 8) {
2516         emit_opcode(cbuf, Assembler::REX_B);
2517       }
2518     } else {
2519       if ($src$$reg < 8) {
2520         emit_opcode(cbuf, Assembler::REX_R);
2521       } else {
2522         emit_opcode(cbuf, Assembler::REX_RB);
2523       }
2524     }
2525     emit_opcode(cbuf, 0x0F);
2526     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2527     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2528   %}
2529 
2530   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2531   %{
2532     // Invert sense of branch from sense of cmov
2533     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2534     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2535 
2536     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2537     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2538     if ($dst$$reg < 8) {
2539       if ($src$$reg >= 8) {
2540         emit_opcode(cbuf, Assembler::REX_B);
2541       }
2542     } else {
2543       if ($src$$reg < 8) {
2544         emit_opcode(cbuf, Assembler::REX_R);
2545       } else {
2546         emit_opcode(cbuf, Assembler::REX_RB);
2547       }
2548     }
2549     emit_opcode(cbuf, 0x0F);
2550     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2551     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2552   %}
2553 
2554   enc_class enc_PartialSubtypeCheck()
2555   %{
2556     Register Rrdi = as_Register(RDI_enc); // result register
2557     Register Rrax = as_Register(RAX_enc); // super class
2558     Register Rrcx = as_Register(RCX_enc); // killed
2559     Register Rrsi = as_Register(RSI_enc); // sub class
2560     Label hit, miss, cmiss;
2561 
2562     MacroAssembler _masm(&cbuf);
2563     // Compare super with sub directly, since super is not in its own SSA.
2564     // The compiler used to emit this test, but we fold it in here,
2565     // to allow platform-specific tweaking on sparc.
2566     __ cmpq(Rrax, Rrsi);
2567     __ jcc(Assembler::equal, hit);
2568 #ifndef PRODUCT
2569     __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
2570     __ incrementl(Address(Rrcx, 0));
2571 #endif //PRODUCT
2572     __ movq(Rrdi, Address(Rrsi,
2573                           sizeof(oopDesc) +
2574                           Klass::secondary_supers_offset_in_bytes()));
2575     __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
2576     __ addq(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2577     if (UseCompressedOops) {
2578       __ encode_heap_oop(Rrax);
2579       __ repne_scanl();
2580       __ jcc(Assembler::notEqual, cmiss);
2581       __ decode_heap_oop(Rrax);
2582       __ movq(Address(Rrsi,
2583                       sizeof(oopDesc) +
2584                       Klass::secondary_super_cache_offset_in_bytes()),
2585               Rrax);
2586       __ jmp(hit);
2587       __ bind(cmiss);
2588       __ decode_heap_oop(Rrax);
2589       __ jmp(miss);
2590     } else {
2591       __ repne_scanq();
2592       __ jcc(Assembler::notEqual, miss);
2593       __ movq(Address(Rrsi,
2594                       sizeof(oopDesc) +
2595                       Klass::secondary_super_cache_offset_in_bytes()),
2596               Rrax);
2597     }
2598     __ bind(hit);
2599     if ($primary) {
2600       __ xorq(Rrdi, Rrdi);
2601     }
2602     __ bind(miss);
2603   %}
2604 
2605   enc_class Java_To_Interpreter(method meth)
2606   %{
2607     // CALL Java_To_Interpreter
2608     // This is the instruction starting address for relocation info.
2609     cbuf.set_inst_mark();
2610     $$$emit8$primary;
2611     // CALL directly to the runtime
2612     emit_d32_reloc(cbuf,
2613                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2614                    runtime_call_Relocation::spec(),
2615                    RELOC_DISP32);
2616   %}
2617 
2618   enc_class Java_Static_Call(method meth)
2619   %{
2620     // JAVA STATIC CALL
2621     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2622     // determine who we intended to call.
2623     cbuf.set_inst_mark();
2624     $$$emit8$primary;
2625 
2626     if (!_method) {
2627       emit_d32_reloc(cbuf,
2628                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2629                      runtime_call_Relocation::spec(),
2630                      RELOC_DISP32);
2631     } else if (_optimized_virtual) {
2632       emit_d32_reloc(cbuf,
2633                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2634                      opt_virtual_call_Relocation::spec(),
2635                      RELOC_DISP32);
2636     } else {
2637       emit_d32_reloc(cbuf,
2638                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2639                      static_call_Relocation::spec(),
2640                      RELOC_DISP32);
2641     }
2642     if (_method) {
2643       // Emit stub for static call
2644       emit_java_to_interp(cbuf);
2645     }
2646   %}
2647 
2648   enc_class Java_Dynamic_Call(method meth)
2649   %{
2650     // JAVA DYNAMIC CALL
2651     // !!!!!
2652     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2653     // emit_call_dynamic_prologue( cbuf );
2654     cbuf.set_inst_mark();
2655 
2656     // movq rax, -1
2657     emit_opcode(cbuf, Assembler::REX_W);
2658     emit_opcode(cbuf, 0xB8 | RAX_enc);
2659     emit_d64_reloc(cbuf,
2660                    (int64_t) Universe::non_oop_word(),
2661                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2662     address virtual_call_oop_addr = cbuf.inst_mark();
2663     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2664     // who we intended to call.
2665     cbuf.set_inst_mark();
2666     $$$emit8$primary;
2667     emit_d32_reloc(cbuf,
2668                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2669                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2670                    RELOC_DISP32);
2671   %}
2672 
2673   enc_class Java_Compiled_Call(method meth)
2674   %{
2675     // JAVA COMPILED CALL
2676     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2677 
2678     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2679     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2680 
2681     // callq *disp(%rax)
2682     cbuf.set_inst_mark();
2683     $$$emit8$primary;
2684     if (disp < 0x80) {
2685       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2686       emit_d8(cbuf, disp); // Displacement
2687     } else {
2688       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2689       emit_d32(cbuf, disp); // Displacement
2690     }
2691   %}
2692 
2693   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2694   %{
2695     // SAL, SAR, SHR
2696     int dstenc = $dst$$reg;
2697     if (dstenc >= 8) {
2698       emit_opcode(cbuf, Assembler::REX_B);
2699       dstenc -= 8;
2700     }
2701     $$$emit8$primary;
2702     emit_rm(cbuf, 0x3, $secondary, dstenc);
2703     $$$emit8$shift$$constant;
2704   %}
2705 
2706   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2707   %{
2708     // SAL, SAR, SHR
2709     int dstenc = $dst$$reg;
2710     if (dstenc < 8) {
2711       emit_opcode(cbuf, Assembler::REX_W);
2712     } else {
2713       emit_opcode(cbuf, Assembler::REX_WB);
2714       dstenc -= 8;
2715     }
2716     $$$emit8$primary;
2717     emit_rm(cbuf, 0x3, $secondary, dstenc);
2718     $$$emit8$shift$$constant;
2719   %}
2720 
2721   enc_class load_immI(rRegI dst, immI src)
2722   %{
2723     int dstenc = $dst$$reg;
2724     if (dstenc >= 8) {
2725       emit_opcode(cbuf, Assembler::REX_B);
2726       dstenc -= 8;
2727     }
2728     emit_opcode(cbuf, 0xB8 | dstenc);
2729     $$$emit32$src$$constant;
2730   %}
2731 
2732   enc_class load_immL(rRegL dst, immL src)
2733   %{
2734     int dstenc = $dst$$reg;
2735     if (dstenc < 8) {
2736       emit_opcode(cbuf, Assembler::REX_W);
2737     } else {
2738       emit_opcode(cbuf, Assembler::REX_WB);
2739       dstenc -= 8;
2740     }
2741     emit_opcode(cbuf, 0xB8 | dstenc);
2742     emit_d64(cbuf, $src$$constant);
2743   %}
2744 
2745   enc_class load_immUL32(rRegL dst, immUL32 src)
2746   %{
2747     // same as load_immI, but this time we care about zeroes in the high word
2748     int dstenc = $dst$$reg;
2749     if (dstenc >= 8) {
2750       emit_opcode(cbuf, Assembler::REX_B);
2751       dstenc -= 8;
2752     }
2753     emit_opcode(cbuf, 0xB8 | dstenc);
2754     $$$emit32$src$$constant;
2755   %}
2756 
2757   enc_class load_immL32(rRegL dst, immL32 src)
2758   %{
2759     int dstenc = $dst$$reg;
2760     if (dstenc < 8) {
2761       emit_opcode(cbuf, Assembler::REX_W);
2762     } else {
2763       emit_opcode(cbuf, Assembler::REX_WB);
2764       dstenc -= 8;
2765     }
2766     emit_opcode(cbuf, 0xC7);
2767     emit_rm(cbuf, 0x03, 0x00, dstenc);
2768     $$$emit32$src$$constant;
2769   %}
2770 
2771   enc_class load_immP31(rRegP dst, immP32 src)
2772   %{
2773     // same as load_immI, but this time we care about zeroes in the high word
2774     int dstenc = $dst$$reg;
2775     if (dstenc >= 8) {
2776       emit_opcode(cbuf, Assembler::REX_B);
2777       dstenc -= 8;
2778     }
2779     emit_opcode(cbuf, 0xB8 | dstenc);
2780     $$$emit32$src$$constant;
2781   %}
2782 
2783   enc_class load_immP(rRegP dst, immP src)
2784   %{
2785     int dstenc = $dst$$reg;
2786     if (dstenc < 8) {
2787       emit_opcode(cbuf, Assembler::REX_W);
2788     } else {
2789       emit_opcode(cbuf, Assembler::REX_WB);
2790       dstenc -= 8;
2791     }
2792     emit_opcode(cbuf, 0xB8 | dstenc);
2793     // This next line should be generated from ADLC
2794     if ($src->constant_is_oop()) {
2795       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2796     } else {
2797       emit_d64(cbuf, $src$$constant);
2798     }
2799   %}
2800 
2801   enc_class load_immF(regF dst, immF con)
2802   %{
2803     // XXX reg_mem doesn't support RIP-relative addressing yet
2804     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2805     emit_float_constant(cbuf, $con$$constant);
2806   %}
2807 
2808   enc_class load_immD(regD dst, immD con)
2809   %{
2810     // XXX reg_mem doesn't support RIP-relative addressing yet
2811     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2812     emit_double_constant(cbuf, $con$$constant);
2813   %}
2814 
2815   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2816     emit_opcode(cbuf, 0xF3);
2817     if ($dst$$reg >= 8) {
2818       emit_opcode(cbuf, Assembler::REX_R);
2819     }
2820     emit_opcode(cbuf, 0x0F);
2821     emit_opcode(cbuf, 0x10);
2822     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2823     emit_float_constant(cbuf, $con$$constant);
2824   %}
2825 
2826   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2827     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2828     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2829     if ($dst$$reg >= 8) {
2830       emit_opcode(cbuf, Assembler::REX_R);
2831     }
2832     emit_opcode(cbuf, 0x0F);
2833     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2834     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2835     emit_double_constant(cbuf, $con$$constant);
2836   %}
2837 
2838   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2839   enc_class enc_copy(rRegI dst, rRegI src)
2840   %{
2841     encode_copy(cbuf, $dst$$reg, $src$$reg);
2842   %}
2843 
2844   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2845   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2846     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2847   %}
2848 
2849   enc_class enc_copy_always(rRegI dst, rRegI src)
2850   %{
2851     int srcenc = $src$$reg;
2852     int dstenc = $dst$$reg;
2853 
2854     if (dstenc < 8) {
2855       if (srcenc >= 8) {
2856         emit_opcode(cbuf, Assembler::REX_B);
2857         srcenc -= 8;
2858       }
2859     } else {
2860       if (srcenc < 8) {
2861         emit_opcode(cbuf, Assembler::REX_R);
2862       } else {
2863         emit_opcode(cbuf, Assembler::REX_RB);
2864         srcenc -= 8;
2865       }
2866       dstenc -= 8;
2867     }
2868 
2869     emit_opcode(cbuf, 0x8B);
2870     emit_rm(cbuf, 0x3, dstenc, srcenc);
2871   %}
2872 
2873   enc_class enc_copy_wide(rRegL dst, rRegL src)
2874   %{
2875     int srcenc = $src$$reg;
2876     int dstenc = $dst$$reg;
2877 
2878     if (dstenc != srcenc) {
2879       if (dstenc < 8) {
2880         if (srcenc < 8) {
2881           emit_opcode(cbuf, Assembler::REX_W);
2882         } else {
2883           emit_opcode(cbuf, Assembler::REX_WB);
2884           srcenc -= 8;
2885         }
2886       } else {
2887         if (srcenc < 8) {
2888           emit_opcode(cbuf, Assembler::REX_WR);
2889         } else {
2890           emit_opcode(cbuf, Assembler::REX_WRB);
2891           srcenc -= 8;
2892         }
2893         dstenc -= 8;
2894       }
2895       emit_opcode(cbuf, 0x8B);
2896       emit_rm(cbuf, 0x3, dstenc, srcenc);
2897     }
2898   %}
2899 
2900   enc_class Con32(immI src)
2901   %{
2902     // Output immediate
2903     $$$emit32$src$$constant;
2904   %}
2905 
2906   enc_class Con64(immL src)
2907   %{
2908     // Output immediate
2909     emit_d64($src$$constant);
2910   %}
2911 
2912   enc_class Con32F_as_bits(immF src)
2913   %{
2914     // Output Float immediate bits
2915     jfloat jf = $src$$constant;
2916     jint jf_as_bits = jint_cast(jf);
2917     emit_d32(cbuf, jf_as_bits);
2918   %}
2919 
2920   enc_class Con16(immI src)
2921   %{
2922     // Output immediate
2923     $$$emit16$src$$constant;
2924   %}
2925 
2926   // How is this different from Con32??? XXX
2927   enc_class Con_d32(immI src)
2928   %{
2929     emit_d32(cbuf,$src$$constant);
2930   %}
2931 
2932   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2933     // Output immediate memory reference
2934     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2935     emit_d32(cbuf, 0x00);
2936   %}
2937 
2938   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2939     MacroAssembler masm(&cbuf);
2940 
2941     Register switch_reg = as_Register($switch_val$$reg);
2942     Register dest_reg   = as_Register($dest$$reg);
2943     address table_base  = masm.address_table_constant(_index2label);
2944 
2945     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2946     // to do that and the compiler is using that register as one it can allocate.
2947     // So we build it all by hand.
2948     // Address index(noreg, switch_reg, Address::times_1);
2949     // ArrayAddress dispatch(table, index);
2950 
2951     Address dispatch(dest_reg, switch_reg, Address::times_1);
2952 
2953     masm.lea(dest_reg, InternalAddress(table_base));
2954     masm.jmp(dispatch);
2955   %}
2956 
2957   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2958     MacroAssembler masm(&cbuf);
2959 
2960     Register switch_reg = as_Register($switch_val$$reg);
2961     Register dest_reg   = as_Register($dest$$reg);
2962     address table_base  = masm.address_table_constant(_index2label);
2963 
2964     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2965     // to do that and the compiler is using that register as one it can allocate.
2966     // So we build it all by hand.
2967     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2968     // ArrayAddress dispatch(table, index);
2969 
2970     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2971 
2972     masm.lea(dest_reg, InternalAddress(table_base));
2973     masm.jmp(dispatch);
2974   %}
2975 
2976   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
2977     MacroAssembler masm(&cbuf);
2978 
2979     Register switch_reg = as_Register($switch_val$$reg);
2980     Register dest_reg   = as_Register($dest$$reg);
2981     address table_base  = masm.address_table_constant(_index2label);
2982 
2983     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2984     // to do that and the compiler is using that register as one it can allocate.
2985     // So we build it all by hand.
2986     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2987     // ArrayAddress dispatch(table, index);
2988 
2989     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2990     masm.lea(dest_reg, InternalAddress(table_base));
2991     masm.jmp(dispatch);
2992 
2993   %}
2994 
2995   enc_class lock_prefix()
2996   %{
2997     if (os::is_MP()) {
2998       emit_opcode(cbuf, 0xF0); // lock
2999     }
3000   %}
3001 
3002   enc_class REX_mem(memory mem)
3003   %{
3004     if ($mem$$base >= 8) {
3005       if ($mem$$index < 8) {
3006         emit_opcode(cbuf, Assembler::REX_B);
3007       } else {
3008         emit_opcode(cbuf, Assembler::REX_XB);
3009       }
3010     } else {
3011       if ($mem$$index >= 8) {
3012         emit_opcode(cbuf, Assembler::REX_X);
3013       }
3014     }
3015   %}
3016 
3017   enc_class REX_mem_wide(memory mem)
3018   %{
3019     if ($mem$$base >= 8) {
3020       if ($mem$$index < 8) {
3021         emit_opcode(cbuf, Assembler::REX_WB);
3022       } else {
3023         emit_opcode(cbuf, Assembler::REX_WXB);
3024       }
3025     } else {
3026       if ($mem$$index < 8) {
3027         emit_opcode(cbuf, Assembler::REX_W);
3028       } else {
3029         emit_opcode(cbuf, Assembler::REX_WX);
3030       }
3031     }
3032   %}
3033 
3034   // for byte regs
3035   enc_class REX_breg(rRegI reg)
3036   %{
3037     if ($reg$$reg >= 4) {
3038       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3039     }
3040   %}
3041 
3042   // for byte regs
3043   enc_class REX_reg_breg(rRegI dst, rRegI src)
3044   %{
3045     if ($dst$$reg < 8) {
3046       if ($src$$reg >= 4) {
3047         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3048       }
3049     } else {
3050       if ($src$$reg < 8) {
3051         emit_opcode(cbuf, Assembler::REX_R);
3052       } else {
3053         emit_opcode(cbuf, Assembler::REX_RB);
3054       }
3055     }
3056   %}
3057 
3058   // for byte regs
3059   enc_class REX_breg_mem(rRegI reg, memory mem)
3060   %{
3061     if ($reg$$reg < 8) {
3062       if ($mem$$base < 8) {
3063         if ($mem$$index >= 8) {
3064           emit_opcode(cbuf, Assembler::REX_X);
3065         } else if ($reg$$reg >= 4) {
3066           emit_opcode(cbuf, Assembler::REX);
3067         }
3068       } else {
3069         if ($mem$$index < 8) {
3070           emit_opcode(cbuf, Assembler::REX_B);
3071         } else {
3072           emit_opcode(cbuf, Assembler::REX_XB);
3073         }
3074       }
3075     } else {
3076       if ($mem$$base < 8) {
3077         if ($mem$$index < 8) {
3078           emit_opcode(cbuf, Assembler::REX_R);
3079         } else {
3080           emit_opcode(cbuf, Assembler::REX_RX);
3081         }
3082       } else {
3083         if ($mem$$index < 8) {
3084           emit_opcode(cbuf, Assembler::REX_RB);
3085         } else {
3086           emit_opcode(cbuf, Assembler::REX_RXB);
3087         }
3088       }
3089     }
3090   %}
3091 
3092   enc_class REX_reg(rRegI reg)
3093   %{
3094     if ($reg$$reg >= 8) {
3095       emit_opcode(cbuf, Assembler::REX_B);
3096     }
3097   %}
3098 
3099   enc_class REX_reg_wide(rRegI reg)
3100   %{
3101     if ($reg$$reg < 8) {
3102       emit_opcode(cbuf, Assembler::REX_W);
3103     } else {
3104       emit_opcode(cbuf, Assembler::REX_WB);
3105     }
3106   %}
3107 
3108   enc_class REX_reg_reg(rRegI dst, rRegI src)
3109   %{
3110     if ($dst$$reg < 8) {
3111       if ($src$$reg >= 8) {
3112         emit_opcode(cbuf, Assembler::REX_B);
3113       }
3114     } else {
3115       if ($src$$reg < 8) {
3116         emit_opcode(cbuf, Assembler::REX_R);
3117       } else {
3118         emit_opcode(cbuf, Assembler::REX_RB);
3119       }
3120     }
3121   %}
3122 
3123   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3124   %{
3125     if ($dst$$reg < 8) {
3126       if ($src$$reg < 8) {
3127         emit_opcode(cbuf, Assembler::REX_W);
3128       } else {
3129         emit_opcode(cbuf, Assembler::REX_WB);
3130       }
3131     } else {
3132       if ($src$$reg < 8) {
3133         emit_opcode(cbuf, Assembler::REX_WR);
3134       } else {
3135         emit_opcode(cbuf, Assembler::REX_WRB);
3136       }
3137     }
3138   %}
3139 
3140   enc_class REX_reg_mem(rRegI reg, memory mem)
3141   %{
3142     if ($reg$$reg < 8) {
3143       if ($mem$$base < 8) {
3144         if ($mem$$index >= 8) {
3145           emit_opcode(cbuf, Assembler::REX_X);
3146         }
3147       } else {
3148         if ($mem$$index < 8) {
3149           emit_opcode(cbuf, Assembler::REX_B);
3150         } else {
3151           emit_opcode(cbuf, Assembler::REX_XB);
3152         }
3153       }
3154     } else {
3155       if ($mem$$base < 8) {
3156         if ($mem$$index < 8) {
3157           emit_opcode(cbuf, Assembler::REX_R);
3158         } else {
3159           emit_opcode(cbuf, Assembler::REX_RX);
3160         }
3161       } else {
3162         if ($mem$$index < 8) {
3163           emit_opcode(cbuf, Assembler::REX_RB);
3164         } else {
3165           emit_opcode(cbuf, Assembler::REX_RXB);
3166         }
3167       }
3168     }
3169   %}
3170 
3171   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3172   %{
3173     if ($reg$$reg < 8) {
3174       if ($mem$$base < 8) {
3175         if ($mem$$index < 8) {
3176           emit_opcode(cbuf, Assembler::REX_W);
3177         } else {
3178           emit_opcode(cbuf, Assembler::REX_WX);
3179         }
3180       } else {
3181         if ($mem$$index < 8) {
3182           emit_opcode(cbuf, Assembler::REX_WB);
3183         } else {
3184           emit_opcode(cbuf, Assembler::REX_WXB);
3185         }
3186       }
3187     } else {
3188       if ($mem$$base < 8) {
3189         if ($mem$$index < 8) {
3190           emit_opcode(cbuf, Assembler::REX_WR);
3191         } else {
3192           emit_opcode(cbuf, Assembler::REX_WRX);
3193         }
3194       } else {
3195         if ($mem$$index < 8) {
3196           emit_opcode(cbuf, Assembler::REX_WRB);
3197         } else {
3198           emit_opcode(cbuf, Assembler::REX_WRXB);
3199         }
3200       }
3201     }
3202   %}
3203 
3204   enc_class reg_mem(rRegI ereg, memory mem)
3205   %{
3206     // High registers handle in encode_RegMem
3207     int reg = $ereg$$reg;
3208     int base = $mem$$base;
3209     int index = $mem$$index;
3210     int scale = $mem$$scale;
3211     int disp = $mem$$disp;
3212     bool disp_is_oop = $mem->disp_is_oop();
3213 
3214     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3215   %}
3216 
3217   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3218   %{
3219     int rm_byte_opcode = $rm_opcode$$constant;
3220 
3221     // High registers handle in encode_RegMem
3222     int base = $mem$$base;
3223     int index = $mem$$index;
3224     int scale = $mem$$scale;
3225     int displace = $mem$$disp;
3226 
3227     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3228                                             // working with static
3229                                             // globals
3230     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3231                   disp_is_oop);
3232   %}
3233 
3234   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3235   %{
3236     int reg_encoding = $dst$$reg;
3237     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3238     int index        = 0x04;            // 0x04 indicates no index
3239     int scale        = 0x00;            // 0x00 indicates no scale
3240     int displace     = $src1$$constant; // 0x00 indicates no displacement
3241     bool disp_is_oop = false;
3242     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3243                   disp_is_oop);
3244   %}
3245 
3246   enc_class neg_reg(rRegI dst)
3247   %{
3248     int dstenc = $dst$$reg;
3249     if (dstenc >= 8) {
3250       emit_opcode(cbuf, Assembler::REX_B);
3251       dstenc -= 8;
3252     }
3253     // NEG $dst
3254     emit_opcode(cbuf, 0xF7);
3255     emit_rm(cbuf, 0x3, 0x03, dstenc);
3256   %}
3257 
3258   enc_class neg_reg_wide(rRegI dst)
3259   %{
3260     int dstenc = $dst$$reg;
3261     if (dstenc < 8) {
3262       emit_opcode(cbuf, Assembler::REX_W);
3263     } else {
3264       emit_opcode(cbuf, Assembler::REX_WB);
3265       dstenc -= 8;
3266     }
3267     // NEG $dst
3268     emit_opcode(cbuf, 0xF7);
3269     emit_rm(cbuf, 0x3, 0x03, dstenc);
3270   %}
3271 
3272   enc_class setLT_reg(rRegI dst)
3273   %{
3274     int dstenc = $dst$$reg;
3275     if (dstenc >= 8) {
3276       emit_opcode(cbuf, Assembler::REX_B);
3277       dstenc -= 8;
3278     } else if (dstenc >= 4) {
3279       emit_opcode(cbuf, Assembler::REX);
3280     }
3281     // SETLT $dst
3282     emit_opcode(cbuf, 0x0F);
3283     emit_opcode(cbuf, 0x9C);
3284     emit_rm(cbuf, 0x3, 0x0, dstenc);
3285   %}
3286 
3287   enc_class setNZ_reg(rRegI dst)
3288   %{
3289     int dstenc = $dst$$reg;
3290     if (dstenc >= 8) {
3291       emit_opcode(cbuf, Assembler::REX_B);
3292       dstenc -= 8;
3293     } else if (dstenc >= 4) {
3294       emit_opcode(cbuf, Assembler::REX);
3295     }
3296     // SETNZ $dst
3297     emit_opcode(cbuf, 0x0F);
3298     emit_opcode(cbuf, 0x95);
3299     emit_rm(cbuf, 0x3, 0x0, dstenc);
3300   %}
3301 
3302   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3303                        rcx_RegI tmp)
3304   %{
3305     // cadd_cmpLT
3306 
3307     int tmpReg = $tmp$$reg;
3308 
3309     int penc = $p$$reg;
3310     int qenc = $q$$reg;
3311     int yenc = $y$$reg;
3312 
3313     // subl $p,$q
3314     if (penc < 8) {
3315       if (qenc >= 8) {
3316         emit_opcode(cbuf, Assembler::REX_B);
3317       }
3318     } else {
3319       if (qenc < 8) {
3320         emit_opcode(cbuf, Assembler::REX_R);
3321       } else {
3322         emit_opcode(cbuf, Assembler::REX_RB);
3323       }
3324     }
3325     emit_opcode(cbuf, 0x2B);
3326     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3327 
3328     // sbbl $tmp, $tmp
3329     emit_opcode(cbuf, 0x1B);
3330     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3331 
3332     // andl $tmp, $y
3333     if (yenc >= 8) {
3334       emit_opcode(cbuf, Assembler::REX_B);
3335     }
3336     emit_opcode(cbuf, 0x23);
3337     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3338 
3339     // addl $p,$tmp
3340     if (penc >= 8) {
3341         emit_opcode(cbuf, Assembler::REX_R);
3342     }
3343     emit_opcode(cbuf, 0x03);
3344     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3345   %}
3346 
3347   // Compare the lonogs and set -1, 0, or 1 into dst
3348   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3349   %{
3350     int src1enc = $src1$$reg;
3351     int src2enc = $src2$$reg;
3352     int dstenc = $dst$$reg;
3353 
3354     // cmpq $src1, $src2
3355     if (src1enc < 8) {
3356       if (src2enc < 8) {
3357         emit_opcode(cbuf, Assembler::REX_W);
3358       } else {
3359         emit_opcode(cbuf, Assembler::REX_WB);
3360       }
3361     } else {
3362       if (src2enc < 8) {
3363         emit_opcode(cbuf, Assembler::REX_WR);
3364       } else {
3365         emit_opcode(cbuf, Assembler::REX_WRB);
3366       }
3367     }
3368     emit_opcode(cbuf, 0x3B);
3369     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3370 
3371     // movl $dst, -1
3372     if (dstenc >= 8) {
3373       emit_opcode(cbuf, Assembler::REX_B);
3374     }
3375     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3376     emit_d32(cbuf, -1);
3377 
3378     // jl,s done
3379     emit_opcode(cbuf, 0x7C);
3380     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3381 
3382     // setne $dst
3383     if (dstenc >= 4) {
3384       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3385     }
3386     emit_opcode(cbuf, 0x0F);
3387     emit_opcode(cbuf, 0x95);
3388     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3389 
3390     // movzbl $dst, $dst
3391     if (dstenc >= 4) {
3392       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3393     }
3394     emit_opcode(cbuf, 0x0F);
3395     emit_opcode(cbuf, 0xB6);
3396     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3397   %}
3398 
3399   enc_class Push_ResultXD(regD dst) %{
3400     int dstenc = $dst$$reg;
3401 
3402     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3403 
3404     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3405     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3406     if (dstenc >= 8) {
3407       emit_opcode(cbuf, Assembler::REX_R);
3408     }
3409     emit_opcode  (cbuf, 0x0F );
3410     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3411     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3412 
3413     // add rsp,8
3414     emit_opcode(cbuf, Assembler::REX_W);
3415     emit_opcode(cbuf,0x83);
3416     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3417     emit_d8(cbuf,0x08);
3418   %}
3419 
3420   enc_class Push_SrcXD(regD src) %{
3421     int srcenc = $src$$reg;
3422 
3423     // subq rsp,#8
3424     emit_opcode(cbuf, Assembler::REX_W);
3425     emit_opcode(cbuf, 0x83);
3426     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3427     emit_d8(cbuf, 0x8);
3428 
3429     // movsd [rsp],src
3430     emit_opcode(cbuf, 0xF2);
3431     if (srcenc >= 8) {
3432       emit_opcode(cbuf, Assembler::REX_R);
3433     }
3434     emit_opcode(cbuf, 0x0F);
3435     emit_opcode(cbuf, 0x11);
3436     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3437 
3438     // fldd [rsp]
3439     emit_opcode(cbuf, 0x66);
3440     emit_opcode(cbuf, 0xDD);
3441     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3442   %}
3443 
3444 
3445   enc_class movq_ld(regD dst, memory mem) %{
3446     MacroAssembler _masm(&cbuf);
3447     Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
3448     __ movq(as_XMMRegister($dst$$reg), madr);
3449   %}
3450 
3451   enc_class movq_st(memory mem, regD src) %{
3452     MacroAssembler _masm(&cbuf);
3453     Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
3454     __ movq(madr, as_XMMRegister($src$$reg));
3455   %}
3456 
3457   enc_class pshufd_8x8(regF dst, regF src) %{
3458     MacroAssembler _masm(&cbuf);
3459 
3460     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3461     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3462     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3463   %}
3464 
3465   enc_class pshufd_4x16(regF dst, regF src) %{
3466     MacroAssembler _masm(&cbuf);
3467 
3468     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3469   %}
3470 
3471   enc_class pshufd(regD dst, regD src, int mode) %{
3472     MacroAssembler _masm(&cbuf);
3473 
3474     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3475   %}
3476 
3477   enc_class pxor(regD dst, regD src) %{
3478     MacroAssembler _masm(&cbuf);
3479 
3480     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3481   %}
3482 
3483   enc_class mov_i2x(regD dst, rRegI src) %{
3484     MacroAssembler _masm(&cbuf);
3485 
3486     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3487   %}
3488 
3489   // obj: object to lock
3490   // box: box address (header location) -- killed
3491   // tmp: rax -- killed
3492   // scr: rbx -- killed
3493   //
3494   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3495   // from i486.ad.  See that file for comments.
3496   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3497   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3498 
3499 
3500   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3501   %{
3502     Register objReg = as_Register((int)$obj$$reg);
3503     Register boxReg = as_Register((int)$box$$reg);
3504     Register tmpReg = as_Register($tmp$$reg);
3505     Register scrReg = as_Register($scr$$reg);
3506     MacroAssembler masm(&cbuf);
3507 
3508     // Verify uniqueness of register assignments -- necessary but not sufficient
3509     assert (objReg != boxReg && objReg != tmpReg &&
3510             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3511 
3512     if (_counters != NULL) {
3513       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3514     }
3515     if (EmitSync & 1) {
3516         masm.movptr (Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())) ;
3517         masm.cmpq   (rsp, 0) ;
3518     } else
3519     if (EmitSync & 2) {
3520         Label DONE_LABEL;
3521         if (UseBiasedLocking) {
3522            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3523           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3524         }
3525         masm.movl(tmpReg, 0x1);
3526         masm.orq(tmpReg, Address(objReg, 0));
3527         masm.movq(Address(boxReg, 0), tmpReg);
3528         if (os::is_MP()) {
3529           masm.lock();
3530         }
3531         masm.cmpxchgq(boxReg, Address(objReg, 0)); // Updates tmpReg
3532         masm.jcc(Assembler::equal, DONE_LABEL);
3533 
3534         // Recursive locking
3535         masm.subq(tmpReg, rsp);
3536         masm.andq(tmpReg, 7 - os::vm_page_size());
3537         masm.movq(Address(boxReg, 0), tmpReg);
3538 
3539         masm.bind(DONE_LABEL);
3540         masm.nop(); // avoid branch to branch
3541     } else {
3542         Label DONE_LABEL, IsInflated, Egress;
3543 
3544         masm.movq  (tmpReg, Address(objReg, 0)) ;
3545         masm.testq (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3546         masm.jcc   (Assembler::notZero, IsInflated) ;
3547 
3548         // it's stack-locked, biased or neutral
3549         // TODO: optimize markword triage order to reduce the number of
3550         // conditional branches in the most common cases.
3551         // Beware -- there's a subtle invariant that fetch of the markword
3552         // at [FETCH], below, will never observe a biased encoding (*101b).
3553         // If this invariant is not held we'll suffer exclusion (safety) failure.
3554 
3555         if (UseBiasedLocking) {
3556           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3557           masm.movq  (tmpReg, Address(objReg, 0)) ;        // [FETCH]
3558         }
3559 
3560         masm.orq   (tmpReg, 1) ;
3561         masm.movq  (Address(boxReg, 0), tmpReg) ;
3562         if (os::is_MP()) { masm.lock(); }
3563         masm.cmpxchgq(boxReg, Address(objReg, 0)); // Updates tmpReg
3564         if (_counters != NULL) {
3565            masm.cond_inc32(Assembler::equal,
3566                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3567         }
3568         masm.jcc   (Assembler::equal, DONE_LABEL);
3569 
3570         // Recursive locking
3571         masm.subq  (tmpReg, rsp);
3572         masm.andq  (tmpReg, 7 - os::vm_page_size());
3573         masm.movq  (Address(boxReg, 0), tmpReg);
3574         if (_counters != NULL) {
3575            masm.cond_inc32(Assembler::equal,
3576                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3577         }
3578         masm.jmp   (DONE_LABEL) ;
3579 
3580         masm.bind  (IsInflated) ;
3581         // It's inflated
3582 
3583         // TODO: someday avoid the ST-before-CAS penalty by
3584         // relocating (deferring) the following ST.
3585         // We should also think about trying a CAS without having
3586         // fetched _owner.  If the CAS is successful we may
3587         // avoid an RTO->RTS upgrade on the $line.
3588         masm.movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())) ;
3589 
3590         masm.movq  (boxReg, tmpReg) ;
3591         masm.movq  (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3592         masm.testq (tmpReg, tmpReg) ;
3593         masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3594 
3595         // It's inflated and appears unlocked
3596         if (os::is_MP()) { masm.lock(); }
3597         masm.cmpxchgq(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3598         // Intentional fall-through into DONE_LABEL ...
3599 
3600         masm.bind  (DONE_LABEL) ;
3601         masm.nop   () ;                 // avoid jmp to jmp
3602     }
3603   %}
3604 
3605   // obj: object to unlock
3606   // box: box address (displaced header location), killed
3607   // RBX: killed tmp; cannot be obj nor box
3608   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3609   %{
3610 
3611     Register objReg = as_Register($obj$$reg);
3612     Register boxReg = as_Register($box$$reg);
3613     Register tmpReg = as_Register($tmp$$reg);
3614     MacroAssembler masm(&cbuf);
3615 
3616     if (EmitSync & 4) {
3617        masm.cmpq  (rsp, 0) ;
3618     } else
3619     if (EmitSync & 8) {
3620        Label DONE_LABEL;
3621        if (UseBiasedLocking) {
3622          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3623        }
3624 
3625        // Check whether the displaced header is 0
3626        //(=> recursive unlock)
3627        masm.movq(tmpReg, Address(boxReg, 0));
3628        masm.testq(tmpReg, tmpReg);
3629        masm.jcc(Assembler::zero, DONE_LABEL);
3630 
3631        // If not recursive lock, reset the header to displaced header
3632        if (os::is_MP()) {
3633          masm.lock();
3634        }
3635        masm.cmpxchgq(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3636        masm.bind(DONE_LABEL);
3637        masm.nop(); // avoid branch to branch
3638     } else {
3639        Label DONE_LABEL, Stacked, CheckSucc ;
3640 
3641        if (UseBiasedLocking) {
3642          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3643        }
3644 
3645        masm.movq  (tmpReg, Address(objReg, 0)) ;
3646        masm.cmpq  (Address(boxReg, 0), (int)NULL_WORD) ;
3647        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3648        masm.testq (tmpReg, 0x02) ;
3649        masm.jcc   (Assembler::zero, Stacked) ;
3650 
3651        // It's inflated
3652        masm.movq  (boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3653        masm.xorq  (boxReg, r15_thread) ;
3654        masm.orq   (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3655        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3656        masm.movq  (boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3657        masm.orq   (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3658        masm.jcc   (Assembler::notZero, CheckSucc) ;
3659        masm.mov64 (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int)NULL_WORD) ;
3660        masm.jmp   (DONE_LABEL) ;
3661 
3662        if ((EmitSync & 65536) == 0) {
3663          Label LSuccess, LGoSlowPath ;
3664          masm.bind  (CheckSucc) ;
3665          masm.cmpq  (Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int)NULL_WORD) ;
3666          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3667 
3668          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3669          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3670          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3671          // are all faster when the write buffer is populated.
3672          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int)NULL_WORD) ;
3673          if (os::is_MP()) {
3674             masm.lock () ; masm.addq (Address(rsp, 0), 0) ;
3675          }
3676          masm.cmpq  (Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int)NULL_WORD) ;
3677          masm.jcc   (Assembler::notZero, LSuccess) ;
3678 
3679          masm.movptr (boxReg, (int)NULL_WORD) ;                   // box is really EAX
3680          if (os::is_MP()) { masm.lock(); }
3681          masm.cmpxchgq (r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3682          masm.jcc   (Assembler::notEqual, LSuccess) ;
3683          // Intentional fall-through into slow-path
3684 
3685          masm.bind  (LGoSlowPath) ;
3686          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3687          masm.jmp   (DONE_LABEL) ;
3688 
3689          masm.bind  (LSuccess) ;
3690          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3691          masm.jmp   (DONE_LABEL) ;
3692        }
3693 
3694        masm.bind  (Stacked) ;
3695        masm.movq  (tmpReg, Address (boxReg, 0)) ;      // re-fetch
3696        if (os::is_MP()) { masm.lock(); }
3697        masm.cmpxchgq(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3698 
3699        if (EmitSync & 65536) {
3700           masm.bind (CheckSucc) ;
3701        }
3702        masm.bind(DONE_LABEL);
3703        if (EmitSync & 32768) {
3704           masm.nop();                      // avoid branch to branch
3705        }
3706     }
3707   %}
3708 
3709   enc_class enc_String_Compare()
3710   %{
3711     Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3712           POP_LABEL, DONE_LABEL, CONT_LABEL,
3713           WHILE_HEAD_LABEL;
3714     MacroAssembler masm(&cbuf);
3715 
3716     // Get the first character position in both strings
3717     //         [8] char array, [12] offset, [16] count
3718     int value_offset  = java_lang_String::value_offset_in_bytes();
3719     int offset_offset = java_lang_String::offset_offset_in_bytes();
3720     int count_offset  = java_lang_String::count_offset_in_bytes();
3721     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3722 
3723     masm.load_heap_oop(rax, Address(rsi, value_offset));
3724     masm.movl(rcx, Address(rsi, offset_offset));
3725     masm.leaq(rax, Address(rax, rcx, Address::times_2, base_offset));
3726     masm.load_heap_oop(rbx, Address(rdi, value_offset));
3727     masm.movl(rcx, Address(rdi, offset_offset));
3728     masm.leaq(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3729 
3730     // Compute the minimum of the string lengths(rsi) and the
3731     // difference of the string lengths (stack)
3732 
3733     masm.movl(rdi, Address(rdi, count_offset));
3734     masm.movl(rsi, Address(rsi, count_offset));
3735     masm.movl(rcx, rdi);
3736     masm.subl(rdi, rsi);
3737     masm.pushq(rdi);
3738     masm.cmovl(Assembler::lessEqual, rsi, rcx);
3739 
3740     // Is the minimum length zero?
3741     masm.bind(RCX_GOOD_LABEL);
3742     masm.testl(rsi, rsi);
3743     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3744 
3745     // Load first characters
3746     masm.load_unsigned_word(rcx, Address(rbx, 0));
3747     masm.load_unsigned_word(rdi, Address(rax, 0));
3748 
3749     // Compare first characters
3750     masm.subl(rcx, rdi);
3751     masm.jcc(Assembler::notZero,  POP_LABEL);
3752     masm.decrementl(rsi);
3753     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3754 
3755     {
3756       // Check after comparing first character to see if strings are equivalent
3757       Label LSkip2;
3758       // Check if the strings start at same location
3759       masm.cmpq(rbx, rax);
3760       masm.jcc(Assembler::notEqual, LSkip2);
3761 
3762       // Check if the length difference is zero (from stack)
3763       masm.cmpl(Address(rsp, 0), 0x0);
3764       masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
3765 
3766       // Strings might not be equivalent
3767       masm.bind(LSkip2);
3768     }
3769 
3770     // Shift RAX and RBX to the end of the arrays, negate min
3771     masm.leaq(rax, Address(rax, rsi, Address::times_2, 2));
3772     masm.leaq(rbx, Address(rbx, rsi, Address::times_2, 2));
3773     masm.negq(rsi);
3774 
3775     // Compare the rest of the characters
3776     masm.bind(WHILE_HEAD_LABEL);
3777     masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
3778     masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
3779     masm.subl(rcx, rdi);
3780     masm.jcc(Assembler::notZero, POP_LABEL);
3781     masm.incrementq(rsi);
3782     masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3783 
3784     // Strings are equal up to min length.  Return the length difference.
3785     masm.bind(LENGTH_DIFF_LABEL);
3786     masm.popq(rcx);
3787     masm.jmp(DONE_LABEL);
3788 
3789     // Discard the stored length difference
3790     masm.bind(POP_LABEL);
3791     masm.addq(rsp, 8);
3792 
3793     // That's it
3794     masm.bind(DONE_LABEL);
3795   %}
3796 
3797   enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, rbx_RegI tmp2, rcx_RegI result) %{
3798     Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP;
3799     MacroAssembler masm(&cbuf);
3800 
3801     Register ary1Reg   = as_Register($ary1$$reg);
3802     Register ary2Reg   = as_Register($ary2$$reg);
3803     Register tmp1Reg   = as_Register($tmp1$$reg);
3804     Register tmp2Reg   = as_Register($tmp2$$reg);
3805     Register resultReg = as_Register($result$$reg);
3806 
3807     int length_offset  = arrayOopDesc::length_offset_in_bytes();
3808     int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3809 
3810     // Check the input args
3811     masm.cmpq(ary1Reg, ary2Reg);                        
3812     masm.jcc(Assembler::equal, TRUE_LABEL);
3813     masm.testq(ary1Reg, ary1Reg);                       
3814     masm.jcc(Assembler::zero, FALSE_LABEL);
3815     masm.testq(ary2Reg, ary2Reg);                       
3816     masm.jcc(Assembler::zero, FALSE_LABEL);
3817 
3818     // Check the lengths
3819     masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
3820     masm.movl(resultReg, Address(ary2Reg, length_offset));
3821     masm.cmpl(tmp2Reg, resultReg);
3822     masm.jcc(Assembler::notEqual, FALSE_LABEL);
3823     masm.testl(resultReg, resultReg);
3824     masm.jcc(Assembler::zero, TRUE_LABEL);
3825 
3826     // Get the number of 4 byte vectors to compare
3827     masm.shrl(resultReg, 1);
3828 
3829     // Check for odd-length arrays
3830     masm.andl(tmp2Reg, 1);
3831     masm.testl(tmp2Reg, tmp2Reg);
3832     masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
3833 
3834     // Compare 2-byte "tail" at end of arrays
3835     masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
3836     masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
3837     masm.cmpl(tmp1Reg, tmp2Reg);
3838     masm.jcc(Assembler::notEqual, FALSE_LABEL);
3839     masm.testl(resultReg, resultReg);
3840     masm.jcc(Assembler::zero, TRUE_LABEL);
3841 
3842     // Setup compare loop
3843     masm.bind(COMPARE_LOOP_HDR);
3844     // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays
3845     masm.leaq(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
3846     masm.leaq(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
3847     masm.negq(resultReg);
3848 
3849     // 4-byte-wide compare loop
3850     masm.bind(COMPARE_LOOP);
3851     masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
3852     masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
3853     masm.cmpl(ary1Reg, ary2Reg);
3854     masm.jcc(Assembler::notEqual, FALSE_LABEL);
3855     masm.incrementq(resultReg);
3856     masm.jcc(Assembler::notZero, COMPARE_LOOP);
3857 
3858     masm.bind(TRUE_LABEL);
3859     masm.movl(resultReg, 1);   // return true
3860     masm.jmp(DONE_LABEL);
3861 
3862     masm.bind(FALSE_LABEL);
3863     masm.xorl(resultReg, resultReg); // return false
3864 
3865     // That's it
3866     masm.bind(DONE_LABEL);
3867   %}
3868 
3869   enc_class enc_rethrow()
3870   %{
3871     cbuf.set_inst_mark();
3872     emit_opcode(cbuf, 0xE9); // jmp entry
3873     emit_d32_reloc(cbuf,
3874                    (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
3875                    runtime_call_Relocation::spec(),
3876                    RELOC_DISP32);
3877   %}
3878 
3879   enc_class absF_encoding(regF dst)
3880   %{
3881     int dstenc = $dst$$reg;
3882     address signmask_address = (address) StubRoutines::amd64::float_sign_mask();
3883 
3884     cbuf.set_inst_mark();
3885     if (dstenc >= 8) {
3886       emit_opcode(cbuf, Assembler::REX_R);
3887       dstenc -= 8;
3888     }
3889     // XXX reg_mem doesn't support RIP-relative addressing yet
3890     emit_opcode(cbuf, 0x0F);
3891     emit_opcode(cbuf, 0x54);
3892     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3893     emit_d32_reloc(cbuf, signmask_address);
3894   %}
3895 
3896   enc_class absD_encoding(regD dst)
3897   %{
3898     int dstenc = $dst$$reg;
3899     address signmask_address = (address) StubRoutines::amd64::double_sign_mask();
3900 
3901     cbuf.set_inst_mark();
3902     emit_opcode(cbuf, 0x66);
3903     if (dstenc >= 8) {
3904       emit_opcode(cbuf, Assembler::REX_R);
3905       dstenc -= 8;
3906     }
3907     // XXX reg_mem doesn't support RIP-relative addressing yet
3908     emit_opcode(cbuf, 0x0F);
3909     emit_opcode(cbuf, 0x54);
3910     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3911     emit_d32_reloc(cbuf, signmask_address);
3912   %}
3913 
3914   enc_class negF_encoding(regF dst)
3915   %{
3916     int dstenc = $dst$$reg;
3917     address signflip_address = (address) StubRoutines::amd64::float_sign_flip();
3918 
3919     cbuf.set_inst_mark();
3920     if (dstenc >= 8) {
3921       emit_opcode(cbuf, Assembler::REX_R);
3922       dstenc -= 8;
3923     }
3924     // XXX reg_mem doesn't support RIP-relative addressing yet
3925     emit_opcode(cbuf, 0x0F);
3926     emit_opcode(cbuf, 0x57);
3927     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3928     emit_d32_reloc(cbuf, signflip_address);
3929   %}
3930 
3931   enc_class negD_encoding(regD dst)
3932   %{
3933     int dstenc = $dst$$reg;
3934     address signflip_address = (address) StubRoutines::amd64::double_sign_flip();
3935 
3936     cbuf.set_inst_mark();
3937     emit_opcode(cbuf, 0x66);
3938     if (dstenc >= 8) {
3939       emit_opcode(cbuf, Assembler::REX_R);
3940       dstenc -= 8;
3941     }
3942     // XXX reg_mem doesn't support RIP-relative addressing yet
3943     emit_opcode(cbuf, 0x0F);
3944     emit_opcode(cbuf, 0x57);
3945     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3946     emit_d32_reloc(cbuf, signflip_address);
3947   %}
3948 
3949   enc_class f2i_fixup(rRegI dst, regF src)
3950   %{
3951     int dstenc = $dst$$reg;
3952     int srcenc = $src$$reg;
3953 
3954     // cmpl $dst, #0x80000000
3955     if (dstenc >= 8) {
3956       emit_opcode(cbuf, Assembler::REX_B);
3957     }
3958     emit_opcode(cbuf, 0x81);
3959     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3960     emit_d32(cbuf, 0x80000000);
3961 
3962     // jne,s done
3963     emit_opcode(cbuf, 0x75);
3964     if (srcenc < 8 && dstenc < 8) {
3965       emit_d8(cbuf, 0xF);
3966     } else if (srcenc >= 8 && dstenc >= 8) {
3967       emit_d8(cbuf, 0x11);
3968     } else {
3969       emit_d8(cbuf, 0x10);
3970     }
3971 
3972     // subq rsp, #8
3973     emit_opcode(cbuf, Assembler::REX_W);
3974     emit_opcode(cbuf, 0x83);
3975     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3976     emit_d8(cbuf, 8);
3977 
3978     // movss [rsp], $src
3979     emit_opcode(cbuf, 0xF3);
3980     if (srcenc >= 8) {
3981       emit_opcode(cbuf, Assembler::REX_R);
3982     }
3983     emit_opcode(cbuf, 0x0F);
3984     emit_opcode(cbuf, 0x11);
3985     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3986 
3987     // call f2i_fixup
3988     cbuf.set_inst_mark();
3989     emit_opcode(cbuf, 0xE8);
3990     emit_d32_reloc(cbuf,
3991                    (int)
3992                    (StubRoutines::amd64::f2i_fixup() - cbuf.code_end() - 4),
3993                    runtime_call_Relocation::spec(),
3994                    RELOC_DISP32);
3995 
3996     // popq $dst
3997     if (dstenc >= 8) {
3998       emit_opcode(cbuf, Assembler::REX_B);
3999     }
4000     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4001 
4002     // done:
4003   %}
4004 
4005   enc_class f2l_fixup(rRegL dst, regF src)
4006   %{
4007     int dstenc = $dst$$reg;
4008     int srcenc = $src$$reg;
4009     address const_address = (address) StubRoutines::amd64::double_sign_flip();
4010 
4011     // cmpq $dst, [0x8000000000000000]
4012     cbuf.set_inst_mark();
4013     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4014     emit_opcode(cbuf, 0x39);
4015     // XXX reg_mem doesn't support RIP-relative addressing yet
4016     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4017     emit_d32_reloc(cbuf, const_address);
4018 
4019 
4020     // jne,s done
4021     emit_opcode(cbuf, 0x75);
4022     if (srcenc < 8 && dstenc < 8) {
4023       emit_d8(cbuf, 0xF);
4024     } else if (srcenc >= 8 && dstenc >= 8) {
4025       emit_d8(cbuf, 0x11);
4026     } else {
4027       emit_d8(cbuf, 0x10);
4028     }
4029 
4030     // subq rsp, #8
4031     emit_opcode(cbuf, Assembler::REX_W);
4032     emit_opcode(cbuf, 0x83);
4033     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4034     emit_d8(cbuf, 8);
4035 
4036     // movss [rsp], $src
4037     emit_opcode(cbuf, 0xF3);
4038     if (srcenc >= 8) {
4039       emit_opcode(cbuf, Assembler::REX_R);
4040     }
4041     emit_opcode(cbuf, 0x0F);
4042     emit_opcode(cbuf, 0x11);
4043     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4044 
4045     // call f2l_fixup
4046     cbuf.set_inst_mark();
4047     emit_opcode(cbuf, 0xE8);
4048     emit_d32_reloc(cbuf,
4049                    (int)
4050                    (StubRoutines::amd64::f2l_fixup() - cbuf.code_end() - 4),
4051                    runtime_call_Relocation::spec(),
4052                    RELOC_DISP32);
4053 
4054     // popq $dst
4055     if (dstenc >= 8) {
4056       emit_opcode(cbuf, Assembler::REX_B);
4057     }
4058     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4059 
4060     // done:
4061   %}
4062 
4063   enc_class d2i_fixup(rRegI dst, regD src)
4064   %{
4065     int dstenc = $dst$$reg;
4066     int srcenc = $src$$reg;
4067 
4068     // cmpl $dst, #0x80000000
4069     if (dstenc >= 8) {
4070       emit_opcode(cbuf, Assembler::REX_B);
4071     }
4072     emit_opcode(cbuf, 0x81);
4073     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4074     emit_d32(cbuf, 0x80000000);
4075 
4076     // jne,s done
4077     emit_opcode(cbuf, 0x75);
4078     if (srcenc < 8 && dstenc < 8) {
4079       emit_d8(cbuf, 0xF);
4080     } else if (srcenc >= 8 && dstenc >= 8) {
4081       emit_d8(cbuf, 0x11);
4082     } else {
4083       emit_d8(cbuf, 0x10);
4084     }
4085 
4086     // subq rsp, #8
4087     emit_opcode(cbuf, Assembler::REX_W);
4088     emit_opcode(cbuf, 0x83);
4089     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4090     emit_d8(cbuf, 8);
4091 
4092     // movsd [rsp], $src
4093     emit_opcode(cbuf, 0xF2);
4094     if (srcenc >= 8) {
4095       emit_opcode(cbuf, Assembler::REX_R);
4096     }
4097     emit_opcode(cbuf, 0x0F);
4098     emit_opcode(cbuf, 0x11);
4099     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4100 
4101     // call d2i_fixup
4102     cbuf.set_inst_mark();
4103     emit_opcode(cbuf, 0xE8);
4104     emit_d32_reloc(cbuf,
4105                    (int)
4106                    (StubRoutines::amd64::d2i_fixup() - cbuf.code_end() - 4),
4107                    runtime_call_Relocation::spec(),
4108                    RELOC_DISP32);
4109 
4110     // popq $dst
4111     if (dstenc >= 8) {
4112       emit_opcode(cbuf, Assembler::REX_B);
4113     }
4114     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4115 
4116     // done:
4117   %}
4118 
4119   enc_class d2l_fixup(rRegL dst, regD src)
4120   %{
4121     int dstenc = $dst$$reg;
4122     int srcenc = $src$$reg;
4123     address const_address = (address) StubRoutines::amd64::double_sign_flip();
4124 
4125     // cmpq $dst, [0x8000000000000000]
4126     cbuf.set_inst_mark();
4127     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4128     emit_opcode(cbuf, 0x39);
4129     // XXX reg_mem doesn't support RIP-relative addressing yet
4130     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4131     emit_d32_reloc(cbuf, const_address);
4132 
4133 
4134     // jne,s done
4135     emit_opcode(cbuf, 0x75);
4136     if (srcenc < 8 && dstenc < 8) {
4137       emit_d8(cbuf, 0xF);
4138     } else if (srcenc >= 8 && dstenc >= 8) {
4139       emit_d8(cbuf, 0x11);
4140     } else {
4141       emit_d8(cbuf, 0x10);
4142     }
4143 
4144     // subq rsp, #8
4145     emit_opcode(cbuf, Assembler::REX_W);
4146     emit_opcode(cbuf, 0x83);
4147     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4148     emit_d8(cbuf, 8);
4149 
4150     // movsd [rsp], $src
4151     emit_opcode(cbuf, 0xF2);
4152     if (srcenc >= 8) {
4153       emit_opcode(cbuf, Assembler::REX_R);
4154     }
4155     emit_opcode(cbuf, 0x0F);
4156     emit_opcode(cbuf, 0x11);
4157     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4158 
4159     // call d2l_fixup
4160     cbuf.set_inst_mark();
4161     emit_opcode(cbuf, 0xE8);
4162     emit_d32_reloc(cbuf,
4163                    (int)
4164                    (StubRoutines::amd64::d2l_fixup() - cbuf.code_end() - 4),
4165                    runtime_call_Relocation::spec(),
4166                    RELOC_DISP32);
4167 
4168     // popq $dst
4169     if (dstenc >= 8) {
4170       emit_opcode(cbuf, Assembler::REX_B);
4171     }
4172     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4173 
4174     // done:
4175   %}
4176 
4177   enc_class enc_membar_acquire
4178   %{
4179     // [jk] not needed currently, if you enable this and it really
4180     // emits code don't forget to the remove the "size(0)" line in
4181     // membar_acquire()
4182     // MacroAssembler masm(&cbuf);
4183     // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
4184     //                                         Assembler::LoadLoad));
4185   %}
4186 
4187   enc_class enc_membar_release
4188   %{
4189     // [jk] not needed currently, if you enable this and it really
4190     // emits code don't forget to the remove the "size(0)" line in
4191     // membar_release()
4192     // MacroAssembler masm(&cbuf);
4193     // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
4194     //                                         Assembler::StoreStore));
4195   %}
4196 
4197   enc_class enc_membar_volatile
4198   %{
4199     MacroAssembler masm(&cbuf);
4200     masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad |
4201                                             Assembler::StoreStore));
4202   %}
4203 
4204   // Safepoint Poll.  This polls the safepoint page, and causes an
4205   // exception if it is not readable. Unfortunately, it kills
4206   // RFLAGS in the process.
4207   enc_class enc_safepoint_poll
4208   %{
4209     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4210     // XXX reg_mem doesn't support RIP-relative addressing yet
4211     cbuf.set_inst_mark();
4212     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4213     emit_opcode(cbuf, 0x85); // testl
4214     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4215     // cbuf.inst_mark() is beginning of instruction
4216     emit_d32_reloc(cbuf, os::get_polling_page());
4217 //                    relocInfo::poll_type,
4218   %}
4219 %}
4220 
4221 
4222 
4223 //----------FRAME--------------------------------------------------------------
4224 // Definition of frame structure and management information.
4225 //
4226 //  S T A C K   L A Y O U T    Allocators stack-slot number
4227 //                             |   (to get allocators register number
4228 //  G  Owned by    |        |  v    add OptoReg::stack0())
4229 //  r   CALLER     |        |
4230 //  o     |        +--------+      pad to even-align allocators stack-slot
4231 //  w     V        |  pad0  |        numbers; owned by CALLER
4232 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4233 //  h     ^        |   in   |  5
4234 //        |        |  args  |  4   Holes in incoming args owned by SELF
4235 //  |     |        |        |  3
4236 //  |     |        +--------+
4237 //  V     |        | old out|      Empty on Intel, window on Sparc
4238 //        |    old |preserve|      Must be even aligned.
4239 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4240 //        |        |   in   |  3   area for Intel ret address
4241 //     Owned by    |preserve|      Empty on Sparc.
4242 //       SELF      +--------+
4243 //        |        |  pad2  |  2   pad to align old SP
4244 //        |        +--------+  1
4245 //        |        | locks  |  0
4246 //        |        +--------+----> OptoReg::stack0(), even aligned
4247 //        |        |  pad1  | 11   pad to align new SP
4248 //        |        +--------+
4249 //        |        |        | 10
4250 //        |        | spills |  9   spills
4251 //        V        |        |  8   (pad0 slot for callee)
4252 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4253 //        ^        |  out   |  7
4254 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4255 //     Owned by    +--------+
4256 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4257 //        |    new |preserve|      Must be even-aligned.
4258 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4259 //        |        |        |
4260 //
4261 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4262 //         known from SELF's arguments and the Java calling convention.
4263 //         Region 6-7 is determined per call site.
4264 // Note 2: If the calling convention leaves holes in the incoming argument
4265 //         area, those holes are owned by SELF.  Holes in the outgoing area
4266 //         are owned by the CALLEE.  Holes should not be nessecary in the
4267 //         incoming area, as the Java calling convention is completely under
4268 //         the control of the AD file.  Doubles can be sorted and packed to
4269 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4270 //         varargs C calling conventions.
4271 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4272 //         even aligned with pad0 as needed.
4273 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4274 //         region 6-11 is even aligned; it may be padded out more so that
4275 //         the region from SP to FP meets the minimum stack alignment.
4276 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4277 //         alignment.  Region 11, pad1, may be dynamically extended so that
4278 //         SP meets the minimum alignment.
4279 
4280 frame
4281 %{
4282   // What direction does stack grow in (assumed to be same for C & Java)
4283   stack_direction(TOWARDS_LOW);
4284 
4285   // These three registers define part of the calling convention
4286   // between compiled code and the interpreter.
4287   inline_cache_reg(RAX);                // Inline Cache Register
4288   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4289                                         // calling interpreter
4290 
4291   // Optional: name the operand used by cisc-spilling to access
4292   // [stack_pointer + offset]
4293   cisc_spilling_operand_name(indOffset32);
4294 
4295   // Number of stack slots consumed by locking an object
4296   sync_stack_slots(2);
4297 
4298   // Compiled code's Frame Pointer
4299   frame_pointer(RSP);
4300 
4301   // Interpreter stores its frame pointer in a register which is
4302   // stored to the stack by I2CAdaptors.
4303   // I2CAdaptors convert from interpreted java to compiled java.
4304   interpreter_frame_pointer(RBP);
4305 
4306   // Stack alignment requirement
4307   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4308 
4309   // Number of stack slots between incoming argument block and the start of
4310   // a new frame.  The PROLOG must add this many slots to the stack.  The
4311   // EPILOG must remove this many slots.  amd64 needs two slots for
4312   // return address.
4313   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4314 
4315   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4316   // for calls to C.  Supports the var-args backing area for register parms.
4317   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4318 
4319   // The after-PROLOG location of the return address.  Location of
4320   // return address specifies a type (REG or STACK) and a number
4321   // representing the register number (i.e. - use a register name) or
4322   // stack slot.
4323   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4324   // Otherwise, it is above the locks and verification slot and alignment word
4325   return_addr(STACK - 2 +
4326               round_to(2 + 2 * VerifyStackAtCalls +
4327                        Compile::current()->fixed_slots(),
4328                        WordsPerLong * 2));
4329 
4330   // Body of function which returns an integer array locating
4331   // arguments either in registers or in stack slots.  Passed an array
4332   // of ideal registers called "sig" and a "length" count.  Stack-slot
4333   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4334   // arguments for a CALLEE.  Incoming stack arguments are
4335   // automatically biased by the preserve_stack_slots field above.
4336 
4337   calling_convention
4338   %{
4339     // No difference between ingoing/outgoing just pass false
4340     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4341   %}
4342 
4343   c_calling_convention
4344   %{
4345     // This is obviously always outgoing
4346     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4347   %}
4348 
4349   // Location of compiled Java return values.  Same as C for now.
4350   return_value
4351   %{
4352     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4353            "only return normal values");
4354 
4355     static const int lo[Op_RegL + 1] = {
4356       0,
4357       0,
4358       RAX_num,  // Op_RegN
4359       RAX_num,  // Op_RegI
4360       RAX_num,  // Op_RegP
4361       XMM0_num, // Op_RegF
4362       XMM0_num, // Op_RegD
4363       RAX_num   // Op_RegL
4364     };
4365     static const int hi[Op_RegL + 1] = {
4366       0,
4367       0,
4368       OptoReg::Bad, // Op_RegN
4369       OptoReg::Bad, // Op_RegI
4370       RAX_H_num,    // Op_RegP
4371       OptoReg::Bad, // Op_RegF
4372       XMM0_H_num,   // Op_RegD
4373       RAX_H_num     // Op_RegL
4374     };
4375     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4376     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4377   %}
4378 %}
4379 
4380 //----------ATTRIBUTES---------------------------------------------------------
4381 //----------Operand Attributes-------------------------------------------------
4382 op_attrib op_cost(0);        // Required cost attribute
4383 
4384 //----------Instruction Attributes---------------------------------------------
4385 ins_attrib ins_cost(100);       // Required cost attribute
4386 ins_attrib ins_size(8);         // Required size attribute (in bits)
4387 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4388 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4389                                 // a non-matching short branch variant
4390                                 // of some long branch?
4391 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4392                                 // be a power of 2) specifies the
4393                                 // alignment that some part of the
4394                                 // instruction (not necessarily the
4395                                 // start) requires.  If > 1, a
4396                                 // compute_padding() function must be
4397                                 // provided for the instruction
4398 
4399 //----------OPERANDS-----------------------------------------------------------
4400 // Operand definitions must precede instruction definitions for correct parsing
4401 // in the ADLC because operands constitute user defined types which are used in
4402 // instruction definitions.
4403 
4404 //----------Simple Operands----------------------------------------------------
4405 // Immediate Operands
4406 // Integer Immediate
4407 operand immI()
4408 %{
4409   match(ConI);
4410 
4411   op_cost(10);
4412   format %{ %}
4413   interface(CONST_INTER);
4414 %}
4415 
4416 // Constant for test vs zero
4417 operand immI0()
4418 %{
4419   predicate(n->get_int() == 0);
4420   match(ConI);
4421 
4422   op_cost(0);
4423   format %{ %}
4424   interface(CONST_INTER);
4425 %}
4426 
4427 // Constant for increment
4428 operand immI1()
4429 %{
4430   predicate(n->get_int() == 1);
4431   match(ConI);
4432 
4433   op_cost(0);
4434   format %{ %}
4435   interface(CONST_INTER);
4436 %}
4437 
4438 // Constant for decrement
4439 operand immI_M1()
4440 %{
4441   predicate(n->get_int() == -1);
4442   match(ConI);
4443 
4444   op_cost(0);
4445   format %{ %}
4446   interface(CONST_INTER);
4447 %}
4448 
4449 // Valid scale values for addressing modes
4450 operand immI2()
4451 %{
4452   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4453   match(ConI);
4454 
4455   format %{ %}
4456   interface(CONST_INTER);
4457 %}
4458 
4459 operand immI8()
4460 %{
4461   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4462   match(ConI);
4463 
4464   op_cost(5);
4465   format %{ %}
4466   interface(CONST_INTER);
4467 %}
4468 
4469 operand immI16()
4470 %{
4471   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4472   match(ConI);
4473 
4474   op_cost(10);
4475   format %{ %}
4476   interface(CONST_INTER);
4477 %}
4478 
4479 // Constant for long shifts
4480 operand immI_32()
4481 %{
4482   predicate( n->get_int() == 32 );
4483   match(ConI);
4484 
4485   op_cost(0);
4486   format %{ %}
4487   interface(CONST_INTER);
4488 %}
4489 
4490 // Constant for long shifts
4491 operand immI_64()
4492 %{
4493   predicate( n->get_int() == 64 );
4494   match(ConI);
4495 
4496   op_cost(0);
4497   format %{ %}
4498   interface(CONST_INTER);
4499 %}
4500 
4501 // Pointer Immediate
4502 operand immP()
4503 %{
4504   match(ConP);
4505 
4506   op_cost(10);
4507   format %{ %}
4508   interface(CONST_INTER);
4509 %}
4510 
4511 // NULL Pointer Immediate
4512 operand immP0()
4513 %{
4514   predicate(n->get_ptr() == 0);
4515   match(ConP);
4516 
4517   op_cost(5);
4518   format %{ %}
4519   interface(CONST_INTER);
4520 %}
4521 
4522 // Pointer Immediate
4523 operand immN() %{
4524   match(ConN);
4525 
4526   op_cost(10);
4527   format %{ %}
4528   interface(CONST_INTER);
4529 %}
4530 
4531 // NULL Pointer Immediate
4532 operand immN0() %{
4533   predicate(n->get_narrowcon() == 0);
4534   match(ConN);
4535 
4536   op_cost(5);
4537   format %{ %}
4538   interface(CONST_INTER);
4539 %}
4540 
4541 operand immP31()
4542 %{
4543   predicate(!n->as_Type()->type()->isa_oopptr()
4544             && (n->get_ptr() >> 31) == 0);
4545   match(ConP);
4546 
4547   op_cost(5);
4548   format %{ %}
4549   interface(CONST_INTER);
4550 %}
4551 
4552 
4553 // Long Immediate
4554 operand immL()
4555 %{
4556   match(ConL);
4557 
4558   op_cost(20);
4559   format %{ %}
4560   interface(CONST_INTER);
4561 %}
4562 
4563 // Long Immediate 8-bit
4564 operand immL8()
4565 %{
4566   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4567   match(ConL);
4568 
4569   op_cost(5);
4570   format %{ %}
4571   interface(CONST_INTER);
4572 %}
4573 
4574 // Long Immediate 32-bit unsigned
4575 operand immUL32()
4576 %{
4577   predicate(n->get_long() == (unsigned int) (n->get_long()));
4578   match(ConL);
4579 
4580   op_cost(10);
4581   format %{ %}
4582   interface(CONST_INTER);
4583 %}
4584 
4585 // Long Immediate 32-bit signed
4586 operand immL32()
4587 %{
4588   predicate(n->get_long() == (int) (n->get_long()));
4589   match(ConL);
4590 
4591   op_cost(15);
4592   format %{ %}
4593   interface(CONST_INTER);
4594 %}
4595 
4596 // Long Immediate zero
4597 operand immL0()
4598 %{
4599   predicate(n->get_long() == 0L);
4600   match(ConL);
4601 
4602   op_cost(10);
4603   format %{ %}
4604   interface(CONST_INTER);
4605 %}
4606 
4607 // Constant for increment
4608 operand immL1()
4609 %{
4610   predicate(n->get_long() == 1);
4611   match(ConL);
4612 
4613   format %{ %}
4614   interface(CONST_INTER);
4615 %}
4616 
4617 // Constant for decrement
4618 operand immL_M1()
4619 %{
4620   predicate(n->get_long() == -1);
4621   match(ConL);
4622 
4623   format %{ %}
4624   interface(CONST_INTER);
4625 %}
4626 
4627 // Long Immediate: the value 10
4628 operand immL10()
4629 %{
4630   predicate(n->get_long() == 10);
4631   match(ConL);
4632 
4633   format %{ %}
4634   interface(CONST_INTER);
4635 %}
4636 
4637 // Long immediate from 0 to 127.
4638 // Used for a shorter form of long mul by 10.
4639 operand immL_127()
4640 %{
4641   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4642   match(ConL);
4643 
4644   op_cost(10);
4645   format %{ %}
4646   interface(CONST_INTER);
4647 %}
4648 
4649 // Long Immediate: low 32-bit mask
4650 operand immL_32bits()
4651 %{
4652   predicate(n->get_long() == 0xFFFFFFFFL);
4653   match(ConL);
4654   op_cost(20);
4655 
4656   format %{ %}
4657   interface(CONST_INTER);
4658 %}
4659 
4660 // Float Immediate zero
4661 operand immF0()
4662 %{
4663   predicate(jint_cast(n->getf()) == 0);
4664   match(ConF);
4665 
4666   op_cost(5);
4667   format %{ %}
4668   interface(CONST_INTER);
4669 %}
4670 
4671 // Float Immediate
4672 operand immF()
4673 %{
4674   match(ConF);
4675 
4676   op_cost(15);
4677   format %{ %}
4678   interface(CONST_INTER);
4679 %}
4680 
4681 // Double Immediate zero
4682 operand immD0()
4683 %{
4684   predicate(jlong_cast(n->getd()) == 0);
4685   match(ConD);
4686 
4687   op_cost(5);
4688   format %{ %}
4689   interface(CONST_INTER);
4690 %}
4691 
4692 // Double Immediate
4693 operand immD()
4694 %{
4695   match(ConD);
4696 
4697   op_cost(15);
4698   format %{ %}
4699   interface(CONST_INTER);
4700 %}
4701 
4702 // Immediates for special shifts (sign extend)
4703 
4704 // Constants for increment
4705 operand immI_16()
4706 %{
4707   predicate(n->get_int() == 16);
4708   match(ConI);
4709 
4710   format %{ %}
4711   interface(CONST_INTER);
4712 %}
4713 
4714 operand immI_24()
4715 %{
4716   predicate(n->get_int() == 24);
4717   match(ConI);
4718 
4719   format %{ %}
4720   interface(CONST_INTER);
4721 %}
4722 
4723 // Constant for byte-wide masking
4724 operand immI_255()
4725 %{
4726   predicate(n->get_int() == 255);
4727   match(ConI);
4728 
4729   format %{ %}
4730   interface(CONST_INTER);
4731 %}
4732 
4733 // Constant for short-wide masking
4734 operand immI_65535()
4735 %{
4736   predicate(n->get_int() == 65535);
4737   match(ConI);
4738 
4739   format %{ %}
4740   interface(CONST_INTER);
4741 %}
4742 
4743 // Constant for byte-wide masking
4744 operand immL_255()
4745 %{
4746   predicate(n->get_long() == 255);
4747   match(ConL);
4748 
4749   format %{ %}
4750   interface(CONST_INTER);
4751 %}
4752 
4753 // Constant for short-wide masking
4754 operand immL_65535()
4755 %{
4756   predicate(n->get_long() == 65535);
4757   match(ConL);
4758 
4759   format %{ %}
4760   interface(CONST_INTER);
4761 %}
4762 
4763 // Register Operands
4764 // Integer Register
4765 operand rRegI()
4766 %{
4767   constraint(ALLOC_IN_RC(int_reg));
4768   match(RegI);
4769 
4770   match(rax_RegI);
4771   match(rbx_RegI);
4772   match(rcx_RegI);
4773   match(rdx_RegI);
4774   match(rdi_RegI);
4775 
4776   format %{ %}
4777   interface(REG_INTER);
4778 %}
4779 
4780 // Special Registers
4781 operand rax_RegI()
4782 %{
4783   constraint(ALLOC_IN_RC(int_rax_reg));
4784   match(RegI);
4785   match(rRegI);
4786 
4787   format %{ "RAX" %}
4788   interface(REG_INTER);
4789 %}
4790 
4791 // Special Registers
4792 operand rbx_RegI()
4793 %{
4794   constraint(ALLOC_IN_RC(int_rbx_reg));
4795   match(RegI);
4796   match(rRegI);
4797 
4798   format %{ "RBX" %}
4799   interface(REG_INTER);
4800 %}
4801 
4802 operand rcx_RegI()
4803 %{
4804   constraint(ALLOC_IN_RC(int_rcx_reg));
4805   match(RegI);
4806   match(rRegI);
4807 
4808   format %{ "RCX" %}
4809   interface(REG_INTER);
4810 %}
4811 
4812 operand rdx_RegI()
4813 %{
4814   constraint(ALLOC_IN_RC(int_rdx_reg));
4815   match(RegI);
4816   match(rRegI);
4817 
4818   format %{ "RDX" %}
4819   interface(REG_INTER);
4820 %}
4821 
4822 operand rdi_RegI()
4823 %{
4824   constraint(ALLOC_IN_RC(int_rdi_reg));
4825   match(RegI);
4826   match(rRegI);
4827 
4828   format %{ "RDI" %}
4829   interface(REG_INTER);
4830 %}
4831 
4832 operand no_rcx_RegI()
4833 %{
4834   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4835   match(RegI);
4836   match(rax_RegI);
4837   match(rbx_RegI);
4838   match(rdx_RegI);
4839   match(rdi_RegI);
4840 
4841   format %{ %}
4842   interface(REG_INTER);
4843 %}
4844 
4845 operand no_rax_rdx_RegI()
4846 %{
4847   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4848   match(RegI);
4849   match(rbx_RegI);
4850   match(rcx_RegI);
4851   match(rdi_RegI);
4852 
4853   format %{ %}
4854   interface(REG_INTER);
4855 %}
4856 
4857 // Pointer Register
4858 operand any_RegP()
4859 %{
4860   constraint(ALLOC_IN_RC(any_reg));
4861   match(RegP);
4862   match(rax_RegP);
4863   match(rbx_RegP);
4864   match(rdi_RegP);
4865   match(rsi_RegP);
4866   match(rbp_RegP);
4867   match(r15_RegP);
4868   match(rRegP);
4869 
4870   format %{ %}
4871   interface(REG_INTER);
4872 %}
4873 
4874 operand rRegP()
4875 %{
4876   constraint(ALLOC_IN_RC(ptr_reg));
4877   match(RegP);
4878   match(rax_RegP);
4879   match(rbx_RegP);
4880   match(rdi_RegP);
4881   match(rsi_RegP);
4882   match(rbp_RegP);
4883   match(r15_RegP);  // See Q&A below about r15_RegP.
4884 
4885   format %{ %}
4886   interface(REG_INTER);
4887 %}
4888 
4889 
4890 operand r12RegL() %{
4891   constraint(ALLOC_IN_RC(long_r12_reg));
4892   match(RegL);
4893 
4894   format %{ %}
4895   interface(REG_INTER);
4896 %}
4897 
4898 operand rRegN() %{
4899   constraint(ALLOC_IN_RC(int_reg));
4900   match(RegN);
4901 
4902   format %{ %}
4903   interface(REG_INTER);
4904 %}
4905 
4906 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4907 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4908 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4909 // The output of an instruction is controlled by the allocator, which respects
4910 // register class masks, not match rules.  Unless an instruction mentions
4911 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4912 // by the allocator as an input.
4913 
4914 operand no_rax_RegP()
4915 %{
4916   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4917   match(RegP);
4918   match(rbx_RegP);
4919   match(rsi_RegP);
4920   match(rdi_RegP);
4921 
4922   format %{ %}
4923   interface(REG_INTER);
4924 %}
4925 
4926 operand no_rbp_RegP()
4927 %{
4928   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4929   match(RegP);
4930   match(rbx_RegP);
4931   match(rsi_RegP);
4932   match(rdi_RegP);
4933 
4934   format %{ %}
4935   interface(REG_INTER);
4936 %}
4937 
4938 operand no_rax_rbx_RegP()
4939 %{
4940   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4941   match(RegP);
4942   match(rsi_RegP);
4943   match(rdi_RegP);
4944 
4945   format %{ %}
4946   interface(REG_INTER);
4947 %}
4948 
4949 // Special Registers
4950 // Return a pointer value
4951 operand rax_RegP()
4952 %{
4953   constraint(ALLOC_IN_RC(ptr_rax_reg));
4954   match(RegP);
4955   match(rRegP);
4956 
4957   format %{ %}
4958   interface(REG_INTER);
4959 %}
4960 
4961 // Special Registers
4962 // Return a compressed pointer value
4963 operand rax_RegN()
4964 %{
4965   constraint(ALLOC_IN_RC(int_rax_reg));
4966   match(RegN);
4967   match(rRegN);
4968 
4969   format %{ %}
4970   interface(REG_INTER);
4971 %}
4972 
4973 // Used in AtomicAdd
4974 operand rbx_RegP()
4975 %{
4976   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4977   match(RegP);
4978   match(rRegP);
4979 
4980   format %{ %}
4981   interface(REG_INTER);
4982 %}
4983 
4984 operand rsi_RegP()
4985 %{
4986   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4987   match(RegP);
4988   match(rRegP);
4989 
4990   format %{ %}
4991   interface(REG_INTER);
4992 %}
4993 
4994 // Used in rep stosq
4995 operand rdi_RegP()
4996 %{
4997   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4998   match(RegP);
4999   match(rRegP);
5000 
5001   format %{ %}
5002   interface(REG_INTER);
5003 %}
5004 
5005 operand rbp_RegP()
5006 %{
5007   constraint(ALLOC_IN_RC(ptr_rbp_reg));
5008   match(RegP);
5009   match(rRegP);
5010 
5011   format %{ %}
5012   interface(REG_INTER);
5013 %}
5014 
5015 operand r15_RegP()
5016 %{
5017   constraint(ALLOC_IN_RC(ptr_r15_reg));
5018   match(RegP);
5019   match(rRegP);
5020 
5021   format %{ %}
5022   interface(REG_INTER);
5023 %}
5024 
5025 operand rRegL()
5026 %{
5027   constraint(ALLOC_IN_RC(long_reg));
5028   match(RegL);
5029   match(rax_RegL);
5030   match(rdx_RegL);
5031 
5032   format %{ %}
5033   interface(REG_INTER);
5034 %}
5035 
5036 // Special Registers
5037 operand no_rax_rdx_RegL()
5038 %{
5039   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5040   match(RegL);
5041   match(rRegL);
5042 
5043   format %{ %}
5044   interface(REG_INTER);
5045 %}
5046 
5047 operand no_rax_RegL()
5048 %{
5049   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5050   match(RegL);
5051   match(rRegL);
5052   match(rdx_RegL);
5053 
5054   format %{ %}
5055   interface(REG_INTER);
5056 %}
5057 
5058 operand no_rcx_RegL()
5059 %{
5060   constraint(ALLOC_IN_RC(long_no_rcx_reg));
5061   match(RegL);
5062   match(rRegL);
5063 
5064   format %{ %}
5065   interface(REG_INTER);
5066 %}
5067 
5068 operand rax_RegL()
5069 %{
5070   constraint(ALLOC_IN_RC(long_rax_reg));
5071   match(RegL);
5072   match(rRegL);
5073 
5074   format %{ "RAX" %}
5075   interface(REG_INTER);
5076 %}
5077 
5078 operand rcx_RegL()
5079 %{
5080   constraint(ALLOC_IN_RC(long_rcx_reg));
5081   match(RegL);
5082   match(rRegL);
5083 
5084   format %{ %}
5085   interface(REG_INTER);
5086 %}
5087 
5088 operand rdx_RegL()
5089 %{
5090   constraint(ALLOC_IN_RC(long_rdx_reg));
5091   match(RegL);
5092   match(rRegL);
5093 
5094   format %{ %}
5095   interface(REG_INTER);
5096 %}
5097 
5098 // Flags register, used as output of compare instructions
5099 operand rFlagsReg()
5100 %{
5101   constraint(ALLOC_IN_RC(int_flags));
5102   match(RegFlags);
5103 
5104   format %{ "RFLAGS" %}
5105   interface(REG_INTER);
5106 %}
5107 
5108 // Flags register, used as output of FLOATING POINT compare instructions
5109 operand rFlagsRegU()
5110 %{
5111   constraint(ALLOC_IN_RC(int_flags));
5112   match(RegFlags);
5113 
5114   format %{ "RFLAGS_U" %}
5115   interface(REG_INTER);
5116 %}
5117 
5118 // Float register operands
5119 operand regF()
5120 %{
5121   constraint(ALLOC_IN_RC(float_reg));
5122   match(RegF);
5123 
5124   format %{ %}
5125   interface(REG_INTER);
5126 %}
5127 
5128 // Double register operands
5129 operand regD()
5130 %{
5131   constraint(ALLOC_IN_RC(double_reg));
5132   match(RegD);
5133 
5134   format %{ %}
5135   interface(REG_INTER);
5136 %}
5137 
5138 
5139 //----------Memory Operands----------------------------------------------------
5140 // Direct Memory Operand
5141 // operand direct(immP addr)
5142 // %{
5143 //   match(addr);
5144 
5145 //   format %{ "[$addr]" %}
5146 //   interface(MEMORY_INTER) %{
5147 //     base(0xFFFFFFFF);
5148 //     index(0x4);
5149 //     scale(0x0);
5150 //     disp($addr);
5151 //   %}
5152 // %}
5153 
5154 // Indirect Memory Operand
5155 operand indirect(any_RegP reg)
5156 %{
5157   constraint(ALLOC_IN_RC(ptr_reg));
5158   match(reg);
5159 
5160   format %{ "[$reg]" %}
5161   interface(MEMORY_INTER) %{
5162     base($reg);
5163     index(0x4);
5164     scale(0x0);
5165     disp(0x0);
5166   %}
5167 %}
5168 
5169 // Indirect Memory Plus Short Offset Operand
5170 operand indOffset8(any_RegP reg, immL8 off)
5171 %{
5172   constraint(ALLOC_IN_RC(ptr_reg));
5173   match(AddP reg off);
5174 
5175   format %{ "[$reg + $off (8-bit)]" %}
5176   interface(MEMORY_INTER) %{
5177     base($reg);
5178     index(0x4);
5179     scale(0x0);
5180     disp($off);
5181   %}
5182 %}
5183 
5184 // Indirect Memory Plus Long Offset Operand
5185 operand indOffset32(any_RegP reg, immL32 off)
5186 %{
5187   constraint(ALLOC_IN_RC(ptr_reg));
5188   match(AddP reg off);
5189 
5190   format %{ "[$reg + $off (32-bit)]" %}
5191   interface(MEMORY_INTER) %{
5192     base($reg);
5193     index(0x4);
5194     scale(0x0);
5195     disp($off);
5196   %}
5197 %}
5198 
5199 // Indirect Memory Plus Index Register Plus Offset Operand
5200 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5201 %{
5202   constraint(ALLOC_IN_RC(ptr_reg));
5203   match(AddP (AddP reg lreg) off);
5204 
5205   op_cost(10);
5206   format %{"[$reg + $off + $lreg]" %}
5207   interface(MEMORY_INTER) %{
5208     base($reg);
5209     index($lreg);
5210     scale(0x0);
5211     disp($off);
5212   %}
5213 %}
5214 
5215 // Indirect Memory Plus Index Register Plus Offset Operand
5216 operand indIndex(any_RegP reg, rRegL lreg)
5217 %{
5218   constraint(ALLOC_IN_RC(ptr_reg));
5219   match(AddP reg lreg);
5220 
5221   op_cost(10);
5222   format %{"[$reg + $lreg]" %}
5223   interface(MEMORY_INTER) %{
5224     base($reg);
5225     index($lreg);
5226     scale(0x0);
5227     disp(0x0);
5228   %}
5229 %}
5230 
5231 // Indirect Memory Times Scale Plus Index Register
5232 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5233 %{
5234   constraint(ALLOC_IN_RC(ptr_reg));
5235   match(AddP reg (LShiftL lreg scale));
5236 
5237   op_cost(10);
5238   format %{"[$reg + $lreg << $scale]" %}
5239   interface(MEMORY_INTER) %{
5240     base($reg);
5241     index($lreg);
5242     scale($scale);
5243     disp(0x0);
5244   %}
5245 %}
5246 
5247 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5248 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5249 %{
5250   constraint(ALLOC_IN_RC(ptr_reg));
5251   match(AddP (AddP reg (LShiftL lreg scale)) off);
5252 
5253   op_cost(10);
5254   format %{"[$reg + $off + $lreg << $scale]" %}
5255   interface(MEMORY_INTER) %{
5256     base($reg);
5257     index($lreg);
5258     scale($scale);
5259     disp($off);
5260   %}
5261 %}
5262 
5263 // Indirect Narrow Oop Plus Offset Operand
5264 operand indNarrowOopOffset(rRegN src, immL32 off) %{
5265   constraint(ALLOC_IN_RC(ptr_reg));
5266   match(AddP (DecodeN src) off);
5267 
5268   op_cost(10);
5269   format %{"[R12 + $src << 3 + $off] (compressed oop addressing)" %}
5270   interface(MEMORY_INTER) %{
5271     base(0xc); // R12
5272     index($src);
5273     scale(0x3);
5274     disp($off);
5275   %}
5276 %}
5277 
5278 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5279 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5280 %{
5281   constraint(ALLOC_IN_RC(ptr_reg));
5282   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5283   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5284 
5285   op_cost(10);
5286   format %{"[$reg + $off + $idx << $scale]" %}
5287   interface(MEMORY_INTER) %{
5288     base($reg);
5289     index($idx);
5290     scale($scale);
5291     disp($off);
5292   %}
5293 %}
5294 
5295 //----------Special Memory Operands--------------------------------------------
5296 // Stack Slot Operand - This operand is used for loading and storing temporary
5297 //                      values on the stack where a match requires a value to
5298 //                      flow through memory.
5299 operand stackSlotP(sRegP reg)
5300 %{
5301   constraint(ALLOC_IN_RC(stack_slots));
5302   // No match rule because this operand is only generated in matching
5303 
5304   format %{ "[$reg]" %}
5305   interface(MEMORY_INTER) %{
5306     base(0x4);   // RSP
5307     index(0x4);  // No Index
5308     scale(0x0);  // No Scale
5309     disp($reg);  // Stack Offset
5310   %}
5311 %}
5312 
5313 operand stackSlotI(sRegI reg)
5314 %{
5315   constraint(ALLOC_IN_RC(stack_slots));
5316   // No match rule because this operand is only generated in matching
5317 
5318   format %{ "[$reg]" %}
5319   interface(MEMORY_INTER) %{
5320     base(0x4);   // RSP
5321     index(0x4);  // No Index
5322     scale(0x0);  // No Scale
5323     disp($reg);  // Stack Offset
5324   %}
5325 %}
5326 
5327 operand stackSlotF(sRegF reg)
5328 %{
5329   constraint(ALLOC_IN_RC(stack_slots));
5330   // No match rule because this operand is only generated in matching
5331 
5332   format %{ "[$reg]" %}
5333   interface(MEMORY_INTER) %{
5334     base(0x4);   // RSP
5335     index(0x4);  // No Index
5336     scale(0x0);  // No Scale
5337     disp($reg);  // Stack Offset
5338   %}
5339 %}
5340 
5341 operand stackSlotD(sRegD reg)
5342 %{
5343   constraint(ALLOC_IN_RC(stack_slots));
5344   // No match rule because this operand is only generated in matching
5345 
5346   format %{ "[$reg]" %}
5347   interface(MEMORY_INTER) %{
5348     base(0x4);   // RSP
5349     index(0x4);  // No Index
5350     scale(0x0);  // No Scale
5351     disp($reg);  // Stack Offset
5352   %}
5353 %}
5354 operand stackSlotL(sRegL reg)
5355 %{
5356   constraint(ALLOC_IN_RC(stack_slots));
5357   // No match rule because this operand is only generated in matching
5358 
5359   format %{ "[$reg]" %}
5360   interface(MEMORY_INTER) %{
5361     base(0x4);   // RSP
5362     index(0x4);  // No Index
5363     scale(0x0);  // No Scale
5364     disp($reg);  // Stack Offset
5365   %}
5366 %}
5367 
5368 //----------Conditional Branch Operands----------------------------------------
5369 // Comparison Op  - This is the operation of the comparison, and is limited to
5370 //                  the following set of codes:
5371 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5372 //
5373 // Other attributes of the comparison, such as unsignedness, are specified
5374 // by the comparison instruction that sets a condition code flags register.
5375 // That result is represented by a flags operand whose subtype is appropriate
5376 // to the unsignedness (etc.) of the comparison.
5377 //
5378 // Later, the instruction which matches both the Comparison Op (a Bool) and
5379 // the flags (produced by the Cmp) specifies the coding of the comparison op
5380 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5381 
5382 // Comparision Code
5383 operand cmpOp()
5384 %{
5385   match(Bool);
5386 
5387   format %{ "" %}
5388   interface(COND_INTER) %{
5389     equal(0x4);
5390     not_equal(0x5);
5391     less(0xC);
5392     greater_equal(0xD);
5393     less_equal(0xE);
5394     greater(0xF);
5395   %}
5396 %}
5397 
5398 // Comparison Code, unsigned compare.  Used by FP also, with
5399 // C2 (unordered) turned into GT or LT already.  The other bits
5400 // C0 and C3 are turned into Carry & Zero flags.
5401 operand cmpOpU()
5402 %{
5403   match(Bool);
5404 
5405   format %{ "" %}
5406   interface(COND_INTER) %{
5407     equal(0x4);
5408     not_equal(0x5);
5409     less(0x2);
5410     greater_equal(0x3);
5411     less_equal(0x6);
5412     greater(0x7);
5413   %}
5414 %}
5415 
5416 
5417 //----------OPERAND CLASSES----------------------------------------------------
5418 // Operand Classes are groups of operands that are used as to simplify
5419 // instruction definitions by not requiring the AD writer to specify seperate
5420 // instructions for every form of operand when the instruction accepts
5421 // multiple operand types with the same basic encoding and format.  The classic
5422 // case of this is memory operands.
5423 
5424 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5425                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5426                indNarrowOopOffset);
5427 
5428 //----------PIPELINE-----------------------------------------------------------
5429 // Rules which define the behavior of the target architectures pipeline.
5430 pipeline %{
5431 
5432 //----------ATTRIBUTES---------------------------------------------------------
5433 attributes %{
5434   variable_size_instructions;        // Fixed size instructions
5435   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5436   instruction_unit_size = 1;         // An instruction is 1 bytes long
5437   instruction_fetch_unit_size = 16;  // The processor fetches one line
5438   instruction_fetch_units = 1;       // of 16 bytes
5439 
5440   // List of nop instructions
5441   nops( MachNop );
5442 %}
5443 
5444 //----------RESOURCES----------------------------------------------------------
5445 // Resources are the functional units available to the machine
5446 
5447 // Generic P2/P3 pipeline
5448 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5449 // 3 instructions decoded per cycle.
5450 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5451 // 3 ALU op, only ALU0 handles mul instructions.
5452 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5453            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5454            BR, FPU,
5455            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5456 
5457 //----------PIPELINE DESCRIPTION-----------------------------------------------
5458 // Pipeline Description specifies the stages in the machine's pipeline
5459 
5460 // Generic P2/P3 pipeline
5461 pipe_desc(S0, S1, S2, S3, S4, S5);
5462 
5463 //----------PIPELINE CLASSES---------------------------------------------------
5464 // Pipeline Classes describe the stages in which input and output are
5465 // referenced by the hardware pipeline.
5466 
5467 // Naming convention: ialu or fpu
5468 // Then: _reg
5469 // Then: _reg if there is a 2nd register
5470 // Then: _long if it's a pair of instructions implementing a long
5471 // Then: _fat if it requires the big decoder
5472 //   Or: _mem if it requires the big decoder and a memory unit.
5473 
5474 // Integer ALU reg operation
5475 pipe_class ialu_reg(rRegI dst)
5476 %{
5477     single_instruction;
5478     dst    : S4(write);
5479     dst    : S3(read);
5480     DECODE : S0;        // any decoder
5481     ALU    : S3;        // any alu
5482 %}
5483 
5484 // Long ALU reg operation
5485 pipe_class ialu_reg_long(rRegL dst)
5486 %{
5487     instruction_count(2);
5488     dst    : S4(write);
5489     dst    : S3(read);
5490     DECODE : S0(2);     // any 2 decoders
5491     ALU    : S3(2);     // both alus
5492 %}
5493 
5494 // Integer ALU reg operation using big decoder
5495 pipe_class ialu_reg_fat(rRegI dst)
5496 %{
5497     single_instruction;
5498     dst    : S4(write);
5499     dst    : S3(read);
5500     D0     : S0;        // big decoder only
5501     ALU    : S3;        // any alu
5502 %}
5503 
5504 // Long ALU reg operation using big decoder
5505 pipe_class ialu_reg_long_fat(rRegL dst)
5506 %{
5507     instruction_count(2);
5508     dst    : S4(write);
5509     dst    : S3(read);
5510     D0     : S0(2);     // big decoder only; twice
5511     ALU    : S3(2);     // any 2 alus
5512 %}
5513 
5514 // Integer ALU reg-reg operation
5515 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5516 %{
5517     single_instruction;
5518     dst    : S4(write);
5519     src    : S3(read);
5520     DECODE : S0;        // any decoder
5521     ALU    : S3;        // any alu
5522 %}
5523 
5524 // Long ALU reg-reg operation
5525 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5526 %{
5527     instruction_count(2);
5528     dst    : S4(write);
5529     src    : S3(read);
5530     DECODE : S0(2);     // any 2 decoders
5531     ALU    : S3(2);     // both alus
5532 %}
5533 
5534 // Integer ALU reg-reg operation
5535 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5536 %{
5537     single_instruction;
5538     dst    : S4(write);
5539     src    : S3(read);
5540     D0     : S0;        // big decoder only
5541     ALU    : S3;        // any alu
5542 %}
5543 
5544 // Long ALU reg-reg operation
5545 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5546 %{
5547     instruction_count(2);
5548     dst    : S4(write);
5549     src    : S3(read);
5550     D0     : S0(2);     // big decoder only; twice
5551     ALU    : S3(2);     // both alus
5552 %}
5553 
5554 // Integer ALU reg-mem operation
5555 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5556 %{
5557     single_instruction;
5558     dst    : S5(write);
5559     mem    : S3(read);
5560     D0     : S0;        // big decoder only
5561     ALU    : S4;        // any alu
5562     MEM    : S3;        // any mem
5563 %}
5564 
5565 // Integer mem operation (prefetch)
5566 pipe_class ialu_mem(memory mem)
5567 %{
5568     single_instruction;
5569     mem    : S3(read);
5570     D0     : S0;        // big decoder only
5571     MEM    : S3;        // any mem
5572 %}
5573 
5574 // Integer Store to Memory
5575 pipe_class ialu_mem_reg(memory mem, rRegI src)
5576 %{
5577     single_instruction;
5578     mem    : S3(read);
5579     src    : S5(read);
5580     D0     : S0;        // big decoder only
5581     ALU    : S4;        // any alu
5582     MEM    : S3;
5583 %}
5584 
5585 // // Long Store to Memory
5586 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5587 // %{
5588 //     instruction_count(2);
5589 //     mem    : S3(read);
5590 //     src    : S5(read);
5591 //     D0     : S0(2);          // big decoder only; twice
5592 //     ALU    : S4(2);     // any 2 alus
5593 //     MEM    : S3(2);  // Both mems
5594 // %}
5595 
5596 // Integer Store to Memory
5597 pipe_class ialu_mem_imm(memory mem)
5598 %{
5599     single_instruction;
5600     mem    : S3(read);
5601     D0     : S0;        // big decoder only
5602     ALU    : S4;        // any alu
5603     MEM    : S3;
5604 %}
5605 
5606 // Integer ALU0 reg-reg operation
5607 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5608 %{
5609     single_instruction;
5610     dst    : S4(write);
5611     src    : S3(read);
5612     D0     : S0;        // Big decoder only
5613     ALU0   : S3;        // only alu0
5614 %}
5615 
5616 // Integer ALU0 reg-mem operation
5617 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5618 %{
5619     single_instruction;
5620     dst    : S5(write);
5621     mem    : S3(read);
5622     D0     : S0;        // big decoder only
5623     ALU0   : S4;        // ALU0 only
5624     MEM    : S3;        // any mem
5625 %}
5626 
5627 // Integer ALU reg-reg operation
5628 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5629 %{
5630     single_instruction;
5631     cr     : S4(write);
5632     src1   : S3(read);
5633     src2   : S3(read);
5634     DECODE : S0;        // any decoder
5635     ALU    : S3;        // any alu
5636 %}
5637 
5638 // Integer ALU reg-imm operation
5639 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5640 %{
5641     single_instruction;
5642     cr     : S4(write);
5643     src1   : S3(read);
5644     DECODE : S0;        // any decoder
5645     ALU    : S3;        // any alu
5646 %}
5647 
5648 // Integer ALU reg-mem operation
5649 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5650 %{
5651     single_instruction;
5652     cr     : S4(write);
5653     src1   : S3(read);
5654     src2   : S3(read);
5655     D0     : S0;        // big decoder only
5656     ALU    : S4;        // any alu
5657     MEM    : S3;
5658 %}
5659 
5660 // Conditional move reg-reg
5661 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5662 %{
5663     instruction_count(4);
5664     y      : S4(read);
5665     q      : S3(read);
5666     p      : S3(read);
5667     DECODE : S0(4);     // any decoder
5668 %}
5669 
5670 // Conditional move reg-reg
5671 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5672 %{
5673     single_instruction;
5674     dst    : S4(write);
5675     src    : S3(read);
5676     cr     : S3(read);
5677     DECODE : S0;        // any decoder
5678 %}
5679 
5680 // Conditional move reg-mem
5681 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5682 %{
5683     single_instruction;
5684     dst    : S4(write);
5685     src    : S3(read);
5686     cr     : S3(read);
5687     DECODE : S0;        // any decoder
5688     MEM    : S3;
5689 %}
5690 
5691 // Conditional move reg-reg long
5692 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5693 %{
5694     single_instruction;
5695     dst    : S4(write);
5696     src    : S3(read);
5697     cr     : S3(read);
5698     DECODE : S0(2);     // any 2 decoders
5699 %}
5700 
5701 // XXX
5702 // // Conditional move double reg-reg
5703 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5704 // %{
5705 //     single_instruction;
5706 //     dst    : S4(write);
5707 //     src    : S3(read);
5708 //     cr     : S3(read);
5709 //     DECODE : S0;     // any decoder
5710 // %}
5711 
5712 // Float reg-reg operation
5713 pipe_class fpu_reg(regD dst)
5714 %{
5715     instruction_count(2);
5716     dst    : S3(read);
5717     DECODE : S0(2);     // any 2 decoders
5718     FPU    : S3;
5719 %}
5720 
5721 // Float reg-reg operation
5722 pipe_class fpu_reg_reg(regD dst, regD src)
5723 %{
5724     instruction_count(2);
5725     dst    : S4(write);
5726     src    : S3(read);
5727     DECODE : S0(2);     // any 2 decoders
5728     FPU    : S3;
5729 %}
5730 
5731 // Float reg-reg operation
5732 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5733 %{
5734     instruction_count(3);
5735     dst    : S4(write);
5736     src1   : S3(read);
5737     src2   : S3(read);
5738     DECODE : S0(3);     // any 3 decoders
5739     FPU    : S3(2);
5740 %}
5741 
5742 // Float reg-reg operation
5743 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5744 %{
5745     instruction_count(4);
5746     dst    : S4(write);
5747     src1   : S3(read);
5748     src2   : S3(read);
5749     src3   : S3(read);
5750     DECODE : S0(4);     // any 3 decoders
5751     FPU    : S3(2);
5752 %}
5753 
5754 // Float reg-reg operation
5755 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5756 %{
5757     instruction_count(4);
5758     dst    : S4(write);
5759     src1   : S3(read);
5760     src2   : S3(read);
5761     src3   : S3(read);
5762     DECODE : S1(3);     // any 3 decoders
5763     D0     : S0;        // Big decoder only
5764     FPU    : S3(2);
5765     MEM    : S3;
5766 %}
5767 
5768 // Float reg-mem operation
5769 pipe_class fpu_reg_mem(regD dst, memory mem)
5770 %{
5771     instruction_count(2);
5772     dst    : S5(write);
5773     mem    : S3(read);
5774     D0     : S0;        // big decoder only
5775     DECODE : S1;        // any decoder for FPU POP
5776     FPU    : S4;
5777     MEM    : S3;        // any mem
5778 %}
5779 
5780 // Float reg-mem operation
5781 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5782 %{
5783     instruction_count(3);
5784     dst    : S5(write);
5785     src1   : S3(read);
5786     mem    : S3(read);
5787     D0     : S0;        // big decoder only
5788     DECODE : S1(2);     // any decoder for FPU POP
5789     FPU    : S4;
5790     MEM    : S3;        // any mem
5791 %}
5792 
5793 // Float mem-reg operation
5794 pipe_class fpu_mem_reg(memory mem, regD src)
5795 %{
5796     instruction_count(2);
5797     src    : S5(read);
5798     mem    : S3(read);
5799     DECODE : S0;        // any decoder for FPU PUSH
5800     D0     : S1;        // big decoder only
5801     FPU    : S4;
5802     MEM    : S3;        // any mem
5803 %}
5804 
5805 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5806 %{
5807     instruction_count(3);
5808     src1   : S3(read);
5809     src2   : S3(read);
5810     mem    : S3(read);
5811     DECODE : S0(2);     // any decoder for FPU PUSH
5812     D0     : S1;        // big decoder only
5813     FPU    : S4;
5814     MEM    : S3;        // any mem
5815 %}
5816 
5817 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5818 %{
5819     instruction_count(3);
5820     src1   : S3(read);
5821     src2   : S3(read);
5822     mem    : S4(read);
5823     DECODE : S0;        // any decoder for FPU PUSH
5824     D0     : S0(2);     // big decoder only
5825     FPU    : S4;
5826     MEM    : S3(2);     // any mem
5827 %}
5828 
5829 pipe_class fpu_mem_mem(memory dst, memory src1)
5830 %{
5831     instruction_count(2);
5832     src1   : S3(read);
5833     dst    : S4(read);
5834     D0     : S0(2);     // big decoder only
5835     MEM    : S3(2);     // any mem
5836 %}
5837 
5838 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5839 %{
5840     instruction_count(3);
5841     src1   : S3(read);
5842     src2   : S3(read);
5843     dst    : S4(read);
5844     D0     : S0(3);     // big decoder only
5845     FPU    : S4;
5846     MEM    : S3(3);     // any mem
5847 %}
5848 
5849 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5850 %{
5851     instruction_count(3);
5852     src1   : S4(read);
5853     mem    : S4(read);
5854     DECODE : S0;        // any decoder for FPU PUSH
5855     D0     : S0(2);     // big decoder only
5856     FPU    : S4;
5857     MEM    : S3(2);     // any mem
5858 %}
5859 
5860 // Float load constant
5861 pipe_class fpu_reg_con(regD dst)
5862 %{
5863     instruction_count(2);
5864     dst    : S5(write);
5865     D0     : S0;        // big decoder only for the load
5866     DECODE : S1;        // any decoder for FPU POP
5867     FPU    : S4;
5868     MEM    : S3;        // any mem
5869 %}
5870 
5871 // Float load constant
5872 pipe_class fpu_reg_reg_con(regD dst, regD src)
5873 %{
5874     instruction_count(3);
5875     dst    : S5(write);
5876     src    : S3(read);
5877     D0     : S0;        // big decoder only for the load
5878     DECODE : S1(2);     // any decoder for FPU POP
5879     FPU    : S4;
5880     MEM    : S3;        // any mem
5881 %}
5882 
5883 // UnConditional branch
5884 pipe_class pipe_jmp(label labl)
5885 %{
5886     single_instruction;
5887     BR   : S3;
5888 %}
5889 
5890 // Conditional branch
5891 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5892 %{
5893     single_instruction;
5894     cr    : S1(read);
5895     BR    : S3;
5896 %}
5897 
5898 // Allocation idiom
5899 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5900 %{
5901     instruction_count(1); force_serialization;
5902     fixed_latency(6);
5903     heap_ptr : S3(read);
5904     DECODE   : S0(3);
5905     D0       : S2;
5906     MEM      : S3;
5907     ALU      : S3(2);
5908     dst      : S5(write);
5909     BR       : S5;
5910 %}
5911 
5912 // Generic big/slow expanded idiom
5913 pipe_class pipe_slow()
5914 %{
5915     instruction_count(10); multiple_bundles; force_serialization;
5916     fixed_latency(100);
5917     D0  : S0(2);
5918     MEM : S3(2);
5919 %}
5920 
5921 // The real do-nothing guy
5922 pipe_class empty()
5923 %{
5924     instruction_count(0);
5925 %}
5926 
5927 // Define the class for the Nop node
5928 define
5929 %{
5930    MachNop = empty;
5931 %}
5932 
5933 %}
5934 
5935 //----------INSTRUCTIONS-------------------------------------------------------
5936 //
5937 // match      -- States which machine-independent subtree may be replaced
5938 //               by this instruction.
5939 // ins_cost   -- The estimated cost of this instruction is used by instruction
5940 //               selection to identify a minimum cost tree of machine
5941 //               instructions that matches a tree of machine-independent
5942 //               instructions.
5943 // format     -- A string providing the disassembly for this instruction.
5944 //               The value of an instruction's operand may be inserted
5945 //               by referring to it with a '$' prefix.
5946 // opcode     -- Three instruction opcodes may be provided.  These are referred
5947 //               to within an encode class as $primary, $secondary, and $tertiary
5948 //               rrspectively.  The primary opcode is commonly used to
5949 //               indicate the type of machine instruction, while secondary
5950 //               and tertiary are often used for prefix options or addressing
5951 //               modes.
5952 // ins_encode -- A list of encode classes with parameters. The encode class
5953 //               name must have been defined in an 'enc_class' specification
5954 //               in the encode section of the architecture description.
5955 
5956 
5957 //----------Load/Store/Move Instructions---------------------------------------
5958 //----------Load Instructions--------------------------------------------------
5959 
5960 // Load Byte (8 bit signed)
5961 instruct loadB(rRegI dst, memory mem)
5962 %{
5963   match(Set dst (LoadB mem));
5964 
5965   ins_cost(125);
5966   format %{ "movsbl  $dst, $mem\t# byte" %}
5967   opcode(0x0F, 0xBE);
5968   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5969   ins_pipe(ialu_reg_mem);
5970 %}
5971 
5972 // Load Byte (8 bit signed) into long
5973 // instruct loadB2L(rRegL dst, memory mem)
5974 // %{
5975 //   match(Set dst (ConvI2L (LoadB mem)));
5976 
5977 //   ins_cost(125);
5978 //   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5979 //   opcode(0x0F, 0xBE);
5980 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5981 //   ins_pipe(ialu_reg_mem);
5982 // %}
5983 
5984 // Load Byte (8 bit UNsigned)
5985 instruct loadUB(rRegI dst, memory mem, immI_255 bytemask)
5986 %{
5987   match(Set dst (AndI (LoadB mem) bytemask));
5988 
5989   ins_cost(125);
5990   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5991   opcode(0x0F, 0xB6);
5992   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5993   ins_pipe(ialu_reg_mem);
5994 %}
5995 
5996 // Load Byte (8 bit UNsigned) into long
5997 // instruct loadUB2L(rRegL dst, memory mem, immI_255 bytemask)
5998 // %{
5999 //   match(Set dst (ConvI2L (AndI (LoadB mem) bytemask)));
6000 
6001 //   ins_cost(125);
6002 //   format %{ "movzbl  $dst, $mem\t# ubyte -> long" %}
6003 //   opcode(0x0F, 0xB6);
6004 //   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
6005 //   ins_pipe(ialu_reg_mem);
6006 // %}
6007 
6008 // Load Short (16 bit signed)
6009 instruct loadS(rRegI dst, memory mem)
6010 %{
6011   match(Set dst (LoadS mem));
6012 
6013   ins_cost(125); // XXX
6014   format %{ "movswl $dst, $mem\t# short" %}
6015   opcode(0x0F, 0xBF);
6016   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
6017   ins_pipe(ialu_reg_mem);
6018 %}
6019 
6020 // Load Short (16 bit signed) into long
6021 // instruct loadS2L(rRegL dst, memory mem)
6022 // %{
6023 //   match(Set dst (ConvI2L (LoadS mem)));
6024 
6025 //   ins_cost(125); // XXX
6026 //   format %{ "movswq $dst, $mem\t# short -> long" %}
6027 //   opcode(0x0F, 0xBF);
6028 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
6029 //   ins_pipe(ialu_reg_mem);
6030 // %}
6031 
6032 // Load Char (16 bit UNsigned)
6033 instruct loadC(rRegI dst, memory mem)
6034 %{
6035   match(Set dst (LoadC mem));
6036 
6037   ins_cost(125);
6038   format %{ "movzwl  $dst, $mem\t# char" %}
6039   opcode(0x0F, 0xB7);
6040   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
6041   ins_pipe(ialu_reg_mem);
6042 %}
6043 
6044 // Load Char (16 bit UNsigned) into long
6045 // instruct loadC2L(rRegL dst, memory mem)
6046 // %{
6047 //   match(Set dst (ConvI2L (LoadC mem)));
6048 
6049 //   ins_cost(125);
6050 //   format %{ "movzwl  $dst, $mem\t# char -> long" %}
6051 //   opcode(0x0F, 0xB7);
6052 //   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
6053 //   ins_pipe(ialu_reg_mem);
6054 // %}
6055 
6056 // Load Integer
6057 instruct loadI(rRegI dst, memory mem)
6058 %{
6059   match(Set dst (LoadI mem));
6060 
6061   ins_cost(125); // XXX
6062   format %{ "movl    $dst, $mem\t# int" %}
6063   opcode(0x8B);
6064   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6065   ins_pipe(ialu_reg_mem);
6066 %}
6067 
6068 // Load Long
6069 instruct loadL(rRegL dst, memory mem)
6070 %{
6071   match(Set dst (LoadL mem));
6072 
6073   ins_cost(125); // XXX
6074   format %{ "movq    $dst, $mem\t# long" %}
6075   opcode(0x8B);
6076   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6077   ins_pipe(ialu_reg_mem); // XXX
6078 %}
6079 
6080 // Load Range
6081 instruct loadRange(rRegI dst, memory mem)
6082 %{
6083   match(Set dst (LoadRange mem));
6084 
6085   ins_cost(125); // XXX
6086   format %{ "movl    $dst, $mem\t# range" %}
6087   opcode(0x8B);
6088   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6089   ins_pipe(ialu_reg_mem);
6090 %}
6091 
6092 // Load Pointer
6093 instruct loadP(rRegP dst, memory mem)
6094 %{
6095   match(Set dst (LoadP mem));
6096 
6097   ins_cost(125); // XXX
6098   format %{ "movq    $dst, $mem\t# ptr" %}
6099   opcode(0x8B);
6100   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6101   ins_pipe(ialu_reg_mem); // XXX
6102 %}
6103 
6104 // Load Compressed Pointer
6105 instruct loadN(rRegN dst, memory mem)
6106 %{
6107    match(Set dst (LoadN mem));
6108 
6109    ins_cost(125); // XXX
6110    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6111    ins_encode %{
6112      Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
6113      Register dst = as_Register($dst$$reg);
6114      __ movl(dst, addr);
6115    %}
6116    ins_pipe(ialu_reg_mem); // XXX
6117 %}
6118 
6119 
6120 // Load Klass Pointer
6121 instruct loadKlass(rRegP dst, memory mem)
6122 %{
6123   match(Set dst (LoadKlass mem));
6124 
6125   ins_cost(125); // XXX
6126   format %{ "movq    $dst, $mem\t# class" %}
6127   opcode(0x8B);
6128   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6129   ins_pipe(ialu_reg_mem); // XXX
6130 %}
6131 
6132 // Load narrow Klass Pointer
6133 instruct loadNKlass(rRegN dst, memory mem)
6134 %{
6135   match(Set dst (LoadNKlass mem));
6136 
6137   ins_cost(125); // XXX
6138   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6139   ins_encode %{
6140     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
6141     Register dst = as_Register($dst$$reg);
6142     __ movl(dst, addr);
6143   %}
6144   ins_pipe(ialu_reg_mem); // XXX
6145 %}
6146 
6147 // Load Float
6148 instruct loadF(regF dst, memory mem)
6149 %{
6150   match(Set dst (LoadF mem));
6151 
6152   ins_cost(145); // XXX
6153   format %{ "movss   $dst, $mem\t# float" %}
6154   opcode(0xF3, 0x0F, 0x10);
6155   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6156   ins_pipe(pipe_slow); // XXX
6157 %}
6158 
6159 // Load Double
6160 instruct loadD_partial(regD dst, memory mem)
6161 %{
6162   predicate(!UseXmmLoadAndClearUpper);
6163   match(Set dst (LoadD mem));
6164 
6165   ins_cost(145); // XXX
6166   format %{ "movlpd  $dst, $mem\t# double" %}
6167   opcode(0x66, 0x0F, 0x12);
6168   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6169   ins_pipe(pipe_slow); // XXX
6170 %}
6171 
6172 instruct loadD(regD dst, memory mem)
6173 %{
6174   predicate(UseXmmLoadAndClearUpper);
6175   match(Set dst (LoadD mem));
6176 
6177   ins_cost(145); // XXX
6178   format %{ "movsd   $dst, $mem\t# double" %}
6179   opcode(0xF2, 0x0F, 0x10);
6180   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6181   ins_pipe(pipe_slow); // XXX
6182 %}
6183 
6184 // Load Aligned Packed Byte to XMM register
6185 instruct loadA8B(regD dst, memory mem) %{
6186   match(Set dst (Load8B mem));
6187   ins_cost(125);
6188   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6189   ins_encode( movq_ld(dst, mem));
6190   ins_pipe( pipe_slow );
6191 %}
6192 
6193 // Load Aligned Packed Short to XMM register
6194 instruct loadA4S(regD dst, memory mem) %{
6195   match(Set dst (Load4S mem));
6196   ins_cost(125);
6197   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6198   ins_encode( movq_ld(dst, mem));
6199   ins_pipe( pipe_slow );
6200 %}
6201 
6202 // Load Aligned Packed Char to XMM register
6203 instruct loadA4C(regD dst, memory mem) %{
6204   match(Set dst (Load4C mem));
6205   ins_cost(125);
6206   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6207   ins_encode( movq_ld(dst, mem));
6208   ins_pipe( pipe_slow );
6209 %}
6210 
6211 // Load Aligned Packed Integer to XMM register
6212 instruct load2IU(regD dst, memory mem) %{
6213   match(Set dst (Load2I mem));
6214   ins_cost(125);
6215   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6216   ins_encode( movq_ld(dst, mem));
6217   ins_pipe( pipe_slow );
6218 %}
6219 
6220 // Load Aligned Packed Single to XMM
6221 instruct loadA2F(regD dst, memory mem) %{
6222   match(Set dst (Load2F mem));
6223   ins_cost(145);
6224   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6225   ins_encode( movq_ld(dst, mem));
6226   ins_pipe( pipe_slow );
6227 %}
6228 
6229 // Load Effective Address
6230 instruct leaP8(rRegP dst, indOffset8 mem)
6231 %{
6232   match(Set dst mem);
6233 
6234   ins_cost(110); // XXX
6235   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6236   opcode(0x8D);
6237   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6238   ins_pipe(ialu_reg_reg_fat);
6239 %}
6240 
6241 instruct leaP32(rRegP dst, indOffset32 mem)
6242 %{
6243   match(Set dst mem);
6244 
6245   ins_cost(110);
6246   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6247   opcode(0x8D);
6248   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6249   ins_pipe(ialu_reg_reg_fat);
6250 %}
6251 
6252 // instruct leaPIdx(rRegP dst, indIndex mem)
6253 // %{
6254 //   match(Set dst mem);
6255 
6256 //   ins_cost(110);
6257 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6258 //   opcode(0x8D);
6259 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6260 //   ins_pipe(ialu_reg_reg_fat);
6261 // %}
6262 
6263 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6264 %{
6265   match(Set dst mem);
6266 
6267   ins_cost(110);
6268   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6269   opcode(0x8D);
6270   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6271   ins_pipe(ialu_reg_reg_fat);
6272 %}
6273 
6274 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6275 %{
6276   match(Set dst mem);
6277 
6278   ins_cost(110);
6279   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6280   opcode(0x8D);
6281   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6282   ins_pipe(ialu_reg_reg_fat);
6283 %}
6284 
6285 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6286 %{
6287   match(Set dst mem);
6288 
6289   ins_cost(110);
6290   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6291   opcode(0x8D);
6292   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6293   ins_pipe(ialu_reg_reg_fat);
6294 %}
6295 
6296 instruct loadConI(rRegI dst, immI src)
6297 %{
6298   match(Set dst src);
6299 
6300   format %{ "movl    $dst, $src\t# int" %}
6301   ins_encode(load_immI(dst, src));
6302   ins_pipe(ialu_reg_fat); // XXX
6303 %}
6304 
6305 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6306 %{
6307   match(Set dst src);
6308   effect(KILL cr);
6309 
6310   ins_cost(50);
6311   format %{ "xorl    $dst, $dst\t# int" %}
6312   opcode(0x33); /* + rd */
6313   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6314   ins_pipe(ialu_reg);
6315 %}
6316 
6317 instruct loadConL(rRegL dst, immL src)
6318 %{
6319   match(Set dst src);
6320 
6321   ins_cost(150);
6322   format %{ "movq    $dst, $src\t# long" %}
6323   ins_encode(load_immL(dst, src));
6324   ins_pipe(ialu_reg);
6325 %}
6326 
6327 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6328 %{
6329   match(Set dst src);
6330   effect(KILL cr);
6331 
6332   ins_cost(50);
6333   format %{ "xorl    $dst, $dst\t# long" %}
6334   opcode(0x33); /* + rd */
6335   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6336   ins_pipe(ialu_reg); // XXX
6337 %}
6338 
6339 instruct loadConUL32(rRegL dst, immUL32 src)
6340 %{
6341   match(Set dst src);
6342 
6343   ins_cost(60);
6344   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6345   ins_encode(load_immUL32(dst, src));
6346   ins_pipe(ialu_reg);
6347 %}
6348 
6349 instruct loadConL32(rRegL dst, immL32 src)
6350 %{
6351   match(Set dst src);
6352 
6353   ins_cost(70);
6354   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6355   ins_encode(load_immL32(dst, src));
6356   ins_pipe(ialu_reg);
6357 %}
6358 
6359 instruct loadConP(rRegP dst, immP src)
6360 %{
6361   match(Set dst src);
6362 
6363   format %{ "movq    $dst, $src\t# ptr" %}
6364   ins_encode(load_immP(dst, src));
6365   ins_pipe(ialu_reg_fat); // XXX
6366 %}
6367 
6368 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6369 %{
6370   match(Set dst src);
6371   effect(KILL cr);
6372 
6373   ins_cost(50);
6374   format %{ "xorl    $dst, $dst\t# ptr" %}
6375   opcode(0x33); /* + rd */
6376   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6377   ins_pipe(ialu_reg);
6378 %}
6379 
6380 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6381 %{
6382   match(Set dst src);
6383   effect(KILL cr);
6384 
6385   ins_cost(60);
6386   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6387   ins_encode(load_immP31(dst, src));
6388   ins_pipe(ialu_reg);
6389 %}
6390 
6391 instruct loadConF(regF dst, immF src)
6392 %{
6393   match(Set dst src);
6394   ins_cost(125);
6395 
6396   format %{ "movss   $dst, [$src]" %}
6397   ins_encode(load_conF(dst, src));
6398   ins_pipe(pipe_slow);
6399 %}
6400 
6401 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6402   match(Set dst src);
6403   effect(KILL cr);
6404   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6405   ins_encode %{
6406     Register dst = $dst$$Register;
6407     __ xorq(dst, dst);
6408   %}
6409   ins_pipe(ialu_reg);
6410 %}
6411 
6412 instruct loadConN(rRegN dst, immN src) %{
6413   match(Set dst src);
6414 
6415   ins_cost(125);
6416   format %{ "movl    $dst, $src\t# compressed ptr" %}
6417   ins_encode %{
6418     address con = (address)$src$$constant;
6419     Register dst = $dst$$Register;
6420     if (con == NULL) {
6421       ShouldNotReachHere();
6422     } else {
6423       __ set_narrow_oop(dst, (jobject)$src$$constant);
6424     }
6425   %}
6426   ins_pipe(ialu_reg_fat); // XXX
6427 %}
6428 
6429 instruct loadConF0(regF dst, immF0 src)
6430 %{
6431   match(Set dst src);
6432   ins_cost(100);
6433 
6434   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6435   opcode(0x0F, 0x57);
6436   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6437   ins_pipe(pipe_slow);
6438 %}
6439 
6440 // Use the same format since predicate() can not be used here.
6441 instruct loadConD(regD dst, immD src)
6442 %{
6443   match(Set dst src);
6444   ins_cost(125);
6445 
6446   format %{ "movsd   $dst, [$src]" %}
6447   ins_encode(load_conD(dst, src));
6448   ins_pipe(pipe_slow);
6449 %}
6450 
6451 instruct loadConD0(regD dst, immD0 src)
6452 %{
6453   match(Set dst src);
6454   ins_cost(100);
6455 
6456   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6457   opcode(0x66, 0x0F, 0x57);
6458   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6459   ins_pipe(pipe_slow);
6460 %}
6461 
6462 instruct loadSSI(rRegI dst, stackSlotI src)
6463 %{
6464   match(Set dst src);
6465 
6466   ins_cost(125);
6467   format %{ "movl    $dst, $src\t# int stk" %}
6468   opcode(0x8B);
6469   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6470   ins_pipe(ialu_reg_mem);
6471 %}
6472 
6473 instruct loadSSL(rRegL dst, stackSlotL src)
6474 %{
6475   match(Set dst src);
6476 
6477   ins_cost(125);
6478   format %{ "movq    $dst, $src\t# long stk" %}
6479   opcode(0x8B);
6480   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6481   ins_pipe(ialu_reg_mem);
6482 %}
6483 
6484 instruct loadSSP(rRegP dst, stackSlotP src)
6485 %{
6486   match(Set dst src);
6487 
6488   ins_cost(125);
6489   format %{ "movq    $dst, $src\t# ptr stk" %}
6490   opcode(0x8B);
6491   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6492   ins_pipe(ialu_reg_mem);
6493 %}
6494 
6495 instruct loadSSF(regF dst, stackSlotF src)
6496 %{
6497   match(Set dst src);
6498 
6499   ins_cost(125);
6500   format %{ "movss   $dst, $src\t# float stk" %}
6501   opcode(0xF3, 0x0F, 0x10);
6502   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6503   ins_pipe(pipe_slow); // XXX
6504 %}
6505 
6506 // Use the same format since predicate() can not be used here.
6507 instruct loadSSD(regD dst, stackSlotD src)
6508 %{
6509   match(Set dst src);
6510 
6511   ins_cost(125);
6512   format %{ "movsd   $dst, $src\t# double stk" %}
6513   ins_encode  %{
6514     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6515   %}
6516   ins_pipe(pipe_slow); // XXX
6517 %}
6518 
6519 // Prefetch instructions.
6520 // Must be safe to execute with invalid address (cannot fault).
6521 
6522 instruct prefetchr( memory mem ) %{
6523   predicate(ReadPrefetchInstr==3);
6524   match(PrefetchRead mem);
6525   ins_cost(125);
6526 
6527   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6528   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6529   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6530   ins_pipe(ialu_mem);
6531 %}
6532 
6533 instruct prefetchrNTA( memory mem ) %{
6534   predicate(ReadPrefetchInstr==0);
6535   match(PrefetchRead mem);
6536   ins_cost(125);
6537 
6538   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6539   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6540   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6541   ins_pipe(ialu_mem);
6542 %}
6543 
6544 instruct prefetchrT0( memory mem ) %{
6545   predicate(ReadPrefetchInstr==1);
6546   match(PrefetchRead mem);
6547   ins_cost(125);
6548 
6549   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6550   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6551   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6552   ins_pipe(ialu_mem);
6553 %}
6554 
6555 instruct prefetchrT2( memory mem ) %{
6556   predicate(ReadPrefetchInstr==2);
6557   match(PrefetchRead mem);
6558   ins_cost(125);
6559 
6560   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6561   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6562   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6563   ins_pipe(ialu_mem);
6564 %}
6565 
6566 instruct prefetchw( memory mem ) %{
6567   predicate(AllocatePrefetchInstr==3);
6568   match(PrefetchWrite mem);
6569   ins_cost(125);
6570 
6571   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6572   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6573   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6574   ins_pipe(ialu_mem);
6575 %}
6576 
6577 instruct prefetchwNTA( memory mem ) %{
6578   predicate(AllocatePrefetchInstr==0);
6579   match(PrefetchWrite mem);
6580   ins_cost(125);
6581 
6582   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6583   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6584   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6585   ins_pipe(ialu_mem);
6586 %}
6587 
6588 instruct prefetchwT0( memory mem ) %{
6589   predicate(AllocatePrefetchInstr==1);
6590   match(PrefetchWrite mem);
6591   ins_cost(125);
6592 
6593   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6594   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6595   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6596   ins_pipe(ialu_mem);
6597 %}
6598 
6599 instruct prefetchwT2( memory mem ) %{
6600   predicate(AllocatePrefetchInstr==2);
6601   match(PrefetchWrite mem);
6602   ins_cost(125);
6603 
6604   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6605   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6606   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6607   ins_pipe(ialu_mem);
6608 %}
6609 
6610 //----------Store Instructions-------------------------------------------------
6611 
6612 // Store Byte
6613 instruct storeB(memory mem, rRegI src)
6614 %{
6615   match(Set mem (StoreB mem src));
6616 
6617   ins_cost(125); // XXX
6618   format %{ "movb    $mem, $src\t# byte" %}
6619   opcode(0x88);
6620   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6621   ins_pipe(ialu_mem_reg);
6622 %}
6623 
6624 // Store Char/Short
6625 instruct storeC(memory mem, rRegI src)
6626 %{
6627   match(Set mem (StoreC mem src));
6628 
6629   ins_cost(125); // XXX
6630   format %{ "movw    $mem, $src\t# char/short" %}
6631   opcode(0x89);
6632   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6633   ins_pipe(ialu_mem_reg);
6634 %}
6635 
6636 // Store Integer
6637 instruct storeI(memory mem, rRegI src)
6638 %{
6639   match(Set mem (StoreI mem src));
6640 
6641   ins_cost(125); // XXX
6642   format %{ "movl    $mem, $src\t# int" %}
6643   opcode(0x89);
6644   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6645   ins_pipe(ialu_mem_reg);
6646 %}
6647 
6648 // Store Long
6649 instruct storeL(memory mem, rRegL src)
6650 %{
6651   match(Set mem (StoreL mem src));
6652 
6653   ins_cost(125); // XXX
6654   format %{ "movq    $mem, $src\t# long" %}
6655   opcode(0x89);
6656   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6657   ins_pipe(ialu_mem_reg); // XXX
6658 %}
6659 
6660 // Store Pointer
6661 instruct storeP(memory mem, any_RegP src)
6662 %{
6663   match(Set mem (StoreP mem src));
6664 
6665   ins_cost(125); // XXX
6666   format %{ "movq    $mem, $src\t# ptr" %}
6667   opcode(0x89);
6668   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6669   ins_pipe(ialu_mem_reg);
6670 %}
6671 
6672 // Store NULL Pointer, mark word, or other simple pointer constant.
6673 instruct storeImmP(memory mem, immP31 src)
6674 %{
6675   match(Set mem (StoreP mem src));
6676 
6677   ins_cost(125); // XXX
6678   format %{ "movq    $mem, $src\t# ptr" %}
6679   opcode(0xC7); /* C7 /0 */
6680   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6681   ins_pipe(ialu_mem_imm);
6682 %}
6683 
6684 // Store Compressed Pointer
6685 instruct storeN(memory mem, rRegN src)
6686 %{
6687   match(Set mem (StoreN mem src));
6688 
6689   ins_cost(125); // XXX
6690   format %{ "movl    $mem, $src\t# compressed ptr" %}
6691   ins_encode %{
6692     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
6693     Register src = as_Register($src$$reg);
6694     __ movl(addr, src);
6695   %}
6696   ins_pipe(ialu_mem_reg);
6697 %}
6698 
6699 // Store Integer Immediate
6700 instruct storeImmI(memory mem, immI src)
6701 %{
6702   match(Set mem (StoreI mem src));
6703 
6704   ins_cost(150);
6705   format %{ "movl    $mem, $src\t# int" %}
6706   opcode(0xC7); /* C7 /0 */
6707   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6708   ins_pipe(ialu_mem_imm);
6709 %}
6710 
6711 // Store Long Immediate
6712 instruct storeImmL(memory mem, immL32 src)
6713 %{
6714   match(Set mem (StoreL mem src));
6715 
6716   ins_cost(150);
6717   format %{ "movq    $mem, $src\t# long" %}
6718   opcode(0xC7); /* C7 /0 */
6719   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6720   ins_pipe(ialu_mem_imm);
6721 %}
6722 
6723 // Store Short/Char Immediate
6724 instruct storeImmI16(memory mem, immI16 src)
6725 %{
6726   predicate(UseStoreImmI16);
6727   match(Set mem (StoreC mem src));
6728 
6729   ins_cost(150);
6730   format %{ "movw    $mem, $src\t# short/char" %}
6731   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6732   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6733   ins_pipe(ialu_mem_imm);
6734 %}
6735 
6736 // Store Byte Immediate
6737 instruct storeImmB(memory mem, immI8 src)
6738 %{
6739   match(Set mem (StoreB mem src));
6740 
6741   ins_cost(150); // XXX
6742   format %{ "movb    $mem, $src\t# byte" %}
6743   opcode(0xC6); /* C6 /0 */
6744   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6745   ins_pipe(ialu_mem_imm);
6746 %}
6747 
6748 // Store Aligned Packed Byte XMM register to memory
6749 instruct storeA8B(memory mem, regD src) %{
6750   match(Set mem (Store8B mem src));
6751   ins_cost(145);
6752   format %{ "MOVQ  $mem,$src\t! packed8B" %}
6753   ins_encode( movq_st(mem, src));
6754   ins_pipe( pipe_slow );
6755 %}
6756 
6757 // Store Aligned Packed Char/Short XMM register to memory
6758 instruct storeA4C(memory mem, regD src) %{
6759   match(Set mem (Store4C mem src));
6760   ins_cost(145);
6761   format %{ "MOVQ  $mem,$src\t! packed4C" %}
6762   ins_encode( movq_st(mem, src));
6763   ins_pipe( pipe_slow );
6764 %}
6765 
6766 // Store Aligned Packed Integer XMM register to memory
6767 instruct storeA2I(memory mem, regD src) %{
6768   match(Set mem (Store2I mem src));
6769   ins_cost(145);
6770   format %{ "MOVQ  $mem,$src\t! packed2I" %}
6771   ins_encode( movq_st(mem, src));
6772   ins_pipe( pipe_slow );
6773 %}
6774 
6775 // Store CMS card-mark Immediate
6776 instruct storeImmCM0(memory mem, immI0 src)
6777 %{
6778   match(Set mem (StoreCM mem src));
6779 
6780   ins_cost(150); // XXX
6781   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6782   opcode(0xC6); /* C6 /0 */
6783   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6784   ins_pipe(ialu_mem_imm);
6785 %}
6786 
6787 // Store Aligned Packed Single Float XMM register to memory
6788 instruct storeA2F(memory mem, regD src) %{
6789   match(Set mem (Store2F mem src));
6790   ins_cost(145);
6791   format %{ "MOVQ  $mem,$src\t! packed2F" %}
6792   ins_encode( movq_st(mem, src));
6793   ins_pipe( pipe_slow );
6794 %}
6795 
6796 // Store Float
6797 instruct storeF(memory mem, regF src)
6798 %{
6799   match(Set mem (StoreF mem src));
6800 
6801   ins_cost(95); // XXX
6802   format %{ "movss   $mem, $src\t# float" %}
6803   opcode(0xF3, 0x0F, 0x11);
6804   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
6805   ins_pipe(pipe_slow); // XXX
6806 %}
6807 
6808 // Store immediate Float value (it is faster than store from XMM register)
6809 instruct storeF_imm(memory mem, immF src)
6810 %{
6811   match(Set mem (StoreF mem src));
6812 
6813   ins_cost(50);
6814   format %{ "movl    $mem, $src\t# float" %}
6815   opcode(0xC7); /* C7 /0 */
6816   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6817   ins_pipe(ialu_mem_imm);
6818 %}
6819 
6820 // Store Double
6821 instruct storeD(memory mem, regD src)
6822 %{
6823   match(Set mem (StoreD mem src));
6824 
6825   ins_cost(95); // XXX
6826   format %{ "movsd   $mem, $src\t# double" %}
6827   opcode(0xF2, 0x0F, 0x11);
6828   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
6829   ins_pipe(pipe_slow); // XXX
6830 %}
6831 
6832 // Store immediate double 0.0 (it is faster than store from XMM register)
6833 instruct storeD0_imm(memory mem, immD0 src)
6834 %{
6835   match(Set mem (StoreD mem src));
6836 
6837   ins_cost(50);
6838   format %{ "movq    $mem, $src\t# double 0." %}
6839   opcode(0xC7); /* C7 /0 */
6840   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6841   ins_pipe(ialu_mem_imm);
6842 %}
6843 
6844 instruct storeSSI(stackSlotI dst, rRegI src)
6845 %{
6846   match(Set dst src);
6847 
6848   ins_cost(100);
6849   format %{ "movl    $dst, $src\t# int stk" %}
6850   opcode(0x89);
6851   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6852   ins_pipe( ialu_mem_reg );
6853 %}
6854 
6855 instruct storeSSL(stackSlotL dst, rRegL src)
6856 %{
6857   match(Set dst src);
6858 
6859   ins_cost(100);
6860   format %{ "movq    $dst, $src\t# long stk" %}
6861   opcode(0x89);
6862   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6863   ins_pipe(ialu_mem_reg);
6864 %}
6865 
6866 instruct storeSSP(stackSlotP dst, rRegP src)
6867 %{
6868   match(Set dst src);
6869 
6870   ins_cost(100);
6871   format %{ "movq    $dst, $src\t# ptr stk" %}
6872   opcode(0x89);
6873   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6874   ins_pipe(ialu_mem_reg);
6875 %}
6876 
6877 instruct storeSSF(stackSlotF dst, regF src)
6878 %{
6879   match(Set dst src);
6880 
6881   ins_cost(95); // XXX
6882   format %{ "movss   $dst, $src\t# float stk" %}
6883   opcode(0xF3, 0x0F, 0x11);
6884   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
6885   ins_pipe(pipe_slow); // XXX
6886 %}
6887 
6888 instruct storeSSD(stackSlotD dst, regD src)
6889 %{
6890   match(Set dst src);
6891 
6892   ins_cost(95); // XXX
6893   format %{ "movsd   $dst, $src\t# double stk" %}
6894   opcode(0xF2, 0x0F, 0x11);
6895   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
6896   ins_pipe(pipe_slow); // XXX
6897 %}
6898 
6899 //----------BSWAP Instructions-------------------------------------------------
6900 instruct bytes_reverse_int(rRegI dst) %{
6901   match(Set dst (ReverseBytesI dst));
6902 
6903   format %{ "bswapl  $dst" %}
6904   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6905   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6906   ins_pipe( ialu_reg );
6907 %}
6908 
6909 instruct bytes_reverse_long(rRegL dst) %{
6910   match(Set dst (ReverseBytesL dst));
6911 
6912   format %{ "bswapq  $dst" %}
6913 
6914   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6915   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6916   ins_pipe( ialu_reg);
6917 %}
6918 
6919 instruct loadI_reversed(rRegI dst, memory src) %{
6920   match(Set dst (ReverseBytesI (LoadI src)));
6921 
6922   format %{ "bswap_movl $dst, $src" %}
6923   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
6924   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
6925   ins_pipe( ialu_reg_mem );
6926 %}
6927 
6928 instruct loadL_reversed(rRegL dst, memory src) %{
6929   match(Set dst (ReverseBytesL (LoadL src)));
6930 
6931   format %{ "bswap_movq $dst, $src" %}
6932   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
6933   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
6934   ins_pipe( ialu_reg_mem );
6935 %}
6936 
6937 instruct storeI_reversed(memory dst, rRegI src) %{
6938   match(Set dst (StoreI dst (ReverseBytesI  src)));
6939 
6940   format %{ "movl_bswap $dst, $src" %}
6941   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
6942   ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
6943   ins_pipe( ialu_mem_reg );
6944 %}
6945 
6946 instruct storeL_reversed(memory dst, rRegL src) %{
6947   match(Set dst (StoreL dst (ReverseBytesL  src)));
6948 
6949   format %{ "movq_bswap $dst, $src" %}
6950   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
6951   ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
6952   ins_pipe( ialu_mem_reg );
6953 %}
6954 
6955 //----------MemBar Instructions-----------------------------------------------
6956 // Memory barrier flavors
6957 
6958 instruct membar_acquire()
6959 %{
6960   match(MemBarAcquire);
6961   ins_cost(0);
6962 
6963   size(0);
6964   format %{ "MEMBAR-acquire" %}
6965   ins_encode();
6966   ins_pipe(empty);
6967 %}
6968 
6969 instruct membar_acquire_lock()
6970 %{
6971   match(MemBarAcquire);
6972   predicate(Matcher::prior_fast_lock(n));
6973   ins_cost(0);
6974 
6975   size(0);
6976   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6977   ins_encode();
6978   ins_pipe(empty);
6979 %}
6980 
6981 instruct membar_release()
6982 %{
6983   match(MemBarRelease);
6984   ins_cost(0);
6985 
6986   size(0);
6987   format %{ "MEMBAR-release" %}
6988   ins_encode();
6989   ins_pipe(empty);
6990 %}
6991 
6992 instruct membar_release_lock()
6993 %{
6994   match(MemBarRelease);
6995   predicate(Matcher::post_fast_unlock(n));
6996   ins_cost(0);
6997 
6998   size(0);
6999   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7000   ins_encode();
7001   ins_pipe(empty);
7002 %}
7003 
7004 instruct membar_volatile()
7005 %{
7006   match(MemBarVolatile);
7007   ins_cost(400);
7008 
7009   format %{ "MEMBAR-volatile" %}
7010   ins_encode(enc_membar_volatile);
7011   ins_pipe(pipe_slow);
7012 %}
7013 
7014 instruct unnecessary_membar_volatile()
7015 %{
7016   match(MemBarVolatile);
7017   predicate(Matcher::post_store_load_barrier(n));
7018   ins_cost(0);
7019 
7020   size(0);
7021   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7022   ins_encode();
7023   ins_pipe(empty);
7024 %}
7025 
7026 //----------Move Instructions--------------------------------------------------
7027 
7028 instruct castX2P(rRegP dst, rRegL src)
7029 %{
7030   match(Set dst (CastX2P src));
7031 
7032   format %{ "movq    $dst, $src\t# long->ptr" %}
7033   ins_encode(enc_copy_wide(dst, src));
7034   ins_pipe(ialu_reg_reg); // XXX
7035 %}
7036 
7037 instruct castP2X(rRegL dst, rRegP src)
7038 %{
7039   match(Set dst (CastP2X src));
7040 
7041   format %{ "movq    $dst, $src\t# ptr -> long" %}
7042   ins_encode(enc_copy_wide(dst, src));
7043   ins_pipe(ialu_reg_reg); // XXX
7044 %}
7045 
7046 
7047 // Convert oop pointer into compressed form
7048 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7049   predicate(n->bottom_type()->is_narrowoop()->make_oopptr()->ptr() != TypePtr::NotNull);
7050   match(Set dst (EncodeP src));
7051   effect(KILL cr);
7052   format %{ "encode_heap_oop $dst,$src" %}
7053   ins_encode %{
7054     Register s = $src$$Register;
7055     Register d = $dst$$Register;
7056     if (s != d) {
7057       __ movq(d, s);
7058     }
7059     __ encode_heap_oop(d);
7060   %}
7061   ins_pipe(ialu_reg_long);
7062 %}
7063 
7064 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7065   predicate(n->bottom_type()->is_narrowoop()->make_oopptr()->ptr() == TypePtr::NotNull);
7066   match(Set dst (EncodeP src));
7067   effect(KILL cr);
7068   format %{ "encode_heap_oop_not_null $dst,$src" %}
7069   ins_encode %{
7070     Register s = $src$$Register;
7071     Register d = $dst$$Register;
7072     __ encode_heap_oop_not_null(d, s);
7073   %}
7074   ins_pipe(ialu_reg_long);
7075 %}
7076 
7077 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7078   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull);
7079   match(Set dst (DecodeN src));
7080   effect(KILL cr);
7081   format %{ "decode_heap_oop $dst,$src" %}
7082   ins_encode %{
7083     Register s = $src$$Register;
7084     Register d = $dst$$Register;
7085     if (s != d) {
7086       __ movq(d, s);
7087     }
7088     __ decode_heap_oop(d);
7089   %}
7090   ins_pipe(ialu_reg_long);
7091 %}
7092 
7093 instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
7094   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull);
7095   match(Set dst (DecodeN src));
7096   format %{ "decode_heap_oop_not_null $dst,$src" %}
7097   ins_encode %{
7098     Register s = $src$$Register;
7099     Register d = $dst$$Register;
7100     __ decode_heap_oop_not_null(d, s);
7101   %}
7102   ins_pipe(ialu_reg_long);
7103 %}
7104 
7105 
7106 //----------Conditional Move---------------------------------------------------
7107 // Jump
7108 // dummy instruction for generating temp registers
7109 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7110   match(Jump (LShiftL switch_val shift));
7111   ins_cost(350);
7112   predicate(false);
7113   effect(TEMP dest);
7114 
7115   format %{ "leaq    $dest, table_base\n\t"
7116             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7117   ins_encode(jump_enc_offset(switch_val, shift, dest));
7118   ins_pipe(pipe_jmp);
7119   ins_pc_relative(1);
7120 %}
7121 
7122 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7123   match(Jump (AddL (LShiftL switch_val shift) offset));
7124   ins_cost(350);
7125   effect(TEMP dest);
7126 
7127   format %{ "leaq    $dest, table_base\n\t"
7128             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7129   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
7130   ins_pipe(pipe_jmp);
7131   ins_pc_relative(1);
7132 %}
7133 
7134 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7135   match(Jump switch_val);
7136   ins_cost(350);
7137   effect(TEMP dest);
7138 
7139   format %{ "leaq    $dest, table_base\n\t"
7140             "jmp     [$dest + $switch_val]\n\t" %}
7141   ins_encode(jump_enc(switch_val, dest));
7142   ins_pipe(pipe_jmp);
7143   ins_pc_relative(1);
7144 %}
7145 
7146 // Conditional move
7147 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7148 %{
7149   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7150 
7151   ins_cost(200); // XXX
7152   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7153   opcode(0x0F, 0x40);
7154   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7155   ins_pipe(pipe_cmov_reg);
7156 %}
7157 
7158 instruct cmovI_regU(rRegI dst, rRegI src, rFlagsRegU cr, cmpOpU cop)
7159 %{
7160   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7161 
7162   ins_cost(200); // XXX
7163   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7164   opcode(0x0F, 0x40);
7165   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7166   ins_pipe(pipe_cmov_reg);
7167 %}
7168 
7169 // Conditional move
7170 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src)
7171 %{
7172   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7173 
7174   ins_cost(250); // XXX
7175   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7176   opcode(0x0F, 0x40);
7177   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7178   ins_pipe(pipe_cmov_mem);
7179 %}
7180 
7181 // Conditional move
7182 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7183 %{
7184   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7185 
7186   ins_cost(250); // XXX
7187   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7188   opcode(0x0F, 0x40);
7189   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7190   ins_pipe(pipe_cmov_mem);
7191 %}
7192 
7193 // Conditional move
7194 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7195 %{
7196   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7197 
7198   ins_cost(200); // XXX
7199   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7200   opcode(0x0F, 0x40);
7201   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7202   ins_pipe(pipe_cmov_reg);
7203 %}
7204 
7205 // Conditional move
7206 instruct cmovN_regU(rRegN dst, rRegN src, rFlagsRegU cr, cmpOpU cop)
7207 %{
7208   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7209 
7210   ins_cost(200); // XXX
7211   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7212   opcode(0x0F, 0x40);
7213   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7214   ins_pipe(pipe_cmov_reg);
7215 %}
7216 
7217 // Conditional move
7218 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7219 %{
7220   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7221 
7222   ins_cost(200); // XXX
7223   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7224   opcode(0x0F, 0x40);
7225   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7226   ins_pipe(pipe_cmov_reg);  // XXX
7227 %}
7228 
7229 // Conditional move
7230 instruct cmovP_regU(rRegP dst, rRegP src, rFlagsRegU cr, cmpOpU cop)
7231 %{
7232   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7233 
7234   ins_cost(200); // XXX
7235   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7236   opcode(0x0F, 0x40);
7237   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7238   ins_pipe(pipe_cmov_reg); // XXX
7239 %}
7240 
7241 // DISABLED: Requires the ADLC to emit a bottom_type call that
7242 // correctly meets the two pointer arguments; one is an incoming
7243 // register but the other is a memory operand.  ALSO appears to
7244 // be buggy with implicit null checks.
7245 //
7246 //// Conditional move
7247 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7248 //%{
7249 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7250 //  ins_cost(250);
7251 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7252 //  opcode(0x0F,0x40);
7253 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7254 //  ins_pipe( pipe_cmov_mem );
7255 //%}
7256 //
7257 //// Conditional move
7258 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7259 //%{
7260 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7261 //  ins_cost(250);
7262 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7263 //  opcode(0x0F,0x40);
7264 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7265 //  ins_pipe( pipe_cmov_mem );
7266 //%}
7267 
7268 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7269 %{
7270   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7271 
7272   ins_cost(200); // XXX
7273   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7274   opcode(0x0F, 0x40);
7275   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7276   ins_pipe(pipe_cmov_reg);  // XXX
7277 %}
7278 
7279 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7280 %{
7281   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7282 
7283   ins_cost(200); // XXX
7284   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7285   opcode(0x0F, 0x40);
7286   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7287   ins_pipe(pipe_cmov_mem);  // XXX
7288 %}
7289 
7290 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7291 %{
7292   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7293 
7294   ins_cost(200); // XXX
7295   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7296   opcode(0x0F, 0x40);
7297   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7298   ins_pipe(pipe_cmov_reg); // XXX
7299 %}
7300 
7301 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7302 %{
7303   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7304 
7305   ins_cost(200); // XXX
7306   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7307   opcode(0x0F, 0x40);
7308   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7309   ins_pipe(pipe_cmov_mem); // XXX
7310 %}
7311 
7312 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7313 %{
7314   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7315 
7316   ins_cost(200); // XXX
7317   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7318             "movss     $dst, $src\n"
7319     "skip:" %}
7320   ins_encode(enc_cmovf_branch(cop, dst, src));
7321   ins_pipe(pipe_slow);
7322 %}
7323 
7324 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7325 // %{
7326 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7327 
7328 //   ins_cost(200); // XXX
7329 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7330 //             "movss     $dst, $src\n"
7331 //     "skip:" %}
7332 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7333 //   ins_pipe(pipe_slow);
7334 // %}
7335 
7336 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7337 %{
7338   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7339 
7340   ins_cost(200); // XXX
7341   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7342             "movss     $dst, $src\n"
7343     "skip:" %}
7344   ins_encode(enc_cmovf_branch(cop, dst, src));
7345   ins_pipe(pipe_slow);
7346 %}
7347 
7348 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7349 %{
7350   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7351 
7352   ins_cost(200); // XXX
7353   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7354             "movsd     $dst, $src\n"
7355     "skip:" %}
7356   ins_encode(enc_cmovd_branch(cop, dst, src));
7357   ins_pipe(pipe_slow);
7358 %}
7359 
7360 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7361 %{
7362   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7363 
7364   ins_cost(200); // XXX
7365   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7366             "movsd     $dst, $src\n"
7367     "skip:" %}
7368   ins_encode(enc_cmovd_branch(cop, dst, src));
7369   ins_pipe(pipe_slow);
7370 %}
7371 
7372 //----------Arithmetic Instructions--------------------------------------------
7373 //----------Addition Instructions----------------------------------------------
7374 
7375 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7376 %{
7377   match(Set dst (AddI dst src));
7378   effect(KILL cr);
7379 
7380   format %{ "addl    $dst, $src\t# int" %}
7381   opcode(0x03);
7382   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7383   ins_pipe(ialu_reg_reg);
7384 %}
7385 
7386 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7387 %{
7388   match(Set dst (AddI dst src));
7389   effect(KILL cr);
7390 
7391   format %{ "addl    $dst, $src\t# int" %}
7392   opcode(0x81, 0x00); /* /0 id */
7393   ins_encode(OpcSErm(dst, src), Con8or32(src));
7394   ins_pipe( ialu_reg );
7395 %}
7396 
7397 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7398 %{
7399   match(Set dst (AddI dst (LoadI src)));
7400   effect(KILL cr);
7401 
7402   ins_cost(125); // XXX
7403   format %{ "addl    $dst, $src\t# int" %}
7404   opcode(0x03);
7405   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7406   ins_pipe(ialu_reg_mem);
7407 %}
7408 
7409 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7410 %{
7411   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7412   effect(KILL cr);
7413 
7414   ins_cost(150); // XXX
7415   format %{ "addl    $dst, $src\t# int" %}
7416   opcode(0x01); /* Opcode 01 /r */
7417   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7418   ins_pipe(ialu_mem_reg);
7419 %}
7420 
7421 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7422 %{
7423   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7424   effect(KILL cr);
7425 
7426   ins_cost(125); // XXX
7427   format %{ "addl    $dst, $src\t# int" %}
7428   opcode(0x81); /* Opcode 81 /0 id */
7429   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7430   ins_pipe(ialu_mem_imm);
7431 %}
7432 
7433 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7434 %{
7435   predicate(UseIncDec);
7436   match(Set dst (AddI dst src));
7437   effect(KILL cr);
7438 
7439   format %{ "incl    $dst\t# int" %}
7440   opcode(0xFF, 0x00); // FF /0
7441   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7442   ins_pipe(ialu_reg);
7443 %}
7444 
7445 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7446 %{
7447   predicate(UseIncDec);
7448   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7449   effect(KILL cr);
7450 
7451   ins_cost(125); // XXX
7452   format %{ "incl    $dst\t# int" %}
7453   opcode(0xFF); /* Opcode FF /0 */
7454   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7455   ins_pipe(ialu_mem_imm);
7456 %}
7457 
7458 // XXX why does that use AddI
7459 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7460 %{
7461   predicate(UseIncDec);
7462   match(Set dst (AddI dst src));
7463   effect(KILL cr);
7464 
7465   format %{ "decl    $dst\t# int" %}
7466   opcode(0xFF, 0x01); // FF /1
7467   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7468   ins_pipe(ialu_reg);
7469 %}
7470 
7471 // XXX why does that use AddI
7472 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7473 %{
7474   predicate(UseIncDec);
7475   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7476   effect(KILL cr);
7477 
7478   ins_cost(125); // XXX
7479   format %{ "decl    $dst\t# int" %}
7480   opcode(0xFF); /* Opcode FF /1 */
7481   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7482   ins_pipe(ialu_mem_imm);
7483 %}
7484 
7485 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7486 %{
7487   match(Set dst (AddI src0 src1));
7488 
7489   ins_cost(110);
7490   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7491   opcode(0x8D); /* 0x8D /r */
7492   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7493   ins_pipe(ialu_reg_reg);
7494 %}
7495 
7496 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7497 %{
7498   match(Set dst (AddL dst src));
7499   effect(KILL cr);
7500 
7501   format %{ "addq    $dst, $src\t# long" %}
7502   opcode(0x03);
7503   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7504   ins_pipe(ialu_reg_reg);
7505 %}
7506 
7507 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7508 %{
7509   match(Set dst (AddL dst src));
7510   effect(KILL cr);
7511 
7512   format %{ "addq    $dst, $src\t# long" %}
7513   opcode(0x81, 0x00); /* /0 id */
7514   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7515   ins_pipe( ialu_reg );
7516 %}
7517 
7518 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7519 %{
7520   match(Set dst (AddL dst (LoadL src)));
7521   effect(KILL cr);
7522 
7523   ins_cost(125); // XXX
7524   format %{ "addq    $dst, $src\t# long" %}
7525   opcode(0x03);
7526   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7527   ins_pipe(ialu_reg_mem);
7528 %}
7529 
7530 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7531 %{
7532   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7533   effect(KILL cr);
7534 
7535   ins_cost(150); // XXX
7536   format %{ "addq    $dst, $src\t# long" %}
7537   opcode(0x01); /* Opcode 01 /r */
7538   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7539   ins_pipe(ialu_mem_reg);
7540 %}
7541 
7542 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7543 %{
7544   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7545   effect(KILL cr);
7546 
7547   ins_cost(125); // XXX
7548   format %{ "addq    $dst, $src\t# long" %}
7549   opcode(0x81); /* Opcode 81 /0 id */
7550   ins_encode(REX_mem_wide(dst),
7551              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7552   ins_pipe(ialu_mem_imm);
7553 %}
7554 
7555 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7556 %{
7557   predicate(UseIncDec);
7558   match(Set dst (AddL dst src));
7559   effect(KILL cr);
7560 
7561   format %{ "incq    $dst\t# long" %}
7562   opcode(0xFF, 0x00); // FF /0
7563   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7564   ins_pipe(ialu_reg);
7565 %}
7566 
7567 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7568 %{
7569   predicate(UseIncDec);
7570   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7571   effect(KILL cr);
7572 
7573   ins_cost(125); // XXX
7574   format %{ "incq    $dst\t# long" %}
7575   opcode(0xFF); /* Opcode FF /0 */
7576   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7577   ins_pipe(ialu_mem_imm);
7578 %}
7579 
7580 // XXX why does that use AddL
7581 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7582 %{
7583   predicate(UseIncDec);
7584   match(Set dst (AddL dst src));
7585   effect(KILL cr);
7586 
7587   format %{ "decq    $dst\t# long" %}
7588   opcode(0xFF, 0x01); // FF /1
7589   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7590   ins_pipe(ialu_reg);
7591 %}
7592 
7593 // XXX why does that use AddL
7594 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7595 %{
7596   predicate(UseIncDec);
7597   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7598   effect(KILL cr);
7599 
7600   ins_cost(125); // XXX
7601   format %{ "decq    $dst\t# long" %}
7602   opcode(0xFF); /* Opcode FF /1 */
7603   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7604   ins_pipe(ialu_mem_imm);
7605 %}
7606 
7607 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7608 %{
7609   match(Set dst (AddL src0 src1));
7610 
7611   ins_cost(110);
7612   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7613   opcode(0x8D); /* 0x8D /r */
7614   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7615   ins_pipe(ialu_reg_reg);
7616 %}
7617 
7618 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7619 %{
7620   match(Set dst (AddP dst src));
7621   effect(KILL cr);
7622 
7623   format %{ "addq    $dst, $src\t# ptr" %}
7624   opcode(0x03);
7625   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7626   ins_pipe(ialu_reg_reg);
7627 %}
7628 
7629 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7630 %{
7631   match(Set dst (AddP dst src));
7632   effect(KILL cr);
7633 
7634   format %{ "addq    $dst, $src\t# ptr" %}
7635   opcode(0x81, 0x00); /* /0 id */
7636   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7637   ins_pipe( ialu_reg );
7638 %}
7639 
7640 // XXX addP mem ops ????
7641 
7642 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7643 %{
7644   match(Set dst (AddP src0 src1));
7645 
7646   ins_cost(110);
7647   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7648   opcode(0x8D); /* 0x8D /r */
7649   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7650   ins_pipe(ialu_reg_reg);
7651 %}
7652 
7653 instruct checkCastPP(rRegP dst)
7654 %{
7655   match(Set dst (CheckCastPP dst));
7656 
7657   size(0);
7658   format %{ "# checkcastPP of $dst" %}
7659   ins_encode(/* empty encoding */);
7660   ins_pipe(empty);
7661 %}
7662 
7663 instruct castPP(rRegP dst)
7664 %{
7665   match(Set dst (CastPP dst));
7666 
7667   size(0);
7668   format %{ "# castPP of $dst" %}
7669   ins_encode(/* empty encoding */);
7670   ins_pipe(empty);
7671 %}
7672 
7673 instruct castII(rRegI dst)
7674 %{
7675   match(Set dst (CastII dst));
7676 
7677   size(0);
7678   format %{ "# castII of $dst" %}
7679   ins_encode(/* empty encoding */);
7680   ins_cost(0);
7681   ins_pipe(empty);
7682 %}
7683 
7684 // LoadP-locked same as a regular LoadP when used with compare-swap
7685 instruct loadPLocked(rRegP dst, memory mem)
7686 %{
7687   match(Set dst (LoadPLocked mem));
7688 
7689   ins_cost(125); // XXX
7690   format %{ "movq    $dst, $mem\t# ptr locked" %}
7691   opcode(0x8B);
7692   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7693   ins_pipe(ialu_reg_mem); // XXX
7694 %}
7695 
7696 // LoadL-locked - same as a regular LoadL when used with compare-swap
7697 instruct loadLLocked(rRegL dst, memory mem)
7698 %{
7699   match(Set dst (LoadLLocked mem));
7700 
7701   ins_cost(125); // XXX
7702   format %{ "movq    $dst, $mem\t# long locked" %}
7703   opcode(0x8B);
7704   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7705   ins_pipe(ialu_reg_mem); // XXX
7706 %}
7707 
7708 // Conditional-store of the updated heap-top.
7709 // Used during allocation of the shared heap.
7710 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7711 
7712 instruct storePConditional(memory heap_top_ptr,
7713                            rax_RegP oldval, rRegP newval,
7714                            rFlagsReg cr)
7715 %{
7716   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7717 
7718   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7719             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7720   opcode(0x0F, 0xB1);
7721   ins_encode(lock_prefix,
7722              REX_reg_mem_wide(newval, heap_top_ptr),
7723              OpcP, OpcS,
7724              reg_mem(newval, heap_top_ptr));
7725   ins_pipe(pipe_cmpxchg);
7726 %}
7727 
7728 // Conditional-store of a long value
7729 // Returns a boolean value (0/1) on success.  Implemented with a
7730 // CMPXCHG8 on Intel.  mem_ptr can actually be in either RSI or RDI
7731 
7732 instruct storeLConditional(rRegI res,
7733                            memory mem_ptr,
7734                            rax_RegL oldval, rRegL newval,
7735                            rFlagsReg cr)
7736 %{
7737   match(Set res (StoreLConditional mem_ptr (Binary oldval newval)));
7738   effect(KILL cr);
7739 
7740   format %{ "cmpxchgq $mem_ptr, $newval\t# (long) "
7741             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7742             "sete    $res\n\t"
7743             "movzbl  $res, $res" %}
7744   opcode(0x0F, 0xB1);
7745   ins_encode(lock_prefix,
7746              REX_reg_mem_wide(newval, mem_ptr),
7747              OpcP, OpcS,
7748              reg_mem(newval, mem_ptr),
7749              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7750              REX_reg_breg(res, res), // movzbl
7751              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7752   ins_pipe(pipe_cmpxchg);
7753 %}
7754 
7755 // Conditional-store of a long value
7756 // ZF flag is set on success, reset otherwise. Implemented with a
7757 // CMPXCHG8 on Intel.  mem_ptr can actually be in either RSI or RDI
7758 instruct storeLConditional_flags(memory mem_ptr,
7759                                  rax_RegL oldval, rRegL newval,
7760                                  rFlagsReg cr,
7761                                  immI0 zero)
7762 %{
7763   match(Set cr (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero));
7764 
7765   format %{ "cmpxchgq $mem_ptr, $newval\t# (long) "
7766             "If rax == $mem_ptr then store $newval into $mem_ptr" %}
7767   opcode(0x0F, 0xB1);
7768   ins_encode(lock_prefix,
7769              REX_reg_mem_wide(newval, mem_ptr),
7770              OpcP, OpcS,
7771              reg_mem(newval, mem_ptr));
7772   ins_pipe(pipe_cmpxchg);
7773 %}
7774 
7775 instruct compareAndSwapP(rRegI res,
7776                          memory mem_ptr,
7777                          rax_RegP oldval, rRegP newval,
7778                          rFlagsReg cr)
7779 %{
7780   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7781   effect(KILL cr, KILL oldval);
7782 
7783   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7784             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7785             "sete    $res\n\t"
7786             "movzbl  $res, $res" %}
7787   opcode(0x0F, 0xB1);
7788   ins_encode(lock_prefix,
7789              REX_reg_mem_wide(newval, mem_ptr),
7790              OpcP, OpcS,
7791              reg_mem(newval, mem_ptr),
7792              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7793              REX_reg_breg(res, res), // movzbl
7794              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7795   ins_pipe( pipe_cmpxchg );
7796 %}
7797 
7798 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7799 instruct compareAndSwapL(rRegI res,
7800                          memory mem_ptr,
7801                          rax_RegL oldval, rRegL newval,
7802                          rFlagsReg cr)
7803 %{
7804   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7805   effect(KILL cr, KILL oldval);
7806 
7807   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7808             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7809             "sete    $res\n\t"
7810             "movzbl  $res, $res" %}
7811   opcode(0x0F, 0xB1);
7812   ins_encode(lock_prefix,
7813              REX_reg_mem_wide(newval, mem_ptr),
7814              OpcP, OpcS,
7815              reg_mem(newval, mem_ptr),
7816              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7817              REX_reg_breg(res, res), // movzbl
7818              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7819   ins_pipe( pipe_cmpxchg );
7820 %}
7821 
7822 instruct compareAndSwapI(rRegI res,
7823                          memory mem_ptr,
7824                          rax_RegI oldval, rRegI newval,
7825                          rFlagsReg cr)
7826 %{
7827   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7828   effect(KILL cr, KILL oldval);
7829 
7830   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7831             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7832             "sete    $res\n\t"
7833             "movzbl  $res, $res" %}
7834   opcode(0x0F, 0xB1);
7835   ins_encode(lock_prefix,
7836              REX_reg_mem(newval, mem_ptr),
7837              OpcP, OpcS,
7838              reg_mem(newval, mem_ptr),
7839              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7840              REX_reg_breg(res, res), // movzbl
7841              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7842   ins_pipe( pipe_cmpxchg );
7843 %}
7844 
7845 
7846 instruct compareAndSwapN(rRegI res,
7847                           memory mem_ptr,
7848                           rax_RegN oldval, rRegN newval,
7849                           rFlagsReg cr) %{
7850   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7851   effect(KILL cr, KILL oldval);
7852 
7853   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7854             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7855             "sete    $res\n\t"
7856             "movzbl  $res, $res" %}
7857   opcode(0x0F, 0xB1);
7858   ins_encode(lock_prefix,
7859              REX_reg_mem(newval, mem_ptr),
7860              OpcP, OpcS,
7861              reg_mem(newval, mem_ptr),
7862              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7863              REX_reg_breg(res, res), // movzbl
7864              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7865   ins_pipe( pipe_cmpxchg );
7866 %}
7867 
7868 //----------Subtraction Instructions-------------------------------------------
7869 
7870 // Integer Subtraction Instructions
7871 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7872 %{
7873   match(Set dst (SubI dst src));
7874   effect(KILL cr);
7875 
7876   format %{ "subl    $dst, $src\t# int" %}
7877   opcode(0x2B);
7878   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7879   ins_pipe(ialu_reg_reg);
7880 %}
7881 
7882 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7883 %{
7884   match(Set dst (SubI dst src));
7885   effect(KILL cr);
7886 
7887   format %{ "subl    $dst, $src\t# int" %}
7888   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7889   ins_encode(OpcSErm(dst, src), Con8or32(src));
7890   ins_pipe(ialu_reg);
7891 %}
7892 
7893 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7894 %{
7895   match(Set dst (SubI dst (LoadI src)));
7896   effect(KILL cr);
7897 
7898   ins_cost(125);
7899   format %{ "subl    $dst, $src\t# int" %}
7900   opcode(0x2B);
7901   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7902   ins_pipe(ialu_reg_mem);
7903 %}
7904 
7905 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7906 %{
7907   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7908   effect(KILL cr);
7909 
7910   ins_cost(150);
7911   format %{ "subl    $dst, $src\t# int" %}
7912   opcode(0x29); /* Opcode 29 /r */
7913   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7914   ins_pipe(ialu_mem_reg);
7915 %}
7916 
7917 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
7918 %{
7919   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7920   effect(KILL cr);
7921 
7922   ins_cost(125); // XXX
7923   format %{ "subl    $dst, $src\t# int" %}
7924   opcode(0x81); /* Opcode 81 /5 id */
7925   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7926   ins_pipe(ialu_mem_imm);
7927 %}
7928 
7929 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7930 %{
7931   match(Set dst (SubL dst src));
7932   effect(KILL cr);
7933 
7934   format %{ "subq    $dst, $src\t# long" %}
7935   opcode(0x2B);
7936   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7937   ins_pipe(ialu_reg_reg);
7938 %}
7939 
7940 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
7941 %{
7942   match(Set dst (SubL dst src));
7943   effect(KILL cr);
7944 
7945   format %{ "subq    $dst, $src\t# long" %}
7946   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7947   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7948   ins_pipe(ialu_reg);
7949 %}
7950 
7951 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7952 %{
7953   match(Set dst (SubL dst (LoadL src)));
7954   effect(KILL cr);
7955 
7956   ins_cost(125);
7957   format %{ "subq    $dst, $src\t# long" %}
7958   opcode(0x2B);
7959   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7960   ins_pipe(ialu_reg_mem);
7961 %}
7962 
7963 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7964 %{
7965   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7966   effect(KILL cr);
7967 
7968   ins_cost(150);
7969   format %{ "subq    $dst, $src\t# long" %}
7970   opcode(0x29); /* Opcode 29 /r */
7971   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7972   ins_pipe(ialu_mem_reg);
7973 %}
7974 
7975 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7976 %{
7977   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7978   effect(KILL cr);
7979 
7980   ins_cost(125); // XXX
7981   format %{ "subq    $dst, $src\t# long" %}
7982   opcode(0x81); /* Opcode 81 /5 id */
7983   ins_encode(REX_mem_wide(dst),
7984              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7985   ins_pipe(ialu_mem_imm);
7986 %}
7987 
7988 // Subtract from a pointer
7989 // XXX hmpf???
7990 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
7991 %{
7992   match(Set dst (AddP dst (SubI zero src)));
7993   effect(KILL cr);
7994 
7995   format %{ "subq    $dst, $src\t# ptr - int" %}
7996   opcode(0x2B);
7997   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7998   ins_pipe(ialu_reg_reg);
7999 %}
8000 
8001 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8002 %{
8003   match(Set dst (SubI zero dst));
8004   effect(KILL cr);
8005 
8006   format %{ "negl    $dst\t# int" %}
8007   opcode(0xF7, 0x03);  // Opcode F7 /3
8008   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8009   ins_pipe(ialu_reg);
8010 %}
8011 
8012 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8013 %{
8014   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8015   effect(KILL cr);
8016 
8017   format %{ "negl    $dst\t# int" %}
8018   opcode(0xF7, 0x03);  // Opcode F7 /3
8019   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8020   ins_pipe(ialu_reg);
8021 %}
8022 
8023 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8024 %{
8025   match(Set dst (SubL zero dst));
8026   effect(KILL cr);
8027 
8028   format %{ "negq    $dst\t# long" %}
8029   opcode(0xF7, 0x03);  // Opcode F7 /3
8030   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8031   ins_pipe(ialu_reg);
8032 %}
8033 
8034 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8035 %{
8036   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8037   effect(KILL cr);
8038 
8039   format %{ "negq    $dst\t# long" %}
8040   opcode(0xF7, 0x03);  // Opcode F7 /3
8041   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8042   ins_pipe(ialu_reg);
8043 %}
8044 
8045 
8046 //----------Multiplication/Division Instructions-------------------------------
8047 // Integer Multiplication Instructions
8048 // Multiply Register
8049 
8050 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8051 %{
8052   match(Set dst (MulI dst src));
8053   effect(KILL cr);
8054 
8055   ins_cost(300);
8056   format %{ "imull   $dst, $src\t# int" %}
8057   opcode(0x0F, 0xAF);
8058   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8059   ins_pipe(ialu_reg_reg_alu0);
8060 %}
8061 
8062 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8063 %{
8064   match(Set dst (MulI src imm));
8065   effect(KILL cr);
8066 
8067   ins_cost(300);
8068   format %{ "imull   $dst, $src, $imm\t# int" %}
8069   opcode(0x69); /* 69 /r id */
8070   ins_encode(REX_reg_reg(dst, src),
8071              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8072   ins_pipe(ialu_reg_reg_alu0);
8073 %}
8074 
8075 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8076 %{
8077   match(Set dst (MulI dst (LoadI src)));
8078   effect(KILL cr);
8079 
8080   ins_cost(350);
8081   format %{ "imull   $dst, $src\t# int" %}
8082   opcode(0x0F, 0xAF);
8083   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8084   ins_pipe(ialu_reg_mem_alu0);
8085 %}
8086 
8087 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8088 %{
8089   match(Set dst (MulI (LoadI src) imm));
8090   effect(KILL cr);
8091 
8092   ins_cost(300);
8093   format %{ "imull   $dst, $src, $imm\t# int" %}
8094   opcode(0x69); /* 69 /r id */
8095   ins_encode(REX_reg_mem(dst, src),
8096              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8097   ins_pipe(ialu_reg_mem_alu0);
8098 %}
8099 
8100 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8101 %{
8102   match(Set dst (MulL dst src));
8103   effect(KILL cr);
8104 
8105   ins_cost(300);
8106   format %{ "imulq   $dst, $src\t# long" %}
8107   opcode(0x0F, 0xAF);
8108   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8109   ins_pipe(ialu_reg_reg_alu0);
8110 %}
8111 
8112 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8113 %{
8114   match(Set dst (MulL src imm));
8115   effect(KILL cr);
8116 
8117   ins_cost(300);
8118   format %{ "imulq   $dst, $src, $imm\t# long" %}
8119   opcode(0x69); /* 69 /r id */
8120   ins_encode(REX_reg_reg_wide(dst, src),
8121              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8122   ins_pipe(ialu_reg_reg_alu0);
8123 %}
8124 
8125 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8126 %{
8127   match(Set dst (MulL dst (LoadL src)));
8128   effect(KILL cr);
8129 
8130   ins_cost(350);
8131   format %{ "imulq   $dst, $src\t# long" %}
8132   opcode(0x0F, 0xAF);
8133   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8134   ins_pipe(ialu_reg_mem_alu0);
8135 %}
8136 
8137 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8138 %{
8139   match(Set dst (MulL (LoadL src) imm));
8140   effect(KILL cr);
8141 
8142   ins_cost(300);
8143   format %{ "imulq   $dst, $src, $imm\t# long" %}
8144   opcode(0x69); /* 69 /r id */
8145   ins_encode(REX_reg_mem_wide(dst, src),
8146              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8147   ins_pipe(ialu_reg_mem_alu0);
8148 %}
8149 
8150 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8151 %{
8152   match(Set dst (MulHiL src rax));
8153   effect(USE_KILL rax, KILL cr);
8154 
8155   ins_cost(300);
8156   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8157   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8158   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8159   ins_pipe(ialu_reg_reg_alu0);
8160 %}
8161 
8162 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8163                    rFlagsReg cr)
8164 %{
8165   match(Set rax (DivI rax div));
8166   effect(KILL rdx, KILL cr);
8167 
8168   ins_cost(30*100+10*100); // XXX
8169   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8170             "jne,s   normal\n\t"
8171             "xorl    rdx, rdx\n\t"
8172             "cmpl    $div, -1\n\t"
8173             "je,s    done\n"
8174     "normal: cdql\n\t"
8175             "idivl   $div\n"
8176     "done:"        %}
8177   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8178   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8179   ins_pipe(ialu_reg_reg_alu0);
8180 %}
8181 
8182 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8183                    rFlagsReg cr)
8184 %{
8185   match(Set rax (DivL rax div));
8186   effect(KILL rdx, KILL cr);
8187 
8188   ins_cost(30*100+10*100); // XXX
8189   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8190             "cmpq    rax, rdx\n\t"
8191             "jne,s   normal\n\t"
8192             "xorl    rdx, rdx\n\t"
8193             "cmpq    $div, -1\n\t"
8194             "je,s    done\n"
8195     "normal: cdqq\n\t"
8196             "idivq   $div\n"
8197     "done:"        %}
8198   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8199   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8200   ins_pipe(ialu_reg_reg_alu0);
8201 %}
8202 
8203 // Integer DIVMOD with Register, both quotient and mod results
8204 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8205                              rFlagsReg cr)
8206 %{
8207   match(DivModI rax div);
8208   effect(KILL cr);
8209 
8210   ins_cost(30*100+10*100); // XXX
8211   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8212             "jne,s   normal\n\t"
8213             "xorl    rdx, rdx\n\t"
8214             "cmpl    $div, -1\n\t"
8215             "je,s    done\n"
8216     "normal: cdql\n\t"
8217             "idivl   $div\n"
8218     "done:"        %}
8219   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8220   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8221   ins_pipe(pipe_slow);
8222 %}
8223 
8224 // Long DIVMOD with Register, both quotient and mod results
8225 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8226                              rFlagsReg cr)
8227 %{
8228   match(DivModL rax div);
8229   effect(KILL cr);
8230 
8231   ins_cost(30*100+10*100); // XXX
8232   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8233             "cmpq    rax, rdx\n\t"
8234             "jne,s   normal\n\t"
8235             "xorl    rdx, rdx\n\t"
8236             "cmpq    $div, -1\n\t"
8237             "je,s    done\n"
8238     "normal: cdqq\n\t"
8239             "idivq   $div\n"
8240     "done:"        %}
8241   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8242   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8243   ins_pipe(pipe_slow);
8244 %}
8245 
8246 //----------- DivL-By-Constant-Expansions--------------------------------------
8247 // DivI cases are handled by the compiler
8248 
8249 // Magic constant, reciprical of 10
8250 instruct loadConL_0x6666666666666667(rRegL dst)
8251 %{
8252   effect(DEF dst);
8253 
8254   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8255   ins_encode(load_immL(dst, 0x6666666666666667));
8256   ins_pipe(ialu_reg);
8257 %}
8258 
8259 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8260 %{
8261   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8262 
8263   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8264   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8265   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8266   ins_pipe(ialu_reg_reg_alu0);
8267 %}
8268 
8269 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8270 %{
8271   effect(USE_DEF dst, KILL cr);
8272 
8273   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8274   opcode(0xC1, 0x7); /* C1 /7 ib */
8275   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8276   ins_pipe(ialu_reg);
8277 %}
8278 
8279 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8280 %{
8281   effect(USE_DEF dst, KILL cr);
8282 
8283   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8284   opcode(0xC1, 0x7); /* C1 /7 ib */
8285   ins_encode(reg_opc_imm_wide(dst, 0x2));
8286   ins_pipe(ialu_reg);
8287 %}
8288 
8289 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8290 %{
8291   match(Set dst (DivL src div));
8292 
8293   ins_cost((5+8)*100);
8294   expand %{
8295     rax_RegL rax;                     // Killed temp
8296     rFlagsReg cr;                     // Killed
8297     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8298     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8299     sarL_rReg_63(src, cr);            // sarq  src, 63
8300     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8301     subL_rReg(dst, src, cr);          // subl  rdx, src
8302   %}
8303 %}
8304 
8305 //-----------------------------------------------------------------------------
8306 
8307 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8308                    rFlagsReg cr)
8309 %{
8310   match(Set rdx (ModI rax div));
8311   effect(KILL rax, KILL cr);
8312 
8313   ins_cost(300); // XXX
8314   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8315             "jne,s   normal\n\t"
8316             "xorl    rdx, rdx\n\t"
8317             "cmpl    $div, -1\n\t"
8318             "je,s    done\n"
8319     "normal: cdql\n\t"
8320             "idivl   $div\n"
8321     "done:"        %}
8322   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8323   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8324   ins_pipe(ialu_reg_reg_alu0);
8325 %}
8326 
8327 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8328                    rFlagsReg cr)
8329 %{
8330   match(Set rdx (ModL rax div));
8331   effect(KILL rax, KILL cr);
8332 
8333   ins_cost(300); // XXX
8334   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8335             "cmpq    rax, rdx\n\t"
8336             "jne,s   normal\n\t"
8337             "xorl    rdx, rdx\n\t"
8338             "cmpq    $div, -1\n\t"
8339             "je,s    done\n"
8340     "normal: cdqq\n\t"
8341             "idivq   $div\n"
8342     "done:"        %}
8343   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8344   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8345   ins_pipe(ialu_reg_reg_alu0);
8346 %}
8347 
8348 // Integer Shift Instructions
8349 // Shift Left by one
8350 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8351 %{
8352   match(Set dst (LShiftI dst shift));
8353   effect(KILL cr);
8354 
8355   format %{ "sall    $dst, $shift" %}
8356   opcode(0xD1, 0x4); /* D1 /4 */
8357   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8358   ins_pipe(ialu_reg);
8359 %}
8360 
8361 // Shift Left by one
8362 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8363 %{
8364   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8365   effect(KILL cr);
8366 
8367   format %{ "sall    $dst, $shift\t" %}
8368   opcode(0xD1, 0x4); /* D1 /4 */
8369   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8370   ins_pipe(ialu_mem_imm);
8371 %}
8372 
8373 // Shift Left by 8-bit immediate
8374 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8375 %{
8376   match(Set dst (LShiftI dst shift));
8377   effect(KILL cr);
8378 
8379   format %{ "sall    $dst, $shift" %}
8380   opcode(0xC1, 0x4); /* C1 /4 ib */
8381   ins_encode(reg_opc_imm(dst, shift));
8382   ins_pipe(ialu_reg);
8383 %}
8384 
8385 // Shift Left by 8-bit immediate
8386 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8387 %{
8388   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8389   effect(KILL cr);
8390 
8391   format %{ "sall    $dst, $shift" %}
8392   opcode(0xC1, 0x4); /* C1 /4 ib */
8393   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8394   ins_pipe(ialu_mem_imm);
8395 %}
8396 
8397 // Shift Left by variable
8398 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8399 %{
8400   match(Set dst (LShiftI dst shift));
8401   effect(KILL cr);
8402 
8403   format %{ "sall    $dst, $shift" %}
8404   opcode(0xD3, 0x4); /* D3 /4 */
8405   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8406   ins_pipe(ialu_reg_reg);
8407 %}
8408 
8409 // Shift Left by variable
8410 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8411 %{
8412   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8413   effect(KILL cr);
8414 
8415   format %{ "sall    $dst, $shift" %}
8416   opcode(0xD3, 0x4); /* D3 /4 */
8417   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8418   ins_pipe(ialu_mem_reg);
8419 %}
8420 
8421 // Arithmetic shift right by one
8422 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8423 %{
8424   match(Set dst (RShiftI dst shift));
8425   effect(KILL cr);
8426 
8427   format %{ "sarl    $dst, $shift" %}
8428   opcode(0xD1, 0x7); /* D1 /7 */
8429   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8430   ins_pipe(ialu_reg);
8431 %}
8432 
8433 // Arithmetic shift right by one
8434 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8435 %{
8436   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8437   effect(KILL cr);
8438 
8439   format %{ "sarl    $dst, $shift" %}
8440   opcode(0xD1, 0x7); /* D1 /7 */
8441   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8442   ins_pipe(ialu_mem_imm);
8443 %}
8444 
8445 // Arithmetic Shift Right by 8-bit immediate
8446 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8447 %{
8448   match(Set dst (RShiftI dst shift));
8449   effect(KILL cr);
8450 
8451   format %{ "sarl    $dst, $shift" %}
8452   opcode(0xC1, 0x7); /* C1 /7 ib */
8453   ins_encode(reg_opc_imm(dst, shift));
8454   ins_pipe(ialu_mem_imm);
8455 %}
8456 
8457 // Arithmetic Shift Right by 8-bit immediate
8458 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8459 %{
8460   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8461   effect(KILL cr);
8462 
8463   format %{ "sarl    $dst, $shift" %}
8464   opcode(0xC1, 0x7); /* C1 /7 ib */
8465   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8466   ins_pipe(ialu_mem_imm);
8467 %}
8468 
8469 // Arithmetic Shift Right by variable
8470 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8471 %{
8472   match(Set dst (RShiftI dst shift));
8473   effect(KILL cr);
8474 
8475   format %{ "sarl    $dst, $shift" %}
8476   opcode(0xD3, 0x7); /* D3 /7 */
8477   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8478   ins_pipe(ialu_reg_reg);
8479 %}
8480 
8481 // Arithmetic Shift Right by variable
8482 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8483 %{
8484   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8485   effect(KILL cr);
8486 
8487   format %{ "sarl    $dst, $shift" %}
8488   opcode(0xD3, 0x7); /* D3 /7 */
8489   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8490   ins_pipe(ialu_mem_reg);
8491 %}
8492 
8493 // Logical shift right by one
8494 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8495 %{
8496   match(Set dst (URShiftI dst shift));
8497   effect(KILL cr);
8498 
8499   format %{ "shrl    $dst, $shift" %}
8500   opcode(0xD1, 0x5); /* D1 /5 */
8501   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8502   ins_pipe(ialu_reg);
8503 %}
8504 
8505 // Logical shift right by one
8506 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8507 %{
8508   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8509   effect(KILL cr);
8510 
8511   format %{ "shrl    $dst, $shift" %}
8512   opcode(0xD1, 0x5); /* D1 /5 */
8513   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8514   ins_pipe(ialu_mem_imm);
8515 %}
8516 
8517 // Logical Shift Right by 8-bit immediate
8518 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8519 %{
8520   match(Set dst (URShiftI dst shift));
8521   effect(KILL cr);
8522 
8523   format %{ "shrl    $dst, $shift" %}
8524   opcode(0xC1, 0x5); /* C1 /5 ib */
8525   ins_encode(reg_opc_imm(dst, shift));
8526   ins_pipe(ialu_reg);
8527 %}
8528 
8529 // Logical Shift Right by 8-bit immediate
8530 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8531 %{
8532   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8533   effect(KILL cr);
8534 
8535   format %{ "shrl    $dst, $shift" %}
8536   opcode(0xC1, 0x5); /* C1 /5 ib */
8537   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8538   ins_pipe(ialu_mem_imm);
8539 %}
8540 
8541 // Logical Shift Right by variable
8542 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8543 %{
8544   match(Set dst (URShiftI dst shift));
8545   effect(KILL cr);
8546 
8547   format %{ "shrl    $dst, $shift" %}
8548   opcode(0xD3, 0x5); /* D3 /5 */
8549   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8550   ins_pipe(ialu_reg_reg);
8551 %}
8552 
8553 // Logical Shift Right by variable
8554 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8555 %{
8556   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8557   effect(KILL cr);
8558 
8559   format %{ "shrl    $dst, $shift" %}
8560   opcode(0xD3, 0x5); /* D3 /5 */
8561   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8562   ins_pipe(ialu_mem_reg);
8563 %}
8564 
8565 // Long Shift Instructions
8566 // Shift Left by one
8567 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8568 %{
8569   match(Set dst (LShiftL dst shift));
8570   effect(KILL cr);
8571 
8572   format %{ "salq    $dst, $shift" %}
8573   opcode(0xD1, 0x4); /* D1 /4 */
8574   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8575   ins_pipe(ialu_reg);
8576 %}
8577 
8578 // Shift Left by one
8579 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8580 %{
8581   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8582   effect(KILL cr);
8583 
8584   format %{ "salq    $dst, $shift" %}
8585   opcode(0xD1, 0x4); /* D1 /4 */
8586   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8587   ins_pipe(ialu_mem_imm);
8588 %}
8589 
8590 // Shift Left by 8-bit immediate
8591 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8592 %{
8593   match(Set dst (LShiftL dst shift));
8594   effect(KILL cr);
8595 
8596   format %{ "salq    $dst, $shift" %}
8597   opcode(0xC1, 0x4); /* C1 /4 ib */
8598   ins_encode(reg_opc_imm_wide(dst, shift));
8599   ins_pipe(ialu_reg);
8600 %}
8601 
8602 // Shift Left by 8-bit immediate
8603 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8604 %{
8605   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8606   effect(KILL cr);
8607 
8608   format %{ "salq    $dst, $shift" %}
8609   opcode(0xC1, 0x4); /* C1 /4 ib */
8610   ins_encode(REX_mem_wide(dst), OpcP,
8611              RM_opc_mem(secondary, dst), Con8or32(shift));
8612   ins_pipe(ialu_mem_imm);
8613 %}
8614 
8615 // Shift Left by variable
8616 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8617 %{
8618   match(Set dst (LShiftL dst shift));
8619   effect(KILL cr);
8620 
8621   format %{ "salq    $dst, $shift" %}
8622   opcode(0xD3, 0x4); /* D3 /4 */
8623   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8624   ins_pipe(ialu_reg_reg);
8625 %}
8626 
8627 // Shift Left by variable
8628 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8629 %{
8630   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8631   effect(KILL cr);
8632 
8633   format %{ "salq    $dst, $shift" %}
8634   opcode(0xD3, 0x4); /* D3 /4 */
8635   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8636   ins_pipe(ialu_mem_reg);
8637 %}
8638 
8639 // Arithmetic shift right by one
8640 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8641 %{
8642   match(Set dst (RShiftL dst shift));
8643   effect(KILL cr);
8644 
8645   format %{ "sarq    $dst, $shift" %}
8646   opcode(0xD1, 0x7); /* D1 /7 */
8647   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8648   ins_pipe(ialu_reg);
8649 %}
8650 
8651 // Arithmetic shift right by one
8652 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8653 %{
8654   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8655   effect(KILL cr);
8656 
8657   format %{ "sarq    $dst, $shift" %}
8658   opcode(0xD1, 0x7); /* D1 /7 */
8659   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8660   ins_pipe(ialu_mem_imm);
8661 %}
8662 
8663 // Arithmetic Shift Right by 8-bit immediate
8664 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8665 %{
8666   match(Set dst (RShiftL dst shift));
8667   effect(KILL cr);
8668 
8669   format %{ "sarq    $dst, $shift" %}
8670   opcode(0xC1, 0x7); /* C1 /7 ib */
8671   ins_encode(reg_opc_imm_wide(dst, shift));
8672   ins_pipe(ialu_mem_imm);
8673 %}
8674 
8675 // Arithmetic Shift Right by 8-bit immediate
8676 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8677 %{
8678   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8679   effect(KILL cr);
8680 
8681   format %{ "sarq    $dst, $shift" %}
8682   opcode(0xC1, 0x7); /* C1 /7 ib */
8683   ins_encode(REX_mem_wide(dst), OpcP,
8684              RM_opc_mem(secondary, dst), Con8or32(shift));
8685   ins_pipe(ialu_mem_imm);
8686 %}
8687 
8688 // Arithmetic Shift Right by variable
8689 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8690 %{
8691   match(Set dst (RShiftL dst shift));
8692   effect(KILL cr);
8693 
8694   format %{ "sarq    $dst, $shift" %}
8695   opcode(0xD3, 0x7); /* D3 /7 */
8696   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8697   ins_pipe(ialu_reg_reg);
8698 %}
8699 
8700 // Arithmetic Shift Right by variable
8701 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8702 %{
8703   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8704   effect(KILL cr);
8705 
8706   format %{ "sarq    $dst, $shift" %}
8707   opcode(0xD3, 0x7); /* D3 /7 */
8708   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8709   ins_pipe(ialu_mem_reg);
8710 %}
8711 
8712 // Logical shift right by one
8713 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8714 %{
8715   match(Set dst (URShiftL dst shift));
8716   effect(KILL cr);
8717 
8718   format %{ "shrq    $dst, $shift" %}
8719   opcode(0xD1, 0x5); /* D1 /5 */
8720   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8721   ins_pipe(ialu_reg);
8722 %}
8723 
8724 // Logical shift right by one
8725 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8726 %{
8727   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8728   effect(KILL cr);
8729 
8730   format %{ "shrq    $dst, $shift" %}
8731   opcode(0xD1, 0x5); /* D1 /5 */
8732   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8733   ins_pipe(ialu_mem_imm);
8734 %}
8735 
8736 // Logical Shift Right by 8-bit immediate
8737 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8738 %{
8739   match(Set dst (URShiftL dst shift));
8740   effect(KILL cr);
8741 
8742   format %{ "shrq    $dst, $shift" %}
8743   opcode(0xC1, 0x5); /* C1 /5 ib */
8744   ins_encode(reg_opc_imm_wide(dst, shift));
8745   ins_pipe(ialu_reg);
8746 %}
8747 
8748 // Logical Shift Right by 8-bit immediate
8749 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8750 %{
8751   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8752   effect(KILL cr);
8753 
8754   format %{ "shrq    $dst, $shift" %}
8755   opcode(0xC1, 0x5); /* C1 /5 ib */
8756   ins_encode(REX_mem_wide(dst), OpcP,
8757              RM_opc_mem(secondary, dst), Con8or32(shift));
8758   ins_pipe(ialu_mem_imm);
8759 %}
8760 
8761 // Logical Shift Right by variable
8762 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8763 %{
8764   match(Set dst (URShiftL dst shift));
8765   effect(KILL cr);
8766 
8767   format %{ "shrq    $dst, $shift" %}
8768   opcode(0xD3, 0x5); /* D3 /5 */
8769   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8770   ins_pipe(ialu_reg_reg);
8771 %}
8772 
8773 // Logical Shift Right by variable
8774 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8775 %{
8776   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8777   effect(KILL cr);
8778 
8779   format %{ "shrq    $dst, $shift" %}
8780   opcode(0xD3, 0x5); /* D3 /5 */
8781   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8782   ins_pipe(ialu_mem_reg);
8783 %}
8784 
8785 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8786 // This idiom is used by the compiler for the i2b bytecode.
8787 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8788 %{
8789   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8790 
8791   format %{ "movsbl  $dst, $src\t# i2b" %}
8792   opcode(0x0F, 0xBE);
8793   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8794   ins_pipe(ialu_reg_reg);
8795 %}
8796 
8797 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8798 // This idiom is used by the compiler the i2s bytecode.
8799 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8800 %{
8801   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8802 
8803   format %{ "movswl  $dst, $src\t# i2s" %}
8804   opcode(0x0F, 0xBF);
8805   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8806   ins_pipe(ialu_reg_reg);
8807 %}
8808 
8809 // ROL/ROR instructions
8810 
8811 // ROL expand
8812 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8813   effect(KILL cr, USE_DEF dst);
8814 
8815   format %{ "roll    $dst" %}
8816   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8817   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8818   ins_pipe(ialu_reg);
8819 %}
8820 
8821 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8822   effect(USE_DEF dst, USE shift, KILL cr);
8823 
8824   format %{ "roll    $dst, $shift" %}
8825   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8826   ins_encode( reg_opc_imm(dst, shift) );
8827   ins_pipe(ialu_reg);
8828 %}
8829 
8830 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8831 %{
8832   effect(USE_DEF dst, USE shift, KILL cr);
8833 
8834   format %{ "roll    $dst, $shift" %}
8835   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8836   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8837   ins_pipe(ialu_reg_reg);
8838 %}
8839 // end of ROL expand
8840 
8841 // Rotate Left by one
8842 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8843 %{
8844   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8845 
8846   expand %{
8847     rolI_rReg_imm1(dst, cr);
8848   %}
8849 %}
8850 
8851 // Rotate Left by 8-bit immediate
8852 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8853 %{
8854   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8855   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8856 
8857   expand %{
8858     rolI_rReg_imm8(dst, lshift, cr);
8859   %}
8860 %}
8861 
8862 // Rotate Left by variable
8863 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8864 %{
8865   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8866 
8867   expand %{
8868     rolI_rReg_CL(dst, shift, cr);
8869   %}
8870 %}
8871 
8872 // Rotate Left by variable
8873 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8874 %{
8875   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8876 
8877   expand %{
8878     rolI_rReg_CL(dst, shift, cr);
8879   %}
8880 %}
8881 
8882 // ROR expand
8883 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8884 %{
8885   effect(USE_DEF dst, KILL cr);
8886 
8887   format %{ "rorl    $dst" %}
8888   opcode(0xD1, 0x1); /* D1 /1 */
8889   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8890   ins_pipe(ialu_reg);
8891 %}
8892 
8893 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8894 %{
8895   effect(USE_DEF dst, USE shift, KILL cr);
8896 
8897   format %{ "rorl    $dst, $shift" %}
8898   opcode(0xC1, 0x1); /* C1 /1 ib */
8899   ins_encode(reg_opc_imm(dst, shift));
8900   ins_pipe(ialu_reg);
8901 %}
8902 
8903 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8904 %{
8905   effect(USE_DEF dst, USE shift, KILL cr);
8906 
8907   format %{ "rorl    $dst, $shift" %}
8908   opcode(0xD3, 0x1); /* D3 /1 */
8909   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8910   ins_pipe(ialu_reg_reg);
8911 %}
8912 // end of ROR expand
8913 
8914 // Rotate Right by one
8915 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8916 %{
8917   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8918 
8919   expand %{
8920     rorI_rReg_imm1(dst, cr);
8921   %}
8922 %}
8923 
8924 // Rotate Right by 8-bit immediate
8925 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8926 %{
8927   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8928   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8929 
8930   expand %{
8931     rorI_rReg_imm8(dst, rshift, cr);
8932   %}
8933 %}
8934 
8935 // Rotate Right by variable
8936 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8937 %{
8938   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8939 
8940   expand %{
8941     rorI_rReg_CL(dst, shift, cr);
8942   %}
8943 %}
8944 
8945 // Rotate Right by variable
8946 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8947 %{
8948   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8949 
8950   expand %{
8951     rorI_rReg_CL(dst, shift, cr);
8952   %}
8953 %}
8954 
8955 // for long rotate
8956 // ROL expand
8957 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
8958   effect(USE_DEF dst, KILL cr);
8959 
8960   format %{ "rolq    $dst" %}
8961   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8962   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8963   ins_pipe(ialu_reg);
8964 %}
8965 
8966 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
8967   effect(USE_DEF dst, USE shift, KILL cr);
8968 
8969   format %{ "rolq    $dst, $shift" %}
8970   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8971   ins_encode( reg_opc_imm_wide(dst, shift) );
8972   ins_pipe(ialu_reg);
8973 %}
8974 
8975 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8976 %{
8977   effect(USE_DEF dst, USE shift, KILL cr);
8978 
8979   format %{ "rolq    $dst, $shift" %}
8980   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8981   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8982   ins_pipe(ialu_reg_reg);
8983 %}
8984 // end of ROL expand
8985 
8986 // Rotate Left by one
8987 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8988 %{
8989   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8990 
8991   expand %{
8992     rolL_rReg_imm1(dst, cr);
8993   %}
8994 %}
8995 
8996 // Rotate Left by 8-bit immediate
8997 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8998 %{
8999   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9000   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9001 
9002   expand %{
9003     rolL_rReg_imm8(dst, lshift, cr);
9004   %}
9005 %}
9006 
9007 // Rotate Left by variable
9008 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9009 %{
9010   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9011 
9012   expand %{
9013     rolL_rReg_CL(dst, shift, cr);
9014   %}
9015 %}
9016 
9017 // Rotate Left by variable
9018 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9019 %{
9020   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9021 
9022   expand %{
9023     rolL_rReg_CL(dst, shift, cr);
9024   %}
9025 %}
9026 
9027 // ROR expand
9028 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9029 %{
9030   effect(USE_DEF dst, KILL cr);
9031 
9032   format %{ "rorq    $dst" %}
9033   opcode(0xD1, 0x1); /* D1 /1 */
9034   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9035   ins_pipe(ialu_reg);
9036 %}
9037 
9038 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9039 %{
9040   effect(USE_DEF dst, USE shift, KILL cr);
9041 
9042   format %{ "rorq    $dst, $shift" %}
9043   opcode(0xC1, 0x1); /* C1 /1 ib */
9044   ins_encode(reg_opc_imm_wide(dst, shift));
9045   ins_pipe(ialu_reg);
9046 %}
9047 
9048 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9049 %{
9050   effect(USE_DEF dst, USE shift, KILL cr);
9051 
9052   format %{ "rorq    $dst, $shift" %}
9053   opcode(0xD3, 0x1); /* D3 /1 */
9054   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9055   ins_pipe(ialu_reg_reg);
9056 %}
9057 // end of ROR expand
9058 
9059 // Rotate Right by one
9060 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9061 %{
9062   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9063 
9064   expand %{
9065     rorL_rReg_imm1(dst, cr);
9066   %}
9067 %}
9068 
9069 // Rotate Right by 8-bit immediate
9070 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9071 %{
9072   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9073   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9074 
9075   expand %{
9076     rorL_rReg_imm8(dst, rshift, cr);
9077   %}
9078 %}
9079 
9080 // Rotate Right by variable
9081 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9082 %{
9083   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9084 
9085   expand %{
9086     rorL_rReg_CL(dst, shift, cr);
9087   %}
9088 %}
9089 
9090 // Rotate Right by variable
9091 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9092 %{
9093   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9094 
9095   expand %{
9096     rorL_rReg_CL(dst, shift, cr);
9097   %}
9098 %}
9099 
9100 // Logical Instructions
9101 
9102 // Integer Logical Instructions
9103 
9104 // And Instructions
9105 // And Register with Register
9106 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9107 %{
9108   match(Set dst (AndI dst src));
9109   effect(KILL cr);
9110 
9111   format %{ "andl    $dst, $src\t# int" %}
9112   opcode(0x23);
9113   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9114   ins_pipe(ialu_reg_reg);
9115 %}
9116 
9117 // And Register with Immediate 255
9118 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9119 %{
9120   match(Set dst (AndI dst src));
9121 
9122   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9123   opcode(0x0F, 0xB6);
9124   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9125   ins_pipe(ialu_reg);
9126 %}
9127 
9128 // And Register with Immediate 255 and promote to long
9129 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9130 %{
9131   match(Set dst (ConvI2L (AndI src mask)));
9132 
9133   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9134   opcode(0x0F, 0xB6);
9135   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9136   ins_pipe(ialu_reg);
9137 %}
9138 
9139 // And Register with Immediate 65535
9140 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9141 %{
9142   match(Set dst (AndI dst src));
9143 
9144   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9145   opcode(0x0F, 0xB7);
9146   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9147   ins_pipe(ialu_reg);
9148 %}
9149 
9150 // And Register with Immediate 65535 and promote to long
9151 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9152 %{
9153   match(Set dst (ConvI2L (AndI src mask)));
9154 
9155   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9156   opcode(0x0F, 0xB7);
9157   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9158   ins_pipe(ialu_reg);
9159 %}
9160 
9161 // And Register with Immediate
9162 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9163 %{
9164   match(Set dst (AndI dst src));
9165   effect(KILL cr);
9166 
9167   format %{ "andl    $dst, $src\t# int" %}
9168   opcode(0x81, 0x04); /* Opcode 81 /4 */
9169   ins_encode(OpcSErm(dst, src), Con8or32(src));
9170   ins_pipe(ialu_reg);
9171 %}
9172 
9173 // And Register with Memory
9174 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9175 %{
9176   match(Set dst (AndI dst (LoadI src)));
9177   effect(KILL cr);
9178 
9179   ins_cost(125);
9180   format %{ "andl    $dst, $src\t# int" %}
9181   opcode(0x23);
9182   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9183   ins_pipe(ialu_reg_mem);
9184 %}
9185 
9186 // And Memory with Register
9187 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9188 %{
9189   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9190   effect(KILL cr);
9191 
9192   ins_cost(150);
9193   format %{ "andl    $dst, $src\t# int" %}
9194   opcode(0x21); /* Opcode 21 /r */
9195   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9196   ins_pipe(ialu_mem_reg);
9197 %}
9198 
9199 // And Memory with Immediate
9200 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9201 %{
9202   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9203   effect(KILL cr);
9204 
9205   ins_cost(125);
9206   format %{ "andl    $dst, $src\t# int" %}
9207   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9208   ins_encode(REX_mem(dst), OpcSE(src),
9209              RM_opc_mem(secondary, dst), Con8or32(src));
9210   ins_pipe(ialu_mem_imm);
9211 %}
9212 
9213 // Or Instructions
9214 // Or Register with Register
9215 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9216 %{
9217   match(Set dst (OrI dst src));
9218   effect(KILL cr);
9219 
9220   format %{ "orl     $dst, $src\t# int" %}
9221   opcode(0x0B);
9222   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9223   ins_pipe(ialu_reg_reg);
9224 %}
9225 
9226 // Or Register with Immediate
9227 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9228 %{
9229   match(Set dst (OrI dst src));
9230   effect(KILL cr);
9231 
9232   format %{ "orl     $dst, $src\t# int" %}
9233   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9234   ins_encode(OpcSErm(dst, src), Con8or32(src));
9235   ins_pipe(ialu_reg);
9236 %}
9237 
9238 // Or Register with Memory
9239 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9240 %{
9241   match(Set dst (OrI dst (LoadI src)));
9242   effect(KILL cr);
9243 
9244   ins_cost(125);
9245   format %{ "orl     $dst, $src\t# int" %}
9246   opcode(0x0B);
9247   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9248   ins_pipe(ialu_reg_mem);
9249 %}
9250 
9251 // Or Memory with Register
9252 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9253 %{
9254   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9255   effect(KILL cr);
9256 
9257   ins_cost(150);
9258   format %{ "orl     $dst, $src\t# int" %}
9259   opcode(0x09); /* Opcode 09 /r */
9260   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9261   ins_pipe(ialu_mem_reg);
9262 %}
9263 
9264 // Or Memory with Immediate
9265 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9266 %{
9267   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9268   effect(KILL cr);
9269 
9270   ins_cost(125);
9271   format %{ "orl     $dst, $src\t# int" %}
9272   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9273   ins_encode(REX_mem(dst), OpcSE(src),
9274              RM_opc_mem(secondary, dst), Con8or32(src));
9275   ins_pipe(ialu_mem_imm);
9276 %}
9277 
9278 // Xor Instructions
9279 // Xor Register with Register
9280 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9281 %{
9282   match(Set dst (XorI dst src));
9283   effect(KILL cr);
9284 
9285   format %{ "xorl    $dst, $src\t# int" %}
9286   opcode(0x33);
9287   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9288   ins_pipe(ialu_reg_reg);
9289 %}
9290 
9291 // Xor Register with Immediate
9292 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9293 %{
9294   match(Set dst (XorI dst src));
9295   effect(KILL cr);
9296 
9297   format %{ "xorl    $dst, $src\t# int" %}
9298   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9299   ins_encode(OpcSErm(dst, src), Con8or32(src));
9300   ins_pipe(ialu_reg);
9301 %}
9302 
9303 // Xor Register with Memory
9304 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9305 %{
9306   match(Set dst (XorI dst (LoadI src)));
9307   effect(KILL cr);
9308 
9309   ins_cost(125);
9310   format %{ "xorl    $dst, $src\t# int" %}
9311   opcode(0x33);
9312   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9313   ins_pipe(ialu_reg_mem);
9314 %}
9315 
9316 // Xor Memory with Register
9317 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9318 %{
9319   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9320   effect(KILL cr);
9321 
9322   ins_cost(150);
9323   format %{ "xorl    $dst, $src\t# int" %}
9324   opcode(0x31); /* Opcode 31 /r */
9325   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9326   ins_pipe(ialu_mem_reg);
9327 %}
9328 
9329 // Xor Memory with Immediate
9330 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9331 %{
9332   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9333   effect(KILL cr);
9334 
9335   ins_cost(125);
9336   format %{ "xorl    $dst, $src\t# int" %}
9337   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9338   ins_encode(REX_mem(dst), OpcSE(src),
9339              RM_opc_mem(secondary, dst), Con8or32(src));
9340   ins_pipe(ialu_mem_imm);
9341 %}
9342 
9343 
9344 // Long Logical Instructions
9345 
9346 // And Instructions
9347 // And Register with Register
9348 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9349 %{
9350   match(Set dst (AndL dst src));
9351   effect(KILL cr);
9352 
9353   format %{ "andq    $dst, $src\t# long" %}
9354   opcode(0x23);
9355   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9356   ins_pipe(ialu_reg_reg);
9357 %}
9358 
9359 // And Register with Immediate 255
9360 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9361 %{
9362   match(Set dst (AndL dst src));
9363 
9364   format %{ "movzbq  $dst, $src\t# long & 0xFF" %}
9365   opcode(0x0F, 0xB6);
9366   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9367   ins_pipe(ialu_reg);
9368 %}
9369 
9370 // And Register with Immediate 65535
9371 instruct andL_rReg_imm65535(rRegI dst, immL_65535 src)
9372 %{
9373   match(Set dst (AndL dst src));
9374 
9375   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9376   opcode(0x0F, 0xB7);
9377   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9378   ins_pipe(ialu_reg);
9379 %}
9380 
9381 // And Register with Immediate
9382 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9383 %{
9384   match(Set dst (AndL dst src));
9385   effect(KILL cr);
9386 
9387   format %{ "andq    $dst, $src\t# long" %}
9388   opcode(0x81, 0x04); /* Opcode 81 /4 */
9389   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9390   ins_pipe(ialu_reg);
9391 %}
9392 
9393 // And Register with Memory
9394 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9395 %{
9396   match(Set dst (AndL dst (LoadL src)));
9397   effect(KILL cr);
9398 
9399   ins_cost(125);
9400   format %{ "andq    $dst, $src\t# long" %}
9401   opcode(0x23);
9402   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9403   ins_pipe(ialu_reg_mem);
9404 %}
9405 
9406 // And Memory with Register
9407 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9408 %{
9409   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9410   effect(KILL cr);
9411 
9412   ins_cost(150);
9413   format %{ "andq    $dst, $src\t# long" %}
9414   opcode(0x21); /* Opcode 21 /r */
9415   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9416   ins_pipe(ialu_mem_reg);
9417 %}
9418 
9419 // And Memory with Immediate
9420 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9421 %{
9422   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9423   effect(KILL cr);
9424 
9425   ins_cost(125);
9426   format %{ "andq    $dst, $src\t# long" %}
9427   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9428   ins_encode(REX_mem_wide(dst), OpcSE(src),
9429              RM_opc_mem(secondary, dst), Con8or32(src));
9430   ins_pipe(ialu_mem_imm);
9431 %}
9432 
9433 // Or Instructions
9434 // Or Register with Register
9435 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9436 %{
9437   match(Set dst (OrL dst src));
9438   effect(KILL cr);
9439 
9440   format %{ "orq     $dst, $src\t# long" %}
9441   opcode(0x0B);
9442   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9443   ins_pipe(ialu_reg_reg);
9444 %}
9445 
9446 // Or Register with Immediate
9447 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9448 %{
9449   match(Set dst (OrL dst src));
9450   effect(KILL cr);
9451 
9452   format %{ "orq     $dst, $src\t# long" %}
9453   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9454   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9455   ins_pipe(ialu_reg);
9456 %}
9457 
9458 // Or Register with Memory
9459 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9460 %{
9461   match(Set dst (OrL dst (LoadL src)));
9462   effect(KILL cr);
9463 
9464   ins_cost(125);
9465   format %{ "orq     $dst, $src\t# long" %}
9466   opcode(0x0B);
9467   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9468   ins_pipe(ialu_reg_mem);
9469 %}
9470 
9471 // Or Memory with Register
9472 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9473 %{
9474   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9475   effect(KILL cr);
9476 
9477   ins_cost(150);
9478   format %{ "orq     $dst, $src\t# long" %}
9479   opcode(0x09); /* Opcode 09 /r */
9480   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9481   ins_pipe(ialu_mem_reg);
9482 %}
9483 
9484 // Or Memory with Immediate
9485 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9486 %{
9487   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9488   effect(KILL cr);
9489 
9490   ins_cost(125);
9491   format %{ "orq     $dst, $src\t# long" %}
9492   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9493   ins_encode(REX_mem_wide(dst), OpcSE(src),
9494              RM_opc_mem(secondary, dst), Con8or32(src));
9495   ins_pipe(ialu_mem_imm);
9496 %}
9497 
9498 // Xor Instructions
9499 // Xor Register with Register
9500 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9501 %{
9502   match(Set dst (XorL dst src));
9503   effect(KILL cr);
9504 
9505   format %{ "xorq    $dst, $src\t# long" %}
9506   opcode(0x33);
9507   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9508   ins_pipe(ialu_reg_reg);
9509 %}
9510 
9511 // Xor Register with Immediate
9512 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9513 %{
9514   match(Set dst (XorL dst src));
9515   effect(KILL cr);
9516 
9517   format %{ "xorq    $dst, $src\t# long" %}
9518   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9519   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9520   ins_pipe(ialu_reg);
9521 %}
9522 
9523 // Xor Register with Memory
9524 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9525 %{
9526   match(Set dst (XorL dst (LoadL src)));
9527   effect(KILL cr);
9528 
9529   ins_cost(125);
9530   format %{ "xorq    $dst, $src\t# long" %}
9531   opcode(0x33);
9532   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9533   ins_pipe(ialu_reg_mem);
9534 %}
9535 
9536 // Xor Memory with Register
9537 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9538 %{
9539   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9540   effect(KILL cr);
9541 
9542   ins_cost(150);
9543   format %{ "xorq    $dst, $src\t# long" %}
9544   opcode(0x31); /* Opcode 31 /r */
9545   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9546   ins_pipe(ialu_mem_reg);
9547 %}
9548 
9549 // Xor Memory with Immediate
9550 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9551 %{
9552   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9553   effect(KILL cr);
9554 
9555   ins_cost(125);
9556   format %{ "xorq    $dst, $src\t# long" %}
9557   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9558   ins_encode(REX_mem_wide(dst), OpcSE(src),
9559              RM_opc_mem(secondary, dst), Con8or32(src));
9560   ins_pipe(ialu_mem_imm);
9561 %}
9562 
9563 // Convert Int to Boolean
9564 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9565 %{
9566   match(Set dst (Conv2B src));
9567   effect(KILL cr);
9568 
9569   format %{ "testl   $src, $src\t# ci2b\n\t"
9570             "setnz   $dst\n\t"
9571             "movzbl  $dst, $dst" %}
9572   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9573              setNZ_reg(dst),
9574              REX_reg_breg(dst, dst), // movzbl
9575              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9576   ins_pipe(pipe_slow); // XXX
9577 %}
9578 
9579 // Convert Pointer to Boolean
9580 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9581 %{
9582   match(Set dst (Conv2B src));
9583   effect(KILL cr);
9584 
9585   format %{ "testq   $src, $src\t# cp2b\n\t"
9586             "setnz   $dst\n\t"
9587             "movzbl  $dst, $dst" %}
9588   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9589              setNZ_reg(dst),
9590              REX_reg_breg(dst, dst), // movzbl
9591              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9592   ins_pipe(pipe_slow); // XXX
9593 %}
9594 
9595 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9596 %{
9597   match(Set dst (CmpLTMask p q));
9598   effect(KILL cr);
9599 
9600   ins_cost(400); // XXX
9601   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9602             "setlt   $dst\n\t"
9603             "movzbl  $dst, $dst\n\t"
9604             "negl    $dst" %}
9605   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9606              setLT_reg(dst),
9607              REX_reg_breg(dst, dst), // movzbl
9608              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
9609              neg_reg(dst));
9610   ins_pipe(pipe_slow);
9611 %}
9612 
9613 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
9614 %{
9615   match(Set dst (CmpLTMask dst zero));
9616   effect(KILL cr);
9617 
9618   ins_cost(100); // XXX
9619   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
9620   opcode(0xC1, 0x7);  /* C1 /7 ib */
9621   ins_encode(reg_opc_imm(dst, 0x1F));
9622   ins_pipe(ialu_reg);
9623 %}
9624 
9625 
9626 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
9627                          rRegI tmp,
9628                          rFlagsReg cr)
9629 %{
9630   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9631   effect(TEMP tmp, KILL cr);
9632 
9633   ins_cost(400); // XXX
9634   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
9635             "sbbl    $tmp, $tmp\n\t"
9636             "andl    $tmp, $y\n\t"
9637             "addl    $p, $tmp" %}
9638   ins_encode(enc_cmpLTP(p, q, y, tmp));
9639   ins_pipe(pipe_cmplt);
9640 %}
9641 
9642 /* If I enable this, I encourage spilling in the inner loop of compress.
9643 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
9644 %{
9645   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9646   effect( TEMP tmp, KILL cr );
9647   ins_cost(400);
9648 
9649   format %{ "SUB    $p,$q\n\t"
9650             "SBB    RCX,RCX\n\t"
9651             "AND    RCX,$y\n\t"
9652             "ADD    $p,RCX" %}
9653   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9654 %}
9655 */
9656 
9657 //---------- FP Instructions------------------------------------------------
9658 
9659 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
9660 %{
9661   match(Set cr (CmpF src1 src2));
9662 
9663   ins_cost(145);
9664   format %{ "ucomiss $src1, $src2\n\t"
9665             "jnp,s   exit\n\t"
9666             "pushfq\t# saw NaN, set CF\n\t"
9667             "andq    [rsp], #0xffffff2b\n\t"
9668             "popfq\n"
9669     "exit:   nop\t# avoid branch to branch" %}
9670   opcode(0x0F, 0x2E);
9671   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
9672              cmpfp_fixup);
9673   ins_pipe(pipe_slow);
9674 %}
9675 
9676 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
9677 %{
9678   match(Set cr (CmpF src1 (LoadF src2)));
9679 
9680   ins_cost(145);
9681   format %{ "ucomiss $src1, $src2\n\t"
9682             "jnp,s   exit\n\t"
9683             "pushfq\t# saw NaN, set CF\n\t"
9684             "andq    [rsp], #0xffffff2b\n\t"
9685             "popfq\n"
9686     "exit:   nop\t# avoid branch to branch" %}
9687   opcode(0x0F, 0x2E);
9688   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
9689              cmpfp_fixup);
9690   ins_pipe(pipe_slow);
9691 %}
9692 
9693 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
9694 %{
9695   match(Set cr (CmpF src1 src2));
9696 
9697   ins_cost(145);
9698   format %{ "ucomiss $src1, $src2\n\t"
9699             "jnp,s   exit\n\t"
9700             "pushfq\t# saw NaN, set CF\n\t"
9701             "andq    [rsp], #0xffffff2b\n\t"
9702             "popfq\n"
9703     "exit:   nop\t# avoid branch to branch" %}
9704   opcode(0x0F, 0x2E);
9705   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
9706              cmpfp_fixup);
9707   ins_pipe(pipe_slow);
9708 %}
9709 
9710 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
9711 %{
9712   match(Set cr (CmpD src1 src2));
9713 
9714   ins_cost(145);
9715   format %{ "ucomisd $src1, $src2\n\t"
9716             "jnp,s   exit\n\t"
9717             "pushfq\t# saw NaN, set CF\n\t"
9718             "andq    [rsp], #0xffffff2b\n\t"
9719             "popfq\n"
9720     "exit:   nop\t# avoid branch to branch" %}
9721   opcode(0x66, 0x0F, 0x2E);
9722   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
9723              cmpfp_fixup);
9724   ins_pipe(pipe_slow);
9725 %}
9726 
9727 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
9728 %{
9729   match(Set cr (CmpD src1 (LoadD src2)));
9730 
9731   ins_cost(145);
9732   format %{ "ucomisd $src1, $src2\n\t"
9733             "jnp,s   exit\n\t"
9734             "pushfq\t# saw NaN, set CF\n\t"
9735             "andq    [rsp], #0xffffff2b\n\t"
9736             "popfq\n"
9737     "exit:   nop\t# avoid branch to branch" %}
9738   opcode(0x66, 0x0F, 0x2E);
9739   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
9740              cmpfp_fixup);
9741   ins_pipe(pipe_slow);
9742 %}
9743 
9744 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
9745 %{
9746   match(Set cr (CmpD src1 src2));
9747 
9748   ins_cost(145);
9749   format %{ "ucomisd $src1, [$src2]\n\t"
9750             "jnp,s   exit\n\t"
9751             "pushfq\t# saw NaN, set CF\n\t"
9752             "andq    [rsp], #0xffffff2b\n\t"
9753             "popfq\n"
9754     "exit:   nop\t# avoid branch to branch" %}
9755   opcode(0x66, 0x0F, 0x2E);
9756   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
9757              cmpfp_fixup);
9758   ins_pipe(pipe_slow);
9759 %}
9760 
9761 // Compare into -1,0,1
9762 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
9763 %{
9764   match(Set dst (CmpF3 src1 src2));
9765   effect(KILL cr);
9766 
9767   ins_cost(275);
9768   format %{ "ucomiss $src1, $src2\n\t"
9769             "movl    $dst, #-1\n\t"
9770             "jp,s    done\n\t"
9771             "jb,s    done\n\t"
9772             "setne   $dst\n\t"
9773             "movzbl  $dst, $dst\n"
9774     "done:" %}
9775 
9776   opcode(0x0F, 0x2E);
9777   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
9778              cmpfp3(dst));
9779   ins_pipe(pipe_slow);
9780 %}
9781 
9782 // Compare into -1,0,1
9783 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
9784 %{
9785   match(Set dst (CmpF3 src1 (LoadF src2)));
9786   effect(KILL cr);
9787 
9788   ins_cost(275);
9789   format %{ "ucomiss $src1, $src2\n\t"
9790             "movl    $dst, #-1\n\t"
9791             "jp,s    done\n\t"
9792             "jb,s    done\n\t"
9793             "setne   $dst\n\t"
9794             "movzbl  $dst, $dst\n"
9795     "done:" %}
9796 
9797   opcode(0x0F, 0x2E);
9798   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
9799              cmpfp3(dst));
9800   ins_pipe(pipe_slow);
9801 %}
9802 
9803 // Compare into -1,0,1
9804 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
9805 %{
9806   match(Set dst (CmpF3 src1 src2));
9807   effect(KILL cr);
9808 
9809   ins_cost(275);
9810   format %{ "ucomiss $src1, [$src2]\n\t"
9811             "movl    $dst, #-1\n\t"
9812             "jp,s    done\n\t"
9813             "jb,s    done\n\t"
9814             "setne   $dst\n\t"
9815             "movzbl  $dst, $dst\n"
9816     "done:" %}
9817 
9818   opcode(0x0F, 0x2E);
9819   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
9820              cmpfp3(dst));
9821   ins_pipe(pipe_slow);
9822 %}
9823 
9824 // Compare into -1,0,1
9825 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
9826 %{
9827   match(Set dst (CmpD3 src1 src2));
9828   effect(KILL cr);
9829 
9830   ins_cost(275);
9831   format %{ "ucomisd $src1, $src2\n\t"
9832             "movl    $dst, #-1\n\t"
9833             "jp,s    done\n\t"
9834             "jb,s    done\n\t"
9835             "setne   $dst\n\t"
9836             "movzbl  $dst, $dst\n"
9837     "done:" %}
9838 
9839   opcode(0x66, 0x0F, 0x2E);
9840   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
9841              cmpfp3(dst));
9842   ins_pipe(pipe_slow);
9843 %}
9844 
9845 // Compare into -1,0,1
9846 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
9847 %{
9848   match(Set dst (CmpD3 src1 (LoadD src2)));
9849   effect(KILL cr);
9850 
9851   ins_cost(275);
9852   format %{ "ucomisd $src1, $src2\n\t"
9853             "movl    $dst, #-1\n\t"
9854             "jp,s    done\n\t"
9855             "jb,s    done\n\t"
9856             "setne   $dst\n\t"
9857             "movzbl  $dst, $dst\n"
9858     "done:" %}
9859 
9860   opcode(0x66, 0x0F, 0x2E);
9861   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
9862              cmpfp3(dst));
9863   ins_pipe(pipe_slow);
9864 %}
9865 
9866 // Compare into -1,0,1
9867 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
9868 %{
9869   match(Set dst (CmpD3 src1 src2));
9870   effect(KILL cr);
9871 
9872   ins_cost(275);
9873   format %{ "ucomisd $src1, [$src2]\n\t"
9874             "movl    $dst, #-1\n\t"
9875             "jp,s    done\n\t"
9876             "jb,s    done\n\t"
9877             "setne   $dst\n\t"
9878             "movzbl  $dst, $dst\n"
9879     "done:" %}
9880 
9881   opcode(0x66, 0x0F, 0x2E);
9882   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
9883              cmpfp3(dst));
9884   ins_pipe(pipe_slow);
9885 %}
9886 
9887 instruct addF_reg(regF dst, regF src)
9888 %{
9889   match(Set dst (AddF dst src));
9890 
9891   format %{ "addss   $dst, $src" %}
9892   ins_cost(150); // XXX
9893   opcode(0xF3, 0x0F, 0x58);
9894   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9895   ins_pipe(pipe_slow);
9896 %}
9897 
9898 instruct addF_mem(regF dst, memory src)
9899 %{
9900   match(Set dst (AddF dst (LoadF src)));
9901 
9902   format %{ "addss   $dst, $src" %}
9903   ins_cost(150); // XXX
9904   opcode(0xF3, 0x0F, 0x58);
9905   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9906   ins_pipe(pipe_slow);
9907 %}
9908 
9909 instruct addF_imm(regF dst, immF src)
9910 %{
9911   match(Set dst (AddF dst src));
9912 
9913   format %{ "addss   $dst, [$src]" %}
9914   ins_cost(150); // XXX
9915   opcode(0xF3, 0x0F, 0x58);
9916   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
9917   ins_pipe(pipe_slow);
9918 %}
9919 
9920 instruct addD_reg(regD dst, regD src)
9921 %{
9922   match(Set dst (AddD dst src));
9923 
9924   format %{ "addsd   $dst, $src" %}
9925   ins_cost(150); // XXX
9926   opcode(0xF2, 0x0F, 0x58);
9927   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9928   ins_pipe(pipe_slow);
9929 %}
9930 
9931 instruct addD_mem(regD dst, memory src)
9932 %{
9933   match(Set dst (AddD dst (LoadD src)));
9934 
9935   format %{ "addsd   $dst, $src" %}
9936   ins_cost(150); // XXX
9937   opcode(0xF2, 0x0F, 0x58);
9938   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9939   ins_pipe(pipe_slow);
9940 %}
9941 
9942 instruct addD_imm(regD dst, immD src)
9943 %{
9944   match(Set dst (AddD dst src));
9945 
9946   format %{ "addsd   $dst, [$src]" %}
9947   ins_cost(150); // XXX
9948   opcode(0xF2, 0x0F, 0x58);
9949   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
9950   ins_pipe(pipe_slow);
9951 %}
9952 
9953 instruct subF_reg(regF dst, regF src)
9954 %{
9955   match(Set dst (SubF dst src));
9956 
9957   format %{ "subss   $dst, $src" %}
9958   ins_cost(150); // XXX
9959   opcode(0xF3, 0x0F, 0x5C);
9960   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9961   ins_pipe(pipe_slow);
9962 %}
9963 
9964 instruct subF_mem(regF dst, memory src)
9965 %{
9966   match(Set dst (SubF dst (LoadF src)));
9967 
9968   format %{ "subss   $dst, $src" %}
9969   ins_cost(150); // XXX
9970   opcode(0xF3, 0x0F, 0x5C);
9971   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9972   ins_pipe(pipe_slow);
9973 %}
9974 
9975 instruct subF_imm(regF dst, immF src)
9976 %{
9977   match(Set dst (SubF dst src));
9978 
9979   format %{ "subss   $dst, [$src]" %}
9980   ins_cost(150); // XXX
9981   opcode(0xF3, 0x0F, 0x5C);
9982   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
9983   ins_pipe(pipe_slow);
9984 %}
9985 
9986 instruct subD_reg(regD dst, regD src)
9987 %{
9988   match(Set dst (SubD dst src));
9989 
9990   format %{ "subsd   $dst, $src" %}
9991   ins_cost(150); // XXX
9992   opcode(0xF2, 0x0F, 0x5C);
9993   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9994   ins_pipe(pipe_slow);
9995 %}
9996 
9997 instruct subD_mem(regD dst, memory src)
9998 %{
9999   match(Set dst (SubD dst (LoadD src)));
10000 
10001   format %{ "subsd   $dst, $src" %}
10002   ins_cost(150); // XXX
10003   opcode(0xF2, 0x0F, 0x5C);
10004   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10005   ins_pipe(pipe_slow);
10006 %}
10007 
10008 instruct subD_imm(regD dst, immD src)
10009 %{
10010   match(Set dst (SubD dst src));
10011 
10012   format %{ "subsd   $dst, [$src]" %}
10013   ins_cost(150); // XXX
10014   opcode(0xF2, 0x0F, 0x5C);
10015   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10016   ins_pipe(pipe_slow);
10017 %}
10018 
10019 instruct mulF_reg(regF dst, regF src)
10020 %{
10021   match(Set dst (MulF dst src));
10022 
10023   format %{ "mulss   $dst, $src" %}
10024   ins_cost(150); // XXX
10025   opcode(0xF3, 0x0F, 0x59);
10026   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10027   ins_pipe(pipe_slow);
10028 %}
10029 
10030 instruct mulF_mem(regF dst, memory src)
10031 %{
10032   match(Set dst (MulF dst (LoadF src)));
10033 
10034   format %{ "mulss   $dst, $src" %}
10035   ins_cost(150); // XXX
10036   opcode(0xF3, 0x0F, 0x59);
10037   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10038   ins_pipe(pipe_slow);
10039 %}
10040 
10041 instruct mulF_imm(regF dst, immF src)
10042 %{
10043   match(Set dst (MulF dst src));
10044 
10045   format %{ "mulss   $dst, [$src]" %}
10046   ins_cost(150); // XXX
10047   opcode(0xF3, 0x0F, 0x59);
10048   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10049   ins_pipe(pipe_slow);
10050 %}
10051 
10052 instruct mulD_reg(regD dst, regD src)
10053 %{
10054   match(Set dst (MulD dst src));
10055 
10056   format %{ "mulsd   $dst, $src" %}
10057   ins_cost(150); // XXX
10058   opcode(0xF2, 0x0F, 0x59);
10059   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10060   ins_pipe(pipe_slow);
10061 %}
10062 
10063 instruct mulD_mem(regD dst, memory src)
10064 %{
10065   match(Set dst (MulD dst (LoadD src)));
10066 
10067   format %{ "mulsd   $dst, $src" %}
10068   ins_cost(150); // XXX
10069   opcode(0xF2, 0x0F, 0x59);
10070   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10071   ins_pipe(pipe_slow);
10072 %}
10073 
10074 instruct mulD_imm(regD dst, immD src)
10075 %{
10076   match(Set dst (MulD dst src));
10077 
10078   format %{ "mulsd   $dst, [$src]" %}
10079   ins_cost(150); // XXX
10080   opcode(0xF2, 0x0F, 0x59);
10081   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10082   ins_pipe(pipe_slow);
10083 %}
10084 
10085 instruct divF_reg(regF dst, regF src)
10086 %{
10087   match(Set dst (DivF dst src));
10088 
10089   format %{ "divss   $dst, $src" %}
10090   ins_cost(150); // XXX
10091   opcode(0xF3, 0x0F, 0x5E);
10092   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10093   ins_pipe(pipe_slow);
10094 %}
10095 
10096 instruct divF_mem(regF dst, memory src)
10097 %{
10098   match(Set dst (DivF dst (LoadF src)));
10099 
10100   format %{ "divss   $dst, $src" %}
10101   ins_cost(150); // XXX
10102   opcode(0xF3, 0x0F, 0x5E);
10103   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10104   ins_pipe(pipe_slow);
10105 %}
10106 
10107 instruct divF_imm(regF dst, immF src)
10108 %{
10109   match(Set dst (DivF dst src));
10110 
10111   format %{ "divss   $dst, [$src]" %}
10112   ins_cost(150); // XXX
10113   opcode(0xF3, 0x0F, 0x5E);
10114   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10115   ins_pipe(pipe_slow);
10116 %}
10117 
10118 instruct divD_reg(regD dst, regD src)
10119 %{
10120   match(Set dst (DivD dst src));
10121 
10122   format %{ "divsd   $dst, $src" %}
10123   ins_cost(150); // XXX
10124   opcode(0xF2, 0x0F, 0x5E);
10125   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10126   ins_pipe(pipe_slow);
10127 %}
10128 
10129 instruct divD_mem(regD dst, memory src)
10130 %{
10131   match(Set dst (DivD dst (LoadD src)));
10132 
10133   format %{ "divsd   $dst, $src" %}
10134   ins_cost(150); // XXX
10135   opcode(0xF2, 0x0F, 0x5E);
10136   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10137   ins_pipe(pipe_slow);
10138 %}
10139 
10140 instruct divD_imm(regD dst, immD src)
10141 %{
10142   match(Set dst (DivD dst src));
10143 
10144   format %{ "divsd   $dst, [$src]" %}
10145   ins_cost(150); // XXX
10146   opcode(0xF2, 0x0F, 0x5E);
10147   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10148   ins_pipe(pipe_slow);
10149 %}
10150 
10151 instruct sqrtF_reg(regF dst, regF src)
10152 %{
10153   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10154 
10155   format %{ "sqrtss  $dst, $src" %}
10156   ins_cost(150); // XXX
10157   opcode(0xF3, 0x0F, 0x51);
10158   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10159   ins_pipe(pipe_slow);
10160 %}
10161 
10162 instruct sqrtF_mem(regF dst, memory src)
10163 %{
10164   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10165 
10166   format %{ "sqrtss  $dst, $src" %}
10167   ins_cost(150); // XXX
10168   opcode(0xF3, 0x0F, 0x51);
10169   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10170   ins_pipe(pipe_slow);
10171 %}
10172 
10173 instruct sqrtF_imm(regF dst, immF src)
10174 %{
10175   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10176 
10177   format %{ "sqrtss  $dst, [$src]" %}
10178   ins_cost(150); // XXX
10179   opcode(0xF3, 0x0F, 0x51);
10180   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10181   ins_pipe(pipe_slow);
10182 %}
10183 
10184 instruct sqrtD_reg(regD dst, regD src)
10185 %{
10186   match(Set dst (SqrtD src));
10187 
10188   format %{ "sqrtsd  $dst, $src" %}
10189   ins_cost(150); // XXX
10190   opcode(0xF2, 0x0F, 0x51);
10191   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10192   ins_pipe(pipe_slow);
10193 %}
10194 
10195 instruct sqrtD_mem(regD dst, memory src)
10196 %{
10197   match(Set dst (SqrtD (LoadD src)));
10198 
10199   format %{ "sqrtsd  $dst, $src" %}
10200   ins_cost(150); // XXX
10201   opcode(0xF2, 0x0F, 0x51);
10202   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10203   ins_pipe(pipe_slow);
10204 %}
10205 
10206 instruct sqrtD_imm(regD dst, immD src)
10207 %{
10208   match(Set dst (SqrtD src));
10209 
10210   format %{ "sqrtsd  $dst, [$src]" %}
10211   ins_cost(150); // XXX
10212   opcode(0xF2, 0x0F, 0x51);
10213   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10214   ins_pipe(pipe_slow);
10215 %}
10216 
10217 instruct absF_reg(regF dst)
10218 %{
10219   match(Set dst (AbsF dst));
10220 
10221   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10222   ins_encode(absF_encoding(dst));
10223   ins_pipe(pipe_slow);
10224 %}
10225 
10226 instruct absD_reg(regD dst)
10227 %{
10228   match(Set dst (AbsD dst));
10229 
10230   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10231             "# abs double by sign masking" %}
10232   ins_encode(absD_encoding(dst));
10233   ins_pipe(pipe_slow);
10234 %}
10235 
10236 instruct negF_reg(regF dst)
10237 %{
10238   match(Set dst (NegF dst));
10239 
10240   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10241   ins_encode(negF_encoding(dst));
10242   ins_pipe(pipe_slow);
10243 %}
10244 
10245 instruct negD_reg(regD dst)
10246 %{
10247   match(Set dst (NegD dst));
10248 
10249   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10250             "# neg double by sign flipping" %}
10251   ins_encode(negD_encoding(dst));
10252   ins_pipe(pipe_slow);
10253 %}
10254 
10255 // -----------Trig and Trancendental Instructions------------------------------
10256 instruct cosD_reg(regD dst) %{
10257   match(Set dst (CosD dst));
10258 
10259   format %{ "dcos   $dst\n\t" %}
10260   opcode(0xD9, 0xFF);
10261   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10262   ins_pipe( pipe_slow );
10263 %}
10264 
10265 instruct sinD_reg(regD dst) %{
10266   match(Set dst (SinD dst));
10267 
10268   format %{ "dsin   $dst\n\t" %}
10269   opcode(0xD9, 0xFE);
10270   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10271   ins_pipe( pipe_slow );
10272 %}
10273 
10274 instruct tanD_reg(regD dst) %{
10275   match(Set dst (TanD dst));
10276 
10277   format %{ "dtan   $dst\n\t" %}
10278   ins_encode( Push_SrcXD(dst),
10279               Opcode(0xD9), Opcode(0xF2),   //fptan
10280               Opcode(0xDD), Opcode(0xD8),   //fstp st
10281               Push_ResultXD(dst) );
10282   ins_pipe( pipe_slow );
10283 %}
10284 
10285 instruct log10D_reg(regD dst) %{
10286   // The source and result Double operands in XMM registers
10287   match(Set dst (Log10D dst));
10288   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10289   // fyl2x        ; compute log_10(2) * log_2(x)
10290   format %{ "fldlg2\t\t\t#Log10\n\t"
10291             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10292          %}
10293    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10294               Push_SrcXD(dst),
10295               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10296               Push_ResultXD(dst));
10297 
10298   ins_pipe( pipe_slow );
10299 %}
10300 
10301 instruct logD_reg(regD dst) %{
10302   // The source and result Double operands in XMM registers
10303   match(Set dst (LogD dst));
10304   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10305   // fyl2x        ; compute log_e(2) * log_2(x)
10306   format %{ "fldln2\t\t\t#Log_e\n\t"
10307             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10308          %}
10309   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10310               Push_SrcXD(dst),
10311               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10312               Push_ResultXD(dst));
10313   ins_pipe( pipe_slow );
10314 %}
10315 
10316 
10317 
10318 //----------Arithmetic Conversion Instructions---------------------------------
10319 
10320 instruct roundFloat_nop(regF dst)
10321 %{
10322   match(Set dst (RoundFloat dst));
10323 
10324   ins_cost(0);
10325   ins_encode();
10326   ins_pipe(empty);
10327 %}
10328 
10329 instruct roundDouble_nop(regD dst)
10330 %{
10331   match(Set dst (RoundDouble dst));
10332 
10333   ins_cost(0);
10334   ins_encode();
10335   ins_pipe(empty);
10336 %}
10337 
10338 instruct convF2D_reg_reg(regD dst, regF src)
10339 %{
10340   match(Set dst (ConvF2D src));
10341 
10342   format %{ "cvtss2sd $dst, $src" %}
10343   opcode(0xF3, 0x0F, 0x5A);
10344   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10345   ins_pipe(pipe_slow); // XXX
10346 %}
10347 
10348 instruct convF2D_reg_mem(regD dst, memory src)
10349 %{
10350   match(Set dst (ConvF2D (LoadF src)));
10351 
10352   format %{ "cvtss2sd $dst, $src" %}
10353   opcode(0xF3, 0x0F, 0x5A);
10354   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10355   ins_pipe(pipe_slow); // XXX
10356 %}
10357 
10358 instruct convD2F_reg_reg(regF dst, regD src)
10359 %{
10360   match(Set dst (ConvD2F src));
10361 
10362   format %{ "cvtsd2ss $dst, $src" %}
10363   opcode(0xF2, 0x0F, 0x5A);
10364   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10365   ins_pipe(pipe_slow); // XXX
10366 %}
10367 
10368 instruct convD2F_reg_mem(regF dst, memory src)
10369 %{
10370   match(Set dst (ConvD2F (LoadD src)));
10371 
10372   format %{ "cvtsd2ss $dst, $src" %}
10373   opcode(0xF2, 0x0F, 0x5A);
10374   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10375   ins_pipe(pipe_slow); // XXX
10376 %}
10377 
10378 // XXX do mem variants
10379 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10380 %{
10381   match(Set dst (ConvF2I src));
10382   effect(KILL cr);
10383 
10384   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10385             "cmpl    $dst, #0x80000000\n\t"
10386             "jne,s   done\n\t"
10387             "subq    rsp, #8\n\t"
10388             "movss   [rsp], $src\n\t"
10389             "call    f2i_fixup\n\t"
10390             "popq    $dst\n"
10391     "done:   "%}
10392   opcode(0xF3, 0x0F, 0x2C);
10393   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10394              f2i_fixup(dst, src));
10395   ins_pipe(pipe_slow);
10396 %}
10397 
10398 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10399 %{
10400   match(Set dst (ConvF2L src));
10401   effect(KILL cr);
10402 
10403   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10404             "cmpq    $dst, [0x8000000000000000]\n\t"
10405             "jne,s   done\n\t"
10406             "subq    rsp, #8\n\t"
10407             "movss   [rsp], $src\n\t"
10408             "call    f2l_fixup\n\t"
10409             "popq    $dst\n"
10410     "done:   "%}
10411   opcode(0xF3, 0x0F, 0x2C);
10412   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
10413              f2l_fixup(dst, src));
10414   ins_pipe(pipe_slow);
10415 %}
10416 
10417 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10418 %{
10419   match(Set dst (ConvD2I src));
10420   effect(KILL cr);
10421 
10422   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
10423             "cmpl    $dst, #0x80000000\n\t"
10424             "jne,s   done\n\t"
10425             "subq    rsp, #8\n\t"
10426             "movsd   [rsp], $src\n\t"
10427             "call    d2i_fixup\n\t"
10428             "popq    $dst\n"
10429     "done:   "%}
10430   opcode(0xF2, 0x0F, 0x2C);
10431   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10432              d2i_fixup(dst, src));
10433   ins_pipe(pipe_slow);
10434 %}
10435 
10436 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10437 %{
10438   match(Set dst (ConvD2L src));
10439   effect(KILL cr);
10440 
10441   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10442             "cmpq    $dst, [0x8000000000000000]\n\t"
10443             "jne,s   done\n\t"
10444             "subq    rsp, #8\n\t"
10445             "movsd   [rsp], $src\n\t"
10446             "call    d2l_fixup\n\t"
10447             "popq    $dst\n"
10448     "done:   "%}
10449   opcode(0xF2, 0x0F, 0x2C);
10450   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
10451              d2l_fixup(dst, src));
10452   ins_pipe(pipe_slow);
10453 %}
10454 
10455 instruct convI2F_reg_reg(regF dst, rRegI src)
10456 %{
10457   predicate(!UseXmmI2F);
10458   match(Set dst (ConvI2F src));
10459 
10460   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10461   opcode(0xF3, 0x0F, 0x2A);
10462   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10463   ins_pipe(pipe_slow); // XXX
10464 %}
10465 
10466 instruct convI2F_reg_mem(regF dst, memory src)
10467 %{
10468   match(Set dst (ConvI2F (LoadI src)));
10469 
10470   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10471   opcode(0xF3, 0x0F, 0x2A);
10472   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10473   ins_pipe(pipe_slow); // XXX
10474 %}
10475 
10476 instruct convI2D_reg_reg(regD dst, rRegI src)
10477 %{
10478   predicate(!UseXmmI2D);
10479   match(Set dst (ConvI2D src));
10480 
10481   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10482   opcode(0xF2, 0x0F, 0x2A);
10483   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10484   ins_pipe(pipe_slow); // XXX
10485 %}
10486 
10487 instruct convI2D_reg_mem(regD dst, memory src)
10488 %{
10489   match(Set dst (ConvI2D (LoadI src)));
10490 
10491   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10492   opcode(0xF2, 0x0F, 0x2A);
10493   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10494   ins_pipe(pipe_slow); // XXX
10495 %}
10496 
10497 instruct convXI2F_reg(regF dst, rRegI src)
10498 %{
10499   predicate(UseXmmI2F);
10500   match(Set dst (ConvI2F src));
10501 
10502   format %{ "movdl $dst, $src\n\t"
10503             "cvtdq2psl $dst, $dst\t# i2f" %}
10504   ins_encode %{
10505     __ movdl($dst$$XMMRegister, $src$$Register);
10506     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10507   %}
10508   ins_pipe(pipe_slow); // XXX
10509 %}
10510 
10511 instruct convXI2D_reg(regD dst, rRegI src)
10512 %{
10513   predicate(UseXmmI2D);
10514   match(Set dst (ConvI2D src));
10515 
10516   format %{ "movdl $dst, $src\n\t"
10517             "cvtdq2pdl $dst, $dst\t# i2d" %}
10518   ins_encode %{
10519     __ movdl($dst$$XMMRegister, $src$$Register);
10520     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10521   %}
10522   ins_pipe(pipe_slow); // XXX
10523 %}
10524 
10525 instruct convL2F_reg_reg(regF dst, rRegL src)
10526 %{
10527   match(Set dst (ConvL2F src));
10528 
10529   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10530   opcode(0xF3, 0x0F, 0x2A);
10531   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
10532   ins_pipe(pipe_slow); // XXX
10533 %}
10534 
10535 instruct convL2F_reg_mem(regF dst, memory src)
10536 %{
10537   match(Set dst (ConvL2F (LoadL src)));
10538 
10539   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10540   opcode(0xF3, 0x0F, 0x2A);
10541   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
10542   ins_pipe(pipe_slow); // XXX
10543 %}
10544 
10545 instruct convL2D_reg_reg(regD dst, rRegL src)
10546 %{
10547   match(Set dst (ConvL2D src));
10548 
10549   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10550   opcode(0xF2, 0x0F, 0x2A);
10551   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
10552   ins_pipe(pipe_slow); // XXX
10553 %}
10554 
10555 instruct convL2D_reg_mem(regD dst, memory src)
10556 %{
10557   match(Set dst (ConvL2D (LoadL src)));
10558 
10559   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10560   opcode(0xF2, 0x0F, 0x2A);
10561   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
10562   ins_pipe(pipe_slow); // XXX
10563 %}
10564 
10565 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10566 %{
10567   match(Set dst (ConvI2L src));
10568 
10569   ins_cost(125);
10570   format %{ "movslq  $dst, $src\t# i2l" %}
10571   opcode(0x63); // needs REX.W
10572   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10573   ins_pipe(ialu_reg_reg);
10574 %}
10575 
10576 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10577 // %{
10578 //   match(Set dst (ConvI2L src));
10579 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10580 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10581 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10582 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10583 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
10584 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10585 
10586 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10587 //   ins_encode(enc_copy(dst, src));
10588 // //   opcode(0x63); // needs REX.W
10589 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10590 //   ins_pipe(ialu_reg_reg);
10591 // %}
10592 
10593 instruct convI2L_reg_mem(rRegL dst, memory src)
10594 %{
10595   match(Set dst (ConvI2L (LoadI src)));
10596 
10597   format %{ "movslq  $dst, $src\t# i2l" %}
10598   opcode(0x63); // needs REX.W
10599   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst,src));
10600   ins_pipe(ialu_reg_mem);
10601 %}
10602 
10603 // Zero-extend convert int to long
10604 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10605 %{
10606   match(Set dst (AndL (ConvI2L src) mask));
10607 
10608   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10609   ins_encode(enc_copy(dst, src));
10610   ins_pipe(ialu_reg_reg);
10611 %}
10612 
10613 // Zero-extend convert int to long
10614 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10615 %{
10616   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10617 
10618   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10619   opcode(0x8B);
10620   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10621   ins_pipe(ialu_reg_mem);
10622 %}
10623 
10624 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10625 %{
10626   match(Set dst (AndL src mask));
10627 
10628   format %{ "movl    $dst, $src\t# zero-extend long" %}
10629   ins_encode(enc_copy_always(dst, src));
10630   ins_pipe(ialu_reg_reg);
10631 %}
10632 
10633 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10634 %{
10635   match(Set dst (ConvL2I src));
10636 
10637   format %{ "movl    $dst, $src\t# l2i" %}
10638   ins_encode(enc_copy_always(dst, src));
10639   ins_pipe(ialu_reg_reg);
10640 %}
10641 
10642 
10643 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10644   match(Set dst (MoveF2I src));
10645   effect(DEF dst, USE src);
10646 
10647   ins_cost(125);
10648   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10649   opcode(0x8B);
10650   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10651   ins_pipe(ialu_reg_mem);
10652 %}
10653 
10654 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10655   match(Set dst (MoveI2F src));
10656   effect(DEF dst, USE src);
10657 
10658   ins_cost(125);
10659   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10660   opcode(0xF3, 0x0F, 0x10);
10661   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10662   ins_pipe(pipe_slow);
10663 %}
10664 
10665 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10666   match(Set dst (MoveD2L src));
10667   effect(DEF dst, USE src);
10668 
10669   ins_cost(125);
10670   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10671   opcode(0x8B);
10672   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10673   ins_pipe(ialu_reg_mem);
10674 %}
10675 
10676 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10677   predicate(!UseXmmLoadAndClearUpper);
10678   match(Set dst (MoveL2D src));
10679   effect(DEF dst, USE src);
10680 
10681   ins_cost(125);
10682   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10683   opcode(0x66, 0x0F, 0x12);
10684   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10685   ins_pipe(pipe_slow);
10686 %}
10687 
10688 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10689   predicate(UseXmmLoadAndClearUpper);
10690   match(Set dst (MoveL2D src));
10691   effect(DEF dst, USE src);
10692 
10693   ins_cost(125);
10694   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10695   opcode(0xF2, 0x0F, 0x10);
10696   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10697   ins_pipe(pipe_slow);
10698 %}
10699 
10700 
10701 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10702   match(Set dst (MoveF2I src));
10703   effect(DEF dst, USE src);
10704 
10705   ins_cost(95); // XXX
10706   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10707   opcode(0xF3, 0x0F, 0x11);
10708   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
10709   ins_pipe(pipe_slow);
10710 %}
10711 
10712 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10713   match(Set dst (MoveI2F src));
10714   effect(DEF dst, USE src);
10715 
10716   ins_cost(100);
10717   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10718   opcode(0x89);
10719   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10720   ins_pipe( ialu_mem_reg );
10721 %}
10722 
10723 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10724   match(Set dst (MoveD2L src));
10725   effect(DEF dst, USE src);
10726 
10727   ins_cost(95); // XXX
10728   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10729   opcode(0xF2, 0x0F, 0x11);
10730   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
10731   ins_pipe(pipe_slow);
10732 %}
10733 
10734 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10735   match(Set dst (MoveL2D src));
10736   effect(DEF dst, USE src);
10737 
10738   ins_cost(100);
10739   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10740   opcode(0x89);
10741   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10742   ins_pipe(ialu_mem_reg);
10743 %}
10744 
10745 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10746   match(Set dst (MoveF2I src));
10747   effect(DEF dst, USE src);
10748   ins_cost(85);
10749   format %{ "movd    $dst,$src\t# MoveF2I" %}
10750   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
10751   ins_pipe( pipe_slow );
10752 %}
10753 
10754 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10755   match(Set dst (MoveD2L src));
10756   effect(DEF dst, USE src);
10757   ins_cost(85);
10758   format %{ "movd    $dst,$src\t# MoveD2L" %}
10759   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
10760   ins_pipe( pipe_slow );
10761 %}
10762 
10763 // The next instructions have long latency and use Int unit. Set high cost.
10764 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10765   match(Set dst (MoveI2F src));
10766   effect(DEF dst, USE src);
10767   ins_cost(300);
10768   format %{ "movd    $dst,$src\t# MoveI2F" %}
10769   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
10770   ins_pipe( pipe_slow );
10771 %}
10772 
10773 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10774   match(Set dst (MoveL2D src));
10775   effect(DEF dst, USE src);
10776   ins_cost(300);
10777   format %{ "movd    $dst,$src\t# MoveL2D" %}
10778   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
10779   ins_pipe( pipe_slow );
10780 %}
10781 
10782 // Replicate scalar to packed byte (1 byte) values in xmm
10783 instruct Repl8B_reg(regD dst, regD src) %{
10784   match(Set dst (Replicate8B src));
10785   format %{ "MOVDQA  $dst,$src\n\t"
10786             "PUNPCKLBW $dst,$dst\n\t"
10787             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
10788   ins_encode( pshufd_8x8(dst, src));
10789   ins_pipe( pipe_slow );
10790 %}
10791 
10792 // Replicate scalar to packed byte (1 byte) values in xmm
10793 instruct Repl8B_rRegI(regD dst, rRegI src) %{
10794   match(Set dst (Replicate8B src));
10795   format %{ "MOVD    $dst,$src\n\t"
10796             "PUNPCKLBW $dst,$dst\n\t"
10797             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
10798   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
10799   ins_pipe( pipe_slow );
10800 %}
10801 
10802 // Replicate scalar zero to packed byte (1 byte) values in xmm
10803 instruct Repl8B_immI0(regD dst, immI0 zero) %{
10804   match(Set dst (Replicate8B zero));
10805   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
10806   ins_encode( pxor(dst, dst));
10807   ins_pipe( fpu_reg_reg );
10808 %}
10809 
10810 // Replicate scalar to packed shore (2 byte) values in xmm
10811 instruct Repl4S_reg(regD dst, regD src) %{
10812   match(Set dst (Replicate4S src));
10813   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
10814   ins_encode( pshufd_4x16(dst, src));
10815   ins_pipe( fpu_reg_reg );
10816 %}
10817 
10818 // Replicate scalar to packed shore (2 byte) values in xmm
10819 instruct Repl4S_rRegI(regD dst, rRegI src) %{
10820   match(Set dst (Replicate4S src));
10821   format %{ "MOVD    $dst,$src\n\t"
10822             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
10823   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
10824   ins_pipe( fpu_reg_reg );
10825 %}
10826 
10827 // Replicate scalar zero to packed short (2 byte) values in xmm
10828 instruct Repl4S_immI0(regD dst, immI0 zero) %{
10829   match(Set dst (Replicate4S zero));
10830   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
10831   ins_encode( pxor(dst, dst));
10832   ins_pipe( fpu_reg_reg );
10833 %}
10834 
10835 // Replicate scalar to packed char (2 byte) values in xmm
10836 instruct Repl4C_reg(regD dst, regD src) %{
10837   match(Set dst (Replicate4C src));
10838   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
10839   ins_encode( pshufd_4x16(dst, src));
10840   ins_pipe( fpu_reg_reg );
10841 %}
10842 
10843 // Replicate scalar to packed char (2 byte) values in xmm
10844 instruct Repl4C_rRegI(regD dst, rRegI src) %{
10845   match(Set dst (Replicate4C src));
10846   format %{ "MOVD    $dst,$src\n\t"
10847             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
10848   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
10849   ins_pipe( fpu_reg_reg );
10850 %}
10851 
10852 // Replicate scalar zero to packed char (2 byte) values in xmm
10853 instruct Repl4C_immI0(regD dst, immI0 zero) %{
10854   match(Set dst (Replicate4C zero));
10855   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
10856   ins_encode( pxor(dst, dst));
10857   ins_pipe( fpu_reg_reg );
10858 %}
10859 
10860 // Replicate scalar to packed integer (4 byte) values in xmm
10861 instruct Repl2I_reg(regD dst, regD src) %{
10862   match(Set dst (Replicate2I src));
10863   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
10864   ins_encode( pshufd(dst, src, 0x00));
10865   ins_pipe( fpu_reg_reg );
10866 %}
10867 
10868 // Replicate scalar to packed integer (4 byte) values in xmm
10869 instruct Repl2I_rRegI(regD dst, rRegI src) %{
10870   match(Set dst (Replicate2I src));
10871   format %{ "MOVD   $dst,$src\n\t"
10872             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
10873   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
10874   ins_pipe( fpu_reg_reg );
10875 %}
10876 
10877 // Replicate scalar zero to packed integer (2 byte) values in xmm
10878 instruct Repl2I_immI0(regD dst, immI0 zero) %{
10879   match(Set dst (Replicate2I zero));
10880   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
10881   ins_encode( pxor(dst, dst));
10882   ins_pipe( fpu_reg_reg );
10883 %}
10884 
10885 // Replicate scalar to packed single precision floating point values in xmm
10886 instruct Repl2F_reg(regD dst, regD src) %{
10887   match(Set dst (Replicate2F src));
10888   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
10889   ins_encode( pshufd(dst, src, 0xe0));
10890   ins_pipe( fpu_reg_reg );
10891 %}
10892 
10893 // Replicate scalar to packed single precision floating point values in xmm
10894 instruct Repl2F_regF(regD dst, regF src) %{
10895   match(Set dst (Replicate2F src));
10896   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
10897   ins_encode( pshufd(dst, src, 0xe0));
10898   ins_pipe( fpu_reg_reg );
10899 %}
10900 
10901 // Replicate scalar to packed single precision floating point values in xmm
10902 instruct Repl2F_immF0(regD dst, immF0 zero) %{
10903   match(Set dst (Replicate2F zero));
10904   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
10905   ins_encode( pxor(dst, dst));
10906   ins_pipe( fpu_reg_reg );
10907 %}
10908 
10909 
10910 // =======================================================================
10911 // fast clearing of an array
10912 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10913                   rFlagsReg cr)
10914 %{
10915   match(Set dummy (ClearArray cnt base));
10916   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10917 
10918   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
10919             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
10920   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
10921              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
10922   ins_pipe(pipe_slow);
10923 %}
10924 
10925 instruct string_compare(rdi_RegP str1, rsi_RegP str2, rax_RegI tmp1,
10926                         rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr)
10927 %{
10928   match(Set result (StrComp str1 str2));
10929   effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr);
10930   //ins_cost(300);
10931 
10932   format %{ "String Compare $str1, $str2 -> $result    // XXX KILL RAX, RBX" %}
10933   ins_encode( enc_String_Compare() );
10934   ins_pipe( pipe_slow );
10935 %}
10936 
10937 // fast array equals
10938 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, 
10939                       rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) %{
10940   match(Set result (AryEq ary1 ary2));
10941   effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr);
10942   //ins_cost(300);
10943 
10944   format %{ "Array Equals $ary1,$ary2 -> $result    // KILL RAX, RBX" %}
10945   ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) );
10946   ins_pipe( pipe_slow );
10947 %}
10948 
10949 //----------Control Flow Instructions------------------------------------------
10950 // Signed compare Instructions
10951 
10952 // XXX more variants!!
10953 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
10954 %{
10955   match(Set cr (CmpI op1 op2));
10956   effect(DEF cr, USE op1, USE op2);
10957 
10958   format %{ "cmpl    $op1, $op2" %}
10959   opcode(0x3B);  /* Opcode 3B /r */
10960   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10961   ins_pipe(ialu_cr_reg_reg);
10962 %}
10963 
10964 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
10965 %{
10966   match(Set cr (CmpI op1 op2));
10967 
10968   format %{ "cmpl    $op1, $op2" %}
10969   opcode(0x81, 0x07); /* Opcode 81 /7 */
10970   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10971   ins_pipe(ialu_cr_reg_imm);
10972 %}
10973 
10974 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
10975 %{
10976   match(Set cr (CmpI op1 (LoadI op2)));
10977 
10978   ins_cost(500); // XXX
10979   format %{ "cmpl    $op1, $op2" %}
10980   opcode(0x3B); /* Opcode 3B /r */
10981   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10982   ins_pipe(ialu_cr_reg_mem);
10983 %}
10984 
10985 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
10986 %{
10987   match(Set cr (CmpI src zero));
10988 
10989   format %{ "testl   $src, $src" %}
10990   opcode(0x85);
10991   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10992   ins_pipe(ialu_cr_reg_imm);
10993 %}
10994 
10995 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
10996 %{
10997   match(Set cr (CmpI (AndI src con) zero));
10998 
10999   format %{ "testl   $src, $con" %}
11000   opcode(0xF7, 0x00);
11001   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11002   ins_pipe(ialu_cr_reg_imm);
11003 %}
11004 
11005 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11006 %{
11007   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11008 
11009   format %{ "testl   $src, $mem" %}
11010   opcode(0x85);
11011   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11012   ins_pipe(ialu_cr_reg_mem);
11013 %}
11014 
11015 // Unsigned compare Instructions; really, same as signed except they
11016 // produce an rFlagsRegU instead of rFlagsReg.
11017 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11018 %{
11019   match(Set cr (CmpU op1 op2));
11020 
11021   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11022   opcode(0x3B); /* Opcode 3B /r */
11023   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11024   ins_pipe(ialu_cr_reg_reg);
11025 %}
11026 
11027 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11028 %{
11029   match(Set cr (CmpU op1 op2));
11030 
11031   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11032   opcode(0x81,0x07); /* Opcode 81 /7 */
11033   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11034   ins_pipe(ialu_cr_reg_imm);
11035 %}
11036 
11037 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11038 %{
11039   match(Set cr (CmpU op1 (LoadI op2)));
11040 
11041   ins_cost(500); // XXX
11042   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11043   opcode(0x3B); /* Opcode 3B /r */
11044   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11045   ins_pipe(ialu_cr_reg_mem);
11046 %}
11047 
11048 // // // Cisc-spilled version of cmpU_rReg
11049 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11050 // //%{
11051 // //  match(Set cr (CmpU (LoadI op1) op2));
11052 // //
11053 // //  format %{ "CMPu   $op1,$op2" %}
11054 // //  ins_cost(500);
11055 // //  opcode(0x39);  /* Opcode 39 /r */
11056 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11057 // //%}
11058 
11059 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11060 %{
11061   match(Set cr (CmpU src zero));
11062 
11063   format %{ "testl  $src, $src\t# unsigned" %}
11064   opcode(0x85);
11065   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11066   ins_pipe(ialu_cr_reg_imm);
11067 %}
11068 
11069 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11070 %{
11071   match(Set cr (CmpP op1 op2));
11072 
11073   format %{ "cmpq    $op1, $op2\t# ptr" %}
11074   opcode(0x3B); /* Opcode 3B /r */
11075   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11076   ins_pipe(ialu_cr_reg_reg);
11077 %}
11078 
11079 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11080 %{
11081   match(Set cr (CmpP op1 (LoadP op2)));
11082 
11083   ins_cost(500); // XXX
11084   format %{ "cmpq    $op1, $op2\t# ptr" %}
11085   opcode(0x3B); /* Opcode 3B /r */
11086   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11087   ins_pipe(ialu_cr_reg_mem);
11088 %}
11089 
11090 // // // Cisc-spilled version of cmpP_rReg
11091 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11092 // //%{
11093 // //  match(Set cr (CmpP (LoadP op1) op2));
11094 // //
11095 // //  format %{ "CMPu   $op1,$op2" %}
11096 // //  ins_cost(500);
11097 // //  opcode(0x39);  /* Opcode 39 /r */
11098 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11099 // //%}
11100 
11101 // XXX this is generalized by compP_rReg_mem???
11102 // Compare raw pointer (used in out-of-heap check).
11103 // Only works because non-oop pointers must be raw pointers
11104 // and raw pointers have no anti-dependencies.
11105 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11106 %{
11107   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11108   match(Set cr (CmpP op1 (LoadP op2)));
11109 
11110   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11111   opcode(0x3B); /* Opcode 3B /r */
11112   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11113   ins_pipe(ialu_cr_reg_mem);
11114 %}
11115 
11116 // This will generate a signed flags result. This should be OK since
11117 // any compare to a zero should be eq/neq.
11118 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11119 %{
11120   match(Set cr (CmpP src zero));
11121 
11122   format %{ "testq   $src, $src\t# ptr" %}
11123   opcode(0x85);
11124   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11125   ins_pipe(ialu_cr_reg_imm);
11126 %}
11127 
11128 // This will generate a signed flags result. This should be OK since
11129 // any compare to a zero should be eq/neq.
11130 instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero)
11131 %{
11132   match(Set cr (CmpP (LoadP op) zero));
11133 
11134   ins_cost(500); // XXX
11135   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11136   opcode(0xF7); /* Opcode F7 /0 */
11137   ins_encode(REX_mem_wide(op),
11138              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11139   ins_pipe(ialu_cr_reg_imm);
11140 %}
11141 
11142 
11143 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11144 %{
11145   match(Set cr (CmpN op1 op2));
11146 
11147   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11148   ins_encode %{ __ cmpl(as_Register($op1$$reg), as_Register($op2$$reg)); %}
11149   ins_pipe(ialu_cr_reg_reg);
11150 %}
11151 
11152 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11153 %{
11154   match(Set cr (CmpN src (LoadN mem)));
11155 
11156   ins_cost(500); // XXX
11157   format %{ "cmpl    $src, mem\t# compressed ptr" %}
11158   ins_encode %{
11159     Address adr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
11160     __ cmpl(as_Register($src$$reg), adr);
11161   %}
11162   ins_pipe(ialu_cr_reg_mem);
11163 %}
11164 
11165 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11166   match(Set cr (CmpN src zero));
11167 
11168   format %{ "testl   $src, $src\t# compressed ptr" %}
11169   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11170   ins_pipe(ialu_cr_reg_imm);
11171 %}
11172 
11173 instruct testN_reg_mem(rFlagsReg cr, memory mem, immN0 zero)
11174 %{
11175   match(Set cr (CmpN (LoadN mem) zero));
11176 
11177   ins_cost(500); // XXX
11178   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11179   ins_encode %{
11180     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
11181     __ cmpl(addr, (int)0xFFFFFFFF);
11182   %}
11183   ins_pipe(ialu_cr_reg_mem);
11184 %}
11185 
11186 // Yanked all unsigned pointer compare operations.
11187 // Pointer compares are done with CmpP which is already unsigned.
11188 
11189 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11190 %{
11191   match(Set cr (CmpL op1 op2));
11192 
11193   format %{ "cmpq    $op1, $op2" %}
11194   opcode(0x3B);  /* Opcode 3B /r */
11195   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11196   ins_pipe(ialu_cr_reg_reg);
11197 %}
11198 
11199 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11200 %{
11201   match(Set cr (CmpL op1 op2));
11202 
11203   format %{ "cmpq    $op1, $op2" %}
11204   opcode(0x81, 0x07); /* Opcode 81 /7 */
11205   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11206   ins_pipe(ialu_cr_reg_imm);
11207 %}
11208 
11209 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11210 %{
11211   match(Set cr (CmpL op1 (LoadL op2)));
11212 
11213   ins_cost(500); // XXX
11214   format %{ "cmpq    $op1, $op2" %}
11215   opcode(0x3B); /* Opcode 3B /r */
11216   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11217   ins_pipe(ialu_cr_reg_mem);
11218 %}
11219 
11220 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11221 %{
11222   match(Set cr (CmpL src zero));
11223 
11224   format %{ "testq   $src, $src" %}
11225   opcode(0x85);
11226   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11227   ins_pipe(ialu_cr_reg_imm);
11228 %}
11229 
11230 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11231 %{
11232   match(Set cr (CmpL (AndL src con) zero));
11233 
11234   format %{ "testq   $src, $con\t# long" %}
11235   opcode(0xF7, 0x00);
11236   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11237   ins_pipe(ialu_cr_reg_imm);
11238 %}
11239 
11240 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11241 %{
11242   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11243 
11244   format %{ "testq   $src, $mem" %}
11245   opcode(0x85);
11246   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11247   ins_pipe(ialu_cr_reg_mem);
11248 %}
11249 
11250 // Manifest a CmpL result in an integer register.  Very painful.
11251 // This is the test to avoid.
11252 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11253 %{
11254   match(Set dst (CmpL3 src1 src2));
11255   effect(KILL flags);
11256 
11257   ins_cost(275); // XXX
11258   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11259             "movl    $dst, -1\n\t"
11260             "jl,s    done\n\t"
11261             "setne   $dst\n\t"
11262             "movzbl  $dst, $dst\n\t"
11263     "done:" %}
11264   ins_encode(cmpl3_flag(src1, src2, dst));
11265   ins_pipe(pipe_slow);
11266 %}
11267 
11268 //----------Max and Min--------------------------------------------------------
11269 // Min Instructions
11270 
11271 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
11272 %{
11273   effect(USE_DEF dst, USE src, USE cr);
11274 
11275   format %{ "cmovlgt $dst, $src\t# min" %}
11276   opcode(0x0F, 0x4F);
11277   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11278   ins_pipe(pipe_cmov_reg);
11279 %}
11280 
11281 
11282 instruct minI_rReg(rRegI dst, rRegI src)
11283 %{
11284   match(Set dst (MinI dst src));
11285 
11286   ins_cost(200);
11287   expand %{
11288     rFlagsReg cr;
11289     compI_rReg(cr, dst, src);
11290     cmovI_reg_g(dst, src, cr);
11291   %}
11292 %}
11293 
11294 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
11295 %{
11296   effect(USE_DEF dst, USE src, USE cr);
11297 
11298   format %{ "cmovllt $dst, $src\t# max" %}
11299   opcode(0x0F, 0x4C);
11300   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11301   ins_pipe(pipe_cmov_reg);
11302 %}
11303 
11304 
11305 instruct maxI_rReg(rRegI dst, rRegI src)
11306 %{
11307   match(Set dst (MaxI dst src));
11308 
11309   ins_cost(200);
11310   expand %{
11311     rFlagsReg cr;
11312     compI_rReg(cr, dst, src);
11313     cmovI_reg_l(dst, src, cr);
11314   %}
11315 %}
11316 
11317 // ============================================================================
11318 // Branch Instructions
11319 
11320 // Jump Direct - Label defines a relative address from JMP+1
11321 instruct jmpDir(label labl)
11322 %{
11323   match(Goto);
11324   effect(USE labl);
11325 
11326   ins_cost(300);
11327   format %{ "jmp     $labl" %}
11328   size(5);
11329   opcode(0xE9);
11330   ins_encode(OpcP, Lbl(labl));
11331   ins_pipe(pipe_jmp);
11332   ins_pc_relative(1);
11333 %}
11334 
11335 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11336 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
11337 %{
11338   match(If cop cr);
11339   effect(USE labl);
11340 
11341   ins_cost(300);
11342   format %{ "j$cop     $labl" %}
11343   size(6);
11344   opcode(0x0F, 0x80);
11345   ins_encode(Jcc(cop, labl));
11346   ins_pipe(pipe_jcc);
11347   ins_pc_relative(1);
11348 %}
11349 
11350 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11351 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
11352 %{
11353   match(CountedLoopEnd cop cr);
11354   effect(USE labl);
11355 
11356   ins_cost(300);
11357   format %{ "j$cop     $labl\t# loop end" %}
11358   size(6);
11359   opcode(0x0F, 0x80);
11360   ins_encode(Jcc(cop, labl));
11361   ins_pipe(pipe_jcc);
11362   ins_pc_relative(1);
11363 %}
11364 
11365 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11366 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl)
11367 %{
11368   match(CountedLoopEnd cop cmp);
11369   effect(USE labl);
11370 
11371   ins_cost(300);
11372   format %{ "j$cop,u   $labl\t# loop end" %}
11373   size(6);
11374   opcode(0x0F, 0x80);
11375   ins_encode(Jcc(cop, labl));
11376   ins_pipe(pipe_jcc);
11377   ins_pc_relative(1);
11378 %}
11379 
11380 // Jump Direct Conditional - using unsigned comparison
11381 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl)
11382 %{
11383   match(If cop cmp);
11384   effect(USE labl);
11385 
11386   ins_cost(300);
11387   format %{ "j$cop,u   $labl" %}
11388   size(6);
11389   opcode(0x0F, 0x80);
11390   ins_encode(Jcc(cop, labl));
11391   ins_pipe(pipe_jcc);
11392   ins_pc_relative(1);
11393 %}
11394 
11395 // ============================================================================
11396 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
11397 // superklass array for an instance of the superklass.  Set a hidden
11398 // internal cache on a hit (cache is checked with exposed code in
11399 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
11400 // encoding ALSO sets flags.
11401 
11402 instruct partialSubtypeCheck(rdi_RegP result,
11403                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11404                              rFlagsReg cr)
11405 %{
11406   match(Set result (PartialSubtypeCheck sub super));
11407   effect(KILL rcx, KILL cr);
11408 
11409   ins_cost(1100);  // slightly larger than the next version
11410   format %{ "cmpq    rax, rsi\n\t"
11411             "jeq,s   hit\n\t"
11412             "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
11413             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
11414             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
11415             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
11416             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
11417             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
11418     "hit:\n\t"
11419             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
11420     "miss:\t" %}
11421 
11422   opcode(0x1); // Force a XOR of RDI
11423   ins_encode(enc_PartialSubtypeCheck());
11424   ins_pipe(pipe_slow);
11425 %}
11426 
11427 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
11428                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11429                                      immP0 zero,
11430                                      rdi_RegP result)
11431 %{
11432   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
11433   predicate(!UseCompressedOops); // decoding oop kills condition codes
11434   effect(KILL rcx, KILL result);
11435 
11436   ins_cost(1000);
11437   format %{ "cmpq    rax, rsi\n\t"
11438             "jeq,s   miss\t# Actually a hit; we are done.\n\t"
11439             "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
11440             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
11441             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
11442             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
11443             "jne,s   miss\t\t# Missed: flags nz\n\t"
11444             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
11445     "miss:\t" %}
11446 
11447   opcode(0x0); // No need to XOR RDI
11448   ins_encode(enc_PartialSubtypeCheck());
11449   ins_pipe(pipe_slow);
11450 %}
11451 
11452 // ============================================================================
11453 // Branch Instructions -- short offset versions
11454 //
11455 // These instructions are used to replace jumps of a long offset (the default
11456 // match) with jumps of a shorter offset.  These instructions are all tagged
11457 // with the ins_short_branch attribute, which causes the ADLC to suppress the
11458 // match rules in general matching.  Instead, the ADLC generates a conversion
11459 // method in the MachNode which can be used to do in-place replacement of the
11460 // long variant with the shorter variant.  The compiler will determine if a
11461 // branch can be taken by the is_short_branch_offset() predicate in the machine
11462 // specific code section of the file.
11463 
11464 // Jump Direct - Label defines a relative address from JMP+1
11465 instruct jmpDir_short(label labl)
11466 %{
11467   match(Goto);
11468   effect(USE labl);
11469 
11470   ins_cost(300);
11471   format %{ "jmp,s   $labl" %}
11472   size(2);
11473   opcode(0xEB);
11474   ins_encode(OpcP, LblShort(labl));
11475   ins_pipe(pipe_jmp);
11476   ins_pc_relative(1);
11477   ins_short_branch(1);
11478 %}
11479 
11480 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11481 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl)
11482 %{
11483   match(If cop cr);
11484   effect(USE labl);
11485 
11486   ins_cost(300);
11487   format %{ "j$cop,s   $labl" %}
11488   size(2);
11489   opcode(0x70);
11490   ins_encode(JccShort(cop, labl));
11491   ins_pipe(pipe_jcc);
11492   ins_pc_relative(1);
11493   ins_short_branch(1);
11494 %}
11495 
11496 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11497 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl)
11498 %{
11499   match(CountedLoopEnd cop cr);
11500   effect(USE labl);
11501 
11502   ins_cost(300);
11503   format %{ "j$cop,s   $labl" %}
11504   size(2);
11505   opcode(0x70);
11506   ins_encode(JccShort(cop, labl));
11507   ins_pipe(pipe_jcc);
11508   ins_pc_relative(1);
11509   ins_short_branch(1);
11510 %}
11511 
11512 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11513 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl)
11514 %{
11515   match(CountedLoopEnd cop cmp);
11516   effect(USE labl);
11517 
11518   ins_cost(300);
11519   format %{ "j$cop,us  $labl" %}
11520   size(2);
11521   opcode(0x70);
11522   ins_encode(JccShort(cop, labl));
11523   ins_pipe(pipe_jcc);
11524   ins_pc_relative(1);
11525   ins_short_branch(1);
11526 %}
11527 
11528 // Jump Direct Conditional - using unsigned comparison
11529 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl)
11530 %{
11531   match(If cop cmp);
11532   effect(USE labl);
11533 
11534   ins_cost(300);
11535   format %{ "j$cop,us  $labl" %}
11536   size(2);
11537   opcode(0x70);
11538   ins_encode(JccShort(cop, labl));
11539   ins_pipe(pipe_jcc);
11540   ins_pc_relative(1);
11541   ins_short_branch(1);
11542 %}
11543 
11544 // ============================================================================
11545 // inlined locking and unlocking
11546 
11547 instruct cmpFastLock(rFlagsReg cr,
11548                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
11549 %{
11550   match(Set cr (FastLock object box));
11551   effect(TEMP tmp, TEMP scr);
11552 
11553   ins_cost(300);
11554   format %{ "fastlock $object,$box,$tmp,$scr" %}
11555   ins_encode(Fast_Lock(object, box, tmp, scr));
11556   ins_pipe(pipe_slow);
11557   ins_pc_relative(1);
11558 %}
11559 
11560 instruct cmpFastUnlock(rFlagsReg cr,
11561                        rRegP object, rax_RegP box, rRegP tmp)
11562 %{
11563   match(Set cr (FastUnlock object box));
11564   effect(TEMP tmp);
11565 
11566   ins_cost(300);
11567   format %{ "fastunlock $object, $box, $tmp" %}
11568   ins_encode(Fast_Unlock(object, box, tmp));
11569   ins_pipe(pipe_slow);
11570   ins_pc_relative(1);
11571 %}
11572 
11573 
11574 // ============================================================================
11575 // Safepoint Instructions
11576 instruct safePoint_poll(rFlagsReg cr)
11577 %{
11578   match(SafePoint);
11579   effect(KILL cr);
11580 
11581   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
11582             "# Safepoint: poll for GC" %}
11583   size(6); // Opcode + ModRM + Disp32 == 6 bytes
11584   ins_cost(125);
11585   ins_encode(enc_safepoint_poll);
11586   ins_pipe(ialu_reg_mem);
11587 %}
11588 
11589 // ============================================================================
11590 // Procedure Call/Return Instructions
11591 // Call Java Static Instruction
11592 // Note: If this code changes, the corresponding ret_addr_offset() and
11593 //       compute_padding() functions will have to be adjusted.
11594 instruct CallStaticJavaDirect(method meth)
11595 %{
11596   match(CallStaticJava);
11597   effect(USE meth);
11598 
11599   ins_cost(300);
11600   format %{ "call,static " %}
11601   opcode(0xE8); /* E8 cd */
11602   ins_encode(Java_Static_Call(meth), call_epilog);
11603   ins_pipe(pipe_slow);
11604   ins_pc_relative(1);
11605   ins_alignment(4);
11606 %}
11607 
11608 // Call Java Dynamic Instruction
11609 // Note: If this code changes, the corresponding ret_addr_offset() and
11610 //       compute_padding() functions will have to be adjusted.
11611 instruct CallDynamicJavaDirect(method meth)
11612 %{
11613   match(CallDynamicJava);
11614   effect(USE meth);
11615 
11616   ins_cost(300);
11617   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
11618             "call,dynamic " %}
11619   opcode(0xE8); /* E8 cd */
11620   ins_encode(Java_Dynamic_Call(meth), call_epilog);
11621   ins_pipe(pipe_slow);
11622   ins_pc_relative(1);
11623   ins_alignment(4);
11624 %}
11625 
11626 // Call Runtime Instruction
11627 instruct CallRuntimeDirect(method meth)
11628 %{
11629   match(CallRuntime);
11630   effect(USE meth);
11631 
11632   ins_cost(300);
11633   format %{ "call,runtime " %}
11634   opcode(0xE8); /* E8 cd */
11635   ins_encode(Java_To_Runtime(meth));
11636   ins_pipe(pipe_slow);
11637   ins_pc_relative(1);
11638 %}
11639 
11640 // Call runtime without safepoint
11641 instruct CallLeafDirect(method meth)
11642 %{
11643   match(CallLeaf);
11644   effect(USE meth);
11645 
11646   ins_cost(300);
11647   format %{ "call_leaf,runtime " %}
11648   opcode(0xE8); /* E8 cd */
11649   ins_encode(Java_To_Runtime(meth));
11650   ins_pipe(pipe_slow);
11651   ins_pc_relative(1);
11652 %}
11653 
11654 // Call runtime without safepoint
11655 instruct CallLeafNoFPDirect(method meth)
11656 %{
11657   match(CallLeafNoFP);
11658   effect(USE meth);
11659 
11660   ins_cost(300);
11661   format %{ "call_leaf_nofp,runtime " %}
11662   opcode(0xE8); /* E8 cd */
11663   ins_encode(Java_To_Runtime(meth));
11664   ins_pipe(pipe_slow);
11665   ins_pc_relative(1);
11666 %}
11667 
11668 // Return Instruction
11669 // Remove the return address & jump to it.
11670 // Notice: We always emit a nop after a ret to make sure there is room
11671 // for safepoint patching
11672 instruct Ret()
11673 %{
11674   match(Return);
11675 
11676   format %{ "ret" %}
11677   opcode(0xC3);
11678   ins_encode(OpcP);
11679   ins_pipe(pipe_jmp);
11680 %}
11681 
11682 // Tail Call; Jump from runtime stub to Java code.
11683 // Also known as an 'interprocedural jump'.
11684 // Target of jump will eventually return to caller.
11685 // TailJump below removes the return address.
11686 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
11687 %{
11688   match(TailCall jump_target method_oop);
11689 
11690   ins_cost(300);
11691   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
11692   opcode(0xFF, 0x4); /* Opcode FF /4 */
11693   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
11694   ins_pipe(pipe_jmp);
11695 %}
11696 
11697 // Tail Jump; remove the return address; jump to target.
11698 // TailCall above leaves the return address around.
11699 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
11700 %{
11701   match(TailJump jump_target ex_oop);
11702 
11703   ins_cost(300);
11704   format %{ "popq    rdx\t# pop return address\n\t"
11705             "jmp     $jump_target" %}
11706   opcode(0xFF, 0x4); /* Opcode FF /4 */
11707   ins_encode(Opcode(0x5a), // popq rdx
11708              REX_reg(jump_target), OpcP, reg_opc(jump_target));
11709   ins_pipe(pipe_jmp);
11710 %}
11711 
11712 // Create exception oop: created by stack-crawling runtime code.
11713 // Created exception is now available to this handler, and is setup
11714 // just prior to jumping to this handler.  No code emitted.
11715 instruct CreateException(rax_RegP ex_oop)
11716 %{
11717   match(Set ex_oop (CreateEx));
11718 
11719   size(0);
11720   // use the following format syntax
11721   format %{ "# exception oop is in rax; no code emitted" %}
11722   ins_encode();
11723   ins_pipe(empty);
11724 %}
11725 
11726 // Rethrow exception:
11727 // The exception oop will come in the first argument position.
11728 // Then JUMP (not call) to the rethrow stub code.
11729 instruct RethrowException()
11730 %{
11731   match(Rethrow);
11732 
11733   // use the following format syntax
11734   format %{ "jmp     rethrow_stub" %}
11735   ins_encode(enc_rethrow);
11736   ins_pipe(pipe_jmp);
11737 %}
11738 
11739 
11740 //----------PEEPHOLE RULES-----------------------------------------------------
11741 // These must follow all instruction definitions as they use the names
11742 // defined in the instructions definitions.
11743 //
11744 // peepmatch ( root_instr_name [precerding_instruction]* );
11745 //
11746 // peepconstraint %{
11747 // (instruction_number.operand_name relational_op instruction_number.operand_name
11748 //  [, ...] );
11749 // // instruction numbers are zero-based using left to right order in peepmatch
11750 //
11751 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
11752 // // provide an instruction_number.operand_name for each operand that appears
11753 // // in the replacement instruction's match rule
11754 //
11755 // ---------VM FLAGS---------------------------------------------------------
11756 //
11757 // All peephole optimizations can be turned off using -XX:-OptoPeephole
11758 //
11759 // Each peephole rule is given an identifying number starting with zero and
11760 // increasing by one in the order seen by the parser.  An individual peephole
11761 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
11762 // on the command-line.
11763 //
11764 // ---------CURRENT LIMITATIONS----------------------------------------------
11765 //
11766 // Only match adjacent instructions in same basic block
11767 // Only equality constraints
11768 // Only constraints between operands, not (0.dest_reg == RAX_enc)
11769 // Only one replacement instruction
11770 //
11771 // ---------EXAMPLE----------------------------------------------------------
11772 //
11773 // // pertinent parts of existing instructions in architecture description
11774 // instruct movI(rRegI dst, rRegI src)
11775 // %{
11776 //   match(Set dst (CopyI src));
11777 // %}
11778 //
11779 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
11780 // %{
11781 //   match(Set dst (AddI dst src));
11782 //   effect(KILL cr);
11783 // %}
11784 //
11785 // // Change (inc mov) to lea
11786 // peephole %{
11787 //   // increment preceeded by register-register move
11788 //   peepmatch ( incI_rReg movI );
11789 //   // require that the destination register of the increment
11790 //   // match the destination register of the move
11791 //   peepconstraint ( 0.dst == 1.dst );
11792 //   // construct a replacement instruction that sets
11793 //   // the destination to ( move's source register + one )
11794 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
11795 // %}
11796 //
11797 
11798 // Implementation no longer uses movX instructions since
11799 // machine-independent system no longer uses CopyX nodes.
11800 //
11801 // peephole
11802 // %{
11803 //   peepmatch (incI_rReg movI);
11804 //   peepconstraint (0.dst == 1.dst);
11805 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11806 // %}
11807 
11808 // peephole
11809 // %{
11810 //   peepmatch (decI_rReg movI);
11811 //   peepconstraint (0.dst == 1.dst);
11812 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11813 // %}
11814 
11815 // peephole
11816 // %{
11817 //   peepmatch (addI_rReg_imm movI);
11818 //   peepconstraint (0.dst == 1.dst);
11819 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11820 // %}
11821 
11822 // peephole
11823 // %{
11824 //   peepmatch (incL_rReg movL);
11825 //   peepconstraint (0.dst == 1.dst);
11826 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11827 // %}
11828 
11829 // peephole
11830 // %{
11831 //   peepmatch (decL_rReg movL);
11832 //   peepconstraint (0.dst == 1.dst);
11833 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11834 // %}
11835 
11836 // peephole
11837 // %{
11838 //   peepmatch (addL_rReg_imm movL);
11839 //   peepconstraint (0.dst == 1.dst);
11840 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11841 // %}
11842 
11843 // peephole
11844 // %{
11845 //   peepmatch (addP_rReg_imm movP);
11846 //   peepconstraint (0.dst == 1.dst);
11847 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
11848 // %}
11849 
11850 // // Change load of spilled value to only a spill
11851 // instruct storeI(memory mem, rRegI src)
11852 // %{
11853 //   match(Set mem (StoreI mem src));
11854 // %}
11855 //
11856 // instruct loadI(rRegI dst, memory mem)
11857 // %{
11858 //   match(Set dst (LoadI mem));
11859 // %}
11860 //
11861 
11862 peephole
11863 %{
11864   peepmatch (loadI storeI);
11865   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11866   peepreplace (storeI(1.mem 1.mem 1.src));
11867 %}
11868 
11869 peephole
11870 %{
11871   peepmatch (loadL storeL);
11872   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11873   peepreplace (storeL(1.mem 1.mem 1.src));
11874 %}
11875 
11876 //----------SMARTSPILL RULES---------------------------------------------------
11877 // These must follow all instruction definitions as they use the names
11878 // defined in the instructions definitions.