1 /*
   2  * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20  * CA 95054 USA or visit www.sun.com if you need additional information or
  21  * have any questions.
  22  *
  23  */
  24 
  25 #include "incls/_precompiled.incl"
  26 #include "incls/_assembler_x86.cpp.incl"
  27 
  28 // Implementation of AddressLiteral
  29 
  30 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  31   _is_lval = false;
  32   _target = target;
  33   switch (rtype) {
  34   case relocInfo::oop_type:
  35     // Oops are a special case. Normally they would be their own section
  36     // but in cases like icBuffer they are literals in the code stream that
  37     // we don't have a section for. We use none so that we get a literal address
  38     // which is always patchable.
  39     break;
  40   case relocInfo::external_word_type:
  41     _rspec = external_word_Relocation::spec(target);
  42     break;
  43   case relocInfo::internal_word_type:
  44     _rspec = internal_word_Relocation::spec(target);
  45     break;
  46   case relocInfo::opt_virtual_call_type:
  47     _rspec = opt_virtual_call_Relocation::spec();
  48     break;
  49   case relocInfo::static_call_type:
  50     _rspec = static_call_Relocation::spec();
  51     break;
  52   case relocInfo::runtime_call_type:
  53     _rspec = runtime_call_Relocation::spec();
  54     break;
  55   case relocInfo::poll_type:
  56   case relocInfo::poll_return_type:
  57     _rspec = Relocation::spec_simple(rtype);
  58     break;
  59   case relocInfo::none:
  60     break;
  61   default:
  62     ShouldNotReachHere();
  63     break;
  64   }
  65 }
  66 
  67 // Implementation of Address
  68 
  69 #ifdef _LP64
  70 
  71 Address Address::make_array(ArrayAddress adr) {
  72   // Not implementable on 64bit machines
  73   // Should have been handled higher up the call chain.
  74   ShouldNotReachHere();
  75   return Address();
  76 }
  77 
  78 // exceedingly dangerous constructor
  79 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
  80   _base  = noreg;
  81   _index = noreg;
  82   _scale = no_scale;
  83   _disp  = disp;
  84   switch (rtype) {
  85     case relocInfo::external_word_type:
  86       _rspec = external_word_Relocation::spec(loc);
  87       break;
  88     case relocInfo::internal_word_type:
  89       _rspec = internal_word_Relocation::spec(loc);
  90       break;
  91     case relocInfo::runtime_call_type:
  92       // HMM
  93       _rspec = runtime_call_Relocation::spec();
  94       break;
  95     case relocInfo::poll_type:
  96     case relocInfo::poll_return_type:
  97       _rspec = Relocation::spec_simple(rtype);
  98       break;
  99     case relocInfo::none:
 100       break;
 101     default:
 102       ShouldNotReachHere();
 103   }
 104 }
 105 #else // LP64
 106 
 107 Address Address::make_array(ArrayAddress adr) {
 108   AddressLiteral base = adr.base();
 109   Address index = adr.index();
 110   assert(index._disp == 0, "must not have disp"); // maybe it can?
 111   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 112   array._rspec = base._rspec;
 113   return array;
 114 }
 115 
 116 // exceedingly dangerous constructor
 117 Address::Address(address loc, RelocationHolder spec) {
 118   _base  = noreg;
 119   _index = noreg;
 120   _scale = no_scale;
 121   _disp  = (intptr_t) loc;
 122   _rspec = spec;
 123 }
 124 
 125 #endif // _LP64
 126 
 127 
 128 
 129 // Convert the raw encoding form into the form expected by the constructor for
 130 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 131 // that to noreg for the Address constructor.
 132 Address Address::make_raw(int base, int index, int scale, int disp) {
 133   bool valid_index = index != rsp->encoding();
 134   if (valid_index) {
 135     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 136     return madr;
 137   } else {
 138     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 139     return madr;
 140   }
 141 }
 142 
 143 // Implementation of Assembler
 144 
 145 int AbstractAssembler::code_fill_byte() {
 146   return (u_char)'\xF4'; // hlt
 147 }
 148 
 149 // make this go away someday
 150 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 151   if (rtype == relocInfo::none)
 152         emit_long(data);
 153   else  emit_data(data, Relocation::spec_simple(rtype), format);
 154 }
 155 
 156 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 157   assert(imm_operand == 0, "default format must be immediate in this file");
 158   assert(inst_mark() != NULL, "must be inside InstructionMark");
 159   if (rspec.type() !=  relocInfo::none) {
 160     #ifdef ASSERT
 161       check_relocation(rspec, format);
 162     #endif
 163     // Do not use AbstractAssembler::relocate, which is not intended for
 164     // embedded words.  Instead, relocate to the enclosing instruction.
 165 
 166     // hack. call32 is too wide for mask so use disp32
 167     if (format == call32_operand)
 168       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 169     else
 170       code_section()->relocate(inst_mark(), rspec, format);
 171   }
 172   emit_long(data);
 173 }
 174 
 175 static int encode(Register r) {
 176   int enc = r->encoding();
 177   if (enc >= 8) {
 178     enc -= 8;
 179   }
 180   return enc;
 181 }
 182 
 183 static int encode(XMMRegister r) {
 184   int enc = r->encoding();
 185   if (enc >= 8) {
 186     enc -= 8;
 187   }
 188   return enc;
 189 }
 190 
 191 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 192   assert(dst->has_byte_register(), "must have byte register");
 193   assert(isByte(op1) && isByte(op2), "wrong opcode");
 194   assert(isByte(imm8), "not a byte");
 195   assert((op1 & 0x01) == 0, "should be 8bit operation");
 196   emit_byte(op1);
 197   emit_byte(op2 | encode(dst));
 198   emit_byte(imm8);
 199 }
 200 
 201 
 202 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 203   assert(isByte(op1) && isByte(op2), "wrong opcode");
 204   assert((op1 & 0x01) == 1, "should be 32bit operation");
 205   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 206   if (is8bit(imm32)) {
 207     emit_byte(op1 | 0x02); // set sign bit
 208     emit_byte(op2 | encode(dst));
 209     emit_byte(imm32 & 0xFF);
 210   } else {
 211     emit_byte(op1);
 212     emit_byte(op2 | encode(dst));
 213     emit_long(imm32);
 214   }
 215 }
 216 
 217 // immediate-to-memory forms
 218 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 219   assert((op1 & 0x01) == 1, "should be 32bit operation");
 220   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 221   if (is8bit(imm32)) {
 222     emit_byte(op1 | 0x02); // set sign bit
 223     emit_operand(rm, adr, 1);
 224     emit_byte(imm32 & 0xFF);
 225   } else {
 226     emit_byte(op1);
 227     emit_operand(rm, adr, 4);
 228     emit_long(imm32);
 229   }
 230 }
 231 
 232 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
 233   LP64_ONLY(ShouldNotReachHere());
 234   assert(isByte(op1) && isByte(op2), "wrong opcode");
 235   assert((op1 & 0x01) == 1, "should be 32bit operation");
 236   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 237   InstructionMark im(this);
 238   emit_byte(op1);
 239   emit_byte(op2 | encode(dst));
 240   emit_data((intptr_t)obj, relocInfo::oop_type, 0);
 241 }
 242 
 243 
 244 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 245   assert(isByte(op1) && isByte(op2), "wrong opcode");
 246   emit_byte(op1);
 247   emit_byte(op2 | encode(dst) << 3 | encode(src));
 248 }
 249 
 250 
 251 void Assembler::emit_operand(Register reg, Register base, Register index,
 252                              Address::ScaleFactor scale, int disp,
 253                              RelocationHolder const& rspec,
 254                              int rip_relative_correction) {
 255   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 256 
 257   // Encode the registers as needed in the fields they are used in
 258 
 259   int regenc = encode(reg) << 3;
 260   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 261   int baseenc = base->is_valid() ? encode(base) : 0;
 262 
 263   if (base->is_valid()) {
 264     if (index->is_valid()) {
 265       assert(scale != Address::no_scale, "inconsistent address");
 266       // [base + index*scale + disp]
 267       if (disp == 0 && rtype == relocInfo::none  &&
 268           base != rbp LP64_ONLY(&& base != r13)) {
 269         // [base + index*scale]
 270         // [00 reg 100][ss index base]
 271         assert(index != rsp, "illegal addressing mode");
 272         emit_byte(0x04 | regenc);
 273         emit_byte(scale << 6 | indexenc | baseenc);
 274       } else if (is8bit(disp) && rtype == relocInfo::none) {
 275         // [base + index*scale + imm8]
 276         // [01 reg 100][ss index base] imm8
 277         assert(index != rsp, "illegal addressing mode");
 278         emit_byte(0x44 | regenc);
 279         emit_byte(scale << 6 | indexenc | baseenc);
 280         emit_byte(disp & 0xFF);
 281       } else {
 282         // [base + index*scale + disp32]
 283         // [10 reg 100][ss index base] disp32
 284         assert(index != rsp, "illegal addressing mode");
 285         emit_byte(0x84 | regenc);
 286         emit_byte(scale << 6 | indexenc | baseenc);
 287         emit_data(disp, rspec, disp32_operand);
 288       }
 289     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 290       // [rsp + disp]
 291       if (disp == 0 && rtype == relocInfo::none) {
 292         // [rsp]
 293         // [00 reg 100][00 100 100]
 294         emit_byte(0x04 | regenc);
 295         emit_byte(0x24);
 296       } else if (is8bit(disp) && rtype == relocInfo::none) {
 297         // [rsp + imm8]
 298         // [01 reg 100][00 100 100] disp8
 299         emit_byte(0x44 | regenc);
 300         emit_byte(0x24);
 301         emit_byte(disp & 0xFF);
 302       } else {
 303         // [rsp + imm32]
 304         // [10 reg 100][00 100 100] disp32
 305         emit_byte(0x84 | regenc);
 306         emit_byte(0x24);
 307         emit_data(disp, rspec, disp32_operand);
 308       }
 309     } else {
 310       // [base + disp]
 311       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 312       if (disp == 0 && rtype == relocInfo::none &&
 313           base != rbp LP64_ONLY(&& base != r13)) {
 314         // [base]
 315         // [00 reg base]
 316         emit_byte(0x00 | regenc | baseenc);
 317       } else if (is8bit(disp) && rtype == relocInfo::none) {
 318         // [base + disp8]
 319         // [01 reg base] disp8
 320         emit_byte(0x40 | regenc | baseenc);
 321         emit_byte(disp & 0xFF);
 322       } else {
 323         // [base + disp32]
 324         // [10 reg base] disp32
 325         emit_byte(0x80 | regenc | baseenc);
 326         emit_data(disp, rspec, disp32_operand);
 327       }
 328     }
 329   } else {
 330     if (index->is_valid()) {
 331       assert(scale != Address::no_scale, "inconsistent address");
 332       // [index*scale + disp]
 333       // [00 reg 100][ss index 101] disp32
 334       assert(index != rsp, "illegal addressing mode");
 335       emit_byte(0x04 | regenc);
 336       emit_byte(scale << 6 | indexenc | 0x05);
 337       emit_data(disp, rspec, disp32_operand);
 338     } else if (rtype != relocInfo::none ) {
 339       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 340       // [00 000 101] disp32
 341 
 342       emit_byte(0x05 | regenc);
 343       // Note that the RIP-rel. correction applies to the generated
 344       // disp field, but _not_ to the target address in the rspec.
 345 
 346       // disp was created by converting the target address minus the pc
 347       // at the start of the instruction. That needs more correction here.
 348       // intptr_t disp = target - next_ip;
 349       assert(inst_mark() != NULL, "must be inside InstructionMark");
 350       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 351       int64_t adjusted = disp;
 352       // Do rip-rel adjustment for 64bit
 353       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 354       assert(is_simm32(adjusted),
 355              "must be 32bit offset (RIP relative address)");
 356       emit_data((int32_t) adjusted, rspec, disp32_operand);
 357 
 358     } else {
 359       // 32bit never did this, did everything as the rip-rel/disp code above
 360       // [disp] ABSOLUTE
 361       // [00 reg 100][00 100 101] disp32
 362       emit_byte(0x04 | regenc);
 363       emit_byte(0x25);
 364       emit_data(disp, rspec, disp32_operand);
 365     }
 366   }
 367 }
 368 
 369 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 370                              Address::ScaleFactor scale, int disp,
 371                              RelocationHolder const& rspec) {
 372   emit_operand((Register)reg, base, index, scale, disp, rspec);
 373 }
 374 
 375 // Secret local extension to Assembler::WhichOperand:
 376 #define end_pc_operand (_WhichOperand_limit)
 377 
 378 address Assembler::locate_operand(address inst, WhichOperand which) {
 379   // Decode the given instruction, and return the address of
 380   // an embedded 32-bit operand word.
 381 
 382   // If "which" is disp32_operand, selects the displacement portion
 383   // of an effective address specifier.
 384   // If "which" is imm64_operand, selects the trailing immediate constant.
 385   // If "which" is call32_operand, selects the displacement of a call or jump.
 386   // Caller is responsible for ensuring that there is such an operand,
 387   // and that it is 32/64 bits wide.
 388 
 389   // If "which" is end_pc_operand, find the end of the instruction.
 390 
 391   address ip = inst;
 392   bool is_64bit = false;
 393 
 394   debug_only(bool has_disp32 = false);
 395   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 396 
 397   again_after_prefix:
 398   switch (0xFF & *ip++) {
 399 
 400   // These convenience macros generate groups of "case" labels for the switch.
 401 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 402 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 403              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 404 #define REP16(x) REP8((x)+0): \
 405               case REP8((x)+8)
 406 
 407   case CS_segment:
 408   case SS_segment:
 409   case DS_segment:
 410   case ES_segment:
 411   case FS_segment:
 412   case GS_segment:
 413     // Seems dubious
 414     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 415     assert(ip == inst+1, "only one prefix allowed");
 416     goto again_after_prefix;
 417 
 418   case 0x67:
 419   case REX:
 420   case REX_B:
 421   case REX_X:
 422   case REX_XB:
 423   case REX_R:
 424   case REX_RB:
 425   case REX_RX:
 426   case REX_RXB:
 427     NOT_LP64(assert(false, "64bit prefixes"));
 428     goto again_after_prefix;
 429 
 430   case REX_W:
 431   case REX_WB:
 432   case REX_WX:
 433   case REX_WXB:
 434   case REX_WR:
 435   case REX_WRB:
 436   case REX_WRX:
 437   case REX_WRXB:
 438     NOT_LP64(assert(false, "64bit prefixes"));
 439     is_64bit = true;
 440     goto again_after_prefix;
 441 
 442   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 443   case 0x88: // movb a, r
 444   case 0x89: // movl a, r
 445   case 0x8A: // movb r, a
 446   case 0x8B: // movl r, a
 447   case 0x8F: // popl a
 448     debug_only(has_disp32 = true);
 449     break;
 450 
 451   case 0x68: // pushq #32
 452     if (which == end_pc_operand) {
 453       return ip + 4;
 454     }
 455     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 456     return ip;                  // not produced by emit_operand
 457 
 458   case 0x66: // movw ... (size prefix)
 459     again_after_size_prefix2:
 460     switch (0xFF & *ip++) {
 461     case REX:
 462     case REX_B:
 463     case REX_X:
 464     case REX_XB:
 465     case REX_R:
 466     case REX_RB:
 467     case REX_RX:
 468     case REX_RXB:
 469     case REX_W:
 470     case REX_WB:
 471     case REX_WX:
 472     case REX_WXB:
 473     case REX_WR:
 474     case REX_WRB:
 475     case REX_WRX:
 476     case REX_WRXB:
 477       NOT_LP64(assert(false, "64bit prefix found"));
 478       goto again_after_size_prefix2;
 479     case 0x8B: // movw r, a
 480     case 0x89: // movw a, r
 481       debug_only(has_disp32 = true);
 482       break;
 483     case 0xC7: // movw a, #16
 484       debug_only(has_disp32 = true);
 485       tail_size = 2;  // the imm16
 486       break;
 487     case 0x0F: // several SSE/SSE2 variants
 488       ip--;    // reparse the 0x0F
 489       goto again_after_prefix;
 490     default:
 491       ShouldNotReachHere();
 492     }
 493     break;
 494 
 495   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 496     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 497     // these asserts are somewhat nonsensical
 498 #ifndef _LP64
 499     assert(which == imm_operand || which == disp32_operand, "");
 500 #else
 501     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 502            which == narrow_oop_operand && !is_64bit, "");
 503 #endif // _LP64
 504     return ip;
 505 
 506   case 0x69: // imul r, a, #32
 507   case 0xC7: // movl a, #32(oop?)
 508     tail_size = 4;
 509     debug_only(has_disp32 = true); // has both kinds of operands!
 510     break;
 511 
 512   case 0x0F: // movx..., etc.
 513     switch (0xFF & *ip++) {
 514     case 0x12: // movlps
 515     case 0x28: // movaps
 516     case 0x2E: // ucomiss
 517     case 0x2F: // comiss
 518     case 0x54: // andps
 519     case 0x55: // andnps
 520     case 0x56: // orps
 521     case 0x57: // xorps
 522     case 0x6E: // movd
 523     case 0x7E: // movd
 524     case 0xAE: // ldmxcsr   a
 525       // 64bit side says it these have both operands but that doesn't
 526       // appear to be true
 527       debug_only(has_disp32 = true);
 528       break;
 529 
 530     case 0xAD: // shrd r, a, %cl
 531     case 0xAF: // imul r, a
 532     case 0xBE: // movsbl r, a (movsxb)
 533     case 0xBF: // movswl r, a (movsxw)
 534     case 0xB6: // movzbl r, a (movzxb)
 535     case 0xB7: // movzwl r, a (movzxw)
 536     case REP16(0x40): // cmovl cc, r, a
 537     case 0xB0: // cmpxchgb
 538     case 0xB1: // cmpxchg
 539     case 0xC1: // xaddl
 540     case 0xC7: // cmpxchg8
 541     case REP16(0x90): // setcc a
 542       debug_only(has_disp32 = true);
 543       // fall out of the switch to decode the address
 544       break;
 545 
 546     case 0xAC: // shrd r, a, #8
 547       debug_only(has_disp32 = true);
 548       tail_size = 1;  // the imm8
 549       break;
 550 
 551     case REP16(0x80): // jcc rdisp32
 552       if (which == end_pc_operand)  return ip + 4;
 553       assert(which == call32_operand, "jcc has no disp32 or imm");
 554       return ip;
 555     default:
 556       ShouldNotReachHere();
 557     }
 558     break;
 559 
 560   case 0x81: // addl a, #32; addl r, #32
 561     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 562     // on 32bit in the case of cmpl, the imm might be an oop
 563     tail_size = 4;
 564     debug_only(has_disp32 = true); // has both kinds of operands!
 565     break;
 566 
 567   case 0x83: // addl a, #8; addl r, #8
 568     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 569     debug_only(has_disp32 = true); // has both kinds of operands!
 570     tail_size = 1;
 571     break;
 572 
 573   case 0x9B:
 574     switch (0xFF & *ip++) {
 575     case 0xD9: // fnstcw a
 576       debug_only(has_disp32 = true);
 577       break;
 578     default:
 579       ShouldNotReachHere();
 580     }
 581     break;
 582 
 583   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 584   case REP4(0x10): // adc...
 585   case REP4(0x20): // and...
 586   case REP4(0x30): // xor...
 587   case REP4(0x08): // or...
 588   case REP4(0x18): // sbb...
 589   case REP4(0x28): // sub...
 590   case 0xF7: // mull a
 591   case 0x8D: // lea r, a
 592   case 0x87: // xchg r, a
 593   case REP4(0x38): // cmp...
 594   case 0x85: // test r, a
 595     debug_only(has_disp32 = true); // has both kinds of operands!
 596     break;
 597 
 598   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 599   case 0xC6: // movb a, #8
 600   case 0x80: // cmpb a, #8
 601   case 0x6B: // imul r, a, #8
 602     debug_only(has_disp32 = true); // has both kinds of operands!
 603     tail_size = 1; // the imm8
 604     break;
 605 
 606   case 0xE8: // call rdisp32
 607   case 0xE9: // jmp  rdisp32
 608     if (which == end_pc_operand)  return ip + 4;
 609     assert(which == call32_operand, "call has no disp32 or imm");
 610     return ip;
 611 
 612   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 613   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 614   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 615   case 0xDD: // fld_d a; fst_d a; fstp_d a
 616   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 617   case 0xDF: // fild_d a; fistp_d a
 618   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 619   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 620   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 621     debug_only(has_disp32 = true);
 622     break;
 623 
 624   case 0xF0:                    // Lock
 625     assert(os::is_MP(), "only on MP");
 626     goto again_after_prefix;
 627 
 628   case 0xF3:                    // For SSE
 629   case 0xF2:                    // For SSE2
 630     switch (0xFF & *ip++) {
 631     case REX:
 632     case REX_B:
 633     case REX_X:
 634     case REX_XB:
 635     case REX_R:
 636     case REX_RB:
 637     case REX_RX:
 638     case REX_RXB:
 639     case REX_W:
 640     case REX_WB:
 641     case REX_WX:
 642     case REX_WXB:
 643     case REX_WR:
 644     case REX_WRB:
 645     case REX_WRX:
 646     case REX_WRXB:
 647       NOT_LP64(assert(false, "found 64bit prefix"));
 648       ip++;
 649     default:
 650       ip++;
 651     }
 652     debug_only(has_disp32 = true); // has both kinds of operands!
 653     break;
 654 
 655   default:
 656     ShouldNotReachHere();
 657 
 658 #undef REP8
 659 #undef REP16
 660   }
 661 
 662   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 663 #ifdef _LP64
 664   assert(which != imm_operand, "instruction is not a movq reg, imm64");
 665 #else
 666   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
 667   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
 668 #endif // LP64
 669   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
 670 
 671   // parse the output of emit_operand
 672   int op2 = 0xFF & *ip++;
 673   int base = op2 & 0x07;
 674   int op3 = -1;
 675   const int b100 = 4;
 676   const int b101 = 5;
 677   if (base == b100 && (op2 >> 6) != 3) {
 678     op3 = 0xFF & *ip++;
 679     base = op3 & 0x07;   // refetch the base
 680   }
 681   // now ip points at the disp (if any)
 682 
 683   switch (op2 >> 6) {
 684   case 0:
 685     // [00 reg  100][ss index base]
 686     // [00 reg  100][00   100  esp]
 687     // [00 reg base]
 688     // [00 reg  100][ss index  101][disp32]
 689     // [00 reg  101]               [disp32]
 690 
 691     if (base == b101) {
 692       if (which == disp32_operand)
 693         return ip;              // caller wants the disp32
 694       ip += 4;                  // skip the disp32
 695     }
 696     break;
 697 
 698   case 1:
 699     // [01 reg  100][ss index base][disp8]
 700     // [01 reg  100][00   100  esp][disp8]
 701     // [01 reg base]               [disp8]
 702     ip += 1;                    // skip the disp8
 703     break;
 704 
 705   case 2:
 706     // [10 reg  100][ss index base][disp32]
 707     // [10 reg  100][00   100  esp][disp32]
 708     // [10 reg base]               [disp32]
 709     if (which == disp32_operand)
 710       return ip;                // caller wants the disp32
 711     ip += 4;                    // skip the disp32
 712     break;
 713 
 714   case 3:
 715     // [11 reg base]  (not a memory addressing mode)
 716     break;
 717   }
 718 
 719   if (which == end_pc_operand) {
 720     return ip + tail_size;
 721   }
 722 
 723 #ifdef _LP64
 724   assert(false, "fix locate_operand");
 725 #else
 726   assert(which == imm_operand, "instruction has only an imm field");
 727 #endif // LP64
 728   return ip;
 729 }
 730 
 731 address Assembler::locate_next_instruction(address inst) {
 732   // Secretly share code with locate_operand:
 733   return locate_operand(inst, end_pc_operand);
 734 }
 735 
 736 
 737 #ifdef ASSERT
 738 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
 739   address inst = inst_mark();
 740   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
 741   address opnd;
 742 
 743   Relocation* r = rspec.reloc();
 744   if (r->type() == relocInfo::none) {
 745     return;
 746   } else if (r->is_call() || format == call32_operand) {
 747     // assert(format == imm32_operand, "cannot specify a nonzero format");
 748     opnd = locate_operand(inst, call32_operand);
 749   } else if (r->is_data()) {
 750     assert(format == imm_operand || format == disp32_operand
 751            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
 752     opnd = locate_operand(inst, (WhichOperand)format);
 753   } else {
 754     assert(format == imm_operand, "cannot specify a format");
 755     return;
 756   }
 757   assert(opnd == pc(), "must put operand where relocs can find it");
 758 }
 759 #endif // ASSERT
 760 
 761 void Assembler::emit_operand32(Register reg, Address adr) {
 762   assert(reg->encoding() < 8, "no extended registers");
 763   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 764   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 765                adr._rspec);
 766 }
 767 
 768 void Assembler::emit_operand(Register reg, Address adr,
 769                              int rip_relative_correction) {
 770   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 771                adr._rspec,
 772                rip_relative_correction);
 773 }
 774 
 775 void Assembler::emit_operand(XMMRegister reg, Address adr) {
 776   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 777                adr._rspec);
 778 }
 779 
 780 // MMX operations
 781 void Assembler::emit_operand(MMXRegister reg, Address adr) {
 782   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 783   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
 784 }
 785 
 786 // work around gcc (3.2.1-7a) bug
 787 void Assembler::emit_operand(Address adr, MMXRegister reg) {
 788   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 789   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
 790 }
 791 
 792 
 793 void Assembler::emit_farith(int b1, int b2, int i) {
 794   assert(isByte(b1) && isByte(b2), "wrong opcode");
 795   assert(0 <= i &&  i < 8, "illegal stack offset");
 796   emit_byte(b1);
 797   emit_byte(b2 + i);
 798 }
 799 
 800 
 801 // Now the Assembler instruction (identical for 32/64 bits)
 802 
 803 void Assembler::adcl(Register dst, int32_t imm32) {
 804   prefix(dst);
 805   emit_arith(0x81, 0xD0, dst, imm32);
 806 }
 807 
 808 void Assembler::adcl(Register dst, Address src) {
 809   InstructionMark im(this);
 810   prefix(src, dst);
 811   emit_byte(0x13);
 812   emit_operand(dst, src);
 813 }
 814 
 815 void Assembler::adcl(Register dst, Register src) {
 816   (void) prefix_and_encode(dst->encoding(), src->encoding());
 817   emit_arith(0x13, 0xC0, dst, src);
 818 }
 819 
 820 void Assembler::addl(Address dst, int32_t imm32) {
 821   InstructionMark im(this);
 822   prefix(dst);
 823   emit_arith_operand(0x81, rax, dst, imm32);
 824 }
 825 
 826 void Assembler::addl(Address dst, Register src) {
 827   InstructionMark im(this);
 828   prefix(dst, src);
 829   emit_byte(0x01);
 830   emit_operand(src, dst);
 831 }
 832 
 833 void Assembler::addl(Register dst, int32_t imm32) {
 834   prefix(dst);
 835   emit_arith(0x81, 0xC0, dst, imm32);
 836 }
 837 
 838 void Assembler::addl(Register dst, Address src) {
 839   InstructionMark im(this);
 840   prefix(src, dst);
 841   emit_byte(0x03);
 842   emit_operand(dst, src);
 843 }
 844 
 845 void Assembler::addl(Register dst, Register src) {
 846   (void) prefix_and_encode(dst->encoding(), src->encoding());
 847   emit_arith(0x03, 0xC0, dst, src);
 848 }
 849 
 850 void Assembler::addr_nop_4() {
 851   // 4 bytes: NOP DWORD PTR [EAX+0]
 852   emit_byte(0x0F);
 853   emit_byte(0x1F);
 854   emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
 855   emit_byte(0);    // 8-bits offset (1 byte)
 856 }
 857 
 858 void Assembler::addr_nop_5() {
 859   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
 860   emit_byte(0x0F);
 861   emit_byte(0x1F);
 862   emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
 863   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 864   emit_byte(0);    // 8-bits offset (1 byte)
 865 }
 866 
 867 void Assembler::addr_nop_7() {
 868   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
 869   emit_byte(0x0F);
 870   emit_byte(0x1F);
 871   emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
 872   emit_long(0);    // 32-bits offset (4 bytes)
 873 }
 874 
 875 void Assembler::addr_nop_8() {
 876   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
 877   emit_byte(0x0F);
 878   emit_byte(0x1F);
 879   emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
 880   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 881   emit_long(0);    // 32-bits offset (4 bytes)
 882 }
 883 
 884 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
 885   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
 886   emit_byte(0xF2);
 887   int encode = prefix_and_encode(dst->encoding(), src->encoding());
 888   emit_byte(0x0F);
 889   emit_byte(0x58);
 890   emit_byte(0xC0 | encode);
 891 }
 892 
 893 void Assembler::addsd(XMMRegister dst, Address src) {
 894   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
 895   InstructionMark im(this);
 896   emit_byte(0xF2);
 897   prefix(src, dst);
 898   emit_byte(0x0F);
 899   emit_byte(0x58);
 900   emit_operand(dst, src);
 901 }
 902 
 903 void Assembler::addss(XMMRegister dst, XMMRegister src) {
 904   NOT_LP64(assert(VM_Version::supports_sse(), ""));
 905   emit_byte(0xF3);
 906   int encode = prefix_and_encode(dst->encoding(), src->encoding());
 907   emit_byte(0x0F);
 908   emit_byte(0x58);
 909   emit_byte(0xC0 | encode);
 910 }
 911 
 912 void Assembler::addss(XMMRegister dst, Address src) {
 913   NOT_LP64(assert(VM_Version::supports_sse(), ""));
 914   InstructionMark im(this);
 915   emit_byte(0xF3);
 916   prefix(src, dst);
 917   emit_byte(0x0F);
 918   emit_byte(0x58);
 919   emit_operand(dst, src);
 920 }
 921 
 922 void Assembler::andl(Register dst, int32_t imm32) {
 923   prefix(dst);
 924   emit_arith(0x81, 0xE0, dst, imm32);
 925 }
 926 
 927 void Assembler::andl(Register dst, Address src) {
 928   InstructionMark im(this);
 929   prefix(src, dst);
 930   emit_byte(0x23);
 931   emit_operand(dst, src);
 932 }
 933 
 934 void Assembler::andl(Register dst, Register src) {
 935   (void) prefix_and_encode(dst->encoding(), src->encoding());
 936   emit_arith(0x23, 0xC0, dst, src);
 937 }
 938 
 939 void Assembler::andpd(XMMRegister dst, Address src) {
 940   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
 941   InstructionMark im(this);
 942   emit_byte(0x66);
 943   prefix(src, dst);
 944   emit_byte(0x0F);
 945   emit_byte(0x54);
 946   emit_operand(dst, src);
 947 }
 948 
 949 void Assembler::bswapl(Register reg) { // bswap
 950   int encode = prefix_and_encode(reg->encoding());
 951   emit_byte(0x0F);
 952   emit_byte(0xC8 | encode);
 953 }
 954 
 955 void Assembler::call(Label& L, relocInfo::relocType rtype) {
 956   // suspect disp32 is always good
 957   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
 958 
 959   if (L.is_bound()) {
 960     const int long_size = 5;
 961     int offs = (int)( target(L) - pc() );
 962     assert(offs <= 0, "assembler error");
 963     InstructionMark im(this);
 964     // 1110 1000 #32-bit disp
 965     emit_byte(0xE8);
 966     emit_data(offs - long_size, rtype, operand);
 967   } else {
 968     InstructionMark im(this);
 969     // 1110 1000 #32-bit disp
 970     L.add_patch_at(code(), locator());
 971 
 972     emit_byte(0xE8);
 973     emit_data(int(0), rtype, operand);
 974   }
 975 }
 976 
 977 void Assembler::call(Register dst) {
 978   // This was originally using a 32bit register encoding
 979   // and surely we want 64bit!
 980   // this is a 32bit encoding but in 64bit mode the default
 981   // operand size is 64bit so there is no need for the
 982   // wide prefix. So prefix only happens if we use the
 983   // new registers. Much like push/pop.
 984   int x = offset();
 985   // this may be true but dbx disassembles it as if it
 986   // were 32bits...
 987   // int encode = prefix_and_encode(dst->encoding());
 988   // if (offset() != x) assert(dst->encoding() >= 8, "what?");
 989   int encode = prefixq_and_encode(dst->encoding());
 990 
 991   emit_byte(0xFF);
 992   emit_byte(0xD0 | encode);
 993 }
 994 
 995 
 996 void Assembler::call(Address adr) {
 997   InstructionMark im(this);
 998   prefix(adr);
 999   emit_byte(0xFF);
1000   emit_operand(rdx, adr);
1001 }
1002 
1003 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1004   assert(entry != NULL, "call most probably wrong");
1005   InstructionMark im(this);
1006   emit_byte(0xE8);
1007   intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1008   assert(is_simm32(disp), "must be 32bit offset (call2)");
1009   // Technically, should use call32_operand, but this format is
1010   // implied by the fact that we're emitting a call instruction.
1011 
1012   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1013   emit_data((int) disp, rspec, operand);
1014 }
1015 
1016 void Assembler::cdql() {
1017   emit_byte(0x99);
1018 }
1019 
1020 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1021   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1022   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1023   emit_byte(0x0F);
1024   emit_byte(0x40 | cc);
1025   emit_byte(0xC0 | encode);
1026 }
1027 
1028 
1029 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1030   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1031   prefix(src, dst);
1032   emit_byte(0x0F);
1033   emit_byte(0x40 | cc);
1034   emit_operand(dst, src);
1035 }
1036 
1037 void Assembler::cmpb(Address dst, int imm8) {
1038   InstructionMark im(this);
1039   prefix(dst);
1040   emit_byte(0x80);
1041   emit_operand(rdi, dst, 1);
1042   emit_byte(imm8);
1043 }
1044 
1045 void Assembler::cmpl(Address dst, int32_t imm32) {
1046   InstructionMark im(this);
1047   prefix(dst);
1048   emit_byte(0x81);
1049   emit_operand(rdi, dst, 4);
1050   emit_long(imm32);
1051 }
1052 
1053 void Assembler::cmpl(Register dst, int32_t imm32) {
1054   prefix(dst);
1055   emit_arith(0x81, 0xF8, dst, imm32);
1056 }
1057 
1058 void Assembler::cmpl(Register dst, Register src) {
1059   (void) prefix_and_encode(dst->encoding(), src->encoding());
1060   emit_arith(0x3B, 0xC0, dst, src);
1061 }
1062 
1063 
1064 void Assembler::cmpl(Register dst, Address  src) {
1065   InstructionMark im(this);
1066   prefix(src, dst);
1067   emit_byte(0x3B);
1068   emit_operand(dst, src);
1069 }
1070 
1071 void Assembler::cmpw(Address dst, int imm16) {
1072   InstructionMark im(this);
1073   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1074   emit_byte(0x66);
1075   emit_byte(0x81);
1076   emit_operand(rdi, dst, 2);
1077   emit_word(imm16);
1078 }
1079 
1080 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1081 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1082 // The ZF is set if the compared values were equal, and cleared otherwise.
1083 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1084   if (Atomics & 2) {
1085      // caveat: no instructionmark, so this isn't relocatable.
1086      // Emit a synthetic, non-atomic, CAS equivalent.
1087      // Beware.  The synthetic form sets all ICCs, not just ZF.
1088      // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1089      cmpl(rax, adr);
1090      movl(rax, adr);
1091      if (reg != rax) {
1092         Label L ;
1093         jcc(Assembler::notEqual, L);
1094         movl(adr, reg);
1095         bind(L);
1096      }
1097   } else {
1098      InstructionMark im(this);
1099      prefix(adr, reg);
1100      emit_byte(0x0F);
1101      emit_byte(0xB1);
1102      emit_operand(reg, adr);
1103   }
1104 }
1105 
1106 void Assembler::comisd(XMMRegister dst, Address src) {
1107   // NOTE: dbx seems to decode this as comiss even though the
1108   // 0x66 is there. Strangly ucomisd comes out correct
1109   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1110   emit_byte(0x66);
1111   comiss(dst, src);
1112 }
1113 
1114 void Assembler::comiss(XMMRegister dst, Address src) {
1115   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1116 
1117   InstructionMark im(this);
1118   prefix(src, dst);
1119   emit_byte(0x0F);
1120   emit_byte(0x2F);
1121   emit_operand(dst, src);
1122 }
1123 
1124 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1125   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1126   emit_byte(0xF3);
1127   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1128   emit_byte(0x0F);
1129   emit_byte(0xE6);
1130   emit_byte(0xC0 | encode);
1131 }
1132 
1133 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1134   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1135   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1136   emit_byte(0x0F);
1137   emit_byte(0x5B);
1138   emit_byte(0xC0 | encode);
1139 }
1140 
1141 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1142   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1143   emit_byte(0xF2);
1144   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1145   emit_byte(0x0F);
1146   emit_byte(0x5A);
1147   emit_byte(0xC0 | encode);
1148 }
1149 
1150 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1151   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1152   emit_byte(0xF2);
1153   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1154   emit_byte(0x0F);
1155   emit_byte(0x2A);
1156   emit_byte(0xC0 | encode);
1157 }
1158 
1159 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1160   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1161   emit_byte(0xF3);
1162   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1163   emit_byte(0x0F);
1164   emit_byte(0x2A);
1165   emit_byte(0xC0 | encode);
1166 }
1167 
1168 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1169   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1170   emit_byte(0xF3);
1171   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1172   emit_byte(0x0F);
1173   emit_byte(0x5A);
1174   emit_byte(0xC0 | encode);
1175 }
1176 
1177 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1178   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1179   emit_byte(0xF2);
1180   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1181   emit_byte(0x0F);
1182   emit_byte(0x2C);
1183   emit_byte(0xC0 | encode);
1184 }
1185 
1186 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1187   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1188   emit_byte(0xF3);
1189   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1190   emit_byte(0x0F);
1191   emit_byte(0x2C);
1192   emit_byte(0xC0 | encode);
1193 }
1194 
1195 void Assembler::decl(Address dst) {
1196   // Don't use it directly. Use MacroAssembler::decrement() instead.
1197   InstructionMark im(this);
1198   prefix(dst);
1199   emit_byte(0xFF);
1200   emit_operand(rcx, dst);
1201 }
1202 
1203 void Assembler::divsd(XMMRegister dst, Address src) {
1204   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1205   InstructionMark im(this);
1206   emit_byte(0xF2);
1207   prefix(src, dst);
1208   emit_byte(0x0F);
1209   emit_byte(0x5E);
1210   emit_operand(dst, src);
1211 }
1212 
1213 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1214   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1215   emit_byte(0xF2);
1216   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1217   emit_byte(0x0F);
1218   emit_byte(0x5E);
1219   emit_byte(0xC0 | encode);
1220 }
1221 
1222 void Assembler::divss(XMMRegister dst, Address src) {
1223   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1224   InstructionMark im(this);
1225   emit_byte(0xF3);
1226   prefix(src, dst);
1227   emit_byte(0x0F);
1228   emit_byte(0x5E);
1229   emit_operand(dst, src);
1230 }
1231 
1232 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1233   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1234   emit_byte(0xF3);
1235   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1236   emit_byte(0x0F);
1237   emit_byte(0x5E);
1238   emit_byte(0xC0 | encode);
1239 }
1240 
1241 void Assembler::emms() {
1242   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1243   emit_byte(0x0F);
1244   emit_byte(0x77);
1245 }
1246 
1247 void Assembler::hlt() {
1248   emit_byte(0xF4);
1249 }
1250 
1251 void Assembler::idivl(Register src) {
1252   int encode = prefix_and_encode(src->encoding());
1253   emit_byte(0xF7);
1254   emit_byte(0xF8 | encode);
1255 }
1256 
1257 void Assembler::imull(Register dst, Register src) {
1258   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1259   emit_byte(0x0F);
1260   emit_byte(0xAF);
1261   emit_byte(0xC0 | encode);
1262 }
1263 
1264 
1265 void Assembler::imull(Register dst, Register src, int value) {
1266   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1267   if (is8bit(value)) {
1268     emit_byte(0x6B);
1269     emit_byte(0xC0 | encode);
1270     emit_byte(value);
1271   } else {
1272     emit_byte(0x69);
1273     emit_byte(0xC0 | encode);
1274     emit_long(value);
1275   }
1276 }
1277 
1278 void Assembler::incl(Address dst) {
1279   // Don't use it directly. Use MacroAssembler::increment() instead.
1280   InstructionMark im(this);
1281   prefix(dst);
1282   emit_byte(0xFF);
1283   emit_operand(rax, dst);
1284 }
1285 
1286 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
1287   InstructionMark im(this);
1288   relocate(rtype);
1289   assert((0 <= cc) && (cc < 16), "illegal cc");
1290   if (L.is_bound()) {
1291     address dst = target(L);
1292     assert(dst != NULL, "jcc most probably wrong");
1293 
1294     const int short_size = 2;
1295     const int long_size = 6;
1296     intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1297     if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1298       // 0111 tttn #8-bit disp
1299       emit_byte(0x70 | cc);
1300       emit_byte((offs - short_size) & 0xFF);
1301     } else {
1302       // 0000 1111 1000 tttn #32-bit disp
1303       assert(is_simm32(offs - long_size),
1304              "must be 32bit offset (call4)");
1305       emit_byte(0x0F);
1306       emit_byte(0x80 | cc);
1307       emit_long(offs - long_size);
1308     }
1309   } else {
1310     // Note: could eliminate cond. jumps to this jump if condition
1311     //       is the same however, seems to be rather unlikely case.
1312     // Note: use jccb() if label to be bound is very close to get
1313     //       an 8-bit displacement
1314     L.add_patch_at(code(), locator());
1315     emit_byte(0x0F);
1316     emit_byte(0x80 | cc);
1317     emit_long(0);
1318   }
1319 }
1320 
1321 void Assembler::jccb(Condition cc, Label& L) {
1322   if (L.is_bound()) {
1323     const int short_size = 2;
1324     address entry = target(L);
1325     assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1326            "Dispacement too large for a short jmp");
1327     intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1328     // 0111 tttn #8-bit disp
1329     emit_byte(0x70 | cc);
1330     emit_byte((offs - short_size) & 0xFF);
1331   } else {
1332     InstructionMark im(this);
1333     L.add_patch_at(code(), locator());
1334     emit_byte(0x70 | cc);
1335     emit_byte(0);
1336   }
1337 }
1338 
1339 void Assembler::jmp(Address adr) {
1340   InstructionMark im(this);
1341   prefix(adr);
1342   emit_byte(0xFF);
1343   emit_operand(rsp, adr);
1344 }
1345 
1346 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
1347   if (L.is_bound()) {
1348     address entry = target(L);
1349     assert(entry != NULL, "jmp most probably wrong");
1350     InstructionMark im(this);
1351     const int short_size = 2;
1352     const int long_size = 5;
1353     intptr_t offs = entry - _code_pos;
1354     if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1355       emit_byte(0xEB);
1356       emit_byte((offs - short_size) & 0xFF);
1357     } else {
1358       emit_byte(0xE9);
1359       emit_long(offs - long_size);
1360     }
1361   } else {
1362     // By default, forward jumps are always 32-bit displacements, since
1363     // we can't yet know where the label will be bound.  If you're sure that
1364     // the forward jump will not run beyond 256 bytes, use jmpb to
1365     // force an 8-bit displacement.
1366     InstructionMark im(this);
1367     relocate(rtype);
1368     L.add_patch_at(code(), locator());
1369     emit_byte(0xE9);
1370     emit_long(0);
1371   }
1372 }
1373 
1374 void Assembler::jmp(Register entry) {
1375   int encode = prefix_and_encode(entry->encoding());
1376   emit_byte(0xFF);
1377   emit_byte(0xE0 | encode);
1378 }
1379 
1380 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1381   InstructionMark im(this);
1382   emit_byte(0xE9);
1383   assert(dest != NULL, "must have a target");
1384   intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1385   assert(is_simm32(disp), "must be 32bit offset (jmp)");
1386   emit_data(disp, rspec.reloc(), call32_operand);
1387 }
1388 
1389 void Assembler::jmpb(Label& L) {
1390   if (L.is_bound()) {
1391     const int short_size = 2;
1392     address entry = target(L);
1393     assert(is8bit((entry - _code_pos) + short_size),
1394            "Dispacement too large for a short jmp");
1395     assert(entry != NULL, "jmp most probably wrong");
1396     intptr_t offs = entry - _code_pos;
1397     emit_byte(0xEB);
1398     emit_byte((offs - short_size) & 0xFF);
1399   } else {
1400     InstructionMark im(this);
1401     L.add_patch_at(code(), locator());
1402     emit_byte(0xEB);
1403     emit_byte(0);
1404   }
1405 }
1406 
1407 void Assembler::ldmxcsr( Address src) {
1408   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1409   InstructionMark im(this);
1410   prefix(src);
1411   emit_byte(0x0F);
1412   emit_byte(0xAE);
1413   emit_operand(as_Register(2), src);
1414 }
1415 
1416 void Assembler::leal(Register dst, Address src) {
1417   InstructionMark im(this);
1418 #ifdef _LP64
1419   emit_byte(0x67); // addr32
1420   prefix(src, dst);
1421 #endif // LP64
1422   emit_byte(0x8D);
1423   emit_operand(dst, src);
1424 }
1425 
1426 void Assembler::lock() {
1427   if (Atomics & 1) {
1428      // Emit either nothing, a NOP, or a NOP: prefix
1429      emit_byte(0x90) ;
1430   } else {
1431      emit_byte(0xF0);
1432   }
1433 }
1434 
1435 // Serializes memory.
1436 void Assembler::mfence() {
1437     // Memory barriers are only needed on multiprocessors
1438   if (os::is_MP()) {
1439     if( LP64_ONLY(true ||) VM_Version::supports_sse2() ) {
1440       emit_byte( 0x0F );                // MFENCE; faster blows no regs
1441       emit_byte( 0xAE );
1442       emit_byte( 0xF0 );
1443     } else {
1444       // All usable chips support "locked" instructions which suffice
1445       // as barriers, and are much faster than the alternative of
1446       // using cpuid instruction. We use here a locked add [esp],0.
1447       // This is conveniently otherwise a no-op except for blowing
1448       // flags (which we save and restore.)
1449       pushf();                // Save eflags register
1450       lock();
1451       addl(Address(rsp, 0), 0);// Assert the lock# signal here
1452       popf();                 // Restore eflags register
1453     }
1454   }
1455 }
1456 
1457 void Assembler::mov(Register dst, Register src) {
1458   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1459 }
1460 
1461 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1462   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1463   int dstenc = dst->encoding();
1464   int srcenc = src->encoding();
1465   emit_byte(0x66);
1466   if (dstenc < 8) {
1467     if (srcenc >= 8) {
1468       prefix(REX_B);
1469       srcenc -= 8;
1470     }
1471   } else {
1472     if (srcenc < 8) {
1473       prefix(REX_R);
1474     } else {
1475       prefix(REX_RB);
1476       srcenc -= 8;
1477     }
1478     dstenc -= 8;
1479   }
1480   emit_byte(0x0F);
1481   emit_byte(0x28);
1482   emit_byte(0xC0 | dstenc << 3 | srcenc);
1483 }
1484 
1485 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1486   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1487   int dstenc = dst->encoding();
1488   int srcenc = src->encoding();
1489   if (dstenc < 8) {
1490     if (srcenc >= 8) {
1491       prefix(REX_B);
1492       srcenc -= 8;
1493     }
1494   } else {
1495     if (srcenc < 8) {
1496       prefix(REX_R);
1497     } else {
1498       prefix(REX_RB);
1499       srcenc -= 8;
1500     }
1501     dstenc -= 8;
1502   }
1503   emit_byte(0x0F);
1504   emit_byte(0x28);
1505   emit_byte(0xC0 | dstenc << 3 | srcenc);
1506 }
1507 
1508 void Assembler::movb(Register dst, Address src) {
1509   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1510   InstructionMark im(this);
1511   prefix(src, dst, true);
1512   emit_byte(0x8A);
1513   emit_operand(dst, src);
1514 }
1515 
1516 
1517 void Assembler::movb(Address dst, int imm8) {
1518   InstructionMark im(this);
1519    prefix(dst);
1520   emit_byte(0xC6);
1521   emit_operand(rax, dst, 1);
1522   emit_byte(imm8);
1523 }
1524 
1525 
1526 void Assembler::movb(Address dst, Register src) {
1527   assert(src->has_byte_register(), "must have byte register");
1528   InstructionMark im(this);
1529   prefix(dst, src, true);
1530   emit_byte(0x88);
1531   emit_operand(src, dst);
1532 }
1533 
1534 void Assembler::movdl(XMMRegister dst, Register src) {
1535   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1536   emit_byte(0x66);
1537   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1538   emit_byte(0x0F);
1539   emit_byte(0x6E);
1540   emit_byte(0xC0 | encode);
1541 }
1542 
1543 void Assembler::movdl(Register dst, XMMRegister src) {
1544   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1545   emit_byte(0x66);
1546   // swap src/dst to get correct prefix
1547   int encode = prefix_and_encode(src->encoding(), dst->encoding());
1548   emit_byte(0x0F);
1549   emit_byte(0x7E);
1550   emit_byte(0xC0 | encode);
1551 }
1552 
1553 void Assembler::movdqa(XMMRegister dst, Address src) {
1554   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1555   InstructionMark im(this);
1556   emit_byte(0x66);
1557   prefix(src, dst);
1558   emit_byte(0x0F);
1559   emit_byte(0x6F);
1560   emit_operand(dst, src);
1561 }
1562 
1563 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1564   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1565   emit_byte(0x66);
1566   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1567   emit_byte(0x0F);
1568   emit_byte(0x6F);
1569   emit_byte(0xC0 | encode);
1570 }
1571 
1572 void Assembler::movdqa(Address dst, XMMRegister src) {
1573   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1574   InstructionMark im(this);
1575   emit_byte(0x66);
1576   prefix(dst, src);
1577   emit_byte(0x0F);
1578   emit_byte(0x7F);
1579   emit_operand(src, dst);
1580 }
1581 
1582 void Assembler::movdqu(XMMRegister dst, Address src) {
1583   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1584   InstructionMark im(this);
1585   emit_byte(0xF3);
1586   prefix(src, dst);
1587   emit_byte(0x0F);
1588   emit_byte(0x6F);
1589   emit_operand(dst, src);
1590 }
1591 
1592 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1593   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1594   emit_byte(0xF3);
1595   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1596   emit_byte(0x0F);
1597   emit_byte(0x6F);
1598   emit_byte(0xC0 | encode);
1599 }
1600 
1601 void Assembler::movdqu(Address dst, XMMRegister src) {
1602   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1603   InstructionMark im(this);
1604   emit_byte(0xF3);
1605   prefix(dst, src);
1606   emit_byte(0x0F);
1607   emit_byte(0x7F);
1608   emit_operand(src, dst);
1609 }
1610 
1611 // Uses zero extension on 64bit
1612 
1613 void Assembler::movl(Register dst, int32_t imm32) {
1614   int encode = prefix_and_encode(dst->encoding());
1615   emit_byte(0xB8 | encode);
1616   emit_long(imm32);
1617 }
1618 
1619 void Assembler::movl(Register dst, Register src) {
1620   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1621   emit_byte(0x8B);
1622   emit_byte(0xC0 | encode);
1623 }
1624 
1625 void Assembler::movl(Register dst, Address src) {
1626   InstructionMark im(this);
1627   prefix(src, dst);
1628   emit_byte(0x8B);
1629   emit_operand(dst, src);
1630 }
1631 
1632 void Assembler::movl(Address dst, int32_t imm32) {
1633   InstructionMark im(this);
1634   prefix(dst);
1635   emit_byte(0xC7);
1636   emit_operand(rax, dst, 4);
1637   emit_long(imm32);
1638 }
1639 
1640 void Assembler::movl(Address dst, Register src) {
1641   InstructionMark im(this);
1642   prefix(dst, src);
1643   emit_byte(0x89);
1644   emit_operand(src, dst);
1645 }
1646 
1647 // New cpus require to use movsd and movss to avoid partial register stall
1648 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1649 // The selection is done in MacroAssembler::movdbl() and movflt().
1650 void Assembler::movlpd(XMMRegister dst, Address src) {
1651   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1652   InstructionMark im(this);
1653   emit_byte(0x66);
1654   prefix(src, dst);
1655   emit_byte(0x0F);
1656   emit_byte(0x12);
1657   emit_operand(dst, src);
1658 }
1659 
1660 void Assembler::movq( MMXRegister dst, Address src ) {
1661   assert( VM_Version::supports_mmx(), "" );
1662   emit_byte(0x0F);
1663   emit_byte(0x6F);
1664   emit_operand(dst, src);
1665 }
1666 
1667 void Assembler::movq( Address dst, MMXRegister src ) {
1668   assert( VM_Version::supports_mmx(), "" );
1669   emit_byte(0x0F);
1670   emit_byte(0x7F);
1671   // workaround gcc (3.2.1-7a) bug
1672   // In that version of gcc with only an emit_operand(MMX, Address)
1673   // gcc will tail jump and try and reverse the parameters completely
1674   // obliterating dst in the process. By having a version available
1675   // that doesn't need to swap the args at the tail jump the bug is
1676   // avoided.
1677   emit_operand(dst, src);
1678 }
1679 
1680 void Assembler::movq(XMMRegister dst, Address src) {
1681   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1682   InstructionMark im(this);
1683   emit_byte(0xF3);
1684   prefix(src, dst);
1685   emit_byte(0x0F);
1686   emit_byte(0x7E);
1687   emit_operand(dst, src);
1688 }
1689 
1690 void Assembler::movq(Address dst, XMMRegister src) {
1691   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1692   InstructionMark im(this);
1693   emit_byte(0x66);
1694   prefix(dst, src);
1695   emit_byte(0x0F);
1696   emit_byte(0xD6);
1697   emit_operand(src, dst);
1698 }
1699 
1700 void Assembler::movsbl(Register dst, Address src) { // movsxb
1701   InstructionMark im(this);
1702   prefix(src, dst);
1703   emit_byte(0x0F);
1704   emit_byte(0xBE);
1705   emit_operand(dst, src);
1706 }
1707 
1708 void Assembler::movsbl(Register dst, Register src) { // movsxb
1709   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1710   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1711   emit_byte(0x0F);
1712   emit_byte(0xBE);
1713   emit_byte(0xC0 | encode);
1714 }
1715 
1716 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1717   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1718   emit_byte(0xF2);
1719   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1720   emit_byte(0x0F);
1721   emit_byte(0x10);
1722   emit_byte(0xC0 | encode);
1723 }
1724 
1725 void Assembler::movsd(XMMRegister dst, Address src) {
1726   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1727   InstructionMark im(this);
1728   emit_byte(0xF2);
1729   prefix(src, dst);
1730   emit_byte(0x0F);
1731   emit_byte(0x10);
1732   emit_operand(dst, src);
1733 }
1734 
1735 void Assembler::movsd(Address dst, XMMRegister src) {
1736   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1737   InstructionMark im(this);
1738   emit_byte(0xF2);
1739   prefix(dst, src);
1740   emit_byte(0x0F);
1741   emit_byte(0x11);
1742   emit_operand(src, dst);
1743 }
1744 
1745 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1746   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1747   emit_byte(0xF3);
1748   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1749   emit_byte(0x0F);
1750   emit_byte(0x10);
1751   emit_byte(0xC0 | encode);
1752 }
1753 
1754 void Assembler::movss(XMMRegister dst, Address src) {
1755   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1756   InstructionMark im(this);
1757   emit_byte(0xF3);
1758   prefix(src, dst);
1759   emit_byte(0x0F);
1760   emit_byte(0x10);
1761   emit_operand(dst, src);
1762 }
1763 
1764 void Assembler::movss(Address dst, XMMRegister src) {
1765   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1766   InstructionMark im(this);
1767   emit_byte(0xF3);
1768   prefix(dst, src);
1769   emit_byte(0x0F);
1770   emit_byte(0x11);
1771   emit_operand(src, dst);
1772 }
1773 
1774 void Assembler::movswl(Register dst, Address src) { // movsxw
1775   InstructionMark im(this);
1776   prefix(src, dst);
1777   emit_byte(0x0F);
1778   emit_byte(0xBF);
1779   emit_operand(dst, src);
1780 }
1781 
1782 void Assembler::movswl(Register dst, Register src) { // movsxw
1783   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1784   emit_byte(0x0F);
1785   emit_byte(0xBF);
1786   emit_byte(0xC0 | encode);
1787 }
1788 
1789 void Assembler::movw(Address dst, int imm16) {
1790   InstructionMark im(this);
1791 
1792   emit_byte(0x66); // switch to 16-bit mode
1793   prefix(dst);
1794   emit_byte(0xC7);
1795   emit_operand(rax, dst, 2);
1796   emit_word(imm16);
1797 }
1798 
1799 void Assembler::movw(Register dst, Address src) {
1800   InstructionMark im(this);
1801   emit_byte(0x66);
1802   prefix(src, dst);
1803   emit_byte(0x8B);
1804   emit_operand(dst, src);
1805 }
1806 
1807 void Assembler::movw(Address dst, Register src) {
1808   InstructionMark im(this);
1809   emit_byte(0x66);
1810   prefix(dst, src);
1811   emit_byte(0x89);
1812   emit_operand(src, dst);
1813 }
1814 
1815 void Assembler::movzbl(Register dst, Address src) { // movzxb
1816   InstructionMark im(this);
1817   prefix(src, dst);
1818   emit_byte(0x0F);
1819   emit_byte(0xB6);
1820   emit_operand(dst, src);
1821 }
1822 
1823 void Assembler::movzbl(Register dst, Register src) { // movzxb
1824   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1825   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1826   emit_byte(0x0F);
1827   emit_byte(0xB6);
1828   emit_byte(0xC0 | encode);
1829 }
1830 
1831 void Assembler::movzwl(Register dst, Address src) { // movzxw
1832   InstructionMark im(this);
1833   prefix(src, dst);
1834   emit_byte(0x0F);
1835   emit_byte(0xB7);
1836   emit_operand(dst, src);
1837 }
1838 
1839 void Assembler::movzwl(Register dst, Register src) { // movzxw
1840   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1841   emit_byte(0x0F);
1842   emit_byte(0xB7);
1843   emit_byte(0xC0 | encode);
1844 }
1845 
1846 void Assembler::mull(Address src) {
1847   InstructionMark im(this);
1848   prefix(src);
1849   emit_byte(0xF7);
1850   emit_operand(rsp, src);
1851 }
1852 
1853 void Assembler::mull(Register src) {
1854   int encode = prefix_and_encode(src->encoding());
1855   emit_byte(0xF7);
1856   emit_byte(0xE0 | encode);
1857 }
1858 
1859 void Assembler::mulsd(XMMRegister dst, Address src) {
1860   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1861   InstructionMark im(this);
1862   emit_byte(0xF2);
1863   prefix(src, dst);
1864   emit_byte(0x0F);
1865   emit_byte(0x59);
1866   emit_operand(dst, src);
1867 }
1868 
1869 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1870   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1871   emit_byte(0xF2);
1872   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1873   emit_byte(0x0F);
1874   emit_byte(0x59);
1875   emit_byte(0xC0 | encode);
1876 }
1877 
1878 void Assembler::mulss(XMMRegister dst, Address src) {
1879   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1880   InstructionMark im(this);
1881   emit_byte(0xF3);
1882   prefix(src, dst);
1883   emit_byte(0x0F);
1884   emit_byte(0x59);
1885   emit_operand(dst, src);
1886 }
1887 
1888 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1889   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1890   emit_byte(0xF3);
1891   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1892   emit_byte(0x0F);
1893   emit_byte(0x59);
1894   emit_byte(0xC0 | encode);
1895 }
1896 
1897 void Assembler::negl(Register dst) {
1898   int encode = prefix_and_encode(dst->encoding());
1899   emit_byte(0xF7);
1900   emit_byte(0xD8 | encode);
1901 }
1902 
1903 void Assembler::nop(int i) {
1904 #ifdef ASSERT
1905   assert(i > 0, " ");
1906   // The fancy nops aren't currently recognized by debuggers making it a
1907   // pain to disassemble code while debugging. If asserts are on clearly
1908   // speed is not an issue so simply use the single byte traditional nop
1909   // to do alignment.
1910 
1911   for (; i > 0 ; i--) emit_byte(0x90);
1912   return;
1913 
1914 #endif // ASSERT
1915 
1916   if (UseAddressNop && VM_Version::is_intel()) {
1917     //
1918     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1919     //  1: 0x90
1920     //  2: 0x66 0x90
1921     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1922     //  4: 0x0F 0x1F 0x40 0x00
1923     //  5: 0x0F 0x1F 0x44 0x00 0x00
1924     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1925     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1926     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1927     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1928     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1929     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1930 
1931     // The rest coding is Intel specific - don't use consecutive address nops
1932 
1933     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1934     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1935     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1936     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1937 
1938     while(i >= 15) {
1939       // For Intel don't generate consecutive addess nops (mix with regular nops)
1940       i -= 15;
1941       emit_byte(0x66);   // size prefix
1942       emit_byte(0x66);   // size prefix
1943       emit_byte(0x66);   // size prefix
1944       addr_nop_8();
1945       emit_byte(0x66);   // size prefix
1946       emit_byte(0x66);   // size prefix
1947       emit_byte(0x66);   // size prefix
1948       emit_byte(0x90);   // nop
1949     }
1950     switch (i) {
1951       case 14:
1952         emit_byte(0x66); // size prefix
1953       case 13:
1954         emit_byte(0x66); // size prefix
1955       case 12:
1956         addr_nop_8();
1957         emit_byte(0x66); // size prefix
1958         emit_byte(0x66); // size prefix
1959         emit_byte(0x66); // size prefix
1960         emit_byte(0x90); // nop
1961         break;
1962       case 11:
1963         emit_byte(0x66); // size prefix
1964       case 10:
1965         emit_byte(0x66); // size prefix
1966       case 9:
1967         emit_byte(0x66); // size prefix
1968       case 8:
1969         addr_nop_8();
1970         break;
1971       case 7:
1972         addr_nop_7();
1973         break;
1974       case 6:
1975         emit_byte(0x66); // size prefix
1976       case 5:
1977         addr_nop_5();
1978         break;
1979       case 4:
1980         addr_nop_4();
1981         break;
1982       case 3:
1983         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
1984         emit_byte(0x66); // size prefix
1985       case 2:
1986         emit_byte(0x66); // size prefix
1987       case 1:
1988         emit_byte(0x90); // nop
1989         break;
1990       default:
1991         assert(i == 0, " ");
1992     }
1993     return;
1994   }
1995   if (UseAddressNop && VM_Version::is_amd()) {
1996     //
1997     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
1998     //  1: 0x90
1999     //  2: 0x66 0x90
2000     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2001     //  4: 0x0F 0x1F 0x40 0x00
2002     //  5: 0x0F 0x1F 0x44 0x00 0x00
2003     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2004     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2005     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2006     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2007     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2008     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2009 
2010     // The rest coding is AMD specific - use consecutive address nops
2011 
2012     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2013     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2014     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2015     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2016     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2017     //     Size prefixes (0x66) are added for larger sizes
2018 
2019     while(i >= 22) {
2020       i -= 11;
2021       emit_byte(0x66); // size prefix
2022       emit_byte(0x66); // size prefix
2023       emit_byte(0x66); // size prefix
2024       addr_nop_8();
2025     }
2026     // Generate first nop for size between 21-12
2027     switch (i) {
2028       case 21:
2029         i -= 1;
2030         emit_byte(0x66); // size prefix
2031       case 20:
2032       case 19:
2033         i -= 1;
2034         emit_byte(0x66); // size prefix
2035       case 18:
2036       case 17:
2037         i -= 1;
2038         emit_byte(0x66); // size prefix
2039       case 16:
2040       case 15:
2041         i -= 8;
2042         addr_nop_8();
2043         break;
2044       case 14:
2045       case 13:
2046         i -= 7;
2047         addr_nop_7();
2048         break;
2049       case 12:
2050         i -= 6;
2051         emit_byte(0x66); // size prefix
2052         addr_nop_5();
2053         break;
2054       default:
2055         assert(i < 12, " ");
2056     }
2057 
2058     // Generate second nop for size between 11-1
2059     switch (i) {
2060       case 11:
2061         emit_byte(0x66); // size prefix
2062       case 10:
2063         emit_byte(0x66); // size prefix
2064       case 9:
2065         emit_byte(0x66); // size prefix
2066       case 8:
2067         addr_nop_8();
2068         break;
2069       case 7:
2070         addr_nop_7();
2071         break;
2072       case 6:
2073         emit_byte(0x66); // size prefix
2074       case 5:
2075         addr_nop_5();
2076         break;
2077       case 4:
2078         addr_nop_4();
2079         break;
2080       case 3:
2081         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2082         emit_byte(0x66); // size prefix
2083       case 2:
2084         emit_byte(0x66); // size prefix
2085       case 1:
2086         emit_byte(0x90); // nop
2087         break;
2088       default:
2089         assert(i == 0, " ");
2090     }
2091     return;
2092   }
2093 
2094   // Using nops with size prefixes "0x66 0x90".
2095   // From AMD Optimization Guide:
2096   //  1: 0x90
2097   //  2: 0x66 0x90
2098   //  3: 0x66 0x66 0x90
2099   //  4: 0x66 0x66 0x66 0x90
2100   //  5: 0x66 0x66 0x90 0x66 0x90
2101   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2102   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2103   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2104   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2105   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2106   //
2107   while(i > 12) {
2108     i -= 4;
2109     emit_byte(0x66); // size prefix
2110     emit_byte(0x66);
2111     emit_byte(0x66);
2112     emit_byte(0x90); // nop
2113   }
2114   // 1 - 12 nops
2115   if(i > 8) {
2116     if(i > 9) {
2117       i -= 1;
2118       emit_byte(0x66);
2119     }
2120     i -= 3;
2121     emit_byte(0x66);
2122     emit_byte(0x66);
2123     emit_byte(0x90);
2124   }
2125   // 1 - 8 nops
2126   if(i > 4) {
2127     if(i > 6) {
2128       i -= 1;
2129       emit_byte(0x66);
2130     }
2131     i -= 3;
2132     emit_byte(0x66);
2133     emit_byte(0x66);
2134     emit_byte(0x90);
2135   }
2136   switch (i) {
2137     case 4:
2138       emit_byte(0x66);
2139     case 3:
2140       emit_byte(0x66);
2141     case 2:
2142       emit_byte(0x66);
2143     case 1:
2144       emit_byte(0x90);
2145       break;
2146     default:
2147       assert(i == 0, " ");
2148   }
2149 }
2150 
2151 void Assembler::notl(Register dst) {
2152   int encode = prefix_and_encode(dst->encoding());
2153   emit_byte(0xF7);
2154   emit_byte(0xD0 | encode );
2155 }
2156 
2157 void Assembler::orl(Address dst, int32_t imm32) {
2158   InstructionMark im(this);
2159   prefix(dst);
2160   emit_byte(0x81);
2161   emit_operand(rcx, dst, 4);
2162   emit_long(imm32);
2163 }
2164 
2165 void Assembler::orl(Register dst, int32_t imm32) {
2166   prefix(dst);
2167   emit_arith(0x81, 0xC8, dst, imm32);
2168 }
2169 
2170 
2171 void Assembler::orl(Register dst, Address src) {
2172   InstructionMark im(this);
2173   prefix(src, dst);
2174   emit_byte(0x0B);
2175   emit_operand(dst, src);
2176 }
2177 
2178 
2179 void Assembler::orl(Register dst, Register src) {
2180   (void) prefix_and_encode(dst->encoding(), src->encoding());
2181   emit_arith(0x0B, 0xC0, dst, src);
2182 }
2183 
2184 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2185   assert(VM_Version::supports_sse4_2(), "");
2186 
2187   InstructionMark im(this);
2188   emit_byte(0x66);
2189   prefix(src, dst);
2190   emit_byte(0x0F);
2191   emit_byte(0x3A);
2192   emit_byte(0x61);
2193   emit_operand(dst, src);
2194   emit_byte(imm8);
2195 }
2196 
2197 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2198   assert(VM_Version::supports_sse4_2(), "");
2199 
2200   emit_byte(0x66);
2201   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2202   emit_byte(0x0F);
2203   emit_byte(0x3A);
2204   emit_byte(0x61);
2205   emit_byte(0xC0 | encode);
2206   emit_byte(imm8);
2207 }
2208 
2209 // generic
2210 void Assembler::pop(Register dst) {
2211   int encode = prefix_and_encode(dst->encoding());
2212   emit_byte(0x58 | encode);
2213 }
2214 
2215 void Assembler::popf() {
2216   emit_byte(0x9D);
2217 }
2218 
2219 void Assembler::popl(Address dst) {
2220   // NOTE: this will adjust stack by 8byte on 64bits
2221   InstructionMark im(this);
2222   prefix(dst);
2223   emit_byte(0x8F);
2224   emit_operand(rax, dst);
2225 }
2226 
2227 void Assembler::prefetch_prefix(Address src) {
2228   prefix(src);
2229   emit_byte(0x0F);
2230 }
2231 
2232 void Assembler::prefetchnta(Address src) {
2233   NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2234   InstructionMark im(this);
2235   prefetch_prefix(src);
2236   emit_byte(0x18);
2237   emit_operand(rax, src); // 0, src
2238 }
2239 
2240 void Assembler::prefetchr(Address src) {
2241   NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2242   InstructionMark im(this);
2243   prefetch_prefix(src);
2244   emit_byte(0x0D);
2245   emit_operand(rax, src); // 0, src
2246 }
2247 
2248 void Assembler::prefetcht0(Address src) {
2249   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2250   InstructionMark im(this);
2251   prefetch_prefix(src);
2252   emit_byte(0x18);
2253   emit_operand(rcx, src); // 1, src
2254 }
2255 
2256 void Assembler::prefetcht1(Address src) {
2257   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2258   InstructionMark im(this);
2259   prefetch_prefix(src);
2260   emit_byte(0x18);
2261   emit_operand(rdx, src); // 2, src
2262 }
2263 
2264 void Assembler::prefetcht2(Address src) {
2265   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2266   InstructionMark im(this);
2267   prefetch_prefix(src);
2268   emit_byte(0x18);
2269   emit_operand(rbx, src); // 3, src
2270 }
2271 
2272 void Assembler::prefetchw(Address src) {
2273   NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2274   InstructionMark im(this);
2275   prefetch_prefix(src);
2276   emit_byte(0x0D);
2277   emit_operand(rcx, src); // 1, src
2278 }
2279 
2280 void Assembler::prefix(Prefix p) {
2281   a_byte(p);
2282 }
2283 
2284 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2285   assert(isByte(mode), "invalid value");
2286   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2287 
2288   emit_byte(0x66);
2289   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2290   emit_byte(0x0F);
2291   emit_byte(0x70);
2292   emit_byte(0xC0 | encode);
2293   emit_byte(mode & 0xFF);
2294 
2295 }
2296 
2297 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2298   assert(isByte(mode), "invalid value");
2299   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2300 
2301   InstructionMark im(this);
2302   emit_byte(0x66);
2303   prefix(src, dst);
2304   emit_byte(0x0F);
2305   emit_byte(0x70);
2306   emit_operand(dst, src);
2307   emit_byte(mode & 0xFF);
2308 }
2309 
2310 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2311   assert(isByte(mode), "invalid value");
2312   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2313 
2314   emit_byte(0xF2);
2315   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2316   emit_byte(0x0F);
2317   emit_byte(0x70);
2318   emit_byte(0xC0 | encode);
2319   emit_byte(mode & 0xFF);
2320 }
2321 
2322 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2323   assert(isByte(mode), "invalid value");
2324   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2325 
2326   InstructionMark im(this);
2327   emit_byte(0xF2);
2328   prefix(src, dst); // QQ new
2329   emit_byte(0x0F);
2330   emit_byte(0x70);
2331   emit_operand(dst, src);
2332   emit_byte(mode & 0xFF);
2333 }
2334 
2335 void Assembler::psrlq(XMMRegister dst, int shift) {
2336   // HMM Table D-1 says sse2 or mmx
2337   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2338 
2339   int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2340   emit_byte(0x66);
2341   emit_byte(0x0F);
2342   emit_byte(0x73);
2343   emit_byte(0xC0 | encode);
2344   emit_byte(shift);
2345 }
2346 
2347 void Assembler::ptest(XMMRegister dst, Address src) {
2348   assert(VM_Version::supports_sse4_1(), "");
2349 
2350   InstructionMark im(this);
2351   emit_byte(0x66);
2352   prefix(src, dst);
2353   emit_byte(0x0F);
2354   emit_byte(0x38);
2355   emit_byte(0x17);
2356   emit_operand(dst, src);
2357 }
2358 
2359 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2360   assert(VM_Version::supports_sse4_1(), "");
2361 
2362   emit_byte(0x66);
2363   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2364   emit_byte(0x0F);
2365   emit_byte(0x38);
2366   emit_byte(0x17);
2367   emit_byte(0xC0 | encode);
2368 }
2369 
2370 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2371   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2372   emit_byte(0x66);
2373   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2374   emit_byte(0x0F);
2375   emit_byte(0x60);
2376   emit_byte(0xC0 | encode);
2377 }
2378 
2379 void Assembler::push(int32_t imm32) {
2380   // in 64bits we push 64bits onto the stack but only
2381   // take a 32bit immediate
2382   emit_byte(0x68);
2383   emit_long(imm32);
2384 }
2385 
2386 void Assembler::push(Register src) {
2387   int encode = prefix_and_encode(src->encoding());
2388 
2389   emit_byte(0x50 | encode);
2390 }
2391 
2392 void Assembler::pushf() {
2393   emit_byte(0x9C);
2394 }
2395 
2396 void Assembler::pushl(Address src) {
2397   // Note this will push 64bit on 64bit
2398   InstructionMark im(this);
2399   prefix(src);
2400   emit_byte(0xFF);
2401   emit_operand(rsi, src);
2402 }
2403 
2404 void Assembler::pxor(XMMRegister dst, Address src) {
2405   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2406   InstructionMark im(this);
2407   emit_byte(0x66);
2408   prefix(src, dst);
2409   emit_byte(0x0F);
2410   emit_byte(0xEF);
2411   emit_operand(dst, src);
2412 }
2413 
2414 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2415   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2416   InstructionMark im(this);
2417   emit_byte(0x66);
2418   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2419   emit_byte(0x0F);
2420   emit_byte(0xEF);
2421   emit_byte(0xC0 | encode);
2422 }
2423 
2424 void Assembler::rcll(Register dst, int imm8) {
2425   assert(isShiftCount(imm8), "illegal shift count");
2426   int encode = prefix_and_encode(dst->encoding());
2427   if (imm8 == 1) {
2428     emit_byte(0xD1);
2429     emit_byte(0xD0 | encode);
2430   } else {
2431     emit_byte(0xC1);
2432     emit_byte(0xD0 | encode);
2433     emit_byte(imm8);
2434   }
2435 }
2436 
2437 // copies data from [esi] to [edi] using rcx pointer sized words
2438 // generic
2439 void Assembler::rep_mov() {
2440   emit_byte(0xF3);
2441   // MOVSQ
2442   LP64_ONLY(prefix(REX_W));
2443   emit_byte(0xA5);
2444 }
2445 
2446 // sets rcx pointer sized words with rax, value at [edi]
2447 // generic
2448 void Assembler::rep_set() { // rep_set
2449   emit_byte(0xF3);
2450   // STOSQ
2451   LP64_ONLY(prefix(REX_W));
2452   emit_byte(0xAB);
2453 }
2454 
2455 // scans rcx pointer sized words at [edi] for occurance of rax,
2456 // generic
2457 void Assembler::repne_scan() { // repne_scan
2458   emit_byte(0xF2);
2459   // SCASQ
2460   LP64_ONLY(prefix(REX_W));
2461   emit_byte(0xAF);
2462 }
2463 
2464 #ifdef _LP64
2465 // scans rcx 4 byte words at [edi] for occurance of rax,
2466 // generic
2467 void Assembler::repne_scanl() { // repne_scan
2468   emit_byte(0xF2);
2469   // SCASL
2470   emit_byte(0xAF);
2471 }
2472 #endif
2473 
2474 void Assembler::ret(int imm16) {
2475   if (imm16 == 0) {
2476     emit_byte(0xC3);
2477   } else {
2478     emit_byte(0xC2);
2479     emit_word(imm16);
2480   }
2481 }
2482 
2483 void Assembler::sahf() {
2484 #ifdef _LP64
2485   // Not supported in 64bit mode
2486   ShouldNotReachHere();
2487 #endif
2488   emit_byte(0x9E);
2489 }
2490 
2491 void Assembler::sarl(Register dst, int imm8) {
2492   int encode = prefix_and_encode(dst->encoding());
2493   assert(isShiftCount(imm8), "illegal shift count");
2494   if (imm8 == 1) {
2495     emit_byte(0xD1);
2496     emit_byte(0xF8 | encode);
2497   } else {
2498     emit_byte(0xC1);
2499     emit_byte(0xF8 | encode);
2500     emit_byte(imm8);
2501   }
2502 }
2503 
2504 void Assembler::sarl(Register dst) {
2505   int encode = prefix_and_encode(dst->encoding());
2506   emit_byte(0xD3);
2507   emit_byte(0xF8 | encode);
2508 }
2509 
2510 void Assembler::sbbl(Address dst, int32_t imm32) {
2511   InstructionMark im(this);
2512   prefix(dst);
2513   emit_arith_operand(0x81, rbx, dst, imm32);
2514 }
2515 
2516 void Assembler::sbbl(Register dst, int32_t imm32) {
2517   prefix(dst);
2518   emit_arith(0x81, 0xD8, dst, imm32);
2519 }
2520 
2521 
2522 void Assembler::sbbl(Register dst, Address src) {
2523   InstructionMark im(this);
2524   prefix(src, dst);
2525   emit_byte(0x1B);
2526   emit_operand(dst, src);
2527 }
2528 
2529 void Assembler::sbbl(Register dst, Register src) {
2530   (void) prefix_and_encode(dst->encoding(), src->encoding());
2531   emit_arith(0x1B, 0xC0, dst, src);
2532 }
2533 
2534 void Assembler::setb(Condition cc, Register dst) {
2535   assert(0 <= cc && cc < 16, "illegal cc");
2536   int encode = prefix_and_encode(dst->encoding(), true);
2537   emit_byte(0x0F);
2538   emit_byte(0x90 | cc);
2539   emit_byte(0xC0 | encode);
2540 }
2541 
2542 void Assembler::shll(Register dst, int imm8) {
2543   assert(isShiftCount(imm8), "illegal shift count");
2544   int encode = prefix_and_encode(dst->encoding());
2545   if (imm8 == 1 ) {
2546     emit_byte(0xD1);
2547     emit_byte(0xE0 | encode);
2548   } else {
2549     emit_byte(0xC1);
2550     emit_byte(0xE0 | encode);
2551     emit_byte(imm8);
2552   }
2553 }
2554 
2555 void Assembler::shll(Register dst) {
2556   int encode = prefix_and_encode(dst->encoding());
2557   emit_byte(0xD3);
2558   emit_byte(0xE0 | encode);
2559 }
2560 
2561 void Assembler::shrl(Register dst, int imm8) {
2562   assert(isShiftCount(imm8), "illegal shift count");
2563   int encode = prefix_and_encode(dst->encoding());
2564   emit_byte(0xC1);
2565   emit_byte(0xE8 | encode);
2566   emit_byte(imm8);
2567 }
2568 
2569 void Assembler::shrl(Register dst) {
2570   int encode = prefix_and_encode(dst->encoding());
2571   emit_byte(0xD3);
2572   emit_byte(0xE8 | encode);
2573 }
2574 
2575 // copies a single word from [esi] to [edi]
2576 void Assembler::smovl() {
2577   emit_byte(0xA5);
2578 }
2579 
2580 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2581   // HMM Table D-1 says sse2
2582   // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2583   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2584   emit_byte(0xF2);
2585   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2586   emit_byte(0x0F);
2587   emit_byte(0x51);
2588   emit_byte(0xC0 | encode);
2589 }
2590 
2591 void Assembler::stmxcsr( Address dst) {
2592   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2593   InstructionMark im(this);
2594   prefix(dst);
2595   emit_byte(0x0F);
2596   emit_byte(0xAE);
2597   emit_operand(as_Register(3), dst);
2598 }
2599 
2600 void Assembler::subl(Address dst, int32_t imm32) {
2601   InstructionMark im(this);
2602   prefix(dst);
2603   if (is8bit(imm32)) {
2604     emit_byte(0x83);
2605     emit_operand(rbp, dst, 1);
2606     emit_byte(imm32 & 0xFF);
2607   } else {
2608     emit_byte(0x81);
2609     emit_operand(rbp, dst, 4);
2610     emit_long(imm32);
2611   }
2612 }
2613 
2614 void Assembler::subl(Register dst, int32_t imm32) {
2615   prefix(dst);
2616   emit_arith(0x81, 0xE8, dst, imm32);
2617 }
2618 
2619 void Assembler::subl(Address dst, Register src) {
2620   InstructionMark im(this);
2621   prefix(dst, src);
2622   emit_byte(0x29);
2623   emit_operand(src, dst);
2624 }
2625 
2626 void Assembler::subl(Register dst, Address src) {
2627   InstructionMark im(this);
2628   prefix(src, dst);
2629   emit_byte(0x2B);
2630   emit_operand(dst, src);
2631 }
2632 
2633 void Assembler::subl(Register dst, Register src) {
2634   (void) prefix_and_encode(dst->encoding(), src->encoding());
2635   emit_arith(0x2B, 0xC0, dst, src);
2636 }
2637 
2638 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2639   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2640   emit_byte(0xF2);
2641   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2642   emit_byte(0x0F);
2643   emit_byte(0x5C);
2644   emit_byte(0xC0 | encode);
2645 }
2646 
2647 void Assembler::subsd(XMMRegister dst, Address src) {
2648   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2649   InstructionMark im(this);
2650   emit_byte(0xF2);
2651   prefix(src, dst);
2652   emit_byte(0x0F);
2653   emit_byte(0x5C);
2654   emit_operand(dst, src);
2655 }
2656 
2657 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2658   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2659   emit_byte(0xF3);
2660   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2661   emit_byte(0x0F);
2662   emit_byte(0x5C);
2663   emit_byte(0xC0 | encode);
2664 }
2665 
2666 void Assembler::subss(XMMRegister dst, Address src) {
2667   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2668   InstructionMark im(this);
2669   emit_byte(0xF3);
2670   prefix(src, dst);
2671   emit_byte(0x0F);
2672   emit_byte(0x5C);
2673   emit_operand(dst, src);
2674 }
2675 
2676 void Assembler::testb(Register dst, int imm8) {
2677   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2678   (void) prefix_and_encode(dst->encoding(), true);
2679   emit_arith_b(0xF6, 0xC0, dst, imm8);
2680 }
2681 
2682 void Assembler::testl(Register dst, int32_t imm32) {
2683   // not using emit_arith because test
2684   // doesn't support sign-extension of
2685   // 8bit operands
2686   int encode = dst->encoding();
2687   if (encode == 0) {
2688     emit_byte(0xA9);
2689   } else {
2690     encode = prefix_and_encode(encode);
2691     emit_byte(0xF7);
2692     emit_byte(0xC0 | encode);
2693   }
2694   emit_long(imm32);
2695 }
2696 
2697 void Assembler::testl(Register dst, Register src) {
2698   (void) prefix_and_encode(dst->encoding(), src->encoding());
2699   emit_arith(0x85, 0xC0, dst, src);
2700 }
2701 
2702 void Assembler::testl(Register dst, Address  src) {
2703   InstructionMark im(this);
2704   prefix(src, dst);
2705   emit_byte(0x85);
2706   emit_operand(dst, src);
2707 }
2708 
2709 void Assembler::ucomisd(XMMRegister dst, Address src) {
2710   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2711   emit_byte(0x66);
2712   ucomiss(dst, src);
2713 }
2714 
2715 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2716   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2717   emit_byte(0x66);
2718   ucomiss(dst, src);
2719 }
2720 
2721 void Assembler::ucomiss(XMMRegister dst, Address src) {
2722   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2723 
2724   InstructionMark im(this);
2725   prefix(src, dst);
2726   emit_byte(0x0F);
2727   emit_byte(0x2E);
2728   emit_operand(dst, src);
2729 }
2730 
2731 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2732   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2733   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2734   emit_byte(0x0F);
2735   emit_byte(0x2E);
2736   emit_byte(0xC0 | encode);
2737 }
2738 
2739 
2740 void Assembler::xaddl(Address dst, Register src) {
2741   InstructionMark im(this);
2742   prefix(dst, src);
2743   emit_byte(0x0F);
2744   emit_byte(0xC1);
2745   emit_operand(src, dst);
2746 }
2747 
2748 void Assembler::xchgl(Register dst, Address src) { // xchg
2749   InstructionMark im(this);
2750   prefix(src, dst);
2751   emit_byte(0x87);
2752   emit_operand(dst, src);
2753 }
2754 
2755 void Assembler::xchgl(Register dst, Register src) {
2756   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2757   emit_byte(0x87);
2758   emit_byte(0xc0 | encode);
2759 }
2760 
2761 void Assembler::xorl(Register dst, int32_t imm32) {
2762   prefix(dst);
2763   emit_arith(0x81, 0xF0, dst, imm32);
2764 }
2765 
2766 void Assembler::xorl(Register dst, Address src) {
2767   InstructionMark im(this);
2768   prefix(src, dst);
2769   emit_byte(0x33);
2770   emit_operand(dst, src);
2771 }
2772 
2773 void Assembler::xorl(Register dst, Register src) {
2774   (void) prefix_and_encode(dst->encoding(), src->encoding());
2775   emit_arith(0x33, 0xC0, dst, src);
2776 }
2777 
2778 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2779   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2780   emit_byte(0x66);
2781   xorps(dst, src);
2782 }
2783 
2784 void Assembler::xorpd(XMMRegister dst, Address src) {
2785   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2786   InstructionMark im(this);
2787   emit_byte(0x66);
2788   prefix(src, dst);
2789   emit_byte(0x0F);
2790   emit_byte(0x57);
2791   emit_operand(dst, src);
2792 }
2793 
2794 
2795 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2796   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2797   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2798   emit_byte(0x0F);
2799   emit_byte(0x57);
2800   emit_byte(0xC0 | encode);
2801 }
2802 
2803 void Assembler::xorps(XMMRegister dst, Address src) {
2804   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2805   InstructionMark im(this);
2806   prefix(src, dst);
2807   emit_byte(0x0F);
2808   emit_byte(0x57);
2809   emit_operand(dst, src);
2810 }
2811 
2812 #ifndef _LP64
2813 // 32bit only pieces of the assembler
2814 
2815 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2816   // NO PREFIX AS NEVER 64BIT
2817   InstructionMark im(this);
2818   emit_byte(0x81);
2819   emit_byte(0xF8 | src1->encoding());
2820   emit_data(imm32, rspec, 0);
2821 }
2822 
2823 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2824   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2825   InstructionMark im(this);
2826   emit_byte(0x81);
2827   emit_operand(rdi, src1);
2828   emit_data(imm32, rspec, 0);
2829 }
2830 
2831 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2832 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2833 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
2834 void Assembler::cmpxchg8(Address adr) {
2835   InstructionMark im(this);
2836   emit_byte(0x0F);
2837   emit_byte(0xc7);
2838   emit_operand(rcx, adr);
2839 }
2840 
2841 void Assembler::decl(Register dst) {
2842   // Don't use it directly. Use MacroAssembler::decrementl() instead.
2843  emit_byte(0x48 | dst->encoding());
2844 }
2845 
2846 #endif // _LP64
2847 
2848 // 64bit typically doesn't use the x87 but needs to for the trig funcs
2849 
2850 void Assembler::fabs() {
2851   emit_byte(0xD9);
2852   emit_byte(0xE1);
2853 }
2854 
2855 void Assembler::fadd(int i) {
2856   emit_farith(0xD8, 0xC0, i);
2857 }
2858 
2859 void Assembler::fadd_d(Address src) {
2860   InstructionMark im(this);
2861   emit_byte(0xDC);
2862   emit_operand32(rax, src);
2863 }
2864 
2865 void Assembler::fadd_s(Address src) {
2866   InstructionMark im(this);
2867   emit_byte(0xD8);
2868   emit_operand32(rax, src);
2869 }
2870 
2871 void Assembler::fadda(int i) {
2872   emit_farith(0xDC, 0xC0, i);
2873 }
2874 
2875 void Assembler::faddp(int i) {
2876   emit_farith(0xDE, 0xC0, i);
2877 }
2878 
2879 void Assembler::fchs() {
2880   emit_byte(0xD9);
2881   emit_byte(0xE0);
2882 }
2883 
2884 void Assembler::fcom(int i) {
2885   emit_farith(0xD8, 0xD0, i);
2886 }
2887 
2888 void Assembler::fcomp(int i) {
2889   emit_farith(0xD8, 0xD8, i);
2890 }
2891 
2892 void Assembler::fcomp_d(Address src) {
2893   InstructionMark im(this);
2894   emit_byte(0xDC);
2895   emit_operand32(rbx, src);
2896 }
2897 
2898 void Assembler::fcomp_s(Address src) {
2899   InstructionMark im(this);
2900   emit_byte(0xD8);
2901   emit_operand32(rbx, src);
2902 }
2903 
2904 void Assembler::fcompp() {
2905   emit_byte(0xDE);
2906   emit_byte(0xD9);
2907 }
2908 
2909 void Assembler::fcos() {
2910   emit_byte(0xD9);
2911   emit_byte(0xFF);
2912 }
2913 
2914 void Assembler::fdecstp() {
2915   emit_byte(0xD9);
2916   emit_byte(0xF6);
2917 }
2918 
2919 void Assembler::fdiv(int i) {
2920   emit_farith(0xD8, 0xF0, i);
2921 }
2922 
2923 void Assembler::fdiv_d(Address src) {
2924   InstructionMark im(this);
2925   emit_byte(0xDC);
2926   emit_operand32(rsi, src);
2927 }
2928 
2929 void Assembler::fdiv_s(Address src) {
2930   InstructionMark im(this);
2931   emit_byte(0xD8);
2932   emit_operand32(rsi, src);
2933 }
2934 
2935 void Assembler::fdiva(int i) {
2936   emit_farith(0xDC, 0xF8, i);
2937 }
2938 
2939 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
2940 //       is erroneous for some of the floating-point instructions below.
2941 
2942 void Assembler::fdivp(int i) {
2943   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
2944 }
2945 
2946 void Assembler::fdivr(int i) {
2947   emit_farith(0xD8, 0xF8, i);
2948 }
2949 
2950 void Assembler::fdivr_d(Address src) {
2951   InstructionMark im(this);
2952   emit_byte(0xDC);
2953   emit_operand32(rdi, src);
2954 }
2955 
2956 void Assembler::fdivr_s(Address src) {
2957   InstructionMark im(this);
2958   emit_byte(0xD8);
2959   emit_operand32(rdi, src);
2960 }
2961 
2962 void Assembler::fdivra(int i) {
2963   emit_farith(0xDC, 0xF0, i);
2964 }
2965 
2966 void Assembler::fdivrp(int i) {
2967   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
2968 }
2969 
2970 void Assembler::ffree(int i) {
2971   emit_farith(0xDD, 0xC0, i);
2972 }
2973 
2974 void Assembler::fild_d(Address adr) {
2975   InstructionMark im(this);
2976   emit_byte(0xDF);
2977   emit_operand32(rbp, adr);
2978 }
2979 
2980 void Assembler::fild_s(Address adr) {
2981   InstructionMark im(this);
2982   emit_byte(0xDB);
2983   emit_operand32(rax, adr);
2984 }
2985 
2986 void Assembler::fincstp() {
2987   emit_byte(0xD9);
2988   emit_byte(0xF7);
2989 }
2990 
2991 void Assembler::finit() {
2992   emit_byte(0x9B);
2993   emit_byte(0xDB);
2994   emit_byte(0xE3);
2995 }
2996 
2997 void Assembler::fist_s(Address adr) {
2998   InstructionMark im(this);
2999   emit_byte(0xDB);
3000   emit_operand32(rdx, adr);
3001 }
3002 
3003 void Assembler::fistp_d(Address adr) {
3004   InstructionMark im(this);
3005   emit_byte(0xDF);
3006   emit_operand32(rdi, adr);
3007 }
3008 
3009 void Assembler::fistp_s(Address adr) {
3010   InstructionMark im(this);
3011   emit_byte(0xDB);
3012   emit_operand32(rbx, adr);
3013 }
3014 
3015 void Assembler::fld1() {
3016   emit_byte(0xD9);
3017   emit_byte(0xE8);
3018 }
3019 
3020 void Assembler::fld_d(Address adr) {
3021   InstructionMark im(this);
3022   emit_byte(0xDD);
3023   emit_operand32(rax, adr);
3024 }
3025 
3026 void Assembler::fld_s(Address adr) {
3027   InstructionMark im(this);
3028   emit_byte(0xD9);
3029   emit_operand32(rax, adr);
3030 }
3031 
3032 
3033 void Assembler::fld_s(int index) {
3034   emit_farith(0xD9, 0xC0, index);
3035 }
3036 
3037 void Assembler::fld_x(Address adr) {
3038   InstructionMark im(this);
3039   emit_byte(0xDB);
3040   emit_operand32(rbp, adr);
3041 }
3042 
3043 void Assembler::fldcw(Address src) {
3044   InstructionMark im(this);
3045   emit_byte(0xd9);
3046   emit_operand32(rbp, src);
3047 }
3048 
3049 void Assembler::fldenv(Address src) {
3050   InstructionMark im(this);
3051   emit_byte(0xD9);
3052   emit_operand32(rsp, src);
3053 }
3054 
3055 void Assembler::fldlg2() {
3056   emit_byte(0xD9);
3057   emit_byte(0xEC);
3058 }
3059 
3060 void Assembler::fldln2() {
3061   emit_byte(0xD9);
3062   emit_byte(0xED);
3063 }
3064 
3065 void Assembler::fldz() {
3066   emit_byte(0xD9);
3067   emit_byte(0xEE);
3068 }
3069 
3070 void Assembler::flog() {
3071   fldln2();
3072   fxch();
3073   fyl2x();
3074 }
3075 
3076 void Assembler::flog10() {
3077   fldlg2();
3078   fxch();
3079   fyl2x();
3080 }
3081 
3082 void Assembler::fmul(int i) {
3083   emit_farith(0xD8, 0xC8, i);
3084 }
3085 
3086 void Assembler::fmul_d(Address src) {
3087   InstructionMark im(this);
3088   emit_byte(0xDC);
3089   emit_operand32(rcx, src);
3090 }
3091 
3092 void Assembler::fmul_s(Address src) {
3093   InstructionMark im(this);
3094   emit_byte(0xD8);
3095   emit_operand32(rcx, src);
3096 }
3097 
3098 void Assembler::fmula(int i) {
3099   emit_farith(0xDC, 0xC8, i);
3100 }
3101 
3102 void Assembler::fmulp(int i) {
3103   emit_farith(0xDE, 0xC8, i);
3104 }
3105 
3106 void Assembler::fnsave(Address dst) {
3107   InstructionMark im(this);
3108   emit_byte(0xDD);
3109   emit_operand32(rsi, dst);
3110 }
3111 
3112 void Assembler::fnstcw(Address src) {
3113   InstructionMark im(this);
3114   emit_byte(0x9B);
3115   emit_byte(0xD9);
3116   emit_operand32(rdi, src);
3117 }
3118 
3119 void Assembler::fnstsw_ax() {
3120   emit_byte(0xdF);
3121   emit_byte(0xE0);
3122 }
3123 
3124 void Assembler::fprem() {
3125   emit_byte(0xD9);
3126   emit_byte(0xF8);
3127 }
3128 
3129 void Assembler::fprem1() {
3130   emit_byte(0xD9);
3131   emit_byte(0xF5);
3132 }
3133 
3134 void Assembler::frstor(Address src) {
3135   InstructionMark im(this);
3136   emit_byte(0xDD);
3137   emit_operand32(rsp, src);
3138 }
3139 
3140 void Assembler::fsin() {
3141   emit_byte(0xD9);
3142   emit_byte(0xFE);
3143 }
3144 
3145 void Assembler::fsqrt() {
3146   emit_byte(0xD9);
3147   emit_byte(0xFA);
3148 }
3149 
3150 void Assembler::fst_d(Address adr) {
3151   InstructionMark im(this);
3152   emit_byte(0xDD);
3153   emit_operand32(rdx, adr);
3154 }
3155 
3156 void Assembler::fst_s(Address adr) {
3157   InstructionMark im(this);
3158   emit_byte(0xD9);
3159   emit_operand32(rdx, adr);
3160 }
3161 
3162 void Assembler::fstp_d(Address adr) {
3163   InstructionMark im(this);
3164   emit_byte(0xDD);
3165   emit_operand32(rbx, adr);
3166 }
3167 
3168 void Assembler::fstp_d(int index) {
3169   emit_farith(0xDD, 0xD8, index);
3170 }
3171 
3172 void Assembler::fstp_s(Address adr) {
3173   InstructionMark im(this);
3174   emit_byte(0xD9);
3175   emit_operand32(rbx, adr);
3176 }
3177 
3178 void Assembler::fstp_x(Address adr) {
3179   InstructionMark im(this);
3180   emit_byte(0xDB);
3181   emit_operand32(rdi, adr);
3182 }
3183 
3184 void Assembler::fsub(int i) {
3185   emit_farith(0xD8, 0xE0, i);
3186 }
3187 
3188 void Assembler::fsub_d(Address src) {
3189   InstructionMark im(this);
3190   emit_byte(0xDC);
3191   emit_operand32(rsp, src);
3192 }
3193 
3194 void Assembler::fsub_s(Address src) {
3195   InstructionMark im(this);
3196   emit_byte(0xD8);
3197   emit_operand32(rsp, src);
3198 }
3199 
3200 void Assembler::fsuba(int i) {
3201   emit_farith(0xDC, 0xE8, i);
3202 }
3203 
3204 void Assembler::fsubp(int i) {
3205   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3206 }
3207 
3208 void Assembler::fsubr(int i) {
3209   emit_farith(0xD8, 0xE8, i);
3210 }
3211 
3212 void Assembler::fsubr_d(Address src) {
3213   InstructionMark im(this);
3214   emit_byte(0xDC);
3215   emit_operand32(rbp, src);
3216 }
3217 
3218 void Assembler::fsubr_s(Address src) {
3219   InstructionMark im(this);
3220   emit_byte(0xD8);
3221   emit_operand32(rbp, src);
3222 }
3223 
3224 void Assembler::fsubra(int i) {
3225   emit_farith(0xDC, 0xE0, i);
3226 }
3227 
3228 void Assembler::fsubrp(int i) {
3229   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3230 }
3231 
3232 void Assembler::ftan() {
3233   emit_byte(0xD9);
3234   emit_byte(0xF2);
3235   emit_byte(0xDD);
3236   emit_byte(0xD8);
3237 }
3238 
3239 void Assembler::ftst() {
3240   emit_byte(0xD9);
3241   emit_byte(0xE4);
3242 }
3243 
3244 void Assembler::fucomi(int i) {
3245   // make sure the instruction is supported (introduced for P6, together with cmov)
3246   guarantee(VM_Version::supports_cmov(), "illegal instruction");
3247   emit_farith(0xDB, 0xE8, i);
3248 }
3249 
3250 void Assembler::fucomip(int i) {
3251   // make sure the instruction is supported (introduced for P6, together with cmov)
3252   guarantee(VM_Version::supports_cmov(), "illegal instruction");
3253   emit_farith(0xDF, 0xE8, i);
3254 }
3255 
3256 void Assembler::fwait() {
3257   emit_byte(0x9B);
3258 }
3259 
3260 void Assembler::fxch(int i) {
3261   emit_farith(0xD9, 0xC8, i);
3262 }
3263 
3264 void Assembler::fyl2x() {
3265   emit_byte(0xD9);
3266   emit_byte(0xF1);
3267 }
3268 
3269 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format) {
3270   InstructionMark im(this);
3271   int encode = prefix_and_encode(dst->encoding());
3272   emit_byte(0xB8 | encode);
3273   emit_data((int)imm32, rspec, format);
3274 }
3275 
3276 #ifndef _LP64
3277 
3278 void Assembler::incl(Register dst) {
3279   // Don't use it directly. Use MacroAssembler::incrementl() instead.
3280  emit_byte(0x40 | dst->encoding());
3281 }
3282 
3283 void Assembler::lea(Register dst, Address src) {
3284   leal(dst, src);
3285 }
3286 
3287 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
3288   InstructionMark im(this);
3289   emit_byte(0xC7);
3290   emit_operand(rax, dst);
3291   emit_data((int)imm32, rspec, 0);
3292 }
3293 
3294 
3295 void Assembler::popa() { // 32bit
3296   emit_byte(0x61);
3297 }
3298 
3299 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3300   InstructionMark im(this);
3301   emit_byte(0x68);
3302   emit_data(imm32, rspec, 0);
3303 }
3304 
3305 void Assembler::pusha() { // 32bit
3306   emit_byte(0x60);
3307 }
3308 
3309 void Assembler::set_byte_if_not_zero(Register dst) {
3310   emit_byte(0x0F);
3311   emit_byte(0x95);
3312   emit_byte(0xE0 | dst->encoding());
3313 }
3314 
3315 void Assembler::shldl(Register dst, Register src) {
3316   emit_byte(0x0F);
3317   emit_byte(0xA5);
3318   emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3319 }
3320 
3321 void Assembler::shrdl(Register dst, Register src) {
3322   emit_byte(0x0F);
3323   emit_byte(0xAD);
3324   emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3325 }
3326 
3327 #else // LP64
3328 
3329 // 64bit only pieces of the assembler
3330 // This should only be used by 64bit instructions that can use rip-relative
3331 // it cannot be used by instructions that want an immediate value.
3332 
3333 bool Assembler::reachable(AddressLiteral adr) {
3334   int64_t disp;
3335   // None will force a 64bit literal to the code stream. Likely a placeholder
3336   // for something that will be patched later and we need to certain it will
3337   // always be reachable.
3338   if (adr.reloc() == relocInfo::none) {
3339     return false;
3340   }
3341   if (adr.reloc() == relocInfo::internal_word_type) {
3342     // This should be rip relative and easily reachable.
3343     return true;
3344   }
3345   if (adr.reloc() == relocInfo::virtual_call_type ||
3346       adr.reloc() == relocInfo::opt_virtual_call_type ||
3347       adr.reloc() == relocInfo::static_call_type ||
3348       adr.reloc() == relocInfo::static_stub_type ) {
3349     // This should be rip relative within the code cache and easily
3350     // reachable until we get huge code caches. (At which point
3351     // ic code is going to have issues).
3352     return true;
3353   }
3354   if (adr.reloc() != relocInfo::external_word_type &&
3355       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
3356       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
3357       adr.reloc() != relocInfo::runtime_call_type ) {
3358     return false;
3359   }
3360 
3361   // Stress the correction code
3362   if (ForceUnreachable) {
3363     // Must be runtimecall reloc, see if it is in the codecache
3364     // Flipping stuff in the codecache to be unreachable causes issues
3365     // with things like inline caches where the additional instructions
3366     // are not handled.
3367     if (CodeCache::find_blob(adr._target) == NULL) {
3368       return false;
3369     }
3370   }
3371   // For external_word_type/runtime_call_type if it is reachable from where we
3372   // are now (possibly a temp buffer) and where we might end up
3373   // anywhere in the codeCache then we are always reachable.
3374   // This would have to change if we ever save/restore shared code
3375   // to be more pessimistic.
3376 
3377   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3378   if (!is_simm32(disp)) return false;
3379   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3380   if (!is_simm32(disp)) return false;
3381 
3382   disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3383 
3384   // Because rip relative is a disp + address_of_next_instruction and we
3385   // don't know the value of address_of_next_instruction we apply a fudge factor
3386   // to make sure we will be ok no matter the size of the instruction we get placed into.
3387   // We don't have to fudge the checks above here because they are already worst case.
3388 
3389   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3390   // + 4 because better safe than sorry.
3391   const int fudge = 12 + 4;
3392   if (disp < 0) {
3393     disp -= fudge;
3394   } else {
3395     disp += fudge;
3396   }
3397   return is_simm32(disp);
3398 }
3399 
3400 void Assembler::emit_data64(jlong data,
3401                             relocInfo::relocType rtype,
3402                             int format) {
3403   if (rtype == relocInfo::none) {
3404     emit_long64(data);
3405   } else {
3406     emit_data64(data, Relocation::spec_simple(rtype), format);
3407   }
3408 }
3409 
3410 void Assembler::emit_data64(jlong data,
3411                             RelocationHolder const& rspec,
3412                             int format) {
3413   assert(imm_operand == 0, "default format must be immediate in this file");
3414   assert(imm_operand == format, "must be immediate");
3415   assert(inst_mark() != NULL, "must be inside InstructionMark");
3416   // Do not use AbstractAssembler::relocate, which is not intended for
3417   // embedded words.  Instead, relocate to the enclosing instruction.
3418   code_section()->relocate(inst_mark(), rspec, format);
3419 #ifdef ASSERT
3420   check_relocation(rspec, format);
3421 #endif
3422   emit_long64(data);
3423 }
3424 
3425 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3426   if (reg_enc >= 8) {
3427     prefix(REX_B);
3428     reg_enc -= 8;
3429   } else if (byteinst && reg_enc >= 4) {
3430     prefix(REX);
3431   }
3432   return reg_enc;
3433 }
3434 
3435 int Assembler::prefixq_and_encode(int reg_enc) {
3436   if (reg_enc < 8) {
3437     prefix(REX_W);
3438   } else {
3439     prefix(REX_WB);
3440     reg_enc -= 8;
3441   }
3442   return reg_enc;
3443 }
3444 
3445 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3446   if (dst_enc < 8) {
3447     if (src_enc >= 8) {
3448       prefix(REX_B);
3449       src_enc -= 8;
3450     } else if (byteinst && src_enc >= 4) {
3451       prefix(REX);
3452     }
3453   } else {
3454     if (src_enc < 8) {
3455       prefix(REX_R);
3456     } else {
3457       prefix(REX_RB);
3458       src_enc -= 8;
3459     }
3460     dst_enc -= 8;
3461   }
3462   return dst_enc << 3 | src_enc;
3463 }
3464 
3465 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3466   if (dst_enc < 8) {
3467     if (src_enc < 8) {
3468       prefix(REX_W);
3469     } else {
3470       prefix(REX_WB);
3471       src_enc -= 8;
3472     }
3473   } else {
3474     if (src_enc < 8) {
3475       prefix(REX_WR);
3476     } else {
3477       prefix(REX_WRB);
3478       src_enc -= 8;
3479     }
3480     dst_enc -= 8;
3481   }
3482   return dst_enc << 3 | src_enc;
3483 }
3484 
3485 void Assembler::prefix(Register reg) {
3486   if (reg->encoding() >= 8) {
3487     prefix(REX_B);
3488   }
3489 }
3490 
3491 void Assembler::prefix(Address adr) {
3492   if (adr.base_needs_rex()) {
3493     if (adr.index_needs_rex()) {
3494       prefix(REX_XB);
3495     } else {
3496       prefix(REX_B);
3497     }
3498   } else {
3499     if (adr.index_needs_rex()) {
3500       prefix(REX_X);
3501     }
3502   }
3503 }
3504 
3505 void Assembler::prefixq(Address adr) {
3506   if (adr.base_needs_rex()) {
3507     if (adr.index_needs_rex()) {
3508       prefix(REX_WXB);
3509     } else {
3510       prefix(REX_WB);
3511     }
3512   } else {
3513     if (adr.index_needs_rex()) {
3514       prefix(REX_WX);
3515     } else {
3516       prefix(REX_W);
3517     }
3518   }
3519 }
3520 
3521 
3522 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3523   if (reg->encoding() < 8) {
3524     if (adr.base_needs_rex()) {
3525       if (adr.index_needs_rex()) {
3526         prefix(REX_XB);
3527       } else {
3528         prefix(REX_B);
3529       }
3530     } else {
3531       if (adr.index_needs_rex()) {
3532         prefix(REX_X);
3533       } else if (reg->encoding() >= 4 ) {
3534         prefix(REX);
3535       }
3536     }
3537   } else {
3538     if (adr.base_needs_rex()) {
3539       if (adr.index_needs_rex()) {
3540         prefix(REX_RXB);
3541       } else {
3542         prefix(REX_RB);
3543       }
3544     } else {
3545       if (adr.index_needs_rex()) {
3546         prefix(REX_RX);
3547       } else {
3548         prefix(REX_R);
3549       }
3550     }
3551   }
3552 }
3553 
3554 void Assembler::prefixq(Address adr, Register src) {
3555   if (src->encoding() < 8) {
3556     if (adr.base_needs_rex()) {
3557       if (adr.index_needs_rex()) {
3558         prefix(REX_WXB);
3559       } else {
3560         prefix(REX_WB);
3561       }
3562     } else {
3563       if (adr.index_needs_rex()) {
3564         prefix(REX_WX);
3565       } else {
3566         prefix(REX_W);
3567       }
3568     }
3569   } else {
3570     if (adr.base_needs_rex()) {
3571       if (adr.index_needs_rex()) {
3572         prefix(REX_WRXB);
3573       } else {
3574         prefix(REX_WRB);
3575       }
3576     } else {
3577       if (adr.index_needs_rex()) {
3578         prefix(REX_WRX);
3579       } else {
3580         prefix(REX_WR);
3581       }
3582     }
3583   }
3584 }
3585 
3586 void Assembler::prefix(Address adr, XMMRegister reg) {
3587   if (reg->encoding() < 8) {
3588     if (adr.base_needs_rex()) {
3589       if (adr.index_needs_rex()) {
3590         prefix(REX_XB);
3591       } else {
3592         prefix(REX_B);
3593       }
3594     } else {
3595       if (adr.index_needs_rex()) {
3596         prefix(REX_X);
3597       }
3598     }
3599   } else {
3600     if (adr.base_needs_rex()) {
3601       if (adr.index_needs_rex()) {
3602         prefix(REX_RXB);
3603       } else {
3604         prefix(REX_RB);
3605       }
3606     } else {
3607       if (adr.index_needs_rex()) {
3608         prefix(REX_RX);
3609       } else {
3610         prefix(REX_R);
3611       }
3612     }
3613   }
3614 }
3615 
3616 void Assembler::adcq(Register dst, int32_t imm32) {
3617   (void) prefixq_and_encode(dst->encoding());
3618   emit_arith(0x81, 0xD0, dst, imm32);
3619 }
3620 
3621 void Assembler::adcq(Register dst, Address src) {
3622   InstructionMark im(this);
3623   prefixq(src, dst);
3624   emit_byte(0x13);
3625   emit_operand(dst, src);
3626 }
3627 
3628 void Assembler::adcq(Register dst, Register src) {
3629   (int) prefixq_and_encode(dst->encoding(), src->encoding());
3630   emit_arith(0x13, 0xC0, dst, src);
3631 }
3632 
3633 void Assembler::addq(Address dst, int32_t imm32) {
3634   InstructionMark im(this);
3635   prefixq(dst);
3636   emit_arith_operand(0x81, rax, dst,imm32);
3637 }
3638 
3639 void Assembler::addq(Address dst, Register src) {
3640   InstructionMark im(this);
3641   prefixq(dst, src);
3642   emit_byte(0x01);
3643   emit_operand(src, dst);
3644 }
3645 
3646 void Assembler::addq(Register dst, int32_t imm32) {
3647   (void) prefixq_and_encode(dst->encoding());
3648   emit_arith(0x81, 0xC0, dst, imm32);
3649 }
3650 
3651 void Assembler::addq(Register dst, Address src) {
3652   InstructionMark im(this);
3653   prefixq(src, dst);
3654   emit_byte(0x03);
3655   emit_operand(dst, src);
3656 }
3657 
3658 void Assembler::addq(Register dst, Register src) {
3659   (void) prefixq_and_encode(dst->encoding(), src->encoding());
3660   emit_arith(0x03, 0xC0, dst, src);
3661 }
3662 
3663 void Assembler::andq(Register dst, int32_t imm32) {
3664   (void) prefixq_and_encode(dst->encoding());
3665   emit_arith(0x81, 0xE0, dst, imm32);
3666 }
3667 
3668 void Assembler::andq(Register dst, Address src) {
3669   InstructionMark im(this);
3670   prefixq(src, dst);
3671   emit_byte(0x23);
3672   emit_operand(dst, src);
3673 }
3674 
3675 void Assembler::andq(Register dst, Register src) {
3676   (int) prefixq_and_encode(dst->encoding(), src->encoding());
3677   emit_arith(0x23, 0xC0, dst, src);
3678 }
3679 
3680 void Assembler::bswapq(Register reg) {
3681   int encode = prefixq_and_encode(reg->encoding());
3682   emit_byte(0x0F);
3683   emit_byte(0xC8 | encode);
3684 }
3685 
3686 void Assembler::cdqq() {
3687   prefix(REX_W);
3688   emit_byte(0x99);
3689 }
3690 
3691 void Assembler::clflush(Address adr) {
3692   prefix(adr);
3693   emit_byte(0x0F);
3694   emit_byte(0xAE);
3695   emit_operand(rdi, adr);
3696 }
3697 
3698 void Assembler::cmovq(Condition cc, Register dst, Register src) {
3699   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3700   emit_byte(0x0F);
3701   emit_byte(0x40 | cc);
3702   emit_byte(0xC0 | encode);
3703 }
3704 
3705 void Assembler::cmovq(Condition cc, Register dst, Address src) {
3706   InstructionMark im(this);
3707   prefixq(src, dst);
3708   emit_byte(0x0F);
3709   emit_byte(0x40 | cc);
3710   emit_operand(dst, src);
3711 }
3712 
3713 void Assembler::cmpq(Address dst, int32_t imm32) {
3714   InstructionMark im(this);
3715   prefixq(dst);
3716   emit_byte(0x81);
3717   emit_operand(rdi, dst, 4);
3718   emit_long(imm32);
3719 }
3720 
3721 void Assembler::cmpq(Register dst, int32_t imm32) {
3722   (void) prefixq_and_encode(dst->encoding());
3723   emit_arith(0x81, 0xF8, dst, imm32);
3724 }
3725 
3726 void Assembler::cmpq(Address dst, Register src) {
3727   InstructionMark im(this);
3728   prefixq(dst, src);
3729   emit_byte(0x3B);
3730   emit_operand(src, dst);
3731 }
3732 
3733 void Assembler::cmpq(Register dst, Register src) {
3734   (void) prefixq_and_encode(dst->encoding(), src->encoding());
3735   emit_arith(0x3B, 0xC0, dst, src);
3736 }
3737 
3738 void Assembler::cmpq(Register dst, Address  src) {
3739   InstructionMark im(this);
3740   prefixq(src, dst);
3741   emit_byte(0x3B);
3742   emit_operand(dst, src);
3743 }
3744 
3745 void Assembler::cmpxchgq(Register reg, Address adr) {
3746   InstructionMark im(this);
3747   prefixq(adr, reg);
3748   emit_byte(0x0F);
3749   emit_byte(0xB1);
3750   emit_operand(reg, adr);
3751 }
3752 
3753 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3754   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3755   emit_byte(0xF2);
3756   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3757   emit_byte(0x0F);
3758   emit_byte(0x2A);
3759   emit_byte(0xC0 | encode);
3760 }
3761 
3762 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3763   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3764   emit_byte(0xF3);
3765   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3766   emit_byte(0x0F);
3767   emit_byte(0x2A);
3768   emit_byte(0xC0 | encode);
3769 }
3770 
3771 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3772   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3773   emit_byte(0xF2);
3774   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3775   emit_byte(0x0F);
3776   emit_byte(0x2C);
3777   emit_byte(0xC0 | encode);
3778 }
3779 
3780 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3781   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3782   emit_byte(0xF3);
3783   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3784   emit_byte(0x0F);
3785   emit_byte(0x2C);
3786   emit_byte(0xC0 | encode);
3787 }
3788 
3789 void Assembler::decl(Register dst) {
3790   // Don't use it directly. Use MacroAssembler::decrementl() instead.
3791   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3792   int encode = prefix_and_encode(dst->encoding());
3793   emit_byte(0xFF);
3794   emit_byte(0xC8 | encode);
3795 }
3796 
3797 void Assembler::decq(Register dst) {
3798   // Don't use it directly. Use MacroAssembler::decrementq() instead.
3799   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3800   int encode = prefixq_and_encode(dst->encoding());
3801   emit_byte(0xFF);
3802   emit_byte(0xC8 | encode);
3803 }
3804 
3805 void Assembler::decq(Address dst) {
3806   // Don't use it directly. Use MacroAssembler::decrementq() instead.
3807   InstructionMark im(this);
3808   prefixq(dst);
3809   emit_byte(0xFF);
3810   emit_operand(rcx, dst);
3811 }
3812 
3813 void Assembler::fxrstor(Address src) {
3814   prefixq(src);
3815   emit_byte(0x0F);
3816   emit_byte(0xAE);
3817   emit_operand(as_Register(1), src);
3818 }
3819 
3820 void Assembler::fxsave(Address dst) {
3821   prefixq(dst);
3822   emit_byte(0x0F);
3823   emit_byte(0xAE);
3824   emit_operand(as_Register(0), dst);
3825 }
3826 
3827 void Assembler::idivq(Register src) {
3828   int encode = prefixq_and_encode(src->encoding());
3829   emit_byte(0xF7);
3830   emit_byte(0xF8 | encode);
3831 }
3832 
3833 void Assembler::imulq(Register dst, Register src) {
3834   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3835   emit_byte(0x0F);
3836   emit_byte(0xAF);
3837   emit_byte(0xC0 | encode);
3838 }
3839 
3840 void Assembler::imulq(Register dst, Register src, int value) {
3841   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3842   if (is8bit(value)) {
3843     emit_byte(0x6B);
3844     emit_byte(0xC0 | encode);
3845     emit_byte(value);
3846   } else {
3847     emit_byte(0x69);
3848     emit_byte(0xC0 | encode);
3849     emit_long(value);
3850   }
3851 }
3852 
3853 void Assembler::incl(Register dst) {
3854   // Don't use it directly. Use MacroAssembler::incrementl() instead.
3855   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3856   int encode = prefix_and_encode(dst->encoding());
3857   emit_byte(0xFF);
3858   emit_byte(0xC0 | encode);
3859 }
3860 
3861 void Assembler::incq(Register dst) {
3862   // Don't use it directly. Use MacroAssembler::incrementq() instead.
3863   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3864   int encode = prefixq_and_encode(dst->encoding());
3865   emit_byte(0xFF);
3866   emit_byte(0xC0 | encode);
3867 }
3868 
3869 void Assembler::incq(Address dst) {
3870   // Don't use it directly. Use MacroAssembler::incrementq() instead.
3871   InstructionMark im(this);
3872   prefixq(dst);
3873   emit_byte(0xFF);
3874   emit_operand(rax, dst);
3875 }
3876 
3877 void Assembler::lea(Register dst, Address src) {
3878   leaq(dst, src);
3879 }
3880 
3881 void Assembler::leaq(Register dst, Address src) {
3882   InstructionMark im(this);
3883   prefixq(src, dst);
3884   emit_byte(0x8D);
3885   emit_operand(dst, src);
3886 }
3887 
3888 void Assembler::mov64(Register dst, int64_t imm64) {
3889   InstructionMark im(this);
3890   int encode = prefixq_and_encode(dst->encoding());
3891   emit_byte(0xB8 | encode);
3892   emit_long64(imm64);
3893 }
3894 
3895 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
3896   InstructionMark im(this);
3897   int encode = prefixq_and_encode(dst->encoding());
3898   emit_byte(0xB8 | encode);
3899   emit_data64(imm64, rspec);
3900 }
3901 
3902 void Assembler::movdq(XMMRegister dst, Register src) {
3903   // table D-1 says MMX/SSE2
3904   NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
3905   emit_byte(0x66);
3906   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3907   emit_byte(0x0F);
3908   emit_byte(0x6E);
3909   emit_byte(0xC0 | encode);
3910 }
3911 
3912 void Assembler::movdq(Register dst, XMMRegister src) {
3913   // table D-1 says MMX/SSE2
3914   NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
3915   emit_byte(0x66);
3916   // swap src/dst to get correct prefix
3917   int encode = prefixq_and_encode(src->encoding(), dst->encoding());
3918   emit_byte(0x0F);
3919   emit_byte(0x7E);
3920   emit_byte(0xC0 | encode);
3921 }
3922 
3923 void Assembler::movq(Register dst, Register src) {
3924   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3925   emit_byte(0x8B);
3926   emit_byte(0xC0 | encode);
3927 }
3928 
3929 void Assembler::movq(Register dst, Address src) {
3930   InstructionMark im(this);
3931   prefixq(src, dst);
3932   emit_byte(0x8B);
3933   emit_operand(dst, src);
3934 }
3935 
3936 void Assembler::movq(Address dst, Register src) {
3937   InstructionMark im(this);
3938   prefixq(dst, src);
3939   emit_byte(0x89);
3940   emit_operand(src, dst);
3941 }
3942 
3943 void Assembler::movslq(Register dst, int32_t imm32) {
3944   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
3945   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
3946   // as a result we shouldn't use until tested at runtime...
3947   ShouldNotReachHere();
3948   InstructionMark im(this);
3949   int encode = prefixq_and_encode(dst->encoding());
3950   emit_byte(0xC7 | encode);
3951   emit_long(imm32);
3952 }
3953 
3954 void Assembler::movslq(Address dst, int32_t imm32) {
3955   assert(is_simm32(imm32), "lost bits");
3956   InstructionMark im(this);
3957   prefixq(dst);
3958   emit_byte(0xC7);
3959   emit_operand(rax, dst, 4);
3960   emit_long(imm32);
3961 }
3962 
3963 void Assembler::movslq(Register dst, Address src) {
3964   InstructionMark im(this);
3965   prefixq(src, dst);
3966   emit_byte(0x63);
3967   emit_operand(dst, src);
3968 }
3969 
3970 void Assembler::movslq(Register dst, Register src) {
3971   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3972   emit_byte(0x63);
3973   emit_byte(0xC0 | encode);
3974 }
3975 
3976 void Assembler::negq(Register dst) {
3977   int encode = prefixq_and_encode(dst->encoding());
3978   emit_byte(0xF7);
3979   emit_byte(0xD8 | encode);
3980 }
3981 
3982 void Assembler::notq(Register dst) {
3983   int encode = prefixq_and_encode(dst->encoding());
3984   emit_byte(0xF7);
3985   emit_byte(0xD0 | encode);
3986 }
3987 
3988 void Assembler::orq(Address dst, int32_t imm32) {
3989   InstructionMark im(this);
3990   prefixq(dst);
3991   emit_byte(0x81);
3992   emit_operand(rcx, dst, 4);
3993   emit_long(imm32);
3994 }
3995 
3996 void Assembler::orq(Register dst, int32_t imm32) {
3997   (void) prefixq_and_encode(dst->encoding());
3998   emit_arith(0x81, 0xC8, dst, imm32);
3999 }
4000 
4001 void Assembler::orq(Register dst, Address src) {
4002   InstructionMark im(this);
4003   prefixq(src, dst);
4004   emit_byte(0x0B);
4005   emit_operand(dst, src);
4006 }
4007 
4008 void Assembler::orq(Register dst, Register src) {
4009   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4010   emit_arith(0x0B, 0xC0, dst, src);
4011 }
4012 
4013 void Assembler::popa() { // 64bit
4014   movq(r15, Address(rsp, 0));
4015   movq(r14, Address(rsp, wordSize));
4016   movq(r13, Address(rsp, 2 * wordSize));
4017   movq(r12, Address(rsp, 3 * wordSize));
4018   movq(r11, Address(rsp, 4 * wordSize));
4019   movq(r10, Address(rsp, 5 * wordSize));
4020   movq(r9,  Address(rsp, 6 * wordSize));
4021   movq(r8,  Address(rsp, 7 * wordSize));
4022   movq(rdi, Address(rsp, 8 * wordSize));
4023   movq(rsi, Address(rsp, 9 * wordSize));
4024   movq(rbp, Address(rsp, 10 * wordSize));
4025   // skip rsp
4026   movq(rbx, Address(rsp, 12 * wordSize));
4027   movq(rdx, Address(rsp, 13 * wordSize));
4028   movq(rcx, Address(rsp, 14 * wordSize));
4029   movq(rax, Address(rsp, 15 * wordSize));
4030 
4031   addq(rsp, 16 * wordSize);
4032 }
4033 
4034 void Assembler::popq(Address dst) {
4035   InstructionMark im(this);
4036   prefixq(dst);
4037   emit_byte(0x8F);
4038   emit_operand(rax, dst);
4039 }
4040 
4041 void Assembler::pusha() { // 64bit
4042   // we have to store original rsp.  ABI says that 128 bytes
4043   // below rsp are local scratch.
4044   movq(Address(rsp, -5 * wordSize), rsp);
4045 
4046   subq(rsp, 16 * wordSize);
4047 
4048   movq(Address(rsp, 15 * wordSize), rax);
4049   movq(Address(rsp, 14 * wordSize), rcx);
4050   movq(Address(rsp, 13 * wordSize), rdx);
4051   movq(Address(rsp, 12 * wordSize), rbx);
4052   // skip rsp
4053   movq(Address(rsp, 10 * wordSize), rbp);
4054   movq(Address(rsp, 9 * wordSize), rsi);
4055   movq(Address(rsp, 8 * wordSize), rdi);
4056   movq(Address(rsp, 7 * wordSize), r8);
4057   movq(Address(rsp, 6 * wordSize), r9);
4058   movq(Address(rsp, 5 * wordSize), r10);
4059   movq(Address(rsp, 4 * wordSize), r11);
4060   movq(Address(rsp, 3 * wordSize), r12);
4061   movq(Address(rsp, 2 * wordSize), r13);
4062   movq(Address(rsp, wordSize), r14);
4063   movq(Address(rsp, 0), r15);
4064 }
4065 
4066 void Assembler::pushq(Address src) {
4067   InstructionMark im(this);
4068   prefixq(src);
4069   emit_byte(0xFF);
4070   emit_operand(rsi, src);
4071 }
4072 
4073 void Assembler::rclq(Register dst, int imm8) {
4074   assert(isShiftCount(imm8 >> 1), "illegal shift count");
4075   int encode = prefixq_and_encode(dst->encoding());
4076   if (imm8 == 1) {
4077     emit_byte(0xD1);
4078     emit_byte(0xD0 | encode);
4079   } else {
4080     emit_byte(0xC1);
4081     emit_byte(0xD0 | encode);
4082     emit_byte(imm8);
4083   }
4084 }
4085 void Assembler::sarq(Register dst, int imm8) {
4086   assert(isShiftCount(imm8 >> 1), "illegal shift count");
4087   int encode = prefixq_and_encode(dst->encoding());
4088   if (imm8 == 1) {
4089     emit_byte(0xD1);
4090     emit_byte(0xF8 | encode);
4091   } else {
4092     emit_byte(0xC1);
4093     emit_byte(0xF8 | encode);
4094     emit_byte(imm8);
4095   }
4096 }
4097 
4098 void Assembler::sarq(Register dst) {
4099   int encode = prefixq_and_encode(dst->encoding());
4100   emit_byte(0xD3);
4101   emit_byte(0xF8 | encode);
4102 }
4103 void Assembler::sbbq(Address dst, int32_t imm32) {
4104   InstructionMark im(this);
4105   prefixq(dst);
4106   emit_arith_operand(0x81, rbx, dst, imm32);
4107 }
4108 
4109 void Assembler::sbbq(Register dst, int32_t imm32) {
4110   (void) prefixq_and_encode(dst->encoding());
4111   emit_arith(0x81, 0xD8, dst, imm32);
4112 }
4113 
4114 void Assembler::sbbq(Register dst, Address src) {
4115   InstructionMark im(this);
4116   prefixq(src, dst);
4117   emit_byte(0x1B);
4118   emit_operand(dst, src);
4119 }
4120 
4121 void Assembler::sbbq(Register dst, Register src) {
4122   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4123   emit_arith(0x1B, 0xC0, dst, src);
4124 }
4125 
4126 void Assembler::shlq(Register dst, int imm8) {
4127   assert(isShiftCount(imm8 >> 1), "illegal shift count");
4128   int encode = prefixq_and_encode(dst->encoding());
4129   if (imm8 == 1) {
4130     emit_byte(0xD1);
4131     emit_byte(0xE0 | encode);
4132   } else {
4133     emit_byte(0xC1);
4134     emit_byte(0xE0 | encode);
4135     emit_byte(imm8);
4136   }
4137 }
4138 
4139 void Assembler::shlq(Register dst) {
4140   int encode = prefixq_and_encode(dst->encoding());
4141   emit_byte(0xD3);
4142   emit_byte(0xE0 | encode);
4143 }
4144 
4145 void Assembler::shrq(Register dst, int imm8) {
4146   assert(isShiftCount(imm8 >> 1), "illegal shift count");
4147   int encode = prefixq_and_encode(dst->encoding());
4148   emit_byte(0xC1);
4149   emit_byte(0xE8 | encode);
4150   emit_byte(imm8);
4151 }
4152 
4153 void Assembler::shrq(Register dst) {
4154   int encode = prefixq_and_encode(dst->encoding());
4155   emit_byte(0xD3);
4156   emit_byte(0xE8 | encode);
4157 }
4158 
4159 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4160   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4161   InstructionMark im(this);
4162   emit_byte(0xF2);
4163   prefix(src, dst);
4164   emit_byte(0x0F);
4165   emit_byte(0x51);
4166   emit_operand(dst, src);
4167 }
4168 
4169 void Assembler::subq(Address dst, int32_t imm32) {
4170   InstructionMark im(this);
4171   prefixq(dst);
4172   if (is8bit(imm32)) {
4173     emit_byte(0x83);
4174     emit_operand(rbp, dst, 1);
4175     emit_byte(imm32 & 0xFF);
4176   } else {
4177     emit_byte(0x81);
4178     emit_operand(rbp, dst, 4);
4179     emit_long(imm32);
4180   }
4181 }
4182 
4183 void Assembler::subq(Register dst, int32_t imm32) {
4184   (void) prefixq_and_encode(dst->encoding());
4185   emit_arith(0x81, 0xE8, dst, imm32);
4186 }
4187 
4188 void Assembler::subq(Address dst, Register src) {
4189   InstructionMark im(this);
4190   prefixq(dst, src);
4191   emit_byte(0x29);
4192   emit_operand(src, dst);
4193 }
4194 
4195 void Assembler::subq(Register dst, Address src) {
4196   InstructionMark im(this);
4197   prefixq(src, dst);
4198   emit_byte(0x2B);
4199   emit_operand(dst, src);
4200 }
4201 
4202 void Assembler::subq(Register dst, Register src) {
4203   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4204   emit_arith(0x2B, 0xC0, dst, src);
4205 }
4206 
4207 void Assembler::testq(Register dst, int32_t imm32) {
4208   // not using emit_arith because test
4209   // doesn't support sign-extension of
4210   // 8bit operands
4211   int encode = dst->encoding();
4212   if (encode == 0) {
4213     prefix(REX_W);
4214     emit_byte(0xA9);
4215   } else {
4216     encode = prefixq_and_encode(encode);
4217     emit_byte(0xF7);
4218     emit_byte(0xC0 | encode);
4219   }
4220   emit_long(imm32);
4221 }
4222 
4223 void Assembler::testq(Register dst, Register src) {
4224   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4225   emit_arith(0x85, 0xC0, dst, src);
4226 }
4227 
4228 void Assembler::xaddq(Address dst, Register src) {
4229   InstructionMark im(this);
4230   prefixq(dst, src);
4231   emit_byte(0x0F);
4232   emit_byte(0xC1);
4233   emit_operand(src, dst);
4234 }
4235 
4236 void Assembler::xchgq(Register dst, Address src) {
4237   InstructionMark im(this);
4238   prefixq(src, dst);
4239   emit_byte(0x87);
4240   emit_operand(dst, src);
4241 }
4242 
4243 void Assembler::xchgq(Register dst, Register src) {
4244   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4245   emit_byte(0x87);
4246   emit_byte(0xc0 | encode);
4247 }
4248 
4249 void Assembler::xorq(Register dst, Register src) {
4250   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4251   emit_arith(0x33, 0xC0, dst, src);
4252 }
4253 
4254 void Assembler::xorq(Register dst, Address src) {
4255   InstructionMark im(this);
4256   prefixq(src, dst);
4257   emit_byte(0x33);
4258   emit_operand(dst, src);
4259 }
4260 
4261 #endif // !LP64
4262 
4263 static Assembler::Condition reverse[] = {
4264     Assembler::noOverflow     /* overflow      = 0x0 */ ,
4265     Assembler::overflow       /* noOverflow    = 0x1 */ ,
4266     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
4267     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
4268     Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
4269     Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
4270     Assembler::above          /* belowEqual    = 0x6 */ ,
4271     Assembler::belowEqual     /* above         = 0x7 */ ,
4272     Assembler::positive       /* negative      = 0x8 */ ,
4273     Assembler::negative       /* positive      = 0x9 */ ,
4274     Assembler::noParity       /* parity        = 0xa */ ,
4275     Assembler::parity         /* noParity      = 0xb */ ,
4276     Assembler::greaterEqual   /* less          = 0xc */ ,
4277     Assembler::less           /* greaterEqual  = 0xd */ ,
4278     Assembler::greater        /* lessEqual     = 0xe */ ,
4279     Assembler::lessEqual      /* greater       = 0xf, */
4280 
4281 };
4282 
4283 
4284 // Implementation of MacroAssembler
4285 
4286 // First all the versions that have distinct versions depending on 32/64 bit
4287 // Unless the difference is trivial (1 line or so).
4288 
4289 #ifndef _LP64
4290 
4291 // 32bit versions
4292 
4293 Address MacroAssembler::as_Address(AddressLiteral adr) {
4294   return Address(adr.target(), adr.rspec());
4295 }
4296 
4297 Address MacroAssembler::as_Address(ArrayAddress adr) {
4298   return Address::make_array(adr);
4299 }
4300 
4301 int MacroAssembler::biased_locking_enter(Register lock_reg,
4302                                          Register obj_reg,
4303                                          Register swap_reg,
4304                                          Register tmp_reg,
4305                                          bool swap_reg_contains_mark,
4306                                          Label& done,
4307                                          Label* slow_case,
4308                                          BiasedLockingCounters* counters) {
4309   assert(UseBiasedLocking, "why call this otherwise?");
4310   assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4311   assert_different_registers(lock_reg, obj_reg, swap_reg);
4312 
4313   if (PrintBiasedLockingStatistics && counters == NULL)
4314     counters = BiasedLocking::counters();
4315 
4316   bool need_tmp_reg = false;
4317   if (tmp_reg == noreg) {
4318     need_tmp_reg = true;
4319     tmp_reg = lock_reg;
4320   } else {
4321     assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4322   }
4323   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4324   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
4325   Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
4326   Address saved_mark_addr(lock_reg, 0);
4327 
4328   // Biased locking
4329   // See whether the lock is currently biased toward our thread and
4330   // whether the epoch is still valid
4331   // Note that the runtime guarantees sufficient alignment of JavaThread
4332   // pointers to allow age to be placed into low bits
4333   // First check to see whether biasing is even enabled for this object
4334   Label cas_label;
4335   int null_check_offset = -1;
4336   if (!swap_reg_contains_mark) {
4337     null_check_offset = offset();
4338     movl(swap_reg, mark_addr);
4339   }
4340   if (need_tmp_reg) {
4341     push(tmp_reg);
4342   }
4343   movl(tmp_reg, swap_reg);
4344   andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4345   cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4346   if (need_tmp_reg) {
4347     pop(tmp_reg);
4348   }
4349   jcc(Assembler::notEqual, cas_label);
4350   // The bias pattern is present in the object's header. Need to check
4351   // whether the bias owner and the epoch are both still current.
4352   // Note that because there is no current thread register on x86 we
4353   // need to store off the mark word we read out of the object to
4354   // avoid reloading it and needing to recheck invariants below. This
4355   // store is unfortunate but it makes the overall code shorter and
4356   // simpler.
4357   movl(saved_mark_addr, swap_reg);
4358   if (need_tmp_reg) {
4359     push(tmp_reg);
4360   }
4361   get_thread(tmp_reg);
4362   xorl(swap_reg, tmp_reg);
4363   if (swap_reg_contains_mark) {
4364     null_check_offset = offset();
4365   }
4366   movl(tmp_reg, klass_addr);
4367   xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4368   andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4369   if (need_tmp_reg) {
4370     pop(tmp_reg);
4371   }
4372   if (counters != NULL) {
4373     cond_inc32(Assembler::zero,
4374                ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4375   }
4376   jcc(Assembler::equal, done);
4377 
4378   Label try_revoke_bias;
4379   Label try_rebias;
4380 
4381   // At this point we know that the header has the bias pattern and
4382   // that we are not the bias owner in the current epoch. We need to
4383   // figure out more details about the state of the header in order to
4384   // know what operations can be legally performed on the object's
4385   // header.
4386 
4387   // If the low three bits in the xor result aren't clear, that means
4388   // the prototype header is no longer biased and we have to revoke
4389   // the bias on this object.
4390   testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4391   jcc(Assembler::notZero, try_revoke_bias);
4392 
4393   // Biasing is still enabled for this data type. See whether the
4394   // epoch of the current bias is still valid, meaning that the epoch
4395   // bits of the mark word are equal to the epoch bits of the
4396   // prototype header. (Note that the prototype header's epoch bits
4397   // only change at a safepoint.) If not, attempt to rebias the object
4398   // toward the current thread. Note that we must be absolutely sure
4399   // that the current epoch is invalid in order to do this because
4400   // otherwise the manipulations it performs on the mark word are
4401   // illegal.
4402   testl(swap_reg, markOopDesc::epoch_mask_in_place);
4403   jcc(Assembler::notZero, try_rebias);
4404 
4405   // The epoch of the current bias is still valid but we know nothing
4406   // about the owner; it might be set or it might be clear. Try to
4407   // acquire the bias of the object using an atomic operation. If this
4408   // fails we will go in to the runtime to revoke the object's bias.
4409   // Note that we first construct the presumed unbiased header so we
4410   // don't accidentally blow away another thread's valid bias.
4411   movl(swap_reg, saved_mark_addr);
4412   andl(swap_reg,
4413        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4414   if (need_tmp_reg) {
4415     push(tmp_reg);
4416   }
4417   get_thread(tmp_reg);
4418   orl(tmp_reg, swap_reg);
4419   if (os::is_MP()) {
4420     lock();
4421   }
4422   cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4423   if (need_tmp_reg) {
4424     pop(tmp_reg);
4425   }
4426   // If the biasing toward our thread failed, this means that
4427   // another thread succeeded in biasing it toward itself and we
4428   // need to revoke that bias. The revocation will occur in the
4429   // interpreter runtime in the slow case.
4430   if (counters != NULL) {
4431     cond_inc32(Assembler::zero,
4432                ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4433   }
4434   if (slow_case != NULL) {
4435     jcc(Assembler::notZero, *slow_case);
4436   }
4437   jmp(done);
4438 
4439   bind(try_rebias);
4440   // At this point we know the epoch has expired, meaning that the
4441   // current "bias owner", if any, is actually invalid. Under these
4442   // circumstances _only_, we are allowed to use the current header's
4443   // value as the comparison value when doing the cas to acquire the
4444   // bias in the current epoch. In other words, we allow transfer of
4445   // the bias from one thread to another directly in this situation.
4446   //
4447   // FIXME: due to a lack of registers we currently blow away the age
4448   // bits in this situation. Should attempt to preserve them.
4449   if (need_tmp_reg) {
4450     push(tmp_reg);
4451   }
4452   get_thread(tmp_reg);
4453   movl(swap_reg, klass_addr);
4454   orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4455   movl(swap_reg, saved_mark_addr);
4456   if (os::is_MP()) {
4457     lock();
4458   }
4459   cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4460   if (need_tmp_reg) {
4461     pop(tmp_reg);
4462   }
4463   // If the biasing toward our thread failed, then another thread
4464   // succeeded in biasing it toward itself and we need to revoke that
4465   // bias. The revocation will occur in the runtime in the slow case.
4466   if (counters != NULL) {
4467     cond_inc32(Assembler::zero,
4468                ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4469   }
4470   if (slow_case != NULL) {
4471     jcc(Assembler::notZero, *slow_case);
4472   }
4473   jmp(done);
4474 
4475   bind(try_revoke_bias);
4476   // The prototype mark in the klass doesn't have the bias bit set any
4477   // more, indicating that objects of this data type are not supposed
4478   // to be biased any more. We are going to try to reset the mark of
4479   // this object to the prototype value and fall through to the
4480   // CAS-based locking scheme. Note that if our CAS fails, it means
4481   // that another thread raced us for the privilege of revoking the
4482   // bias of this particular object, so it's okay to continue in the
4483   // normal locking code.
4484   //
4485   // FIXME: due to a lack of registers we currently blow away the age
4486   // bits in this situation. Should attempt to preserve them.
4487   movl(swap_reg, saved_mark_addr);
4488   if (need_tmp_reg) {
4489     push(tmp_reg);
4490   }
4491   movl(tmp_reg, klass_addr);
4492   movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4493   if (os::is_MP()) {
4494     lock();
4495   }
4496   cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4497   if (need_tmp_reg) {
4498     pop(tmp_reg);
4499   }
4500   // Fall through to the normal CAS-based lock, because no matter what
4501   // the result of the above CAS, some thread must have succeeded in
4502   // removing the bias bit from the object's header.
4503   if (counters != NULL) {
4504     cond_inc32(Assembler::zero,
4505                ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4506   }
4507 
4508   bind(cas_label);
4509 
4510   return null_check_offset;
4511 }
4512 void MacroAssembler::call_VM_leaf_base(address entry_point,
4513                                        int number_of_arguments) {
4514   call(RuntimeAddress(entry_point));
4515   increment(rsp, number_of_arguments * wordSize);
4516 }
4517 
4518 void MacroAssembler::cmpoop(Address src1, jobject obj) {
4519   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4520 }
4521 
4522 void MacroAssembler::cmpoop(Register src1, jobject obj) {
4523   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4524 }
4525 
4526 void MacroAssembler::extend_sign(Register hi, Register lo) {
4527   // According to Intel Doc. AP-526, "Integer Divide", p.18.
4528   if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4529     cdql();
4530   } else {
4531     movl(hi, lo);
4532     sarl(hi, 31);
4533   }
4534 }
4535 
4536 void MacroAssembler::fat_nop() {
4537   // A 5 byte nop that is safe for patching (see patch_verified_entry)
4538   emit_byte(0x26); // es:
4539   emit_byte(0x2e); // cs:
4540   emit_byte(0x64); // fs:
4541   emit_byte(0x65); // gs:
4542   emit_byte(0x90);
4543 }
4544 
4545 void MacroAssembler::jC2(Register tmp, Label& L) {
4546   // set parity bit if FPU flag C2 is set (via rax)
4547   save_rax(tmp);
4548   fwait(); fnstsw_ax();
4549   sahf();
4550   restore_rax(tmp);
4551   // branch
4552   jcc(Assembler::parity, L);
4553 }
4554 
4555 void MacroAssembler::jnC2(Register tmp, Label& L) {
4556   // set parity bit if FPU flag C2 is set (via rax)
4557   save_rax(tmp);
4558   fwait(); fnstsw_ax();
4559   sahf();
4560   restore_rax(tmp);
4561   // branch
4562   jcc(Assembler::noParity, L);
4563 }
4564 
4565 // 32bit can do a case table jump in one instruction but we no longer allow the base
4566 // to be installed in the Address class
4567 void MacroAssembler::jump(ArrayAddress entry) {
4568   jmp(as_Address(entry));
4569 }
4570 
4571 // Note: y_lo will be destroyed
4572 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4573   // Long compare for Java (semantics as described in JVM spec.)
4574   Label high, low, done;
4575 
4576   cmpl(x_hi, y_hi);
4577   jcc(Assembler::less, low);
4578   jcc(Assembler::greater, high);
4579   // x_hi is the return register
4580   xorl(x_hi, x_hi);
4581   cmpl(x_lo, y_lo);
4582   jcc(Assembler::below, low);
4583   jcc(Assembler::equal, done);
4584 
4585   bind(high);
4586   xorl(x_hi, x_hi);
4587   increment(x_hi);
4588   jmp(done);
4589 
4590   bind(low);
4591   xorl(x_hi, x_hi);
4592   decrementl(x_hi);
4593 
4594   bind(done);
4595 }
4596 
4597 void MacroAssembler::lea(Register dst, AddressLiteral src) {
4598     mov_literal32(dst, (int32_t)src.target(), src.rspec());
4599 }
4600 
4601 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4602   // leal(dst, as_Address(adr));
4603   // see note in movl as to why we must use a move
4604   mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4605 }
4606 
4607 void MacroAssembler::leave() {
4608   mov(rsp, rbp);
4609   pop(rbp);
4610 }
4611 
4612 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4613   // Multiplication of two Java long values stored on the stack
4614   // as illustrated below. Result is in rdx:rax.
4615   //
4616   // rsp ---> [  ??  ] \               \
4617   //            ....    | y_rsp_offset  |
4618   //          [ y_lo ] /  (in bytes)    | x_rsp_offset
4619   //          [ y_hi ]                  | (in bytes)
4620   //            ....                    |
4621   //          [ x_lo ]                 /
4622   //          [ x_hi ]
4623   //            ....
4624   //
4625   // Basic idea: lo(result) = lo(x_lo * y_lo)
4626   //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4627   Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4628   Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4629   Label quick;
4630   // load x_hi, y_hi and check if quick
4631   // multiplication is possible
4632   movl(rbx, x_hi);
4633   movl(rcx, y_hi);
4634   movl(rax, rbx);
4635   orl(rbx, rcx);                                 // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4636   jcc(Assembler::zero, quick);                   // if rbx, = 0 do quick multiply
4637   // do full multiplication
4638   // 1st step
4639   mull(y_lo);                                    // x_hi * y_lo
4640   movl(rbx, rax);                                // save lo(x_hi * y_lo) in rbx,
4641   // 2nd step
4642   movl(rax, x_lo);
4643   mull(rcx);                                     // x_lo * y_hi
4644   addl(rbx, rax);                                // add lo(x_lo * y_hi) to rbx,
4645   // 3rd step
4646   bind(quick);                                   // note: rbx, = 0 if quick multiply!
4647   movl(rax, x_lo);
4648   mull(y_lo);                                    // x_lo * y_lo
4649   addl(rdx, rbx);                                // correct hi(x_lo * y_lo)
4650 }
4651 
4652 void MacroAssembler::lneg(Register hi, Register lo) {
4653   negl(lo);
4654   adcl(hi, 0);
4655   negl(hi);
4656 }
4657 
4658 void MacroAssembler::lshl(Register hi, Register lo) {
4659   // Java shift left long support (semantics as described in JVM spec., p.305)
4660   // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4661   // shift value is in rcx !
4662   assert(hi != rcx, "must not use rcx");
4663   assert(lo != rcx, "must not use rcx");
4664   const Register s = rcx;                        // shift count
4665   const int      n = BitsPerWord;
4666   Label L;
4667   andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
4668   cmpl(s, n);                                    // if (s < n)
4669   jcc(Assembler::less, L);                       // else (s >= n)
4670   movl(hi, lo);                                  // x := x << n
4671   xorl(lo, lo);
4672   // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4673   bind(L);                                       // s (mod n) < n
4674   shldl(hi, lo);                                 // x := x << s
4675   shll(lo);
4676 }
4677 
4678 
4679 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4680   // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4681   // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4682   assert(hi != rcx, "must not use rcx");
4683   assert(lo != rcx, "must not use rcx");
4684   const Register s = rcx;                        // shift count
4685   const int      n = BitsPerWord;
4686   Label L;
4687   andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
4688   cmpl(s, n);                                    // if (s < n)
4689   jcc(Assembler::less, L);                       // else (s >= n)
4690   movl(lo, hi);                                  // x := x >> n
4691   if (sign_extension) sarl(hi, 31);
4692   else                xorl(hi, hi);
4693   // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4694   bind(L);                                       // s (mod n) < n
4695   shrdl(lo, hi);                                 // x := x >> s
4696   if (sign_extension) sarl(hi);
4697   else                shrl(hi);
4698 }
4699 
4700 void MacroAssembler::movoop(Register dst, jobject obj) {
4701   mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4702 }
4703 
4704 void MacroAssembler::movoop(Address dst, jobject obj) {
4705   mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4706 }
4707 
4708 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4709   if (src.is_lval()) {
4710     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4711   } else {
4712     movl(dst, as_Address(src));
4713   }
4714 }
4715 
4716 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4717   movl(as_Address(dst), src);
4718 }
4719 
4720 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4721   movl(dst, as_Address(src));
4722 }
4723 
4724 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
4725 void MacroAssembler::movptr(Address dst, intptr_t src) {
4726   movl(dst, src);
4727 }
4728 
4729 
4730 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
4731   movsd(dst, as_Address(src));
4732 }
4733 
4734 void MacroAssembler::pop_callee_saved_registers() {
4735   pop(rcx);
4736   pop(rdx);
4737   pop(rdi);
4738   pop(rsi);
4739 }
4740 
4741 void MacroAssembler::pop_fTOS() {
4742   fld_d(Address(rsp, 0));
4743   addl(rsp, 2 * wordSize);
4744 }
4745 
4746 void MacroAssembler::push_callee_saved_registers() {
4747   push(rsi);
4748   push(rdi);
4749   push(rdx);
4750   push(rcx);
4751 }
4752 
4753 void MacroAssembler::push_fTOS() {
4754   subl(rsp, 2 * wordSize);
4755   fstp_d(Address(rsp, 0));
4756 }
4757 
4758 
4759 void MacroAssembler::pushoop(jobject obj) {
4760   push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
4761 }
4762 
4763 
4764 void MacroAssembler::pushptr(AddressLiteral src) {
4765   if (src.is_lval()) {
4766     push_literal32((int32_t)src.target(), src.rspec());
4767   } else {
4768     pushl(as_Address(src));
4769   }
4770 }
4771 
4772 void MacroAssembler::set_word_if_not_zero(Register dst) {
4773   xorl(dst, dst);
4774   set_byte_if_not_zero(dst);
4775 }
4776 
4777 static void pass_arg0(MacroAssembler* masm, Register arg) {
4778   masm->push(arg);
4779 }
4780 
4781 static void pass_arg1(MacroAssembler* masm, Register arg) {
4782   masm->push(arg);
4783 }
4784 
4785 static void pass_arg2(MacroAssembler* masm, Register arg) {
4786   masm->push(arg);
4787 }
4788 
4789 static void pass_arg3(MacroAssembler* masm, Register arg) {
4790   masm->push(arg);
4791 }
4792 
4793 #ifndef PRODUCT
4794 extern "C" void findpc(intptr_t x);
4795 #endif
4796 
4797 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
4798   // In order to get locks to work, we need to fake a in_VM state
4799   JavaThread* thread = JavaThread::current();
4800   JavaThreadState saved_state = thread->thread_state();
4801   thread->set_thread_state(_thread_in_vm);
4802   if (ShowMessageBoxOnError) {
4803     JavaThread* thread = JavaThread::current();
4804     JavaThreadState saved_state = thread->thread_state();
4805     thread->set_thread_state(_thread_in_vm);
4806     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
4807       ttyLocker ttyl;
4808       BytecodeCounter::print();
4809     }
4810     // To see where a verify_oop failed, get $ebx+40/X for this frame.
4811     // This is the value of eip which points to where verify_oop will return.
4812     if (os::message_box(msg, "Execution stopped, print registers?")) {
4813       ttyLocker ttyl;
4814       tty->print_cr("eip = 0x%08x", eip);
4815 #ifndef PRODUCT
4816       tty->cr();
4817       findpc(eip);
4818       tty->cr();
4819 #endif
4820       tty->print_cr("rax, = 0x%08x", rax);
4821       tty->print_cr("rbx, = 0x%08x", rbx);
4822       tty->print_cr("rcx = 0x%08x", rcx);
4823       tty->print_cr("rdx = 0x%08x", rdx);
4824       tty->print_cr("rdi = 0x%08x", rdi);
4825       tty->print_cr("rsi = 0x%08x", rsi);
4826       tty->print_cr("rbp, = 0x%08x", rbp);
4827       tty->print_cr("rsp = 0x%08x", rsp);
4828       BREAKPOINT;
4829     }
4830   } else {
4831     ttyLocker ttyl;
4832     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
4833     assert(false, "DEBUG MESSAGE");
4834   }
4835   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
4836 }
4837 
4838 void MacroAssembler::stop(const char* msg) {
4839   ExternalAddress message((address)msg);
4840   // push address of message
4841   pushptr(message.addr());
4842   { Label L; call(L, relocInfo::none); bind(L); }     // push eip
4843   pusha();                                           // push registers
4844   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
4845   hlt();
4846 }
4847 
4848 void MacroAssembler::warn(const char* msg) {
4849   push_CPU_state();
4850 
4851   ExternalAddress message((address) msg);
4852   // push address of message
4853   pushptr(message.addr());
4854 
4855   call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
4856   addl(rsp, wordSize);       // discard argument
4857   pop_CPU_state();
4858 }
4859 
4860 #else // _LP64
4861 
4862 // 64 bit versions
4863 
4864 Address MacroAssembler::as_Address(AddressLiteral adr) {
4865   // amd64 always does this as a pc-rel
4866   // we can be absolute or disp based on the instruction type
4867   // jmp/call are displacements others are absolute
4868   assert(!adr.is_lval(), "must be rval");
4869   assert(reachable(adr), "must be");
4870   return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
4871 
4872 }
4873 
4874 Address MacroAssembler::as_Address(ArrayAddress adr) {
4875   AddressLiteral base = adr.base();
4876   lea(rscratch1, base);
4877   Address index = adr.index();
4878   assert(index._disp == 0, "must not have disp"); // maybe it can?
4879   Address array(rscratch1, index._index, index._scale, index._disp);
4880   return array;
4881 }
4882 
4883 int MacroAssembler::biased_locking_enter(Register lock_reg,
4884                                          Register obj_reg,
4885                                          Register swap_reg,
4886                                          Register tmp_reg,
4887                                          bool swap_reg_contains_mark,
4888                                          Label& done,
4889                                          Label* slow_case,
4890                                          BiasedLockingCounters* counters) {
4891   assert(UseBiasedLocking, "why call this otherwise?");
4892   assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
4893   assert(tmp_reg != noreg, "tmp_reg must be supplied");
4894   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4895   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4896   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
4897   Address saved_mark_addr(lock_reg, 0);
4898 
4899   if (PrintBiasedLockingStatistics && counters == NULL)
4900     counters = BiasedLocking::counters();
4901 
4902   // Biased locking
4903   // See whether the lock is currently biased toward our thread and
4904   // whether the epoch is still valid
4905   // Note that the runtime guarantees sufficient alignment of JavaThread
4906   // pointers to allow age to be placed into low bits
4907   // First check to see whether biasing is even enabled for this object
4908   Label cas_label;
4909   int null_check_offset = -1;
4910   if (!swap_reg_contains_mark) {
4911     null_check_offset = offset();
4912     movq(swap_reg, mark_addr);
4913   }
4914   movq(tmp_reg, swap_reg);
4915   andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4916   cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
4917   jcc(Assembler::notEqual, cas_label);
4918   // The bias pattern is present in the object's header. Need to check
4919   // whether the bias owner and the epoch are both still current.
4920   load_prototype_header(tmp_reg, obj_reg);
4921   orq(tmp_reg, r15_thread);
4922   xorq(tmp_reg, swap_reg);
4923   andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
4924   if (counters != NULL) {
4925     cond_inc32(Assembler::zero,
4926                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
4927   }
4928   jcc(Assembler::equal, done);
4929 
4930   Label try_revoke_bias;
4931   Label try_rebias;
4932 
4933   // At this point we know that the header has the bias pattern and
4934   // that we are not the bias owner in the current epoch. We need to
4935   // figure out more details about the state of the header in order to
4936   // know what operations can be legally performed on the object's
4937   // header.
4938 
4939   // If the low three bits in the xor result aren't clear, that means
4940   // the prototype header is no longer biased and we have to revoke
4941   // the bias on this object.
4942   testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4943   jcc(Assembler::notZero, try_revoke_bias);
4944 
4945   // Biasing is still enabled for this data type. See whether the
4946   // epoch of the current bias is still valid, meaning that the epoch
4947   // bits of the mark word are equal to the epoch bits of the
4948   // prototype header. (Note that the prototype header's epoch bits
4949   // only change at a safepoint.) If not, attempt to rebias the object
4950   // toward the current thread. Note that we must be absolutely sure
4951   // that the current epoch is invalid in order to do this because
4952   // otherwise the manipulations it performs on the mark word are
4953   // illegal.
4954   testq(tmp_reg, markOopDesc::epoch_mask_in_place);
4955   jcc(Assembler::notZero, try_rebias);
4956 
4957   // The epoch of the current bias is still valid but we know nothing
4958   // about the owner; it might be set or it might be clear. Try to
4959   // acquire the bias of the object using an atomic operation. If this
4960   // fails we will go in to the runtime to revoke the object's bias.
4961   // Note that we first construct the presumed unbiased header so we
4962   // don't accidentally blow away another thread's valid bias.
4963   andq(swap_reg,
4964        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4965   movq(tmp_reg, swap_reg);
4966   orq(tmp_reg, r15_thread);
4967   if (os::is_MP()) {
4968     lock();
4969   }
4970   cmpxchgq(tmp_reg, Address(obj_reg, 0));
4971   // If the biasing toward our thread failed, this means that
4972   // another thread succeeded in biasing it toward itself and we
4973   // need to revoke that bias. The revocation will occur in the
4974   // interpreter runtime in the slow case.
4975   if (counters != NULL) {
4976     cond_inc32(Assembler::zero,
4977                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
4978   }
4979   if (slow_case != NULL) {
4980     jcc(Assembler::notZero, *slow_case);
4981   }
4982   jmp(done);
4983 
4984   bind(try_rebias);
4985   // At this point we know the epoch has expired, meaning that the
4986   // current "bias owner", if any, is actually invalid. Under these
4987   // circumstances _only_, we are allowed to use the current header's
4988   // value as the comparison value when doing the cas to acquire the
4989   // bias in the current epoch. In other words, we allow transfer of
4990   // the bias from one thread to another directly in this situation.
4991   //
4992   // FIXME: due to a lack of registers we currently blow away the age
4993   // bits in this situation. Should attempt to preserve them.
4994   load_prototype_header(tmp_reg, obj_reg);
4995   orq(tmp_reg, r15_thread);
4996   if (os::is_MP()) {
4997     lock();
4998   }
4999   cmpxchgq(tmp_reg, Address(obj_reg, 0));
5000   // If the biasing toward our thread failed, then another thread
5001   // succeeded in biasing it toward itself and we need to revoke that
5002   // bias. The revocation will occur in the runtime in the slow case.
5003   if (counters != NULL) {
5004     cond_inc32(Assembler::zero,
5005                ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5006   }
5007   if (slow_case != NULL) {
5008     jcc(Assembler::notZero, *slow_case);
5009   }
5010   jmp(done);
5011 
5012   bind(try_revoke_bias);
5013   // The prototype mark in the klass doesn't have the bias bit set any
5014   // more, indicating that objects of this data type are not supposed
5015   // to be biased any more. We are going to try to reset the mark of
5016   // this object to the prototype value and fall through to the
5017   // CAS-based locking scheme. Note that if our CAS fails, it means
5018   // that another thread raced us for the privilege of revoking the
5019   // bias of this particular object, so it's okay to continue in the
5020   // normal locking code.
5021   //
5022   // FIXME: due to a lack of registers we currently blow away the age
5023   // bits in this situation. Should attempt to preserve them.
5024   load_prototype_header(tmp_reg, obj_reg);
5025   if (os::is_MP()) {
5026     lock();
5027   }
5028   cmpxchgq(tmp_reg, Address(obj_reg, 0));
5029   // Fall through to the normal CAS-based lock, because no matter what
5030   // the result of the above CAS, some thread must have succeeded in
5031   // removing the bias bit from the object's header.
5032   if (counters != NULL) {
5033     cond_inc32(Assembler::zero,
5034                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5035   }
5036 
5037   bind(cas_label);
5038 
5039   return null_check_offset;
5040 }
5041 
5042 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5043   Label L, E;
5044 
5045 #ifdef _WIN64
5046   // Windows always allocates space for it's register args
5047   assert(num_args <= 4, "only register arguments supported");
5048   subq(rsp,  frame::arg_reg_save_area_bytes);
5049 #endif
5050 
5051   // Align stack if necessary
5052   testl(rsp, 15);
5053   jcc(Assembler::zero, L);
5054 
5055   subq(rsp, 8);
5056   {
5057     call(RuntimeAddress(entry_point));
5058   }
5059   addq(rsp, 8);
5060   jmp(E);
5061 
5062   bind(L);
5063   {
5064     call(RuntimeAddress(entry_point));
5065   }
5066 
5067   bind(E);
5068 
5069 #ifdef _WIN64
5070   // restore stack pointer
5071   addq(rsp, frame::arg_reg_save_area_bytes);
5072 #endif
5073 
5074 }
5075 
5076 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5077   assert(!src2.is_lval(), "should use cmpptr");
5078 
5079   if (reachable(src2)) {
5080     cmpq(src1, as_Address(src2));
5081   } else {
5082     lea(rscratch1, src2);
5083     Assembler::cmpq(src1, Address(rscratch1, 0));
5084   }
5085 }
5086 
5087 int MacroAssembler::corrected_idivq(Register reg) {
5088   // Full implementation of Java ldiv and lrem; checks for special
5089   // case as described in JVM spec., p.243 & p.271.  The function
5090   // returns the (pc) offset of the idivl instruction - may be needed
5091   // for implicit exceptions.
5092   //
5093   //         normal case                           special case
5094   //
5095   // input : rax: dividend                         min_long
5096   //         reg: divisor   (may not be eax/edx)   -1
5097   //
5098   // output: rax: quotient  (= rax idiv reg)       min_long
5099   //         rdx: remainder (= rax irem reg)       0
5100   assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5101   static const int64_t min_long = 0x8000000000000000;
5102   Label normal_case, special_case;
5103 
5104   // check for special case
5105   cmp64(rax, ExternalAddress((address) &min_long));
5106   jcc(Assembler::notEqual, normal_case);
5107   xorl(rdx, rdx); // prepare rdx for possible special case (where
5108                   // remainder = 0)
5109   cmpq(reg, -1);
5110   jcc(Assembler::equal, special_case);
5111 
5112   // handle normal case
5113   bind(normal_case);
5114   cdqq();
5115   int idivq_offset = offset();
5116   idivq(reg);
5117 
5118   // normal and special case exit
5119   bind(special_case);
5120 
5121   return idivq_offset;
5122 }
5123 
5124 void MacroAssembler::decrementq(Register reg, int value) {
5125   if (value == min_jint) { subq(reg, value); return; }
5126   if (value <  0) { incrementq(reg, -value); return; }
5127   if (value == 0) {                        ; return; }
5128   if (value == 1 && UseIncDec) { decq(reg) ; return; }
5129   /* else */      { subq(reg, value)       ; return; }
5130 }
5131 
5132 void MacroAssembler::decrementq(Address dst, int value) {
5133   if (value == min_jint) { subq(dst, value); return; }
5134   if (value <  0) { incrementq(dst, -value); return; }
5135   if (value == 0) {                        ; return; }
5136   if (value == 1 && UseIncDec) { decq(dst) ; return; }
5137   /* else */      { subq(dst, value)       ; return; }
5138 }
5139 
5140 void MacroAssembler::fat_nop() {
5141   // A 5 byte nop that is safe for patching (see patch_verified_entry)
5142   // Recommened sequence from 'Software Optimization Guide for the AMD
5143   // Hammer Processor'
5144   emit_byte(0x66);
5145   emit_byte(0x66);
5146   emit_byte(0x90);
5147   emit_byte(0x66);
5148   emit_byte(0x90);
5149 }
5150 
5151 void MacroAssembler::incrementq(Register reg, int value) {
5152   if (value == min_jint) { addq(reg, value); return; }
5153   if (value <  0) { decrementq(reg, -value); return; }
5154   if (value == 0) {                        ; return; }
5155   if (value == 1 && UseIncDec) { incq(reg) ; return; }
5156   /* else */      { addq(reg, value)       ; return; }
5157 }
5158 
5159 void MacroAssembler::incrementq(Address dst, int value) {
5160   if (value == min_jint) { addq(dst, value); return; }
5161   if (value <  0) { decrementq(dst, -value); return; }
5162   if (value == 0) {                        ; return; }
5163   if (value == 1 && UseIncDec) { incq(dst) ; return; }
5164   /* else */      { addq(dst, value)       ; return; }
5165 }
5166 
5167 // 32bit can do a case table jump in one instruction but we no longer allow the base
5168 // to be installed in the Address class
5169 void MacroAssembler::jump(ArrayAddress entry) {
5170   lea(rscratch1, entry.base());
5171   Address dispatch = entry.index();
5172   assert(dispatch._base == noreg, "must be");
5173   dispatch._base = rscratch1;
5174   jmp(dispatch);
5175 }
5176 
5177 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5178   ShouldNotReachHere(); // 64bit doesn't use two regs
5179   cmpq(x_lo, y_lo);
5180 }
5181 
5182 void MacroAssembler::lea(Register dst, AddressLiteral src) {
5183     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5184 }
5185 
5186 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5187   mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5188   movptr(dst, rscratch1);
5189 }
5190 
5191 void MacroAssembler::leave() {
5192   // %%% is this really better? Why not on 32bit too?
5193   emit_byte(0xC9); // LEAVE
5194 }
5195 
5196 void MacroAssembler::lneg(Register hi, Register lo) {
5197   ShouldNotReachHere(); // 64bit doesn't use two regs
5198   negq(lo);
5199 }
5200 
5201 void MacroAssembler::movoop(Register dst, jobject obj) {
5202   mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5203 }
5204 
5205 void MacroAssembler::movoop(Address dst, jobject obj) {
5206   mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5207   movq(dst, rscratch1);
5208 }
5209 
5210 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5211   if (src.is_lval()) {
5212     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5213   } else {
5214     if (reachable(src)) {
5215       movq(dst, as_Address(src));
5216     } else {
5217       lea(rscratch1, src);
5218       movq(dst, Address(rscratch1,0));
5219     }
5220   }
5221 }
5222 
5223 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5224   movq(as_Address(dst), src);
5225 }
5226 
5227 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5228   movq(dst, as_Address(src));
5229 }
5230 
5231 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
5232 void MacroAssembler::movptr(Address dst, intptr_t src) {
5233   mov64(rscratch1, src);
5234   movq(dst, rscratch1);
5235 }
5236 
5237 // These are mostly for initializing NULL
5238 void MacroAssembler::movptr(Address dst, int32_t src) {
5239   movslq(dst, src);
5240 }
5241 
5242 void MacroAssembler::movptr(Register dst, int32_t src) {
5243   mov64(dst, (intptr_t)src);
5244 }
5245 
5246 void MacroAssembler::pushoop(jobject obj) {
5247   movoop(rscratch1, obj);
5248   push(rscratch1);
5249 }
5250 
5251 void MacroAssembler::pushptr(AddressLiteral src) {
5252   lea(rscratch1, src);
5253   if (src.is_lval()) {
5254     push(rscratch1);
5255   } else {
5256     pushq(Address(rscratch1, 0));
5257   }
5258 }
5259 
5260 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5261                                            bool clear_pc) {
5262   // we must set sp to zero to clear frame
5263   movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), (int32_t)NULL_WORD);
5264   // must clear fp, so that compiled frames are not confused; it is
5265   // possible that we need it only for debugging
5266   if (clear_fp) {
5267     movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), (int32_t)NULL_WORD);
5268   }
5269 
5270   if (clear_pc) {
5271     movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), (int32_t)NULL_WORD);
5272   }
5273 }
5274 
5275 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5276                                          Register last_java_fp,
5277                                          address  last_java_pc) {
5278   // determine last_java_sp register
5279   if (!last_java_sp->is_valid()) {
5280     last_java_sp = rsp;
5281   }
5282 
5283   // last_java_fp is optional
5284   if (last_java_fp->is_valid()) {
5285     movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5286            last_java_fp);
5287   }
5288 
5289   // last_java_pc is optional
5290   if (last_java_pc != NULL) {
5291     Address java_pc(r15_thread,
5292                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5293     lea(rscratch1, InternalAddress(last_java_pc));
5294     movptr(java_pc, rscratch1);
5295   }
5296 
5297   movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5298 }
5299 
5300 static void pass_arg0(MacroAssembler* masm, Register arg) {
5301   if (c_rarg0 != arg ) {
5302     masm->mov(c_rarg0, arg);
5303   }
5304 }
5305 
5306 static void pass_arg1(MacroAssembler* masm, Register arg) {
5307   if (c_rarg1 != arg ) {
5308     masm->mov(c_rarg1, arg);
5309   }
5310 }
5311 
5312 static void pass_arg2(MacroAssembler* masm, Register arg) {
5313   if (c_rarg2 != arg ) {
5314     masm->mov(c_rarg2, arg);
5315   }
5316 }
5317 
5318 static void pass_arg3(MacroAssembler* masm, Register arg) {
5319   if (c_rarg3 != arg ) {
5320     masm->mov(c_rarg3, arg);
5321   }
5322 }
5323 
5324 void MacroAssembler::stop(const char* msg) {
5325   address rip = pc();
5326   pusha(); // get regs on stack
5327   lea(c_rarg0, ExternalAddress((address) msg));
5328   lea(c_rarg1, InternalAddress(rip));
5329   movq(c_rarg2, rsp); // pass pointer to regs array
5330   andq(rsp, -16); // align stack as required by ABI
5331   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5332   hlt();
5333 }
5334 
5335 void MacroAssembler::warn(const char* msg) {
5336   push(r12);
5337   movq(r12, rsp);
5338   andq(rsp, -16);     // align stack as required by push_CPU_state and call
5339 
5340   push_CPU_state();   // keeps alignment at 16 bytes
5341   lea(c_rarg0, ExternalAddress((address) msg));
5342   call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5343   pop_CPU_state();
5344 
5345   movq(rsp, r12);
5346   pop(r12);
5347 }
5348 
5349 #ifndef PRODUCT
5350 extern "C" void findpc(intptr_t x);
5351 #endif
5352 
5353 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5354   // In order to get locks to work, we need to fake a in_VM state
5355   if (ShowMessageBoxOnError ) {
5356     JavaThread* thread = JavaThread::current();
5357     JavaThreadState saved_state = thread->thread_state();
5358     thread->set_thread_state(_thread_in_vm);
5359 #ifndef PRODUCT
5360     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5361       ttyLocker ttyl;
5362       BytecodeCounter::print();
5363     }
5364 #endif
5365     // To see where a verify_oop failed, get $ebx+40/X for this frame.
5366     // XXX correct this offset for amd64
5367     // This is the value of eip which points to where verify_oop will return.
5368     if (os::message_box(msg, "Execution stopped, print registers?")) {
5369       ttyLocker ttyl;
5370       tty->print_cr("rip = 0x%016lx", pc);
5371 #ifndef PRODUCT
5372       tty->cr();
5373       findpc(pc);
5374       tty->cr();
5375 #endif
5376       tty->print_cr("rax = 0x%016lx", regs[15]);
5377       tty->print_cr("rbx = 0x%016lx", regs[12]);
5378       tty->print_cr("rcx = 0x%016lx", regs[14]);
5379       tty->print_cr("rdx = 0x%016lx", regs[13]);
5380       tty->print_cr("rdi = 0x%016lx", regs[8]);
5381       tty->print_cr("rsi = 0x%016lx", regs[9]);
5382       tty->print_cr("rbp = 0x%016lx", regs[10]);
5383       tty->print_cr("rsp = 0x%016lx", regs[11]);
5384       tty->print_cr("r8  = 0x%016lx", regs[7]);
5385       tty->print_cr("r9  = 0x%016lx", regs[6]);
5386       tty->print_cr("r10 = 0x%016lx", regs[5]);
5387       tty->print_cr("r11 = 0x%016lx", regs[4]);
5388       tty->print_cr("r12 = 0x%016lx", regs[3]);
5389       tty->print_cr("r13 = 0x%016lx", regs[2]);
5390       tty->print_cr("r14 = 0x%016lx", regs[1]);
5391       tty->print_cr("r15 = 0x%016lx", regs[0]);
5392       BREAKPOINT;
5393     }
5394     ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5395   } else {
5396     ttyLocker ttyl;
5397     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5398                     msg);
5399   }
5400 }
5401 
5402 #endif // _LP64
5403 
5404 // Now versions that are common to 32/64 bit
5405 
5406 void MacroAssembler::addptr(Register dst, int32_t imm32) {
5407   LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5408 }
5409 
5410 void MacroAssembler::addptr(Register dst, Register src) {
5411   LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5412 }
5413 
5414 void MacroAssembler::addptr(Address dst, Register src) {
5415   LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5416 }
5417 
5418 void MacroAssembler::align(int modulus) {
5419   if (offset() % modulus != 0) {
5420     nop(modulus - (offset() % modulus));
5421   }
5422 }
5423 
5424 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5425   andpd(dst, as_Address(src));
5426 }
5427 
5428 void MacroAssembler::andptr(Register dst, int32_t imm32) {
5429   LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5430 }
5431 
5432 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5433   pushf();
5434   if (os::is_MP())
5435     lock();
5436   incrementl(counter_addr);
5437   popf();
5438 }
5439 
5440 // Writes to stack successive pages until offset reached to check for
5441 // stack overflow + shadow pages.  This clobbers tmp.
5442 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5443   movptr(tmp, rsp);
5444   // Bang stack for total size given plus shadow page size.
5445   // Bang one page at a time because large size can bang beyond yellow and
5446   // red zones.
5447   Label loop;
5448   bind(loop);
5449   movl(Address(tmp, (-os::vm_page_size())), size );
5450   subptr(tmp, os::vm_page_size());
5451   subl(size, os::vm_page_size());
5452   jcc(Assembler::greater, loop);
5453 
5454   // Bang down shadow pages too.
5455   // The -1 because we already subtracted 1 page.
5456   for (int i = 0; i< StackShadowPages-1; i++) {
5457     // this could be any sized move but this is can be a debugging crumb
5458     // so the bigger the better.
5459     movptr(Address(tmp, (-i*os::vm_page_size())), size );
5460   }
5461 }
5462 
5463 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5464   assert(UseBiasedLocking, "why call this otherwise?");
5465 
5466   // Check for biased locking unlock case, which is a no-op
5467   // Note: we do not have to check the thread ID for two reasons.
5468   // First, the interpreter checks for IllegalMonitorStateException at
5469   // a higher level. Second, if the bias was revoked while we held the
5470   // lock, the object could not be rebiased toward another thread, so
5471   // the bias bit would be clear.
5472   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5473   andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5474   cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5475   jcc(Assembler::equal, done);
5476 }
5477 
5478 void MacroAssembler::c2bool(Register x) {
5479   // implements x == 0 ? 0 : 1
5480   // note: must only look at least-significant byte of x
5481   //       since C-style booleans are stored in one byte
5482   //       only! (was bug)
5483   andl(x, 0xFF);
5484   setb(Assembler::notZero, x);
5485 }
5486 
5487 // Wouldn't need if AddressLiteral version had new name
5488 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5489   Assembler::call(L, rtype);
5490 }
5491 
5492 void MacroAssembler::call(Register entry) {
5493   Assembler::call(entry);
5494 }
5495 
5496 void MacroAssembler::call(AddressLiteral entry) {
5497   if (reachable(entry)) {
5498     Assembler::call_literal(entry.target(), entry.rspec());
5499   } else {
5500     lea(rscratch1, entry);
5501     Assembler::call(rscratch1);
5502   }
5503 }
5504 
5505 // Implementation of call_VM versions
5506 
5507 void MacroAssembler::call_VM(Register oop_result,
5508                              address entry_point,
5509                              bool check_exceptions) {
5510   Label C, E;
5511   call(C, relocInfo::none);
5512   jmp(E);
5513 
5514   bind(C);
5515   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5516   ret(0);
5517 
5518   bind(E);
5519 }
5520 
5521 void MacroAssembler::call_VM(Register oop_result,
5522                              address entry_point,
5523                              Register arg_1,
5524                              bool check_exceptions) {
5525   Label C, E;
5526   call(C, relocInfo::none);
5527   jmp(E);
5528 
5529   bind(C);
5530   pass_arg1(this, arg_1);
5531   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5532   ret(0);
5533 
5534   bind(E);
5535 }
5536 
5537 void MacroAssembler::call_VM(Register oop_result,
5538                              address entry_point,
5539                              Register arg_1,
5540                              Register arg_2,
5541                              bool check_exceptions) {
5542   Label C, E;
5543   call(C, relocInfo::none);
5544   jmp(E);
5545 
5546   bind(C);
5547 
5548   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5549 
5550   pass_arg2(this, arg_2);
5551   pass_arg1(this, arg_1);
5552   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5553   ret(0);
5554 
5555   bind(E);
5556 }
5557 
5558 void MacroAssembler::call_VM(Register oop_result,
5559                              address entry_point,
5560                              Register arg_1,
5561                              Register arg_2,
5562                              Register arg_3,
5563                              bool check_exceptions) {
5564   Label C, E;
5565   call(C, relocInfo::none);
5566   jmp(E);
5567 
5568   bind(C);
5569 
5570   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5571   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5572   pass_arg3(this, arg_3);
5573 
5574   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5575   pass_arg2(this, arg_2);
5576 
5577   pass_arg1(this, arg_1);
5578   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5579   ret(0);
5580 
5581   bind(E);
5582 }
5583 
5584 void MacroAssembler::call_VM(Register oop_result,
5585                              Register last_java_sp,
5586                              address entry_point,
5587                              int number_of_arguments,
5588                              bool check_exceptions) {
5589   Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5590   call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5591 }
5592 
5593 void MacroAssembler::call_VM(Register oop_result,
5594                              Register last_java_sp,
5595                              address entry_point,
5596                              Register arg_1,
5597                              bool check_exceptions) {
5598   pass_arg1(this, arg_1);
5599   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5600 }
5601 
5602 void MacroAssembler::call_VM(Register oop_result,
5603                              Register last_java_sp,
5604                              address entry_point,
5605                              Register arg_1,
5606                              Register arg_2,
5607                              bool check_exceptions) {
5608 
5609   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5610   pass_arg2(this, arg_2);
5611   pass_arg1(this, arg_1);
5612   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5613 }
5614 
5615 void MacroAssembler::call_VM(Register oop_result,
5616                              Register last_java_sp,
5617                              address entry_point,
5618                              Register arg_1,
5619                              Register arg_2,
5620                              Register arg_3,
5621                              bool check_exceptions) {
5622   LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5623   LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5624   pass_arg3(this, arg_3);
5625   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5626   pass_arg2(this, arg_2);
5627   pass_arg1(this, arg_1);
5628   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5629 }
5630 
5631 void MacroAssembler::call_VM_base(Register oop_result,
5632                                   Register java_thread,
5633                                   Register last_java_sp,
5634                                   address  entry_point,
5635                                   int      number_of_arguments,
5636                                   bool     check_exceptions) {
5637   // determine java_thread register
5638   if (!java_thread->is_valid()) {
5639 #ifdef _LP64
5640     java_thread = r15_thread;
5641 #else
5642     java_thread = rdi;
5643     get_thread(java_thread);
5644 #endif // LP64
5645   }
5646   // determine last_java_sp register
5647   if (!last_java_sp->is_valid()) {
5648     last_java_sp = rsp;
5649   }
5650   // debugging support
5651   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
5652   LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5653   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
5654   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5655 
5656   // push java thread (becomes first argument of C function)
5657 
5658   NOT_LP64(push(java_thread); number_of_arguments++);
5659   LP64_ONLY(mov(c_rarg0, r15_thread));
5660 
5661   // set last Java frame before call
5662   assert(last_java_sp != rbp, "can't use ebp/rbp");
5663 
5664   // Only interpreter should have to set fp
5665   set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5666 
5667   // do the call, remove parameters
5668   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5669 
5670   // restore the thread (cannot use the pushed argument since arguments
5671   // may be overwritten by C code generated by an optimizing compiler);
5672   // however can use the register value directly if it is callee saved.
5673   if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5674     // rdi & rsi (also r15) are callee saved -> nothing to do
5675 #ifdef ASSERT
5676     guarantee(java_thread != rax, "change this code");
5677     push(rax);
5678     { Label L;
5679       get_thread(rax);
5680       cmpptr(java_thread, rax);
5681       jcc(Assembler::equal, L);
5682       stop("MacroAssembler::call_VM_base: rdi not callee saved?");
5683       bind(L);
5684     }
5685     pop(rax);
5686 #endif
5687   } else {
5688     get_thread(java_thread);
5689   }
5690   // reset last Java frame
5691   // Only interpreter should have to clear fp
5692   reset_last_Java_frame(java_thread, true, false);
5693 
5694 #ifndef CC_INTERP
5695    // C++ interp handles this in the interpreter
5696   check_and_handle_popframe(java_thread);
5697   check_and_handle_earlyret(java_thread);
5698 #endif /* CC_INTERP */
5699 
5700   if (check_exceptions) {
5701     // check for pending exceptions (java_thread is set upon return)
5702     cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
5703 #ifndef _LP64
5704     jump_cc(Assembler::notEqual,
5705             RuntimeAddress(StubRoutines::forward_exception_entry()));
5706 #else
5707     // This used to conditionally jump to forward_exception however it is
5708     // possible if we relocate that the branch will not reach. So we must jump
5709     // around so we can always reach
5710 
5711     Label ok;
5712     jcc(Assembler::equal, ok);
5713     jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5714     bind(ok);
5715 #endif // LP64
5716   }
5717 
5718   // get oop result if there is one and reset the value in the thread
5719   if (oop_result->is_valid()) {
5720     movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
5721     movptr(Address(java_thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
5722     verify_oop(oop_result, "broken oop in call_VM_base");
5723   }
5724 }
5725 
5726 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
5727 
5728   // Calculate the value for last_Java_sp
5729   // somewhat subtle. call_VM does an intermediate call
5730   // which places a return address on the stack just under the
5731   // stack pointer as the user finsihed with it. This allows
5732   // use to retrieve last_Java_pc from last_Java_sp[-1].
5733   // On 32bit we then have to push additional args on the stack to accomplish
5734   // the actual requested call. On 64bit call_VM only can use register args
5735   // so the only extra space is the return address that call_VM created.
5736   // This hopefully explains the calculations here.
5737 
5738 #ifdef _LP64
5739   // We've pushed one address, correct last_Java_sp
5740   lea(rax, Address(rsp, wordSize));
5741 #else
5742   lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
5743 #endif // LP64
5744 
5745   call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
5746 
5747 }
5748 
5749 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
5750   call_VM_leaf_base(entry_point, number_of_arguments);
5751 }
5752 
5753 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
5754   pass_arg0(this, arg_0);
5755   call_VM_leaf(entry_point, 1);
5756 }
5757 
5758 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
5759 
5760   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5761   pass_arg1(this, arg_1);
5762   pass_arg0(this, arg_0);
5763   call_VM_leaf(entry_point, 2);
5764 }
5765 
5766 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
5767   LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
5768   LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5769   pass_arg2(this, arg_2);
5770   LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5771   pass_arg1(this, arg_1);
5772   pass_arg0(this, arg_0);
5773   call_VM_leaf(entry_point, 3);
5774 }
5775 
5776 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
5777 }
5778 
5779 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
5780 }
5781 
5782 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
5783   if (reachable(src1)) {
5784     cmpl(as_Address(src1), imm);
5785   } else {
5786     lea(rscratch1, src1);
5787     cmpl(Address(rscratch1, 0), imm);
5788   }
5789 }
5790 
5791 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
5792   assert(!src2.is_lval(), "use cmpptr");
5793   if (reachable(src2)) {
5794     cmpl(src1, as_Address(src2));
5795   } else {
5796     lea(rscratch1, src2);
5797     cmpl(src1, Address(rscratch1, 0));
5798   }
5799 }
5800 
5801 void MacroAssembler::cmp32(Register src1, int32_t imm) {
5802   Assembler::cmpl(src1, imm);
5803 }
5804 
5805 void MacroAssembler::cmp32(Register src1, Address src2) {
5806   Assembler::cmpl(src1, src2);
5807 }
5808 
5809 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5810   ucomisd(opr1, opr2);
5811 
5812   Label L;
5813   if (unordered_is_less) {
5814     movl(dst, -1);
5815     jcc(Assembler::parity, L);
5816     jcc(Assembler::below , L);
5817     movl(dst, 0);
5818     jcc(Assembler::equal , L);
5819     increment(dst);
5820   } else { // unordered is greater
5821     movl(dst, 1);
5822     jcc(Assembler::parity, L);
5823     jcc(Assembler::above , L);
5824     movl(dst, 0);
5825     jcc(Assembler::equal , L);
5826     decrementl(dst);
5827   }
5828   bind(L);
5829 }
5830 
5831 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5832   ucomiss(opr1, opr2);
5833 
5834   Label L;
5835   if (unordered_is_less) {
5836     movl(dst, -1);
5837     jcc(Assembler::parity, L);
5838     jcc(Assembler::below , L);
5839     movl(dst, 0);
5840     jcc(Assembler::equal , L);
5841     increment(dst);
5842   } else { // unordered is greater
5843     movl(dst, 1);
5844     jcc(Assembler::parity, L);
5845     jcc(Assembler::above , L);
5846     movl(dst, 0);
5847     jcc(Assembler::equal , L);
5848     decrementl(dst);
5849   }
5850   bind(L);
5851 }
5852 
5853 
5854 void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
5855   if (reachable(src1)) {
5856     cmpb(as_Address(src1), imm);
5857   } else {
5858     lea(rscratch1, src1);
5859     cmpb(Address(rscratch1, 0), imm);
5860   }
5861 }
5862 
5863 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
5864 #ifdef _LP64
5865   if (src2.is_lval()) {
5866     movptr(rscratch1, src2);
5867     Assembler::cmpq(src1, rscratch1);
5868   } else if (reachable(src2)) {
5869     cmpq(src1, as_Address(src2));
5870   } else {
5871     lea(rscratch1, src2);
5872     Assembler::cmpq(src1, Address(rscratch1, 0));
5873   }
5874 #else
5875   if (src2.is_lval()) {
5876     cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
5877   } else {
5878     cmpl(src1, as_Address(src2));
5879   }
5880 #endif // _LP64
5881 }
5882 
5883 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
5884   assert(src2.is_lval(), "not a mem-mem compare");
5885 #ifdef _LP64
5886   // moves src2's literal address
5887   movptr(rscratch1, src2);
5888   Assembler::cmpq(src1, rscratch1);
5889 #else
5890   cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
5891 #endif // _LP64
5892 }
5893 
5894 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
5895   if (reachable(adr)) {
5896     if (os::is_MP())
5897       lock();
5898     cmpxchgptr(reg, as_Address(adr));
5899   } else {
5900     lea(rscratch1, adr);
5901     if (os::is_MP())
5902       lock();
5903     cmpxchgptr(reg, Address(rscratch1, 0));
5904   }
5905 }
5906 
5907 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
5908   LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
5909 }
5910 
5911 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
5912   comisd(dst, as_Address(src));
5913 }
5914 
5915 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
5916   comiss(dst, as_Address(src));
5917 }
5918 
5919 
5920 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
5921   Condition negated_cond = negate_condition(cond);
5922   Label L;
5923   jcc(negated_cond, L);
5924   atomic_incl(counter_addr);
5925   bind(L);
5926 }
5927 
5928 int MacroAssembler::corrected_idivl(Register reg) {
5929   // Full implementation of Java idiv and irem; checks for
5930   // special case as described in JVM spec., p.243 & p.271.
5931   // The function returns the (pc) offset of the idivl
5932   // instruction - may be needed for implicit exceptions.
5933   //
5934   //         normal case                           special case
5935   //
5936   // input : rax,: dividend                         min_int
5937   //         reg: divisor   (may not be rax,/rdx)   -1
5938   //
5939   // output: rax,: quotient  (= rax, idiv reg)       min_int
5940   //         rdx: remainder (= rax, irem reg)       0
5941   assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
5942   const int min_int = 0x80000000;
5943   Label normal_case, special_case;
5944 
5945   // check for special case
5946   cmpl(rax, min_int);
5947   jcc(Assembler::notEqual, normal_case);
5948   xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
5949   cmpl(reg, -1);
5950   jcc(Assembler::equal, special_case);
5951 
5952   // handle normal case
5953   bind(normal_case);
5954   cdql();
5955   int idivl_offset = offset();
5956   idivl(reg);
5957 
5958   // normal and special case exit
5959   bind(special_case);
5960 
5961   return idivl_offset;
5962 }
5963 
5964 
5965 
5966 void MacroAssembler::decrementl(Register reg, int value) {
5967   if (value == min_jint) {subl(reg, value) ; return; }
5968   if (value <  0) { incrementl(reg, -value); return; }
5969   if (value == 0) {                        ; return; }
5970   if (value == 1 && UseIncDec) { decl(reg) ; return; }
5971   /* else */      { subl(reg, value)       ; return; }
5972 }
5973 
5974 void MacroAssembler::decrementl(Address dst, int value) {
5975   if (value == min_jint) {subl(dst, value) ; return; }
5976   if (value <  0) { incrementl(dst, -value); return; }
5977   if (value == 0) {                        ; return; }
5978   if (value == 1 && UseIncDec) { decl(dst) ; return; }
5979   /* else */      { subl(dst, value)       ; return; }
5980 }
5981 
5982 void MacroAssembler::division_with_shift (Register reg, int shift_value) {
5983   assert (shift_value > 0, "illegal shift value");
5984   Label _is_positive;
5985   testl (reg, reg);
5986   jcc (Assembler::positive, _is_positive);
5987   int offset = (1 << shift_value) - 1 ;
5988 
5989   if (offset == 1) {
5990     incrementl(reg);
5991   } else {
5992     addl(reg, offset);
5993   }
5994 
5995   bind (_is_positive);
5996   sarl(reg, shift_value);
5997 }
5998 
5999 // !defined(COMPILER2) is because of stupid core builds
6000 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6001 void MacroAssembler::empty_FPU_stack() {
6002   if (VM_Version::supports_mmx()) {
6003     emms();
6004   } else {
6005     for (int i = 8; i-- > 0; ) ffree(i);
6006   }
6007 }
6008 #endif // !LP64 || C1 || !C2
6009 
6010 
6011 // Defines obj, preserves var_size_in_bytes
6012 void MacroAssembler::eden_allocate(Register obj,
6013                                    Register var_size_in_bytes,
6014                                    int con_size_in_bytes,
6015                                    Register t1,
6016                                    Label& slow_case) {
6017   assert(obj == rax, "obj must be in rax, for cmpxchg");
6018   assert_different_registers(obj, var_size_in_bytes, t1);
6019   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6020     jmp(slow_case);
6021   } else {
6022     Register end = t1;
6023     Label retry;
6024     bind(retry);
6025     ExternalAddress heap_top((address) Universe::heap()->top_addr());
6026     movptr(obj, heap_top);
6027     if (var_size_in_bytes == noreg) {
6028       lea(end, Address(obj, con_size_in_bytes));
6029     } else {
6030       lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6031     }
6032     // if end < obj then we wrapped around => object too long => slow case
6033     cmpptr(end, obj);
6034     jcc(Assembler::below, slow_case);
6035     cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6036     jcc(Assembler::above, slow_case);
6037     // Compare obj with the top addr, and if still equal, store the new top addr in
6038     // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6039     // it otherwise. Use lock prefix for atomicity on MPs.
6040     locked_cmpxchgptr(end, heap_top);
6041     jcc(Assembler::notEqual, retry);
6042   }
6043 }
6044 
6045 void MacroAssembler::enter() {
6046   push(rbp);
6047   mov(rbp, rsp);
6048 }
6049 
6050 void MacroAssembler::fcmp(Register tmp) {
6051   fcmp(tmp, 1, true, true);
6052 }
6053 
6054 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6055   assert(!pop_right || pop_left, "usage error");
6056   if (VM_Version::supports_cmov()) {
6057     assert(tmp == noreg, "unneeded temp");
6058     if (pop_left) {
6059       fucomip(index);
6060     } else {
6061       fucomi(index);
6062     }
6063     if (pop_right) {
6064       fpop();
6065     }
6066   } else {
6067     assert(tmp != noreg, "need temp");
6068     if (pop_left) {
6069       if (pop_right) {
6070         fcompp();
6071       } else {
6072         fcomp(index);
6073       }
6074     } else {
6075       fcom(index);
6076     }
6077     // convert FPU condition into eflags condition via rax,
6078     save_rax(tmp);
6079     fwait(); fnstsw_ax();
6080     sahf();
6081     restore_rax(tmp);
6082   }
6083   // condition codes set as follows:
6084   //
6085   // CF (corresponds to C0) if x < y
6086   // PF (corresponds to C2) if unordered
6087   // ZF (corresponds to C3) if x = y
6088 }
6089 
6090 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6091   fcmp2int(dst, unordered_is_less, 1, true, true);
6092 }
6093 
6094 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6095   fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6096   Label L;
6097   if (unordered_is_less) {
6098     movl(dst, -1);
6099     jcc(Assembler::parity, L);
6100     jcc(Assembler::below , L);
6101     movl(dst, 0);
6102     jcc(Assembler::equal , L);
6103     increment(dst);
6104   } else { // unordered is greater
6105     movl(dst, 1);
6106     jcc(Assembler::parity, L);
6107     jcc(Assembler::above , L);
6108     movl(dst, 0);
6109     jcc(Assembler::equal , L);
6110     decrementl(dst);
6111   }
6112   bind(L);
6113 }
6114 
6115 void MacroAssembler::fld_d(AddressLiteral src) {
6116   fld_d(as_Address(src));
6117 }
6118 
6119 void MacroAssembler::fld_s(AddressLiteral src) {
6120   fld_s(as_Address(src));
6121 }
6122 
6123 void MacroAssembler::fld_x(AddressLiteral src) {
6124   Assembler::fld_x(as_Address(src));
6125 }
6126 
6127 void MacroAssembler::fldcw(AddressLiteral src) {
6128   Assembler::fldcw(as_Address(src));
6129 }
6130 
6131 void MacroAssembler::fpop() {
6132   ffree();
6133   fincstp();
6134 }
6135 
6136 void MacroAssembler::fremr(Register tmp) {
6137   save_rax(tmp);
6138   { Label L;
6139     bind(L);
6140     fprem();
6141     fwait(); fnstsw_ax();
6142 #ifdef _LP64
6143     testl(rax, 0x400);
6144     jcc(Assembler::notEqual, L);
6145 #else
6146     sahf();
6147     jcc(Assembler::parity, L);
6148 #endif // _LP64
6149   }
6150   restore_rax(tmp);
6151   // Result is in ST0.
6152   // Note: fxch & fpop to get rid of ST1
6153   // (otherwise FPU stack could overflow eventually)
6154   fxch(1);
6155   fpop();
6156 }
6157 
6158 
6159 void MacroAssembler::incrementl(AddressLiteral dst) {
6160   if (reachable(dst)) {
6161     incrementl(as_Address(dst));
6162   } else {
6163     lea(rscratch1, dst);
6164     incrementl(Address(rscratch1, 0));
6165   }
6166 }
6167 
6168 void MacroAssembler::incrementl(ArrayAddress dst) {
6169   incrementl(as_Address(dst));
6170 }
6171 
6172 void MacroAssembler::incrementl(Register reg, int value) {
6173   if (value == min_jint) {addl(reg, value) ; return; }
6174   if (value <  0) { decrementl(reg, -value); return; }
6175   if (value == 0) {                        ; return; }
6176   if (value == 1 && UseIncDec) { incl(reg) ; return; }
6177   /* else */      { addl(reg, value)       ; return; }
6178 }
6179 
6180 void MacroAssembler::incrementl(Address dst, int value) {
6181   if (value == min_jint) {addl(dst, value) ; return; }
6182   if (value <  0) { decrementl(dst, -value); return; }
6183   if (value == 0) {                        ; return; }
6184   if (value == 1 && UseIncDec) { incl(dst) ; return; }
6185   /* else */      { addl(dst, value)       ; return; }
6186 }
6187 
6188 void MacroAssembler::jump(AddressLiteral dst) {
6189   if (reachable(dst)) {
6190     jmp_literal(dst.target(), dst.rspec());
6191   } else {
6192     lea(rscratch1, dst);
6193     jmp(rscratch1);
6194   }
6195 }
6196 
6197 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6198   if (reachable(dst)) {
6199     InstructionMark im(this);
6200     relocate(dst.reloc());
6201     const int short_size = 2;
6202     const int long_size = 6;
6203     int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6204     if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6205       // 0111 tttn #8-bit disp
6206       emit_byte(0x70 | cc);
6207       emit_byte((offs - short_size) & 0xFF);
6208     } else {
6209       // 0000 1111 1000 tttn #32-bit disp
6210       emit_byte(0x0F);
6211       emit_byte(0x80 | cc);
6212       emit_long(offs - long_size);
6213     }
6214   } else {
6215 #ifdef ASSERT
6216     warning("reversing conditional branch");
6217 #endif /* ASSERT */
6218     Label skip;
6219     jccb(reverse[cc], skip);
6220     lea(rscratch1, dst);
6221     Assembler::jmp(rscratch1);
6222     bind(skip);
6223   }
6224 }
6225 
6226 void MacroAssembler::ldmxcsr(AddressLiteral src) {
6227   if (reachable(src)) {
6228     Assembler::ldmxcsr(as_Address(src));
6229   } else {
6230     lea(rscratch1, src);
6231     Assembler::ldmxcsr(Address(rscratch1, 0));
6232   }
6233 }
6234 
6235 int MacroAssembler::load_signed_byte(Register dst, Address src) {
6236   int off;
6237   if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6238     off = offset();
6239     movsbl(dst, src); // movsxb
6240   } else {
6241     off = load_unsigned_byte(dst, src);
6242     shll(dst, 24);
6243     sarl(dst, 24);
6244   }
6245   return off;
6246 }
6247 
6248 // word => int32 which seems bad for 64bit
6249 int MacroAssembler::load_signed_word(Register dst, Address src) {
6250   int off;
6251   if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6252     // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6253     // version but this is what 64bit has always done. This seems to imply
6254     // that users are only using 32bits worth.
6255     off = offset();
6256     movswl(dst, src); // movsxw
6257   } else {
6258     off = load_unsigned_word(dst, src);
6259     shll(dst, 16);
6260     sarl(dst, 16);
6261   }
6262   return off;
6263 }
6264 
6265 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6266   // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6267   // and "3.9 Partial Register Penalties", p. 22).
6268   int off;
6269   if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6270     off = offset();
6271     movzbl(dst, src); // movzxb
6272   } else {
6273     xorl(dst, dst);
6274     off = offset();
6275     movb(dst, src);
6276   }
6277   return off;
6278 }
6279 
6280 int MacroAssembler::load_unsigned_word(Register dst, Address src) {
6281   // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6282   // and "3.9 Partial Register Penalties", p. 22).
6283   int off;
6284   if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6285     off = offset();
6286     movzwl(dst, src); // movzxw
6287   } else {
6288     xorl(dst, dst);
6289     off = offset();
6290     movw(dst, src);
6291   }
6292   return off;
6293 }
6294 
6295 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6296   if (reachable(dst)) {
6297     movl(as_Address(dst), src);
6298   } else {
6299     lea(rscratch1, dst);
6300     movl(Address(rscratch1, 0), src);
6301   }
6302 }
6303 
6304 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6305   if (reachable(src)) {
6306     movl(dst, as_Address(src));
6307   } else {
6308     lea(rscratch1, src);
6309     movl(dst, Address(rscratch1, 0));
6310   }
6311 }
6312 
6313 // C++ bool manipulation
6314 
6315 void MacroAssembler::movbool(Register dst, Address src) {
6316   if(sizeof(bool) == 1)
6317     movb(dst, src);
6318   else if(sizeof(bool) == 2)
6319     movw(dst, src);
6320   else if(sizeof(bool) == 4)
6321     movl(dst, src);
6322   else
6323     // unsupported
6324     ShouldNotReachHere();
6325 }
6326 
6327 void MacroAssembler::movbool(Address dst, bool boolconst) {
6328   if(sizeof(bool) == 1)
6329     movb(dst, (int) boolconst);
6330   else if(sizeof(bool) == 2)
6331     movw(dst, (int) boolconst);
6332   else if(sizeof(bool) == 4)
6333     movl(dst, (int) boolconst);
6334   else
6335     // unsupported
6336     ShouldNotReachHere();
6337 }
6338 
6339 void MacroAssembler::movbool(Address dst, Register src) {
6340   if(sizeof(bool) == 1)
6341     movb(dst, src);
6342   else if(sizeof(bool) == 2)
6343     movw(dst, src);
6344   else if(sizeof(bool) == 4)
6345     movl(dst, src);
6346   else
6347     // unsupported
6348     ShouldNotReachHere();
6349 }
6350 
6351 void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6352   movb(as_Address(dst), src);
6353 }
6354 
6355 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6356   if (reachable(src)) {
6357     if (UseXmmLoadAndClearUpper) {
6358       movsd (dst, as_Address(src));
6359     } else {
6360       movlpd(dst, as_Address(src));
6361     }
6362   } else {
6363     lea(rscratch1, src);
6364     if (UseXmmLoadAndClearUpper) {
6365       movsd (dst, Address(rscratch1, 0));
6366     } else {
6367       movlpd(dst, Address(rscratch1, 0));
6368     }
6369   }
6370 }
6371 
6372 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6373   if (reachable(src)) {
6374     movss(dst, as_Address(src));
6375   } else {
6376     lea(rscratch1, src);
6377     movss(dst, Address(rscratch1, 0));
6378   }
6379 }
6380 
6381 void MacroAssembler::movptr(Register dst, Register src) {
6382   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6383 }
6384 
6385 void MacroAssembler::movptr(Register dst, Address src) {
6386   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6387 }
6388 
6389 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
6390 void MacroAssembler::movptr(Register dst, intptr_t src) {
6391   LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6392 }
6393 
6394 void MacroAssembler::movptr(Address dst, Register src) {
6395   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6396 }
6397 
6398 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6399   if (reachable(src)) {
6400     movss(dst, as_Address(src));
6401   } else {
6402     lea(rscratch1, src);
6403     movss(dst, Address(rscratch1, 0));
6404   }
6405 }
6406 
6407 void MacroAssembler::null_check(Register reg, int offset) {
6408   if (needs_explicit_null_check(offset)) {
6409     // provoke OS NULL exception if reg = NULL by
6410     // accessing M[reg] w/o changing any (non-CC) registers
6411     // NOTE: cmpl is plenty here to provoke a segv
6412     cmpptr(rax, Address(reg, 0));
6413     // Note: should probably use testl(rax, Address(reg, 0));
6414     //       may be shorter code (however, this version of
6415     //       testl needs to be implemented first)
6416   } else {
6417     // nothing to do, (later) access of M[reg + offset]
6418     // will provoke OS NULL exception if reg = NULL
6419   }
6420 }
6421 
6422 void MacroAssembler::os_breakpoint() {
6423   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6424   // (e.g., MSVC can't call ps() otherwise)
6425   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6426 }
6427 
6428 void MacroAssembler::pop_CPU_state() {
6429   pop_FPU_state();
6430   pop_IU_state();
6431 }
6432 
6433 void MacroAssembler::pop_FPU_state() {
6434   NOT_LP64(frstor(Address(rsp, 0));)
6435   LP64_ONLY(fxrstor(Address(rsp, 0));)
6436   addptr(rsp, FPUStateSizeInWords * wordSize);
6437 }
6438 
6439 void MacroAssembler::pop_IU_state() {
6440   popa();
6441   LP64_ONLY(addq(rsp, 8));
6442   popf();
6443 }
6444 
6445 // Save Integer and Float state
6446 // Warning: Stack must be 16 byte aligned (64bit)
6447 void MacroAssembler::push_CPU_state() {
6448   push_IU_state();
6449   push_FPU_state();
6450 }
6451 
6452 void MacroAssembler::push_FPU_state() {
6453   subptr(rsp, FPUStateSizeInWords * wordSize);
6454 #ifndef _LP64
6455   fnsave(Address(rsp, 0));
6456   fwait();
6457 #else
6458   fxsave(Address(rsp, 0));
6459 #endif // LP64
6460 }
6461 
6462 void MacroAssembler::push_IU_state() {
6463   // Push flags first because pusha kills them
6464   pushf();
6465   // Make sure rsp stays 16-byte aligned
6466   LP64_ONLY(subq(rsp, 8));
6467   pusha();
6468 }
6469 
6470 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6471   // determine java_thread register
6472   if (!java_thread->is_valid()) {
6473     java_thread = rdi;
6474     get_thread(java_thread);
6475   }
6476   // we must set sp to zero to clear frame
6477   movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), (int32_t)NULL_WORD);
6478   if (clear_fp) {
6479     movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), (int32_t)NULL_WORD);
6480   }
6481 
6482   if (clear_pc)
6483     movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), (int32_t)NULL_WORD);
6484 
6485 }
6486 
6487 void MacroAssembler::restore_rax(Register tmp) {
6488   if (tmp == noreg) pop(rax);
6489   else if (tmp != rax) mov(rax, tmp);
6490 }
6491 
6492 void MacroAssembler::round_to(Register reg, int modulus) {
6493   addptr(reg, modulus - 1);
6494   andptr(reg, -modulus);
6495 }
6496 
6497 void MacroAssembler::save_rax(Register tmp) {
6498   if (tmp == noreg) push(rax);
6499   else if (tmp != rax) mov(tmp, rax);
6500 }
6501 
6502 // Write serialization page so VM thread can do a pseudo remote membar.
6503 // We use the current thread pointer to calculate a thread specific
6504 // offset to write to within the page. This minimizes bus traffic
6505 // due to cache line collision.
6506 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6507   movl(tmp, thread);
6508   shrl(tmp, os::get_serialize_page_shift_count());
6509   andl(tmp, (os::vm_page_size() - sizeof(int)));
6510 
6511   Address index(noreg, tmp, Address::times_1);
6512   ExternalAddress page(os::get_memory_serialize_page());
6513 
6514   movptr(ArrayAddress(page, index), tmp);
6515 }
6516 
6517 // Calls to C land
6518 //
6519 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6520 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6521 // has to be reset to 0. This is required to allow proper stack traversal.
6522 void MacroAssembler::set_last_Java_frame(Register java_thread,
6523                                          Register last_java_sp,
6524                                          Register last_java_fp,
6525                                          address  last_java_pc) {
6526   // determine java_thread register
6527   if (!java_thread->is_valid()) {
6528     java_thread = rdi;
6529     get_thread(java_thread);
6530   }
6531   // determine last_java_sp register
6532   if (!last_java_sp->is_valid()) {
6533     last_java_sp = rsp;
6534   }
6535 
6536   // last_java_fp is optional
6537 
6538   if (last_java_fp->is_valid()) {
6539     movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6540   }
6541 
6542   // last_java_pc is optional
6543 
6544   if (last_java_pc != NULL) {
6545     lea(Address(java_thread,
6546                  JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6547         InternalAddress(last_java_pc));
6548 
6549   }
6550   movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6551 }
6552 
6553 void MacroAssembler::shlptr(Register dst, int imm8) {
6554   LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6555 }
6556 
6557 void MacroAssembler::shrptr(Register dst, int imm8) {
6558   LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6559 }
6560 
6561 void MacroAssembler::sign_extend_byte(Register reg) {
6562   if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6563     movsbl(reg, reg); // movsxb
6564   } else {
6565     shll(reg, 24);
6566     sarl(reg, 24);
6567   }
6568 }
6569 
6570 void MacroAssembler::sign_extend_short(Register reg) {
6571   if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6572     movswl(reg, reg); // movsxw
6573   } else {
6574     shll(reg, 16);
6575     sarl(reg, 16);
6576   }
6577 }
6578 
6579 //////////////////////////////////////////////////////////////////////////////////
6580 #ifndef SERIALGC
6581 
6582 void MacroAssembler::g1_write_barrier_pre(Register obj,
6583 #ifndef _LP64
6584                                           Register thread,
6585 #endif
6586                                           Register tmp,
6587                                           Register tmp2,
6588                                           bool tosca_live) {
6589   LP64_ONLY(Register thread = r15_thread;)
6590   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6591                                        PtrQueue::byte_offset_of_active()));
6592 
6593   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6594                                        PtrQueue::byte_offset_of_index()));
6595   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6596                                        PtrQueue::byte_offset_of_buf()));
6597 
6598 
6599   Label done;
6600   Label runtime;
6601 
6602   // if (!marking_in_progress) goto done;
6603   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
6604     cmpl(in_progress, 0);
6605   } else {
6606     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
6607     cmpb(in_progress, 0);
6608   }
6609   jcc(Assembler::equal, done);
6610 
6611   // if (x.f == NULL) goto done;
6612   cmpptr(Address(obj, 0), NULL_WORD);
6613   jcc(Assembler::equal, done);
6614 
6615   // Can we store original value in the thread's buffer?
6616 
6617   LP64_ONLY(movslq(tmp, index);)
6618   movptr(tmp2, Address(obj, 0));
6619 #ifdef _LP64
6620   cmpq(tmp, 0);
6621 #else
6622   cmpl(index, 0);
6623 #endif
6624   jcc(Assembler::equal, runtime);
6625 #ifdef _LP64
6626   subq(tmp, wordSize);
6627   movl(index, tmp);
6628   addq(tmp, buffer);
6629 #else
6630   subl(index, wordSize);
6631   movl(tmp, buffer);
6632   addl(tmp, index);
6633 #endif
6634   movptr(Address(tmp, 0), tmp2);
6635   jmp(done);
6636   bind(runtime);
6637   // save the live input values
6638   if(tosca_live) push(rax);
6639   push(obj);
6640 #ifdef _LP64
6641   movq(c_rarg0, Address(obj, 0));
6642   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
6643 #else
6644   push(thread);
6645   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
6646   pop(thread);
6647 #endif
6648   pop(obj);
6649   if(tosca_live) pop(rax);
6650   bind(done);
6651 
6652 }
6653 
6654 void MacroAssembler::g1_write_barrier_post(Register store_addr,
6655                                            Register new_val,
6656 #ifndef _LP64
6657                                            Register thread,
6658 #endif
6659                                            Register tmp,
6660                                            Register tmp2) {
6661 
6662   LP64_ONLY(Register thread = r15_thread;)
6663   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6664                                        PtrQueue::byte_offset_of_index()));
6665   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6666                                        PtrQueue::byte_offset_of_buf()));
6667   BarrierSet* bs = Universe::heap()->barrier_set();
6668   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6669   Label done;
6670   Label runtime;
6671 
6672   // Does store cross heap regions?
6673 
6674   movptr(tmp, store_addr);
6675   xorptr(tmp, new_val);
6676   shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
6677   jcc(Assembler::equal, done);
6678 
6679   // crosses regions, storing NULL?
6680 
6681   cmpptr(new_val, (int32_t) NULL_WORD);
6682   jcc(Assembler::equal, done);
6683 
6684   // storing region crossing non-NULL, is card already dirty?
6685 
6686   ExternalAddress cardtable((address) ct->byte_map_base);
6687   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6688 #ifdef _LP64
6689   const Register card_addr = tmp;
6690 
6691   movq(card_addr, store_addr);
6692   shrq(card_addr, CardTableModRefBS::card_shift);
6693 
6694   lea(tmp2, cardtable);
6695 
6696   // get the address of the card
6697   addq(card_addr, tmp2);
6698 #else
6699   const Register card_index = tmp;
6700 
6701   movl(card_index, store_addr);
6702   shrl(card_index, CardTableModRefBS::card_shift);
6703 
6704   Address index(noreg, card_index, Address::times_1);
6705   const Register card_addr = tmp;
6706   lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
6707 #endif
6708   cmpb(Address(card_addr, 0), 0);
6709   jcc(Assembler::equal, done);
6710 
6711   // storing a region crossing, non-NULL oop, card is clean.
6712   // dirty card and log.
6713 
6714   movb(Address(card_addr, 0), 0);
6715 
6716   cmpl(queue_index, 0);
6717   jcc(Assembler::equal, runtime);
6718   subl(queue_index, wordSize);
6719   movptr(tmp2, buffer);
6720 #ifdef _LP64
6721   movslq(rscratch1, queue_index);
6722   addq(tmp2, rscratch1);
6723   movq(Address(tmp2, 0), card_addr);
6724 #else
6725   addl(tmp2, queue_index);
6726   movl(Address(tmp2, 0), card_index);
6727 #endif
6728   jmp(done);
6729 
6730   bind(runtime);
6731   // save the live input values
6732   push(store_addr);
6733   push(new_val);
6734 #ifdef _LP64
6735   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
6736 #else
6737   push(thread);
6738   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
6739   pop(thread);
6740 #endif
6741   pop(new_val);
6742   pop(store_addr);
6743 
6744   bind(done);
6745 
6746 }
6747 
6748 #endif // SERIALGC
6749 //////////////////////////////////////////////////////////////////////////////////
6750 
6751 
6752 void MacroAssembler::store_check(Register obj) {
6753   // Does a store check for the oop in register obj. The content of
6754   // register obj is destroyed afterwards.
6755   store_check_part_1(obj);
6756   store_check_part_2(obj);
6757 }
6758 
6759 void MacroAssembler::store_check(Register obj, Address dst) {
6760   store_check(obj);
6761 }
6762 
6763 
6764 // split the store check operation so that other instructions can be scheduled inbetween
6765 void MacroAssembler::store_check_part_1(Register obj) {
6766   BarrierSet* bs = Universe::heap()->barrier_set();
6767   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6768   shrptr(obj, CardTableModRefBS::card_shift);
6769 }
6770 
6771 void MacroAssembler::store_check_part_2(Register obj) {
6772   BarrierSet* bs = Universe::heap()->barrier_set();
6773   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6774   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6775   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6776 
6777   // The calculation for byte_map_base is as follows:
6778   // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
6779   // So this essentially converts an address to a displacement and
6780   // it will never need to be relocated. On 64bit however the value may be too
6781   // large for a 32bit displacement
6782 
6783   intptr_t disp = (intptr_t) ct->byte_map_base;
6784   if (is_simm32(disp)) {
6785     Address cardtable(noreg, obj, Address::times_1, disp);
6786     movb(cardtable, 0);
6787   } else {
6788     // By doing it as an ExternalAddress disp could be converted to a rip-relative
6789     // displacement and done in a single instruction given favorable mapping and
6790     // a smarter version of as_Address. Worst case it is two instructions which
6791     // is no worse off then loading disp into a register and doing as a simple
6792     // Address() as above.
6793     // We can't do as ExternalAddress as the only style since if disp == 0 we'll
6794     // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
6795     // in some cases we'll get a single instruction version.
6796 
6797     ExternalAddress cardtable((address)disp);
6798     Address index(noreg, obj, Address::times_1);
6799     movb(as_Address(ArrayAddress(cardtable, index)), 0);
6800   }
6801 }
6802 
6803 void MacroAssembler::subptr(Register dst, int32_t imm32) {
6804   LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
6805 }
6806 
6807 void MacroAssembler::subptr(Register dst, Register src) {
6808   LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
6809 }
6810 
6811 void MacroAssembler::test32(Register src1, AddressLiteral src2) {
6812   // src2 must be rval
6813 
6814   if (reachable(src2)) {
6815     testl(src1, as_Address(src2));
6816   } else {
6817     lea(rscratch1, src2);
6818     testl(src1, Address(rscratch1, 0));
6819   }
6820 }
6821 
6822 // C++ bool manipulation
6823 void MacroAssembler::testbool(Register dst) {
6824   if(sizeof(bool) == 1)
6825     testb(dst, 0xff);
6826   else if(sizeof(bool) == 2) {
6827     // testw implementation needed for two byte bools
6828     ShouldNotReachHere();
6829   } else if(sizeof(bool) == 4)
6830     testl(dst, dst);
6831   else
6832     // unsupported
6833     ShouldNotReachHere();
6834 }
6835 
6836 void MacroAssembler::testptr(Register dst, Register src) {
6837   LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
6838 }
6839 
6840 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
6841 void MacroAssembler::tlab_allocate(Register obj,
6842                                    Register var_size_in_bytes,
6843                                    int con_size_in_bytes,
6844                                    Register t1,
6845                                    Register t2,
6846                                    Label& slow_case) {
6847   assert_different_registers(obj, t1, t2);
6848   assert_different_registers(obj, var_size_in_bytes, t1);
6849   Register end = t2;
6850   Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
6851 
6852   verify_tlab();
6853 
6854   NOT_LP64(get_thread(thread));
6855 
6856   movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
6857   if (var_size_in_bytes == noreg) {
6858     lea(end, Address(obj, con_size_in_bytes));
6859   } else {
6860     lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6861   }
6862   cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
6863   jcc(Assembler::above, slow_case);
6864 
6865   // update the tlab top pointer
6866   movptr(Address(thread, JavaThread::tlab_top_offset()), end);
6867 
6868   // recover var_size_in_bytes if necessary
6869   if (var_size_in_bytes == end) {
6870     subptr(var_size_in_bytes, obj);
6871   }
6872   verify_tlab();
6873 }
6874 
6875 // Preserves rbx, and rdx.
6876 void MacroAssembler::tlab_refill(Label& retry,
6877                                  Label& try_eden,
6878                                  Label& slow_case) {
6879   Register top = rax;
6880   Register t1  = rcx;
6881   Register t2  = rsi;
6882   Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
6883   assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
6884   Label do_refill, discard_tlab;
6885 
6886   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6887     // No allocation in the shared eden.
6888     jmp(slow_case);
6889   }
6890 
6891   NOT_LP64(get_thread(thread_reg));
6892 
6893   movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
6894   movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
6895 
6896   // calculate amount of free space
6897   subptr(t1, top);
6898   shrptr(t1, LogHeapWordSize);
6899 
6900   // Retain tlab and allocate object in shared space if
6901   // the amount free in the tlab is too large to discard.
6902   cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
6903   jcc(Assembler::lessEqual, discard_tlab);
6904 
6905   // Retain
6906   // %%% yuck as movptr...
6907   movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
6908   addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
6909   if (TLABStats) {
6910     // increment number of slow_allocations
6911     addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
6912   }
6913   jmp(try_eden);
6914 
6915   bind(discard_tlab);
6916   if (TLABStats) {
6917     // increment number of refills
6918     addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
6919     // accumulate wastage -- t1 is amount free in tlab
6920     addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
6921   }
6922 
6923   // if tlab is currently allocated (top or end != null) then
6924   // fill [top, end + alignment_reserve) with array object
6925   testptr (top, top);
6926   jcc(Assembler::zero, do_refill);
6927 
6928   // set up the mark word
6929   movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
6930   // set the length to the remaining space
6931   subptr(t1, typeArrayOopDesc::header_size(T_INT));
6932   addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
6933   shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
6934   movptr(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
6935   // set klass to intArrayKlass
6936   // dubious reloc why not an oop reloc?
6937   movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
6938   // store klass last.  concurrent gcs assumes klass length is valid if
6939   // klass field is not null.
6940   store_klass(top, t1);
6941 
6942   // refill the tlab with an eden allocation
6943   bind(do_refill);
6944   movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
6945   shlptr(t1, LogHeapWordSize);
6946   // add object_size ??
6947   eden_allocate(top, t1, 0, t2, slow_case);
6948 
6949   // Check that t1 was preserved in eden_allocate.
6950 #ifdef ASSERT
6951   if (UseTLAB) {
6952     Label ok;
6953     Register tsize = rsi;
6954     assert_different_registers(tsize, thread_reg, t1);
6955     push(tsize);
6956     movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
6957     shlptr(tsize, LogHeapWordSize);
6958     cmpptr(t1, tsize);
6959     jcc(Assembler::equal, ok);
6960     stop("assert(t1 != tlab size)");
6961     should_not_reach_here();
6962 
6963     bind(ok);
6964     pop(tsize);
6965   }
6966 #endif
6967   movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
6968   movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
6969   addptr(top, t1);
6970   subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
6971   movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
6972   verify_tlab();
6973   jmp(retry);
6974 }
6975 
6976 static const double     pi_4 =  0.7853981633974483;
6977 
6978 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
6979   // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
6980   // was attempted in this code; unfortunately it appears that the
6981   // switch to 80-bit precision and back causes this to be
6982   // unprofitable compared with simply performing a runtime call if
6983   // the argument is out of the (-pi/4, pi/4) range.
6984 
6985   Register tmp = noreg;
6986   if (!VM_Version::supports_cmov()) {
6987     // fcmp needs a temporary so preserve rbx,
6988     tmp = rbx;
6989     push(tmp);
6990   }
6991 
6992   Label slow_case, done;
6993 
6994   // x ?<= pi/4
6995   fld_d(ExternalAddress((address)&pi_4));
6996   fld_s(1);                // Stack:  X  PI/4  X
6997   fabs();                  // Stack: |X| PI/4  X
6998   fcmp(tmp);
6999   jcc(Assembler::above, slow_case);
7000 
7001   // fastest case: -pi/4 <= x <= pi/4
7002   switch(trig) {
7003   case 's':
7004     fsin();
7005     break;
7006   case 'c':
7007     fcos();
7008     break;
7009   case 't':
7010     ftan();
7011     break;
7012   default:
7013     assert(false, "bad intrinsic");
7014     break;
7015   }
7016   jmp(done);
7017 
7018   // slow case: runtime call
7019   bind(slow_case);
7020   // Preserve registers across runtime call
7021   pusha();
7022   int incoming_argument_and_return_value_offset = -1;
7023   if (num_fpu_regs_in_use > 1) {
7024     // Must preserve all other FPU regs (could alternatively convert
7025     // SharedRuntime::dsin and dcos into assembly routines known not to trash
7026     // FPU state, but can not trust C compiler)
7027     NEEDS_CLEANUP;
7028     // NOTE that in this case we also push the incoming argument to
7029     // the stack and restore it later; we also use this stack slot to
7030     // hold the return value from dsin or dcos.
7031     for (int i = 0; i < num_fpu_regs_in_use; i++) {
7032       subptr(rsp, sizeof(jdouble));
7033       fstp_d(Address(rsp, 0));
7034     }
7035     incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
7036     fld_d(Address(rsp, incoming_argument_and_return_value_offset));
7037   }
7038   subptr(rsp, sizeof(jdouble));
7039   fstp_d(Address(rsp, 0));
7040 #ifdef _LP64
7041   movdbl(xmm0, Address(rsp, 0));
7042 #endif // _LP64
7043 
7044   // NOTE: we must not use call_VM_leaf here because that requires a
7045   // complete interpreter frame in debug mode -- same bug as 4387334
7046   // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7047   // do proper 64bit abi
7048 
7049   NEEDS_CLEANUP;
7050   // Need to add stack banging before this runtime call if it needs to
7051   // be taken; however, there is no generic stack banging routine at
7052   // the MacroAssembler level
7053   switch(trig) {
7054   case 's':
7055     {
7056       MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7057     }
7058     break;
7059   case 'c':
7060     {
7061       MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7062     }
7063     break;
7064   case 't':
7065     {
7066       MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7067     }
7068     break;
7069   default:
7070     assert(false, "bad intrinsic");
7071     break;
7072   }
7073 #ifdef _LP64
7074     movsd(Address(rsp, 0), xmm0);
7075     fld_d(Address(rsp, 0));
7076 #endif // _LP64
7077   addptr(rsp, sizeof(jdouble));
7078   if (num_fpu_regs_in_use > 1) {
7079     // Must save return value to stack and then restore entire FPU stack
7080     fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7081     for (int i = 0; i < num_fpu_regs_in_use; i++) {
7082       fld_d(Address(rsp, 0));
7083       addptr(rsp, sizeof(jdouble));
7084     }
7085   }
7086   popa();
7087 
7088   // Come here with result in F-TOS
7089   bind(done);
7090 
7091   if (tmp != noreg) {
7092     pop(tmp);
7093   }
7094 }
7095 
7096 
7097 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7098   ucomisd(dst, as_Address(src));
7099 }
7100 
7101 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7102   ucomiss(dst, as_Address(src));
7103 }
7104 
7105 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7106   if (reachable(src)) {
7107     xorpd(dst, as_Address(src));
7108   } else {
7109     lea(rscratch1, src);
7110     xorpd(dst, Address(rscratch1, 0));
7111   }
7112 }
7113 
7114 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7115   if (reachable(src)) {
7116     xorps(dst, as_Address(src));
7117   } else {
7118     lea(rscratch1, src);
7119     xorps(dst, Address(rscratch1, 0));
7120   }
7121 }
7122 
7123 void MacroAssembler::verify_oop(Register reg, const char* s) {
7124   if (!VerifyOops) return;
7125 
7126   // Pass register number to verify_oop_subroutine
7127   char* b = new char[strlen(s) + 50];
7128   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7129   push(rax);                          // save rax,
7130   push(reg);                          // pass register argument
7131   ExternalAddress buffer((address) b);
7132   // avoid using pushptr, as it modifies scratch registers
7133   // and our contract is not to modify anything
7134   movptr(rax, buffer.addr());
7135   push(rax);
7136   // call indirectly to solve generation ordering problem
7137   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7138   call(rax);
7139 }
7140 
7141 
7142 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
7143   if (!VerifyOops) return;
7144 
7145   // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
7146   // Pass register number to verify_oop_subroutine
7147   char* b = new char[strlen(s) + 50];
7148   sprintf(b, "verify_oop_addr: %s", s);
7149 
7150   push(rax);                          // save rax,
7151   // addr may contain rsp so we will have to adjust it based on the push
7152   // we just did
7153   // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
7154   // stores rax into addr which is backwards of what was intended.
7155   if (addr.uses(rsp)) {
7156     lea(rax, addr);
7157     pushptr(Address(rax, BytesPerWord));
7158   } else {
7159     pushptr(addr);
7160   }
7161 
7162   ExternalAddress buffer((address) b);
7163   // pass msg argument
7164   // avoid using pushptr, as it modifies scratch registers
7165   // and our contract is not to modify anything
7166   movptr(rax, buffer.addr());
7167   push(rax);
7168 
7169   // call indirectly to solve generation ordering problem
7170   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7171   call(rax);
7172   // Caller pops the arguments and restores rax, from the stack
7173 }
7174 
7175 void MacroAssembler::verify_tlab() {
7176 #ifdef ASSERT
7177   if (UseTLAB && VerifyOops) {
7178     Label next, ok;
7179     Register t1 = rsi;
7180     Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
7181 
7182     push(t1);
7183     NOT_LP64(push(thread_reg));
7184     NOT_LP64(get_thread(thread_reg));
7185 
7186     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7187     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7188     jcc(Assembler::aboveEqual, next);
7189     stop("assert(top >= start)");
7190     should_not_reach_here();
7191 
7192     bind(next);
7193     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7194     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7195     jcc(Assembler::aboveEqual, ok);
7196     stop("assert(top <= end)");
7197     should_not_reach_here();
7198 
7199     bind(ok);
7200     NOT_LP64(pop(thread_reg));
7201     pop(t1);
7202   }
7203 #endif
7204 }
7205 
7206 class ControlWord {
7207  public:
7208   int32_t _value;
7209 
7210   int  rounding_control() const        { return  (_value >> 10) & 3      ; }
7211   int  precision_control() const       { return  (_value >>  8) & 3      ; }
7212   bool precision() const               { return ((_value >>  5) & 1) != 0; }
7213   bool underflow() const               { return ((_value >>  4) & 1) != 0; }
7214   bool overflow() const                { return ((_value >>  3) & 1) != 0; }
7215   bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
7216   bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
7217   bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
7218 
7219   void print() const {
7220     // rounding control
7221     const char* rc;
7222     switch (rounding_control()) {
7223       case 0: rc = "round near"; break;
7224       case 1: rc = "round down"; break;
7225       case 2: rc = "round up  "; break;
7226       case 3: rc = "chop      "; break;
7227     };
7228     // precision control
7229     const char* pc;
7230     switch (precision_control()) {
7231       case 0: pc = "24 bits "; break;
7232       case 1: pc = "reserved"; break;
7233       case 2: pc = "53 bits "; break;
7234       case 3: pc = "64 bits "; break;
7235     };
7236     // flags
7237     char f[9];
7238     f[0] = ' ';
7239     f[1] = ' ';
7240     f[2] = (precision   ()) ? 'P' : 'p';
7241     f[3] = (underflow   ()) ? 'U' : 'u';
7242     f[4] = (overflow    ()) ? 'O' : 'o';
7243     f[5] = (zero_divide ()) ? 'Z' : 'z';
7244     f[6] = (denormalized()) ? 'D' : 'd';
7245     f[7] = (invalid     ()) ? 'I' : 'i';
7246     f[8] = '\x0';
7247     // output
7248     printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
7249   }
7250 
7251 };
7252 
7253 class StatusWord {
7254  public:
7255   int32_t _value;
7256 
7257   bool busy() const                    { return ((_value >> 15) & 1) != 0; }
7258   bool C3() const                      { return ((_value >> 14) & 1) != 0; }
7259   bool C2() const                      { return ((_value >> 10) & 1) != 0; }
7260   bool C1() const                      { return ((_value >>  9) & 1) != 0; }
7261   bool C0() const                      { return ((_value >>  8) & 1) != 0; }
7262   int  top() const                     { return  (_value >> 11) & 7      ; }
7263   bool error_status() const            { return ((_value >>  7) & 1) != 0; }
7264   bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
7265   bool precision() const               { return ((_value >>  5) & 1) != 0; }
7266   bool underflow() const               { return ((_value >>  4) & 1) != 0; }
7267   bool overflow() const                { return ((_value >>  3) & 1) != 0; }
7268   bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
7269   bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
7270   bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
7271 
7272   void print() const {
7273     // condition codes
7274     char c[5];
7275     c[0] = (C3()) ? '3' : '-';
7276     c[1] = (C2()) ? '2' : '-';
7277     c[2] = (C1()) ? '1' : '-';
7278     c[3] = (C0()) ? '0' : '-';
7279     c[4] = '\x0';
7280     // flags
7281     char f[9];
7282     f[0] = (error_status()) ? 'E' : '-';
7283     f[1] = (stack_fault ()) ? 'S' : '-';
7284     f[2] = (precision   ()) ? 'P' : '-';
7285     f[3] = (underflow   ()) ? 'U' : '-';
7286     f[4] = (overflow    ()) ? 'O' : '-';
7287     f[5] = (zero_divide ()) ? 'Z' : '-';
7288     f[6] = (denormalized()) ? 'D' : '-';
7289     f[7] = (invalid     ()) ? 'I' : '-';
7290     f[8] = '\x0';
7291     // output
7292     printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
7293   }
7294 
7295 };
7296 
7297 class TagWord {
7298  public:
7299   int32_t _value;
7300 
7301   int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
7302 
7303   void print() const {
7304     printf("%04x", _value & 0xFFFF);
7305   }
7306 
7307 };
7308 
7309 class FPU_Register {
7310  public:
7311   int32_t _m0;
7312   int32_t _m1;
7313   int16_t _ex;
7314 
7315   bool is_indefinite() const           {
7316     return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
7317   }
7318 
7319   void print() const {
7320     char  sign = (_ex < 0) ? '-' : '+';
7321     const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
7322     printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
7323   };
7324 
7325 };
7326 
7327 class FPU_State {
7328  public:
7329   enum {
7330     register_size       = 10,
7331     number_of_registers =  8,
7332     register_mask       =  7
7333   };
7334 
7335   ControlWord  _control_word;
7336   StatusWord   _status_word;
7337   TagWord      _tag_word;
7338   int32_t      _error_offset;
7339   int32_t      _error_selector;
7340   int32_t      _data_offset;
7341   int32_t      _data_selector;
7342   int8_t       _register[register_size * number_of_registers];
7343 
7344   int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
7345   FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
7346 
7347   const char* tag_as_string(int tag) const {
7348     switch (tag) {
7349       case 0: return "valid";
7350       case 1: return "zero";
7351       case 2: return "special";
7352       case 3: return "empty";
7353     }
7354     ShouldNotReachHere()
7355     return NULL;
7356   }
7357 
7358   void print() const {
7359     // print computation registers
7360     { int t = _status_word.top();
7361       for (int i = 0; i < number_of_registers; i++) {
7362         int j = (i - t) & register_mask;
7363         printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
7364         st(j)->print();
7365         printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
7366       }
7367     }
7368     printf("\n");
7369     // print control registers
7370     printf("ctrl = "); _control_word.print(); printf("\n");
7371     printf("stat = "); _status_word .print(); printf("\n");
7372     printf("tags = "); _tag_word    .print(); printf("\n");
7373   }
7374 
7375 };
7376 
7377 class Flag_Register {
7378  public:
7379   int32_t _value;
7380 
7381   bool overflow() const                { return ((_value >> 11) & 1) != 0; }
7382   bool direction() const               { return ((_value >> 10) & 1) != 0; }
7383   bool sign() const                    { return ((_value >>  7) & 1) != 0; }
7384   bool zero() const                    { return ((_value >>  6) & 1) != 0; }
7385   bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
7386   bool parity() const                  { return ((_value >>  2) & 1) != 0; }
7387   bool carry() const                   { return ((_value >>  0) & 1) != 0; }
7388 
7389   void print() const {
7390     // flags
7391     char f[8];
7392     f[0] = (overflow       ()) ? 'O' : '-';
7393     f[1] = (direction      ()) ? 'D' : '-';
7394     f[2] = (sign           ()) ? 'S' : '-';
7395     f[3] = (zero           ()) ? 'Z' : '-';
7396     f[4] = (auxiliary_carry()) ? 'A' : '-';
7397     f[5] = (parity         ()) ? 'P' : '-';
7398     f[6] = (carry          ()) ? 'C' : '-';
7399     f[7] = '\x0';
7400     // output
7401     printf("%08x  flags = %s", _value, f);
7402   }
7403 
7404 };
7405 
7406 class IU_Register {
7407  public:
7408   int32_t _value;
7409 
7410   void print() const {
7411     printf("%08x  %11d", _value, _value);
7412   }
7413 
7414 };
7415 
7416 class IU_State {
7417  public:
7418   Flag_Register _eflags;
7419   IU_Register   _rdi;
7420   IU_Register   _rsi;
7421   IU_Register   _rbp;
7422   IU_Register   _rsp;
7423   IU_Register   _rbx;
7424   IU_Register   _rdx;
7425   IU_Register   _rcx;
7426   IU_Register   _rax;
7427 
7428   void print() const {
7429     // computation registers
7430     printf("rax,  = "); _rax.print(); printf("\n");
7431     printf("rbx,  = "); _rbx.print(); printf("\n");
7432     printf("rcx  = "); _rcx.print(); printf("\n");
7433     printf("rdx  = "); _rdx.print(); printf("\n");
7434     printf("rdi  = "); _rdi.print(); printf("\n");
7435     printf("rsi  = "); _rsi.print(); printf("\n");
7436     printf("rbp,  = "); _rbp.print(); printf("\n");
7437     printf("rsp  = "); _rsp.print(); printf("\n");
7438     printf("\n");
7439     // control registers
7440     printf("flgs = "); _eflags.print(); printf("\n");
7441   }
7442 };
7443 
7444 
7445 class CPU_State {
7446  public:
7447   FPU_State _fpu_state;
7448   IU_State  _iu_state;
7449 
7450   void print() const {
7451     printf("--------------------------------------------------\n");
7452     _iu_state .print();
7453     printf("\n");
7454     _fpu_state.print();
7455     printf("--------------------------------------------------\n");
7456   }
7457 
7458 };
7459 
7460 
7461 static void _print_CPU_state(CPU_State* state) {
7462   state->print();
7463 };
7464 
7465 
7466 void MacroAssembler::print_CPU_state() {
7467   push_CPU_state();
7468   push(rsp);                // pass CPU state
7469   call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
7470   addptr(rsp, wordSize);       // discard argument
7471   pop_CPU_state();
7472 }
7473 
7474 
7475 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
7476   static int counter = 0;
7477   FPU_State* fs = &state->_fpu_state;
7478   counter++;
7479   // For leaf calls, only verify that the top few elements remain empty.
7480   // We only need 1 empty at the top for C2 code.
7481   if( stack_depth < 0 ) {
7482     if( fs->tag_for_st(7) != 3 ) {
7483       printf("FPR7 not empty\n");
7484       state->print();
7485       assert(false, "error");
7486       return false;
7487     }
7488     return true;                // All other stack states do not matter
7489   }
7490 
7491   assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
7492          "bad FPU control word");
7493 
7494   // compute stack depth
7495   int i = 0;
7496   while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;
7497   int d = i;
7498   while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
7499   // verify findings
7500   if (i != FPU_State::number_of_registers) {
7501     // stack not contiguous
7502     printf("%s: stack not contiguous at ST%d\n", s, i);
7503     state->print();
7504     assert(false, "error");
7505     return false;
7506   }
7507   // check if computed stack depth corresponds to expected stack depth
7508   if (stack_depth < 0) {
7509     // expected stack depth is -stack_depth or less
7510     if (d > -stack_depth) {
7511       // too many elements on the stack
7512       printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
7513       state->print();
7514       assert(false, "error");
7515       return false;
7516     }
7517   } else {
7518     // expected stack depth is stack_depth
7519     if (d != stack_depth) {
7520       // wrong stack depth
7521       printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
7522       state->print();
7523       assert(false, "error");
7524       return false;
7525     }
7526   }
7527   // everything is cool
7528   return true;
7529 }
7530 
7531 
7532 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
7533   if (!VerifyFPU) return;
7534   push_CPU_state();
7535   push(rsp);                // pass CPU state
7536   ExternalAddress msg((address) s);
7537   // pass message string s
7538   pushptr(msg.addr());
7539   push(stack_depth);        // pass stack depth
7540   call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
7541   addptr(rsp, 3 * wordSize);   // discard arguments
7542   // check for error
7543   { Label L;
7544     testl(rax, rax);
7545     jcc(Assembler::notZero, L);
7546     int3();                  // break if error condition
7547     bind(L);
7548   }
7549   pop_CPU_state();
7550 }
7551 
7552 void MacroAssembler::load_klass(Register dst, Register src) {
7553 #ifdef _LP64
7554   if (UseCompressedOops) {
7555     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7556     decode_heap_oop_not_null(dst);
7557   } else
7558 #endif
7559     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7560 }
7561 
7562 void MacroAssembler::load_prototype_header(Register dst, Register src) {
7563 #ifdef _LP64
7564   if (UseCompressedOops) {
7565     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7566     movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
7567   } else
7568 #endif
7569     {
7570       movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7571       movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
7572     }
7573 }
7574 
7575 void MacroAssembler::store_klass(Register dst, Register src) {
7576 #ifdef _LP64
7577   if (UseCompressedOops) {
7578     encode_heap_oop_not_null(src);
7579     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
7580   } else
7581 #endif
7582     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
7583 }
7584 
7585 #ifdef _LP64
7586 void MacroAssembler::store_klass_gap(Register dst, Register src) {
7587   if (UseCompressedOops) {
7588     // Store to klass gap in destination
7589     movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
7590   }
7591 }
7592 
7593 void MacroAssembler::load_heap_oop(Register dst, Address src) {
7594   if (UseCompressedOops) {
7595     movl(dst, src);
7596     decode_heap_oop(dst);
7597   } else {
7598     movq(dst, src);
7599   }
7600 }
7601 
7602 void MacroAssembler::store_heap_oop(Address dst, Register src) {
7603   if (UseCompressedOops) {
7604     assert(!dst.uses(src), "not enough registers");
7605     encode_heap_oop(src);
7606     movl(dst, src);
7607   } else {
7608     movq(dst, src);
7609   }
7610 }
7611 
7612 // Algorithm must match oop.inline.hpp encode_heap_oop.
7613 void MacroAssembler::encode_heap_oop(Register r) {
7614   assert (UseCompressedOops, "should be compressed");
7615 #ifdef ASSERT
7616   if (CheckCompressedOops) {
7617     Label ok;
7618     push(rscratch1); // cmpptr trashes rscratch1
7619     cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
7620     jcc(Assembler::equal, ok);
7621     stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
7622     bind(ok);
7623     pop(rscratch1);
7624   }
7625 #endif
7626   verify_oop(r, "broken oop in encode_heap_oop");
7627   testq(r, r);
7628   cmovq(Assembler::equal, r, r12_heapbase);
7629   subq(r, r12_heapbase);
7630   shrq(r, LogMinObjAlignmentInBytes);
7631 }
7632 
7633 void MacroAssembler::encode_heap_oop_not_null(Register r) {
7634   assert (UseCompressedOops, "should be compressed");
7635 #ifdef ASSERT
7636   if (CheckCompressedOops) {
7637     Label ok;
7638     testq(r, r);
7639     jcc(Assembler::notEqual, ok);
7640     stop("null oop passed to encode_heap_oop_not_null");
7641     bind(ok);
7642   }
7643 #endif
7644   verify_oop(r, "broken oop in encode_heap_oop_not_null");
7645   subq(r, r12_heapbase);
7646   shrq(r, LogMinObjAlignmentInBytes);
7647 }
7648 
7649 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
7650   assert (UseCompressedOops, "should be compressed");
7651 #ifdef ASSERT
7652   if (CheckCompressedOops) {
7653     Label ok;
7654     testq(src, src);
7655     jcc(Assembler::notEqual, ok);
7656     stop("null oop passed to encode_heap_oop_not_null2");
7657     bind(ok);
7658   }
7659 #endif
7660   verify_oop(src, "broken oop in encode_heap_oop_not_null2");
7661   if (dst != src) {
7662     movq(dst, src);
7663   }
7664   subq(dst, r12_heapbase);
7665   shrq(dst, LogMinObjAlignmentInBytes);
7666 }
7667 
7668 void  MacroAssembler::decode_heap_oop(Register r) {
7669   assert (UseCompressedOops, "should be compressed");
7670 #ifdef ASSERT
7671   if (CheckCompressedOops) {
7672     Label ok;
7673     push(rscratch1);
7674     cmpptr(r12_heapbase,
7675            ExternalAddress((address)Universe::heap_base_addr()));
7676     jcc(Assembler::equal, ok);
7677     stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
7678     bind(ok);
7679     pop(rscratch1);
7680   }
7681 #endif
7682 
7683   Label done;
7684   shlq(r, LogMinObjAlignmentInBytes);
7685   jccb(Assembler::equal, done);
7686   addq(r, r12_heapbase);
7687 #if 0
7688    // alternate decoding probably a wash.
7689    testq(r, r);
7690    jccb(Assembler::equal, done);
7691    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
7692 #endif
7693   bind(done);
7694   verify_oop(r, "broken oop in decode_heap_oop");
7695 }
7696 
7697 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
7698   assert (UseCompressedOops, "should only be used for compressed headers");
7699   // Cannot assert, unverified entry point counts instructions (see .ad file)
7700   // vtableStubs also counts instructions in pd_code_size_limit.
7701   // Also do not verify_oop as this is called by verify_oop.
7702   assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
7703   leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
7704 }
7705 
7706 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
7707   assert (UseCompressedOops, "should only be used for compressed headers");
7708   // Cannot assert, unverified entry point counts instructions (see .ad file)
7709   // vtableStubs also counts instructions in pd_code_size_limit.
7710   // Also do not verify_oop as this is called by verify_oop.
7711   assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
7712   leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
7713 }
7714 
7715 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
7716   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
7717   int oop_index = oop_recorder()->find_index(obj);
7718   RelocationHolder rspec = oop_Relocation::spec(oop_index);
7719   mov_literal32(dst, oop_index, rspec, narrow_oop_operand);
7720 }
7721 
7722 void MacroAssembler::reinit_heapbase() {
7723   if (UseCompressedOops) {
7724     movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
7725   }
7726 }
7727 #endif // _LP64
7728 
7729 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
7730   switch (cond) {
7731     // Note some conditions are synonyms for others
7732     case Assembler::zero:         return Assembler::notZero;
7733     case Assembler::notZero:      return Assembler::zero;
7734     case Assembler::less:         return Assembler::greaterEqual;
7735     case Assembler::lessEqual:    return Assembler::greater;
7736     case Assembler::greater:      return Assembler::lessEqual;
7737     case Assembler::greaterEqual: return Assembler::less;
7738     case Assembler::below:        return Assembler::aboveEqual;
7739     case Assembler::belowEqual:   return Assembler::above;
7740     case Assembler::above:        return Assembler::belowEqual;
7741     case Assembler::aboveEqual:   return Assembler::below;
7742     case Assembler::overflow:     return Assembler::noOverflow;
7743     case Assembler::noOverflow:   return Assembler::overflow;
7744     case Assembler::negative:     return Assembler::positive;
7745     case Assembler::positive:     return Assembler::negative;
7746     case Assembler::parity:       return Assembler::noParity;
7747     case Assembler::noParity:     return Assembler::parity;
7748   }
7749   ShouldNotReachHere(); return Assembler::overflow;
7750 }
7751 
7752 SkipIfEqual::SkipIfEqual(
7753     MacroAssembler* masm, const bool* flag_addr, bool value) {
7754   _masm = masm;
7755   _masm->cmp8(ExternalAddress((address)flag_addr), value);
7756   _masm->jcc(Assembler::equal, _label);
7757 }
7758 
7759 SkipIfEqual::~SkipIfEqual() {
7760   _masm->bind(_label);
7761 }