1 /*
   2  * Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20  * CA 95054 USA or visit www.sun.com if you need additional information or
  21  * have any questions.
  22  *
  23  */
  24 
  25 #include "incls/_precompiled.incl"
  26 #include "incls/_assembler_x86_64.cpp.incl"
  27 
  28 // Implementation of AddressLiteral
  29 
  30 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  31   _is_lval = false;
  32   _target = target;
  33   switch (rtype) {
  34   case relocInfo::oop_type:
  35     // Oops are a special case. Normally they would be their own section
  36     // but in cases like icBuffer they are literals in the code stream that
  37     // we don't have a section for. We use none so that we get a literal address
  38     // which is always patchable.
  39     break;
  40   case relocInfo::external_word_type:
  41     _rspec = external_word_Relocation::spec(target);
  42     break;
  43   case relocInfo::internal_word_type:
  44     _rspec = internal_word_Relocation::spec(target);
  45     break;
  46   case relocInfo::opt_virtual_call_type:
  47     _rspec = opt_virtual_call_Relocation::spec();
  48     break;
  49   case relocInfo::static_call_type:
  50     _rspec = static_call_Relocation::spec();
  51     break;
  52   case relocInfo::runtime_call_type:
  53     _rspec = runtime_call_Relocation::spec();
  54     break;
  55   case relocInfo::none:
  56     break;
  57   default:
  58     ShouldNotReachHere();
  59     break;
  60   }
  61 }
  62 
  63 // Implementation of Address
  64 
  65 Address Address::make_array(ArrayAddress adr) {
  66 #ifdef _LP64
  67   // Not implementable on 64bit machines
  68   // Should have been handled higher up the call chain.
  69   ShouldNotReachHere();
  70   return Address();
  71 #else
  72   AddressLiteral base = adr.base();
  73   Address index = adr.index();
  74   assert(index._disp == 0, "must not have disp"); // maybe it can?
  75   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
  76   array._rspec = base._rspec;
  77   return array;
  78 #endif // _LP64
  79 }
  80 
  81 // exceedingly dangerous constructor
  82 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
  83   _base  = noreg;
  84   _index = noreg;
  85   _scale = no_scale;
  86   _disp  = disp;
  87   switch (rtype) {
  88     case relocInfo::external_word_type:
  89       _rspec = external_word_Relocation::spec(loc);
  90       break;
  91     case relocInfo::internal_word_type:
  92       _rspec = internal_word_Relocation::spec(loc);
  93       break;
  94     case relocInfo::runtime_call_type:
  95       // HMM
  96       _rspec = runtime_call_Relocation::spec();
  97       break;
  98     case relocInfo::none:
  99       break;
 100     default:
 101       ShouldNotReachHere();
 102   }
 103 }
 104 
 105 // Convert the raw encoding form into the form expected by the constructor for
 106 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 107 // that to noreg for the Address constructor.
 108 Address Address::make_raw(int base, int index, int scale, int disp) {
 109   bool valid_index = index != rsp->encoding();
 110   if (valid_index) {
 111     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 112     return madr;
 113   } else {
 114     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 115     return madr;
 116   }
 117 }
 118 
 119 
 120 // Implementation of Assembler
 121 int AbstractAssembler::code_fill_byte() {
 122   return (u_char)'\xF4'; // hlt
 123 }
 124 
 125 // This should only be used by 64bit instructions that can use rip-relative
 126 // it cannot be used by instructions that want an immediate value.
 127 
 128 bool Assembler::reachable(AddressLiteral adr) {
 129   int64_t disp;
 130 
 131   // None will force a 64bit literal to the code stream. Likely a placeholder
 132   // for something that will be patched later and we need to certain it will
 133   // always be reachable.
 134   if (adr.reloc() == relocInfo::none) {
 135     return false;
 136   }
 137   if (adr.reloc() == relocInfo::internal_word_type) {
 138     // This should be rip relative and easily reachable.
 139     return true;
 140   }
 141   if (adr.reloc() != relocInfo::external_word_type &&
 142       adr.reloc() != relocInfo::runtime_call_type ) {
 143     return false;
 144   }
 145 
 146   // Stress the correction code
 147   if (ForceUnreachable) {
 148     // Must be runtimecall reloc, see if it is in the codecache
 149     // Flipping stuff in the codecache to be unreachable causes issues
 150     // with things like inline caches where the additional instructions
 151     // are not handled.
 152     if (CodeCache::find_blob(adr._target) == NULL) {
 153       return false;
 154     }
 155   }
 156   // For external_word_type/runtime_call_type if it is reachable from where we
 157   // are now (possibly a temp buffer) and where we might end up
 158   // anywhere in the codeCache then we are always reachable.
 159   // This would have to change if we ever save/restore shared code
 160   // to be more pessimistic.
 161 
 162   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
 163   if (!is_simm32(disp)) return false;
 164   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
 165   if (!is_simm32(disp)) return false;
 166 
 167   disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
 168 
 169   // Because rip relative is a disp + address_of_next_instruction and we
 170   // don't know the value of address_of_next_instruction we apply a fudge factor
 171   // to make sure we will be ok no matter the size of the instruction we get placed into.
 172   // We don't have to fudge the checks above here because they are already worst case.
 173 
 174   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
 175   // + 4 because better safe than sorry.
 176   const int fudge = 12 + 4;
 177   if (disp < 0) {
 178     disp -= fudge;
 179   } else {
 180     disp += fudge;
 181   }
 182   return is_simm32(disp);
 183 }
 184 
 185 
 186 // make this go away eventually
 187 void Assembler::emit_data(jint data,
 188                           relocInfo::relocType rtype,
 189                           int format) {
 190   if (rtype == relocInfo::none) {
 191     emit_long(data);
 192   } else {
 193     emit_data(data, Relocation::spec_simple(rtype), format);
 194   }
 195 }
 196 
 197 void Assembler::emit_data(jint data,
 198                           RelocationHolder const& rspec,
 199                           int format) {
 200   assert(imm64_operand == 0, "default format must be imm64 in this file");
 201   assert(imm64_operand != format, "must not be imm64");
 202   assert(inst_mark() != NULL, "must be inside InstructionMark");
 203   if (rspec.type() !=  relocInfo::none) {
 204     #ifdef ASSERT
 205       check_relocation(rspec, format);
 206     #endif
 207     // Do not use AbstractAssembler::relocate, which is not intended for
 208     // embedded words.  Instead, relocate to the enclosing instruction.
 209 
 210     // hack. call32 is too wide for mask so use disp32
 211     if (format == call32_operand)
 212       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 213     else
 214       code_section()->relocate(inst_mark(), rspec, format);
 215   }
 216   emit_long(data);
 217 }
 218 
 219 void Assembler::emit_data64(jlong data,
 220                             relocInfo::relocType rtype,
 221                             int format) {
 222   if (rtype == relocInfo::none) {
 223     emit_long64(data);
 224   } else {
 225     emit_data64(data, Relocation::spec_simple(rtype), format);
 226   }
 227 }
 228 
 229 void Assembler::emit_data64(jlong data,
 230                             RelocationHolder const& rspec,
 231                             int format) {
 232   assert(imm64_operand == 0, "default format must be imm64 in this file");
 233   assert(imm64_operand == format, "must be imm64");
 234   assert(inst_mark() != NULL, "must be inside InstructionMark");
 235   // Do not use AbstractAssembler::relocate, which is not intended for
 236   // embedded words.  Instead, relocate to the enclosing instruction.
 237   code_section()->relocate(inst_mark(), rspec, format);
 238 #ifdef ASSERT
 239   check_relocation(rspec, format);
 240 #endif
 241   emit_long64(data);
 242 }
 243 
 244 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 245   assert(isByte(op1) && isByte(op2), "wrong opcode");
 246   assert(isByte(imm8), "not a byte");
 247   assert((op1 & 0x01) == 0, "should be 8bit operation");
 248   int dstenc = dst->encoding();
 249   if (dstenc >= 8) {
 250     dstenc -= 8;
 251   }
 252   emit_byte(op1);
 253   emit_byte(op2 | dstenc);
 254   emit_byte(imm8);
 255 }
 256 
 257 void Assembler::emit_arith(int op1, int op2, Register dst, int imm32) {
 258   assert(isByte(op1) && isByte(op2), "wrong opcode");
 259   assert((op1 & 0x01) == 1, "should be 32bit operation");
 260   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 261   int dstenc = dst->encoding();
 262   if (dstenc >= 8) {
 263     dstenc -= 8;
 264   }
 265   if (is8bit(imm32)) {
 266     emit_byte(op1 | 0x02); // set sign bit
 267     emit_byte(op2 | dstenc);
 268     emit_byte(imm32 & 0xFF);
 269   } else {
 270     emit_byte(op1);
 271     emit_byte(op2 | dstenc);
 272     emit_long(imm32);
 273   }
 274 }
 275 
 276 // immediate-to-memory forms
 277 void Assembler::emit_arith_operand(int op1,
 278                                    Register rm, Address adr,
 279                                    int imm32) {
 280   assert((op1 & 0x01) == 1, "should be 32bit operation");
 281   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 282   if (is8bit(imm32)) {
 283     emit_byte(op1 | 0x02); // set sign bit
 284     emit_operand(rm, adr, 1);
 285     emit_byte(imm32 & 0xFF);
 286   } else {
 287     emit_byte(op1);
 288     emit_operand(rm, adr, 4);
 289     emit_long(imm32);
 290   }
 291 }
 292 
 293 
 294 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 295   assert(isByte(op1) && isByte(op2), "wrong opcode");
 296   int dstenc = dst->encoding();
 297   int srcenc = src->encoding();
 298   if (dstenc >= 8) {
 299     dstenc -= 8;
 300   }
 301   if (srcenc >= 8) {
 302     srcenc -= 8;
 303   }
 304   emit_byte(op1);
 305   emit_byte(op2 | dstenc << 3 | srcenc);
 306 }
 307 
 308 void Assembler::emit_operand(Register reg, Register base, Register index,
 309                              Address::ScaleFactor scale, int disp,
 310                              RelocationHolder const& rspec,
 311                              int rip_relative_correction) {
 312   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 313   int regenc = reg->encoding();
 314   if (regenc >= 8) {
 315     regenc -= 8;
 316   }
 317   if (base->is_valid()) {
 318     if (index->is_valid()) {
 319       assert(scale != Address::no_scale, "inconsistent address");
 320       int indexenc = index->encoding();
 321       if (indexenc >= 8) {
 322         indexenc -= 8;
 323       }
 324       int baseenc = base->encoding();
 325       if (baseenc >= 8) {
 326         baseenc -= 8;
 327       }
 328       // [base + index*scale + disp]
 329       if (disp == 0 && rtype == relocInfo::none  &&
 330           base != rbp && base != r13) {
 331         // [base + index*scale]
 332         // [00 reg 100][ss index base]
 333         assert(index != rsp, "illegal addressing mode");
 334         emit_byte(0x04 | regenc << 3);
 335         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 336       } else if (is8bit(disp) && rtype == relocInfo::none) {
 337         // [base + index*scale + imm8]
 338         // [01 reg 100][ss index base] imm8
 339         assert(index != rsp, "illegal addressing mode");
 340         emit_byte(0x44 | regenc << 3);
 341         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 342         emit_byte(disp & 0xFF);
 343       } else {
 344         // [base + index*scale + disp32]
 345         // [10 reg 100][ss index base] disp32
 346         assert(index != rsp, "illegal addressing mode");
 347         emit_byte(0x84 | regenc << 3);
 348         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 349         emit_data(disp, rspec, disp32_operand);
 350       }
 351     } else if (base == rsp || base == r12) {
 352       // [rsp + disp]
 353       if (disp == 0 && rtype == relocInfo::none) {
 354         // [rsp]
 355         // [00 reg 100][00 100 100]
 356         emit_byte(0x04 | regenc << 3);
 357         emit_byte(0x24);
 358       } else if (is8bit(disp) && rtype == relocInfo::none) {
 359         // [rsp + imm8]
 360         // [01 reg 100][00 100 100] disp8
 361         emit_byte(0x44 | regenc << 3);
 362         emit_byte(0x24);
 363         emit_byte(disp & 0xFF);
 364       } else {
 365         // [rsp + imm32]
 366         // [10 reg 100][00 100 100] disp32
 367         emit_byte(0x84 | regenc << 3);
 368         emit_byte(0x24);
 369         emit_data(disp, rspec, disp32_operand);
 370       }
 371     } else {
 372       // [base + disp]
 373       assert(base != rsp && base != r12, "illegal addressing mode");
 374       int baseenc = base->encoding();
 375       if (baseenc >= 8) {
 376         baseenc -= 8;
 377       }
 378       if (disp == 0 && rtype == relocInfo::none &&
 379           base != rbp && base != r13) {
 380         // [base]
 381         // [00 reg base]
 382         emit_byte(0x00 | regenc << 3 | baseenc);
 383       } else if (is8bit(disp) && rtype == relocInfo::none) {
 384         // [base + disp8]
 385         // [01 reg base] disp8
 386         emit_byte(0x40 | regenc << 3 | baseenc);
 387         emit_byte(disp & 0xFF);
 388       } else {
 389         // [base + disp32]
 390         // [10 reg base] disp32
 391         emit_byte(0x80 | regenc << 3 | baseenc);
 392         emit_data(disp, rspec, disp32_operand);
 393       }
 394     }
 395   } else {
 396     if (index->is_valid()) {
 397       assert(scale != Address::no_scale, "inconsistent address");
 398       int indexenc = index->encoding();
 399       if (indexenc >= 8) {
 400         indexenc -= 8;
 401       }
 402       // [index*scale + disp]
 403       // [00 reg 100][ss index 101] disp32
 404       assert(index != rsp, "illegal addressing mode");
 405       emit_byte(0x04 | regenc << 3);
 406       emit_byte(scale << 6 | indexenc << 3 | 0x05);
 407       emit_data(disp, rspec, disp32_operand);
 408 #ifdef _LP64
 409     } else if (rtype != relocInfo::none ) {
 410       // [disp] RIP-RELATIVE
 411       // [00 000 101] disp32
 412 
 413       emit_byte(0x05 | regenc << 3);
 414       // Note that the RIP-rel. correction applies to the generated
 415       // disp field, but _not_ to the target address in the rspec.
 416 
 417       // disp was created by converting the target address minus the pc
 418       // at the start of the instruction. That needs more correction here.
 419       // intptr_t disp = target - next_ip;
 420       assert(inst_mark() != NULL, "must be inside InstructionMark");
 421       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 422       int64_t adjusted = (int64_t) disp -  (next_ip - inst_mark());
 423       assert(is_simm32(adjusted),
 424              "must be 32bit offset (RIP relative address)");
 425       emit_data((int) adjusted, rspec, disp32_operand);
 426 
 427 #endif // _LP64
 428     } else {
 429       // [disp] ABSOLUTE
 430       // [00 reg 100][00 100 101] disp32
 431       emit_byte(0x04 | regenc << 3);
 432       emit_byte(0x25);
 433       emit_data(disp, rspec, disp32_operand);
 434     }
 435   }
 436 }
 437 
 438 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 439                              Address::ScaleFactor scale, int disp,
 440                              RelocationHolder const& rspec,
 441                              int rip_relative_correction) {
 442   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 443   int regenc = reg->encoding();
 444   if (regenc >= 8) {
 445     regenc -= 8;
 446   }
 447   if (base->is_valid()) {
 448     if (index->is_valid()) {
 449       assert(scale != Address::no_scale, "inconsistent address");
 450       int indexenc = index->encoding();
 451       if (indexenc >= 8) {
 452         indexenc -= 8;
 453       }
 454       int baseenc = base->encoding();
 455       if (baseenc >= 8) {
 456         baseenc -= 8;
 457       }
 458       // [base + index*scale + disp]
 459       if (disp == 0 && rtype == relocInfo::none  &&
 460           base != rbp && base != r13) {
 461         // [base + index*scale]
 462         // [00 reg 100][ss index base]
 463         assert(index != rsp, "illegal addressing mode");
 464         emit_byte(0x04 | regenc << 3);
 465         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 466       } else if (is8bit(disp) && rtype == relocInfo::none) {
 467         // [base + index*scale + disp8]
 468         // [01 reg 100][ss index base] disp8
 469         assert(index != rsp, "illegal addressing mode");
 470         emit_byte(0x44 | regenc << 3);
 471         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 472         emit_byte(disp & 0xFF);
 473       } else {
 474         // [base + index*scale + disp32]
 475         // [10 reg 100][ss index base] disp32
 476         assert(index != rsp, "illegal addressing mode");
 477         emit_byte(0x84 | regenc << 3);
 478         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 479         emit_data(disp, rspec, disp32_operand);
 480       }
 481     } else if (base == rsp || base == r12) {
 482       // [rsp + disp]
 483       if (disp == 0 && rtype == relocInfo::none) {
 484         // [rsp]
 485         // [00 reg 100][00 100 100]
 486         emit_byte(0x04 | regenc << 3);
 487         emit_byte(0x24);
 488       } else if (is8bit(disp) && rtype == relocInfo::none) {
 489         // [rsp + imm8]
 490         // [01 reg 100][00 100 100] disp8
 491         emit_byte(0x44 | regenc << 3);
 492         emit_byte(0x24);
 493         emit_byte(disp & 0xFF);
 494       } else {
 495         // [rsp + imm32]
 496         // [10 reg 100][00 100 100] disp32
 497         emit_byte(0x84 | regenc << 3);
 498         emit_byte(0x24);
 499         emit_data(disp, rspec, disp32_operand);
 500       }
 501     } else {
 502       // [base + disp]
 503       assert(base != rsp && base != r12, "illegal addressing mode");
 504       int baseenc = base->encoding();
 505       if (baseenc >= 8) {
 506         baseenc -= 8;
 507       }
 508       if (disp == 0 && rtype == relocInfo::none &&
 509           base != rbp && base != r13) {
 510         // [base]
 511         // [00 reg base]
 512         emit_byte(0x00 | regenc << 3 | baseenc);
 513       } else if (is8bit(disp) && rtype == relocInfo::none) {
 514         // [base + imm8]
 515         // [01 reg base] disp8
 516         emit_byte(0x40 | regenc << 3 | baseenc);
 517         emit_byte(disp & 0xFF);
 518       } else {
 519         // [base + imm32]
 520         // [10 reg base] disp32
 521         emit_byte(0x80 | regenc << 3 | baseenc);
 522         emit_data(disp, rspec, disp32_operand);
 523       }
 524     }
 525   } else {
 526     if (index->is_valid()) {
 527       assert(scale != Address::no_scale, "inconsistent address");
 528       int indexenc = index->encoding();
 529       if (indexenc >= 8) {
 530         indexenc -= 8;
 531       }
 532       // [index*scale + disp]
 533       // [00 reg 100][ss index 101] disp32
 534       assert(index != rsp, "illegal addressing mode");
 535       emit_byte(0x04 | regenc << 3);
 536       emit_byte(scale << 6 | indexenc << 3 | 0x05);
 537       emit_data(disp, rspec, disp32_operand);
 538 #ifdef _LP64
 539     } else if ( rtype != relocInfo::none ) {
 540       // [disp] RIP-RELATIVE
 541       // [00 reg 101] disp32
 542       emit_byte(0x05 | regenc << 3);
 543       // Note that the RIP-rel. correction applies to the generated
 544       // disp field, but _not_ to the target address in the rspec.
 545 
 546       // disp was created by converting the target address minus the pc
 547       // at the start of the instruction. That needs more correction here.
 548       // intptr_t disp = target - next_ip;
 549 
 550       assert(inst_mark() != NULL, "must be inside InstructionMark");
 551       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 552 
 553       int64_t adjusted = (int64_t) disp -  (next_ip - inst_mark());
 554       assert(is_simm32(adjusted),
 555              "must be 32bit offset (RIP relative address)");
 556       emit_data((int) adjusted, rspec, disp32_operand);
 557 #endif // _LP64
 558     } else {
 559       // [disp] ABSOLUTE
 560       // [00 reg 100][00 100 101] disp32
 561       emit_byte(0x04 | regenc << 3);
 562       emit_byte(0x25);
 563       emit_data(disp, rspec, disp32_operand);
 564     }
 565   }
 566 }
 567 
 568 // Secret local extension to Assembler::WhichOperand:
 569 #define end_pc_operand (_WhichOperand_limit)
 570 
 571 address Assembler::locate_operand(address inst, WhichOperand which) {
 572   // Decode the given instruction, and return the address of
 573   // an embedded 32-bit operand word.
 574 
 575   // If "which" is disp32_operand, selects the displacement portion
 576   // of an effective address specifier.
 577   // If "which" is imm64_operand, selects the trailing immediate constant.
 578   // If "which" is call32_operand, selects the displacement of a call or jump.
 579   // Caller is responsible for ensuring that there is such an operand,
 580   // and that it is 32/64 bits wide.
 581 
 582   // If "which" is end_pc_operand, find the end of the instruction.
 583 
 584   address ip = inst;
 585   bool is_64bit = false;
 586 
 587   debug_only(bool has_disp32 = false);
 588   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 589 
 590   again_after_prefix:
 591   switch (0xFF & *ip++) {
 592 
 593   // These convenience macros generate groups of "case" labels for the switch.
 594 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 595 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 596              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 597 #define REP16(x) REP8((x)+0): \
 598               case REP8((x)+8)
 599 
 600   case CS_segment:
 601   case SS_segment:
 602   case DS_segment:
 603   case ES_segment:
 604   case FS_segment:
 605   case GS_segment:
 606     assert(0, "shouldn't have that prefix");
 607     assert(ip == inst + 1 || ip == inst + 2, "only two prefixes allowed");
 608     goto again_after_prefix;
 609 
 610   case 0x67:
 611   case REX:
 612   case REX_B:
 613   case REX_X:
 614   case REX_XB:
 615   case REX_R:
 616   case REX_RB:
 617   case REX_RX:
 618   case REX_RXB:
 619 //     assert(ip == inst + 1, "only one prefix allowed");
 620     goto again_after_prefix;
 621 
 622   case REX_W:
 623   case REX_WB:
 624   case REX_WX:
 625   case REX_WXB:
 626   case REX_WR:
 627   case REX_WRB:
 628   case REX_WRX:
 629   case REX_WRXB:
 630     is_64bit = true;
 631 //     assert(ip == inst + 1, "only one prefix allowed");
 632     goto again_after_prefix;
 633 
 634   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 635   case 0x88: // movb a, r
 636   case 0x89: // movl a, r
 637   case 0x8A: // movb r, a
 638   case 0x8B: // movl r, a
 639   case 0x8F: // popl a
 640     debug_only(has_disp32 = true;)
 641     break;
 642 
 643   case 0x68: // pushq #32
 644     if (which == end_pc_operand) {
 645       return ip + 4;
 646     }
 647     assert(0, "pushq has no disp32 or imm64");
 648     ShouldNotReachHere();
 649 
 650   case 0x66: // movw ... (size prefix)
 651     again_after_size_prefix2:
 652     switch (0xFF & *ip++) {
 653     case REX:
 654     case REX_B:
 655     case REX_X:
 656     case REX_XB:
 657     case REX_R:
 658     case REX_RB:
 659     case REX_RX:
 660     case REX_RXB:
 661     case REX_W:
 662     case REX_WB:
 663     case REX_WX:
 664     case REX_WXB:
 665     case REX_WR:
 666     case REX_WRB:
 667     case REX_WRX:
 668     case REX_WRXB:
 669       goto again_after_size_prefix2;
 670     case 0x8B: // movw r, a
 671     case 0x89: // movw a, r
 672       break;
 673     case 0xC7: // movw a, #16
 674       tail_size = 2;  // the imm16
 675       break;
 676     case 0x0F: // several SSE/SSE2 variants
 677       ip--;    // reparse the 0x0F
 678       goto again_after_prefix;
 679     default:
 680       ShouldNotReachHere();
 681     }
 682     break;
 683 
 684   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 685     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 686     assert((which == call32_operand || which == imm64_operand) && is_64bit, "");
 687     return ip;
 688 
 689   case 0x69: // imul r, a, #32
 690   case 0xC7: // movl a, #32(oop?)
 691     tail_size = 4;
 692     debug_only(has_disp32 = true); // has both kinds of operands!
 693     break;
 694 
 695   case 0x0F: // movx..., etc.
 696     switch (0xFF & *ip++) {
 697     case 0x12: // movlps
 698     case 0x28: // movaps
 699     case 0x2E: // ucomiss
 700     case 0x2F: // comiss
 701     case 0x54: // andps
 702     case 0x57: // xorps
 703     case 0x6E: // movd
 704     case 0x7E: // movd
 705     case 0xAE: // ldmxcsr   a
 706       debug_only(has_disp32 = true); // has both kinds of operands!
 707       break;
 708     case 0xAD: // shrd r, a, %cl
 709     case 0xAF: // imul r, a
 710     case 0xBE: // movsbl r, a
 711     case 0xBF: // movswl r, a
 712     case 0xB6: // movzbl r, a
 713     case 0xB7: // movzwl r, a
 714     case REP16(0x40): // cmovl cc, r, a
 715     case 0xB0: // cmpxchgb
 716     case 0xB1: // cmpxchg
 717     case 0xC1: // xaddl
 718     case 0xC7: // cmpxchg8
 719     case REP16(0x90): // setcc a
 720       debug_only(has_disp32 = true);
 721       // fall out of the switch to decode the address
 722       break;
 723     case 0xAC: // shrd r, a, #8
 724       debug_only(has_disp32 = true);
 725       tail_size = 1;  // the imm8
 726       break;
 727     case REP16(0x80): // jcc rdisp32
 728       if (which == end_pc_operand)  return ip + 4;
 729       assert(which == call32_operand, "jcc has no disp32 or imm64");
 730       return ip;
 731     default:
 732       ShouldNotReachHere();
 733     }
 734     break;
 735 
 736   case 0x81: // addl a, #32; addl r, #32
 737     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 738     tail_size = 4;
 739     debug_only(has_disp32 = true); // has both kinds of operands!
 740     break;
 741 
 742   case 0x83: // addl a, #8; addl r, #8
 743     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 744     debug_only(has_disp32 = true); // has both kinds of operands!
 745     tail_size = 1;
 746     break;
 747 
 748   case 0x9B:
 749     switch (0xFF & *ip++) {
 750     case 0xD9: // fnstcw a
 751       debug_only(has_disp32 = true);
 752       break;
 753     default:
 754       ShouldNotReachHere();
 755     }
 756     break;
 757 
 758   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 759   case REP4(0x10): // adc...
 760   case REP4(0x20): // and...
 761   case REP4(0x30): // xor...
 762   case REP4(0x08): // or...
 763   case REP4(0x18): // sbb...
 764   case REP4(0x28): // sub...
 765   case 0xF7: // mull a
 766   case 0x87: // xchg r, a
 767     debug_only(has_disp32 = true);
 768     break;
 769   case REP4(0x38): // cmp...
 770   case 0x8D: // lea r, a
 771   case 0x85: // test r, a
 772     debug_only(has_disp32 = true); // has both kinds of operands!
 773     break;
 774 
 775   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 776   case 0xC6: // movb a, #8
 777   case 0x80: // cmpb a, #8
 778   case 0x6B: // imul r, a, #8
 779     debug_only(has_disp32 = true); // has both kinds of operands!
 780     tail_size = 1; // the imm8
 781     break;
 782 
 783   case 0xE8: // call rdisp32
 784   case 0xE9: // jmp  rdisp32
 785     if (which == end_pc_operand)  return ip + 4;
 786     assert(which == call32_operand, "call has no disp32 or imm32");
 787     return ip;
 788 
 789   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 790   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 791   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 792   case 0xDD: // fld_d a; fst_d a; fstp_d a
 793   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 794   case 0xDF: // fild_d a; fistp_d a
 795   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 796   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 797   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 798     debug_only(has_disp32 = true);
 799     break;
 800 
 801   case 0xF3:                    // For SSE
 802   case 0xF2:                    // For SSE2
 803     switch (0xFF & *ip++) {
 804     case REX:
 805     case REX_B:
 806     case REX_X:
 807     case REX_XB:
 808     case REX_R:
 809     case REX_RB:
 810     case REX_RX:
 811     case REX_RXB:
 812     case REX_W:
 813     case REX_WB:
 814     case REX_WX:
 815     case REX_WXB:
 816     case REX_WR:
 817     case REX_WRB:
 818     case REX_WRX:
 819     case REX_WRXB:
 820       ip++;
 821     default:
 822       ip++;
 823     }
 824     debug_only(has_disp32 = true); // has both kinds of operands!
 825     break;
 826 
 827   default:
 828     ShouldNotReachHere();
 829 
 830 #undef REP8
 831 #undef REP16
 832   }
 833 
 834   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 835   assert(which != imm64_operand, "instruction is not a movq reg, imm64");
 836   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
 837 
 838   // parse the output of emit_operand
 839   int op2 = 0xFF & *ip++;
 840   int base = op2 & 0x07;
 841   int op3 = -1;
 842   const int b100 = 4;
 843   const int b101 = 5;
 844   if (base == b100 && (op2 >> 6) != 3) {
 845     op3 = 0xFF & *ip++;
 846     base = op3 & 0x07;   // refetch the base
 847   }
 848   // now ip points at the disp (if any)
 849 
 850   switch (op2 >> 6) {
 851   case 0:
 852     // [00 reg  100][ss index base]
 853     // [00 reg  100][00   100  esp]
 854     // [00 reg base]
 855     // [00 reg  100][ss index  101][disp32]
 856     // [00 reg  101]               [disp32]
 857 
 858     if (base == b101) {
 859       if (which == disp32_operand)
 860         return ip;              // caller wants the disp32
 861       ip += 4;                  // skip the disp32
 862     }
 863     break;
 864 
 865   case 1:
 866     // [01 reg  100][ss index base][disp8]
 867     // [01 reg  100][00   100  esp][disp8]
 868     // [01 reg base]               [disp8]
 869     ip += 1;                    // skip the disp8
 870     break;
 871 
 872   case 2:
 873     // [10 reg  100][ss index base][disp32]
 874     // [10 reg  100][00   100  esp][disp32]
 875     // [10 reg base]               [disp32]
 876     if (which == disp32_operand)
 877       return ip;                // caller wants the disp32
 878     ip += 4;                    // skip the disp32
 879     break;
 880 
 881   case 3:
 882     // [11 reg base]  (not a memory addressing mode)
 883     break;
 884   }
 885 
 886   if (which == end_pc_operand) {
 887     return ip + tail_size;
 888   }
 889 
 890   assert(0, "fix locate_operand");
 891   return ip;
 892 }
 893 
 894 address Assembler::locate_next_instruction(address inst) {
 895   // Secretly share code with locate_operand:
 896   return locate_operand(inst, end_pc_operand);
 897 }
 898 
 899 #ifdef ASSERT
 900 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
 901   address inst = inst_mark();
 902   assert(inst != NULL && inst < pc(),
 903          "must point to beginning of instruction");
 904   address opnd;
 905 
 906   Relocation* r = rspec.reloc();
 907   if (r->type() == relocInfo::none) {
 908     return;
 909   } else if (r->is_call() || format == call32_operand) {
 910     opnd = locate_operand(inst, call32_operand);
 911   } else if (r->is_data()) {
 912     assert(format == imm64_operand || format == disp32_operand, "format ok");
 913     opnd = locate_operand(inst, (WhichOperand) format);
 914   } else {
 915     assert(format == 0, "cannot specify a format");
 916     return;
 917   }
 918   assert(opnd == pc(), "must put operand where relocs can find it");
 919 }
 920 #endif
 921 
 922 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
 923   if (reg_enc >= 8) {
 924     prefix(REX_B);
 925     reg_enc -= 8;
 926   } else if (byteinst && reg_enc >= 4) {
 927     prefix(REX);
 928   }
 929   return reg_enc;
 930 }
 931 
 932 int Assembler::prefixq_and_encode(int reg_enc) {
 933   if (reg_enc < 8) {
 934     prefix(REX_W);
 935   } else {
 936     prefix(REX_WB);
 937     reg_enc -= 8;
 938   }
 939   return reg_enc;
 940 }
 941 
 942 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
 943   if (dst_enc < 8) {
 944     if (src_enc >= 8) {
 945       prefix(REX_B);
 946       src_enc -= 8;
 947     } else if (byteinst && src_enc >= 4) {
 948       prefix(REX);
 949     }
 950   } else {
 951     if (src_enc < 8) {
 952       prefix(REX_R);
 953     } else {
 954       prefix(REX_RB);
 955       src_enc -= 8;
 956     }
 957     dst_enc -= 8;
 958   }
 959   return dst_enc << 3 | src_enc;
 960 }
 961 
 962 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
 963   if (dst_enc < 8) {
 964     if (src_enc < 8) {
 965       prefix(REX_W);
 966     } else {
 967       prefix(REX_WB);
 968       src_enc -= 8;
 969     }
 970   } else {
 971     if (src_enc < 8) {
 972       prefix(REX_WR);
 973     } else {
 974       prefix(REX_WRB);
 975       src_enc -= 8;
 976     }
 977     dst_enc -= 8;
 978   }
 979   return dst_enc << 3 | src_enc;
 980 }
 981 
 982 void Assembler::prefix(Register reg) {
 983   if (reg->encoding() >= 8) {
 984     prefix(REX_B);
 985   }
 986 }
 987 
 988 void Assembler::prefix(Address adr) {
 989   if (adr.base_needs_rex()) {
 990     if (adr.index_needs_rex()) {
 991       prefix(REX_XB);
 992     } else {
 993       prefix(REX_B);
 994     }
 995   } else {
 996     if (adr.index_needs_rex()) {
 997       prefix(REX_X);
 998     }
 999   }
1000 }
1001 
1002 void Assembler::prefixq(Address adr) {
1003   if (adr.base_needs_rex()) {
1004     if (adr.index_needs_rex()) {
1005       prefix(REX_WXB);
1006     } else {
1007       prefix(REX_WB);
1008     }
1009   } else {
1010     if (adr.index_needs_rex()) {
1011       prefix(REX_WX);
1012     } else {
1013       prefix(REX_W);
1014     }
1015   }
1016 }
1017 
1018 
1019 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
1020   if (reg->encoding() < 8) {
1021     if (adr.base_needs_rex()) {
1022       if (adr.index_needs_rex()) {
1023         prefix(REX_XB);
1024       } else {
1025         prefix(REX_B);
1026       }
1027     } else {
1028       if (adr.index_needs_rex()) {
1029         prefix(REX_X);
1030       } else if (reg->encoding() >= 4 ) {
1031         prefix(REX);
1032       }
1033     }
1034   } else {
1035     if (adr.base_needs_rex()) {
1036       if (adr.index_needs_rex()) {
1037         prefix(REX_RXB);
1038       } else {
1039         prefix(REX_RB);
1040       }
1041     } else {
1042       if (adr.index_needs_rex()) {
1043         prefix(REX_RX);
1044       } else {
1045         prefix(REX_R);
1046       }
1047     }
1048   }
1049 }
1050 
1051 void Assembler::prefixq(Address adr, Register src) {
1052   if (src->encoding() < 8) {
1053     if (adr.base_needs_rex()) {
1054       if (adr.index_needs_rex()) {
1055         prefix(REX_WXB);
1056       } else {
1057         prefix(REX_WB);
1058       }
1059     } else {
1060       if (adr.index_needs_rex()) {
1061         prefix(REX_WX);
1062       } else {
1063         prefix(REX_W);
1064       }
1065     }
1066   } else {
1067     if (adr.base_needs_rex()) {
1068       if (adr.index_needs_rex()) {
1069         prefix(REX_WRXB);
1070       } else {
1071         prefix(REX_WRB);
1072       }
1073     } else {
1074       if (adr.index_needs_rex()) {
1075         prefix(REX_WRX);
1076       } else {
1077         prefix(REX_WR);
1078       }
1079     }
1080   }
1081 }
1082 
1083 void Assembler::prefix(Address adr, XMMRegister reg) {
1084   if (reg->encoding() < 8) {
1085     if (adr.base_needs_rex()) {
1086       if (adr.index_needs_rex()) {
1087         prefix(REX_XB);
1088       } else {
1089         prefix(REX_B);
1090       }
1091     } else {
1092       if (adr.index_needs_rex()) {
1093         prefix(REX_X);
1094       }
1095     }
1096   } else {
1097     if (adr.base_needs_rex()) {
1098       if (adr.index_needs_rex()) {
1099         prefix(REX_RXB);
1100       } else {
1101         prefix(REX_RB);
1102       }
1103     } else {
1104       if (adr.index_needs_rex()) {
1105         prefix(REX_RX);
1106       } else {
1107         prefix(REX_R);
1108       }
1109     }
1110   }
1111 }
1112 
1113 void Assembler::emit_operand(Register reg, Address adr,
1114                              int rip_relative_correction) {
1115   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1116                adr._rspec,
1117                rip_relative_correction);
1118 }
1119 
1120 void Assembler::emit_operand(XMMRegister reg, Address adr,
1121                              int rip_relative_correction) {
1122   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1123                adr._rspec,
1124                rip_relative_correction);
1125 }
1126 
1127 void Assembler::emit_farith(int b1, int b2, int i) {
1128   assert(isByte(b1) && isByte(b2), "wrong opcode");
1129   assert(0 <= i &&  i < 8, "illegal stack offset");
1130   emit_byte(b1);
1131   emit_byte(b2 + i);
1132 }
1133 
1134 // pushad is invalid, use this instead.
1135 // NOTE: Kills flags!!
1136 void Assembler::pushaq() {
1137   // we have to store original rsp.  ABI says that 128 bytes
1138   // below rsp are local scratch.
1139   movq(Address(rsp, -5 * wordSize), rsp);
1140 
1141   subq(rsp, 16 * wordSize);
1142 
1143   movq(Address(rsp, 15 * wordSize), rax);
1144   movq(Address(rsp, 14 * wordSize), rcx);
1145   movq(Address(rsp, 13 * wordSize), rdx);
1146   movq(Address(rsp, 12 * wordSize), rbx);
1147   // skip rsp
1148   movq(Address(rsp, 10 * wordSize), rbp);
1149   movq(Address(rsp, 9 * wordSize), rsi);
1150   movq(Address(rsp, 8 * wordSize), rdi);
1151   movq(Address(rsp, 7 * wordSize), r8);
1152   movq(Address(rsp, 6 * wordSize), r9);
1153   movq(Address(rsp, 5 * wordSize), r10);
1154   movq(Address(rsp, 4 * wordSize), r11);
1155   movq(Address(rsp, 3 * wordSize), r12);
1156   movq(Address(rsp, 2 * wordSize), r13);
1157   movq(Address(rsp, wordSize), r14);
1158   movq(Address(rsp, 0), r15);
1159 }
1160 
1161 // popad is invalid, use this instead
1162 // NOTE: Kills flags!!
1163 void Assembler::popaq() {
1164   movq(r15, Address(rsp, 0));
1165   movq(r14, Address(rsp, wordSize));
1166   movq(r13, Address(rsp, 2 * wordSize));
1167   movq(r12, Address(rsp, 3 * wordSize));
1168   movq(r11, Address(rsp, 4 * wordSize));
1169   movq(r10, Address(rsp, 5 * wordSize));
1170   movq(r9,  Address(rsp, 6 * wordSize));
1171   movq(r8,  Address(rsp, 7 * wordSize));
1172   movq(rdi, Address(rsp, 8 * wordSize));
1173   movq(rsi, Address(rsp, 9 * wordSize));
1174   movq(rbp, Address(rsp, 10 * wordSize));
1175   // skip rsp
1176   movq(rbx, Address(rsp, 12 * wordSize));
1177   movq(rdx, Address(rsp, 13 * wordSize));
1178   movq(rcx, Address(rsp, 14 * wordSize));
1179   movq(rax, Address(rsp, 15 * wordSize));
1180 
1181   addq(rsp, 16 * wordSize);
1182 }
1183 
1184 void Assembler::pushfq() {
1185   emit_byte(0x9C);
1186 }
1187 
1188 void Assembler::popfq() {
1189   emit_byte(0x9D);
1190 }
1191 
1192 void Assembler::pushq(int imm32) {
1193   emit_byte(0x68);
1194   emit_long(imm32);
1195 }
1196 
1197 void Assembler::pushq(Register src) {
1198   int encode = prefix_and_encode(src->encoding());
1199 
1200   emit_byte(0x50 | encode);
1201 }
1202 
1203 void Assembler::pushq(Address src) {
1204   InstructionMark im(this);
1205   prefix(src);
1206   emit_byte(0xFF);
1207   emit_operand(rsi, src);
1208 }
1209 
1210 void Assembler::popq(Register dst) {
1211   int encode = prefix_and_encode(dst->encoding());
1212   emit_byte(0x58 | encode);
1213 }
1214 
1215 void Assembler::popq(Address dst) {
1216   InstructionMark im(this);
1217   prefix(dst);
1218   emit_byte(0x8F);
1219   emit_operand(rax, dst);
1220 }
1221 
1222 void Assembler::prefix(Prefix p) {
1223   a_byte(p);
1224 }
1225 
1226 void Assembler::movb(Register dst, Address src) {
1227   InstructionMark im(this);
1228   prefix(src, dst, true);
1229   emit_byte(0x8A);
1230   emit_operand(dst, src);
1231 }
1232 
1233 void Assembler::movb(Address dst, int imm8) {
1234   InstructionMark im(this);
1235   prefix(dst);
1236   emit_byte(0xC6);
1237   emit_operand(rax, dst, 1);
1238   emit_byte(imm8);
1239 }
1240 
1241 void Assembler::movb(Address dst, Register src) {
1242   InstructionMark im(this);
1243   prefix(dst, src, true);
1244   emit_byte(0x88);
1245   emit_operand(src, dst);
1246 }
1247 
1248 void Assembler::movw(Address dst, int imm16) {
1249   InstructionMark im(this);
1250   emit_byte(0x66); // switch to 16-bit mode
1251   prefix(dst);
1252   emit_byte(0xC7);
1253   emit_operand(rax, dst, 2);
1254   emit_word(imm16);
1255 }
1256 
1257 void Assembler::movw(Register dst, Address src) {
1258   InstructionMark im(this);
1259   emit_byte(0x66);
1260   prefix(src, dst);
1261   emit_byte(0x8B);
1262   emit_operand(dst, src);
1263 }
1264 
1265 void Assembler::movw(Address dst, Register src) {
1266   InstructionMark im(this);
1267   emit_byte(0x66);
1268   prefix(dst, src);
1269   emit_byte(0x89);
1270   emit_operand(src, dst);
1271 }
1272 
1273 // Uses zero extension.
1274 void Assembler::movl(Register dst, int imm32) {
1275   int encode = prefix_and_encode(dst->encoding());
1276   emit_byte(0xB8 | encode);
1277   emit_long(imm32);
1278 }
1279 
1280 void Assembler::movl(Register dst, Register src) {
1281   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1282   emit_byte(0x8B);
1283   emit_byte(0xC0 | encode);
1284 }
1285 
1286 void Assembler::movl(Register dst, Address src) {
1287   InstructionMark im(this);
1288   prefix(src, dst);
1289   emit_byte(0x8B);
1290   emit_operand(dst, src);
1291 }
1292 
1293 void Assembler::movl(Address dst, int imm32) {
1294   InstructionMark im(this);
1295   prefix(dst);
1296   emit_byte(0xC7);
1297   emit_operand(rax, dst, 4);
1298   emit_long(imm32);
1299 }
1300 
1301 void Assembler::movl(Address dst, Register src) {
1302   InstructionMark im(this);
1303   prefix(dst, src);
1304   emit_byte(0x89);
1305   emit_operand(src, dst);
1306 }
1307 
1308 void Assembler::mov64(Register dst, intptr_t imm64) {
1309   InstructionMark im(this);
1310   int encode = prefixq_and_encode(dst->encoding());
1311   emit_byte(0xB8 | encode);
1312   emit_long64(imm64);
1313 }
1314 
1315 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
1316   InstructionMark im(this);
1317   int encode = prefixq_and_encode(dst->encoding());
1318   emit_byte(0xB8 | encode);
1319   emit_data64(imm64, rspec);
1320 }
1321 
1322 void Assembler::movq(Register dst, Register src) {
1323   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1324   emit_byte(0x8B);
1325   emit_byte(0xC0 | encode);
1326 }
1327 
1328 void Assembler::movq(Register dst, Address src) {
1329   InstructionMark im(this);
1330   prefixq(src, dst);
1331   emit_byte(0x8B);
1332   emit_operand(dst, src);
1333 }
1334 
1335 void Assembler::mov64(Address dst, intptr_t imm32) {
1336   assert(is_simm32(imm32), "lost bits");
1337   InstructionMark im(this);
1338   prefixq(dst);
1339   emit_byte(0xC7);
1340   emit_operand(rax, dst, 4);
1341   emit_long(imm32);
1342 }
1343 
1344 void Assembler::movq(Address dst, Register src) {
1345   InstructionMark im(this);
1346   prefixq(dst, src);
1347   emit_byte(0x89);
1348   emit_operand(src, dst);
1349 }
1350 
1351 void Assembler::movsbl(Register dst, Address src) {
1352   InstructionMark im(this);
1353   prefix(src, dst);
1354   emit_byte(0x0F);
1355   emit_byte(0xBE);
1356   emit_operand(dst, src);
1357 }
1358 
1359 void Assembler::movsbl(Register dst, Register src) {
1360   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1361   emit_byte(0x0F);
1362   emit_byte(0xBE);
1363   emit_byte(0xC0 | encode);
1364 }
1365 
1366 void Assembler::movswl(Register dst, Address src) {
1367   InstructionMark im(this);
1368   prefix(src, dst);
1369   emit_byte(0x0F);
1370   emit_byte(0xBF);
1371   emit_operand(dst, src);
1372 }
1373 
1374 void Assembler::movswl(Register dst, Register src) {
1375   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1376   emit_byte(0x0F);
1377   emit_byte(0xBF);
1378   emit_byte(0xC0 | encode);
1379 }
1380 
1381 void Assembler::movslq(Register dst, Address src) {
1382   InstructionMark im(this);
1383   prefixq(src, dst);
1384   emit_byte(0x63);
1385   emit_operand(dst, src);
1386 }
1387 
1388 void Assembler::movslq(Register dst, Register src) {
1389   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1390   emit_byte(0x63);
1391   emit_byte(0xC0 | encode);
1392 }
1393 
1394 void Assembler::movzbl(Register dst, Address src) {
1395   InstructionMark im(this);
1396   prefix(src, dst);
1397   emit_byte(0x0F);
1398   emit_byte(0xB6);
1399   emit_operand(dst, src);
1400 }
1401 
1402 void Assembler::movzbl(Register dst, Register src) {
1403   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1404   emit_byte(0x0F);
1405   emit_byte(0xB6);
1406   emit_byte(0xC0 | encode);
1407 }
1408 
1409 void Assembler::movzwl(Register dst, Address src) {
1410   InstructionMark im(this);
1411   prefix(src, dst);
1412   emit_byte(0x0F);
1413   emit_byte(0xB7);
1414   emit_operand(dst, src);
1415 }
1416 
1417 void Assembler::movzwl(Register dst, Register src) {
1418   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1419   emit_byte(0x0F);
1420   emit_byte(0xB7);
1421   emit_byte(0xC0 | encode);
1422 }
1423 
1424 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1425   emit_byte(0xF3);
1426   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1427   emit_byte(0x0F);
1428   emit_byte(0x10);
1429   emit_byte(0xC0 | encode);
1430 }
1431 
1432 void Assembler::movss(XMMRegister dst, Address src) {
1433   InstructionMark im(this);
1434   emit_byte(0xF3);
1435   prefix(src, dst);
1436   emit_byte(0x0F);
1437   emit_byte(0x10);
1438   emit_operand(dst, src);
1439 }
1440 
1441 void Assembler::movss(Address dst, XMMRegister src) {
1442   InstructionMark im(this);
1443   emit_byte(0xF3);
1444   prefix(dst, src);
1445   emit_byte(0x0F);
1446   emit_byte(0x11);
1447   emit_operand(src, dst);
1448 }
1449 
1450 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1451   emit_byte(0xF2);
1452   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1453   emit_byte(0x0F);
1454   emit_byte(0x10);
1455   emit_byte(0xC0 | encode);
1456 }
1457 
1458 void Assembler::movsd(XMMRegister dst, Address src) {
1459   InstructionMark im(this);
1460   emit_byte(0xF2);
1461   prefix(src, dst);
1462   emit_byte(0x0F);
1463   emit_byte(0x10);
1464   emit_operand(dst, src);
1465 }
1466 
1467 void Assembler::movsd(Address dst, XMMRegister src) {
1468   InstructionMark im(this);
1469   emit_byte(0xF2);
1470   prefix(dst, src);
1471   emit_byte(0x0F);
1472   emit_byte(0x11);
1473   emit_operand(src, dst);
1474 }
1475 
1476 // New cpus require to use movsd and movss to avoid partial register stall
1477 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1478 // The selection is done in MacroAssembler::movdbl() and movflt().
1479 void Assembler::movlpd(XMMRegister dst, Address src) {
1480   InstructionMark im(this);
1481   emit_byte(0x66);
1482   prefix(src, dst);
1483   emit_byte(0x0F);
1484   emit_byte(0x12);
1485   emit_operand(dst, src);
1486 }
1487 
1488 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1489   int dstenc = dst->encoding();
1490   int srcenc = src->encoding();
1491   emit_byte(0x66);
1492   if (dstenc < 8) {
1493     if (srcenc >= 8) {
1494       prefix(REX_B);
1495       srcenc -= 8;
1496     }
1497   } else {
1498     if (srcenc < 8) {
1499       prefix(REX_R);
1500     } else {
1501       prefix(REX_RB);
1502       srcenc -= 8;
1503     }
1504     dstenc -= 8;
1505   }
1506   emit_byte(0x0F);
1507   emit_byte(0x28);
1508   emit_byte(0xC0 | dstenc << 3 | srcenc);
1509 }
1510 
1511 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1512   int dstenc = dst->encoding();
1513   int srcenc = src->encoding();
1514   if (dstenc < 8) {
1515     if (srcenc >= 8) {
1516       prefix(REX_B);
1517       srcenc -= 8;
1518     }
1519   } else {
1520     if (srcenc < 8) {
1521       prefix(REX_R);
1522     } else {
1523       prefix(REX_RB);
1524       srcenc -= 8;
1525     }
1526     dstenc -= 8;
1527   }
1528   emit_byte(0x0F);
1529   emit_byte(0x28);
1530   emit_byte(0xC0 | dstenc << 3 | srcenc);
1531 }
1532 
1533 void Assembler::movdl(XMMRegister dst, Register src) {
1534   emit_byte(0x66);
1535   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1536   emit_byte(0x0F);
1537   emit_byte(0x6E);
1538   emit_byte(0xC0 | encode);
1539 }
1540 
1541 void Assembler::movdl(Register dst, XMMRegister src) {
1542   emit_byte(0x66);
1543   // swap src/dst to get correct prefix
1544   int encode = prefix_and_encode(src->encoding(), dst->encoding());
1545   emit_byte(0x0F);
1546   emit_byte(0x7E);
1547   emit_byte(0xC0 | encode);
1548 }
1549 
1550 void Assembler::movdq(XMMRegister dst, Register src) {
1551   emit_byte(0x66);
1552   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1553   emit_byte(0x0F);
1554   emit_byte(0x6E);
1555   emit_byte(0xC0 | encode);
1556 }
1557 
1558 void Assembler::movdq(Register dst, XMMRegister src) {
1559   emit_byte(0x66);
1560   // swap src/dst to get correct prefix
1561   int encode = prefixq_and_encode(src->encoding(), dst->encoding());
1562   emit_byte(0x0F);
1563   emit_byte(0x7E);
1564   emit_byte(0xC0 | encode);
1565 }
1566 
1567 void Assembler::pxor(XMMRegister dst, Address src) {
1568   InstructionMark im(this);
1569   emit_byte(0x66);
1570   prefix(src, dst);
1571   emit_byte(0x0F);
1572   emit_byte(0xEF);
1573   emit_operand(dst, src);
1574 }
1575 
1576 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
1577   InstructionMark im(this);
1578   emit_byte(0x66);
1579   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1580   emit_byte(0x0F);
1581   emit_byte(0xEF);
1582   emit_byte(0xC0 | encode);
1583 }
1584 
1585 void Assembler::movdqa(XMMRegister dst, Address src) {
1586   InstructionMark im(this);
1587   emit_byte(0x66);
1588   prefix(src, dst);
1589   emit_byte(0x0F);
1590   emit_byte(0x6F);
1591   emit_operand(dst, src);
1592 }
1593 
1594 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1595   emit_byte(0x66);
1596   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1597   emit_byte(0x0F);
1598   emit_byte(0x6F);
1599   emit_byte(0xC0 | encode);
1600 }
1601 
1602 void Assembler::movdqa(Address dst, XMMRegister src) {
1603   InstructionMark im(this);
1604   emit_byte(0x66);
1605   prefix(dst, src);
1606   emit_byte(0x0F);
1607   emit_byte(0x7F);
1608   emit_operand(src, dst);
1609 }
1610 
1611 void Assembler::movq(XMMRegister dst, Address src) {
1612   InstructionMark im(this);
1613   emit_byte(0xF3);
1614   prefix(src, dst);
1615   emit_byte(0x0F);
1616   emit_byte(0x7E);
1617   emit_operand(dst, src);
1618 }
1619 
1620 void Assembler::movq(Address dst, XMMRegister src) {
1621   InstructionMark im(this);
1622   emit_byte(0x66);
1623   prefix(dst, src);
1624   emit_byte(0x0F);
1625   emit_byte(0xD6);
1626   emit_operand(src, dst);
1627 }
1628 
1629 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
1630   assert(isByte(mode), "invalid value");
1631   emit_byte(0x66);
1632   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1633   emit_byte(0x0F);
1634   emit_byte(0x70);
1635   emit_byte(0xC0 | encode);
1636   emit_byte(mode & 0xFF);
1637 }
1638 
1639 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
1640   assert(isByte(mode), "invalid value");
1641   InstructionMark im(this);
1642   emit_byte(0x66);
1643   emit_byte(0x0F);
1644   emit_byte(0x70);
1645   emit_operand(dst, src);
1646   emit_byte(mode & 0xFF);
1647 }
1648 
1649 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
1650   assert(isByte(mode), "invalid value");
1651   emit_byte(0xF2);
1652   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1653   emit_byte(0x0F);
1654   emit_byte(0x70);
1655   emit_byte(0xC0 | encode);
1656   emit_byte(mode & 0xFF);
1657 }
1658 
1659 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
1660   assert(isByte(mode), "invalid value");
1661   InstructionMark im(this);
1662   emit_byte(0xF2);
1663   emit_byte(0x0F);
1664   emit_byte(0x70);
1665   emit_operand(dst, src);
1666   emit_byte(mode & 0xFF);
1667 }
1668 
1669 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1670   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1671   emit_byte(0x0F);
1672   emit_byte(0x40 | cc);
1673   emit_byte(0xC0 | encode);
1674 }
1675 
1676 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1677   InstructionMark im(this);
1678   prefix(src, dst);
1679   emit_byte(0x0F);
1680   emit_byte(0x40 | cc);
1681   emit_operand(dst, src);
1682 }
1683 
1684 void Assembler::cmovq(Condition cc, Register dst, Register src) {
1685   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1686   emit_byte(0x0F);
1687   emit_byte(0x40 | cc);
1688   emit_byte(0xC0 | encode);
1689 }
1690 
1691 void Assembler::cmovq(Condition cc, Register dst, Address src) {
1692   InstructionMark im(this);
1693   prefixq(src, dst);
1694   emit_byte(0x0F);
1695   emit_byte(0x40 | cc);
1696   emit_operand(dst, src);
1697 }
1698 
1699 void Assembler::prefetch_prefix(Address src) {
1700   prefix(src);
1701   emit_byte(0x0F);
1702 }
1703 
1704 void Assembler::prefetcht0(Address src) {
1705   InstructionMark im(this);
1706   prefetch_prefix(src);
1707   emit_byte(0x18);
1708   emit_operand(rcx, src); // 1, src
1709 }
1710 
1711 void Assembler::prefetcht1(Address src) {
1712   InstructionMark im(this);
1713   prefetch_prefix(src);
1714   emit_byte(0x18);
1715   emit_operand(rdx, src); // 2, src
1716 }
1717 
1718 void Assembler::prefetcht2(Address src) {
1719   InstructionMark im(this);
1720   prefetch_prefix(src);
1721   emit_byte(0x18);
1722   emit_operand(rbx, src); // 3, src
1723 }
1724 
1725 void Assembler::prefetchnta(Address src) {
1726   InstructionMark im(this);
1727   prefetch_prefix(src);
1728   emit_byte(0x18);
1729   emit_operand(rax, src); // 0, src
1730 }
1731 
1732 void Assembler::prefetchw(Address src) {
1733   InstructionMark im(this);
1734   prefetch_prefix(src);
1735   emit_byte(0x0D);
1736   emit_operand(rcx, src); // 1, src
1737 }
1738 
1739 void Assembler::adcl(Register dst, int imm32) {
1740   prefix(dst);
1741   emit_arith(0x81, 0xD0, dst, imm32);
1742 }
1743 
1744 void Assembler::adcl(Register dst, Address src) {
1745   InstructionMark im(this);
1746   prefix(src, dst);
1747   emit_byte(0x13);
1748   emit_operand(dst, src);
1749 }
1750 
1751 void Assembler::adcl(Register dst, Register src) {
1752   (void) prefix_and_encode(dst->encoding(), src->encoding());
1753   emit_arith(0x13, 0xC0, dst, src);
1754 }
1755 
1756 void Assembler::adcq(Register dst, int imm32) {
1757   (void) prefixq_and_encode(dst->encoding());
1758   emit_arith(0x81, 0xD0, dst, imm32);
1759 }
1760 
1761 void Assembler::adcq(Register dst, Address src) {
1762   InstructionMark im(this);
1763   prefixq(src, dst);
1764   emit_byte(0x13);
1765   emit_operand(dst, src);
1766 }
1767 
1768 void Assembler::adcq(Register dst, Register src) {
1769   (int) prefixq_and_encode(dst->encoding(), src->encoding());
1770   emit_arith(0x13, 0xC0, dst, src);
1771 }
1772 
1773 void Assembler::addl(Address dst, int imm32) {
1774   InstructionMark im(this);
1775   prefix(dst);
1776   emit_arith_operand(0x81, rax, dst,imm32);
1777 }
1778 
1779 void Assembler::addl(Address dst, Register src) {
1780   InstructionMark im(this);
1781   prefix(dst, src);
1782   emit_byte(0x01);
1783   emit_operand(src, dst);
1784 }
1785 
1786 void Assembler::addl(Register dst, int imm32) {
1787   prefix(dst);
1788   emit_arith(0x81, 0xC0, dst, imm32);
1789 }
1790 
1791 void Assembler::addl(Register dst, Address src) {
1792   InstructionMark im(this);
1793   prefix(src, dst);
1794   emit_byte(0x03);
1795   emit_operand(dst, src);
1796 }
1797 
1798 void Assembler::addl(Register dst, Register src) {
1799   (void) prefix_and_encode(dst->encoding(), src->encoding());
1800   emit_arith(0x03, 0xC0, dst, src);
1801 }
1802 
1803 void Assembler::addq(Address dst, int imm32) {
1804   InstructionMark im(this);
1805   prefixq(dst);
1806   emit_arith_operand(0x81, rax, dst,imm32);
1807 }
1808 
1809 void Assembler::addq(Address dst, Register src) {
1810   InstructionMark im(this);
1811   prefixq(dst, src);
1812   emit_byte(0x01);
1813   emit_operand(src, dst);
1814 }
1815 
1816 void Assembler::addq(Register dst, int imm32) {
1817   (void) prefixq_and_encode(dst->encoding());
1818   emit_arith(0x81, 0xC0, dst, imm32);
1819 }
1820 
1821 void Assembler::addq(Register dst, Address src) {
1822   InstructionMark im(this);
1823   prefixq(src, dst);
1824   emit_byte(0x03);
1825   emit_operand(dst, src);
1826 }
1827 
1828 void Assembler::addq(Register dst, Register src) {
1829   (void) prefixq_and_encode(dst->encoding(), src->encoding());
1830   emit_arith(0x03, 0xC0, dst, src);
1831 }
1832 
1833 void Assembler::andl(Register dst, int imm32) {
1834   prefix(dst);
1835   emit_arith(0x81, 0xE0, dst, imm32);
1836 }
1837 
1838 void Assembler::andl(Register dst, Address src) {
1839   InstructionMark im(this);
1840   prefix(src, dst);
1841   emit_byte(0x23);
1842   emit_operand(dst, src);
1843 }
1844 
1845 void Assembler::andl(Register dst, Register src) {
1846   (void) prefix_and_encode(dst->encoding(), src->encoding());
1847   emit_arith(0x23, 0xC0, dst, src);
1848 }
1849 
1850 void Assembler::andq(Register dst, int imm32) {
1851   (void) prefixq_and_encode(dst->encoding());
1852   emit_arith(0x81, 0xE0, dst, imm32);
1853 }
1854 
1855 void Assembler::andq(Register dst, Address src) {
1856   InstructionMark im(this);
1857   prefixq(src, dst);
1858   emit_byte(0x23);
1859   emit_operand(dst, src);
1860 }
1861 
1862 void Assembler::andq(Register dst, Register src) {
1863   (int) prefixq_and_encode(dst->encoding(), src->encoding());
1864   emit_arith(0x23, 0xC0, dst, src);
1865 }
1866 
1867 void Assembler::cmpb(Address dst, int imm8) {
1868   InstructionMark im(this);
1869   prefix(dst);
1870   emit_byte(0x80);
1871   emit_operand(rdi, dst, 1);
1872   emit_byte(imm8);
1873 }
1874 
1875 void Assembler::cmpl(Address dst, int imm32) {
1876   InstructionMark im(this);
1877   prefix(dst);
1878   emit_byte(0x81);
1879   emit_operand(rdi, dst, 4);
1880   emit_long(imm32);
1881 }
1882 
1883 void Assembler::cmpl(Register dst, int imm32) {
1884   prefix(dst);
1885   emit_arith(0x81, 0xF8, dst, imm32);
1886 }
1887 
1888 void Assembler::cmpl(Register dst, Register src) {
1889   (void) prefix_and_encode(dst->encoding(), src->encoding());
1890   emit_arith(0x3B, 0xC0, dst, src);
1891 }
1892 
1893 void Assembler::cmpl(Register dst, Address src) {
1894   InstructionMark im(this);
1895   prefix(src, dst);
1896   emit_byte(0x3B);
1897   emit_operand(dst, src);
1898 }
1899 
1900 void Assembler::cmpq(Address dst, int imm32) {
1901   InstructionMark im(this);
1902   prefixq(dst);
1903   emit_byte(0x81);
1904   emit_operand(rdi, dst, 4);
1905   emit_long(imm32);
1906 }
1907 
1908 void Assembler::cmpq(Register dst, int imm32) {
1909   (void) prefixq_and_encode(dst->encoding());
1910   emit_arith(0x81, 0xF8, dst, imm32);
1911 }
1912 
1913 void Assembler::cmpq(Address dst, Register src) {
1914   prefixq(dst, src);
1915   emit_byte(0x3B);
1916   emit_operand(src, dst);
1917 }
1918 
1919 void Assembler::cmpq(Register dst, Register src) {
1920   (void) prefixq_and_encode(dst->encoding(), src->encoding());
1921   emit_arith(0x3B, 0xC0, dst, src);
1922 }
1923 
1924 void Assembler::cmpq(Register dst, Address  src) {
1925   InstructionMark im(this);
1926   prefixq(src, dst);
1927   emit_byte(0x3B);
1928   emit_operand(dst, src);
1929 }
1930 
1931 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
1932   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1933   emit_byte(0x0F);
1934   emit_byte(0x2E);
1935   emit_byte(0xC0 | encode);
1936 }
1937 
1938 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
1939   emit_byte(0x66);
1940   ucomiss(dst, src);
1941 }
1942 
1943 void Assembler::decl(Register dst) {
1944   // Don't use it directly. Use MacroAssembler::decrementl() instead.
1945   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
1946   int encode = prefix_and_encode(dst->encoding());
1947   emit_byte(0xFF);
1948   emit_byte(0xC8 | encode);
1949 }
1950 
1951 void Assembler::decl(Address dst) {
1952   // Don't use it directly. Use MacroAssembler::decrementl() instead.
1953   InstructionMark im(this);
1954   prefix(dst);
1955   emit_byte(0xFF);
1956   emit_operand(rcx, dst);
1957 }
1958 
1959 void Assembler::decq(Register dst) {
1960   // Don't use it directly. Use MacroAssembler::decrementq() instead.
1961   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
1962   int encode = prefixq_and_encode(dst->encoding());
1963   emit_byte(0xFF);
1964   emit_byte(0xC8 | encode);
1965 }
1966 
1967 void Assembler::decq(Address dst) {
1968   // Don't use it directly. Use MacroAssembler::decrementq() instead.
1969   InstructionMark im(this);
1970   prefixq(dst);
1971   emit_byte(0xFF);
1972   emit_operand(rcx, dst);
1973 }
1974 
1975 void Assembler::idivl(Register src) {
1976   int encode = prefix_and_encode(src->encoding());
1977   emit_byte(0xF7);
1978   emit_byte(0xF8 | encode);
1979 }
1980 
1981 void Assembler::idivq(Register src) {
1982   int encode = prefixq_and_encode(src->encoding());
1983   emit_byte(0xF7);
1984   emit_byte(0xF8 | encode);
1985 }
1986 
1987 void Assembler::cdql() {
1988   emit_byte(0x99);
1989 }
1990 
1991 void Assembler::cdqq() {
1992   prefix(REX_W);
1993   emit_byte(0x99);
1994 }
1995 
1996 void Assembler::imull(Register dst, Register src) {
1997   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1998   emit_byte(0x0F);
1999   emit_byte(0xAF);
2000   emit_byte(0xC0 | encode);
2001 }
2002 
2003 void Assembler::imull(Register dst, Register src, int value) {
2004   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2005   if (is8bit(value)) {
2006     emit_byte(0x6B);
2007     emit_byte(0xC0 | encode);
2008     emit_byte(value);
2009   } else {
2010     emit_byte(0x69);
2011     emit_byte(0xC0 | encode);
2012     emit_long(value);
2013   }
2014 }
2015 
2016 void Assembler::imulq(Register dst, Register src) {
2017   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2018   emit_byte(0x0F);
2019   emit_byte(0xAF);
2020   emit_byte(0xC0 | encode);
2021 }
2022 
2023 void Assembler::imulq(Register dst, Register src, int value) {
2024   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2025   if (is8bit(value)) {
2026     emit_byte(0x6B);
2027     emit_byte(0xC0 | encode);
2028     emit_byte(value);
2029   } else {
2030     emit_byte(0x69);
2031     emit_byte(0xC0 | encode);
2032     emit_long(value);
2033   }
2034 }
2035 
2036 void Assembler::incl(Register dst) {
2037   // Don't use it directly. Use MacroAssembler::incrementl() instead.
2038   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2039   int encode = prefix_and_encode(dst->encoding());
2040   emit_byte(0xFF);
2041   emit_byte(0xC0 | encode);
2042 }
2043 
2044 void Assembler::incl(Address dst) {
2045   // Don't use it directly. Use MacroAssembler::incrementl() instead.
2046   InstructionMark im(this);
2047   prefix(dst);
2048   emit_byte(0xFF);
2049   emit_operand(rax, dst);
2050 }
2051 
2052 void Assembler::incq(Register dst) {
2053   // Don't use it directly. Use MacroAssembler::incrementq() instead.
2054   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2055   int encode = prefixq_and_encode(dst->encoding());
2056   emit_byte(0xFF);
2057   emit_byte(0xC0 | encode);
2058 }
2059 
2060 void Assembler::incq(Address dst) {
2061   // Don't use it directly. Use MacroAssembler::incrementq() instead.
2062   InstructionMark im(this);
2063   prefixq(dst);
2064   emit_byte(0xFF);
2065   emit_operand(rax, dst);
2066 }
2067 
2068 void Assembler::leal(Register dst, Address src) {
2069   InstructionMark im(this);
2070   emit_byte(0x67); // addr32
2071   prefix(src, dst);
2072   emit_byte(0x8D);
2073   emit_operand(dst, src);
2074 }
2075 
2076 void Assembler::leaq(Register dst, Address src) {
2077   InstructionMark im(this);
2078   prefixq(src, dst);
2079   emit_byte(0x8D);
2080   emit_operand(dst, src);
2081 }
2082 
2083 void Assembler::mull(Address src) {
2084   InstructionMark im(this);
2085   // was missing
2086   prefix(src);
2087   emit_byte(0xF7);
2088   emit_operand(rsp, src);
2089 }
2090 
2091 void Assembler::mull(Register src) {
2092   // was missing
2093   int encode = prefix_and_encode(src->encoding());
2094   emit_byte(0xF7);
2095   emit_byte(0xE0 | encode);
2096 }
2097 
2098 void Assembler::negl(Register dst) {
2099   int encode = prefix_and_encode(dst->encoding());
2100   emit_byte(0xF7);
2101   emit_byte(0xD8 | encode);
2102 }
2103 
2104 void Assembler::negq(Register dst) {
2105   int encode = prefixq_and_encode(dst->encoding());
2106   emit_byte(0xF7);
2107   emit_byte(0xD8 | encode);
2108 }
2109 
2110 void Assembler::notl(Register dst) {
2111   int encode = prefix_and_encode(dst->encoding());
2112   emit_byte(0xF7);
2113   emit_byte(0xD0 | encode);
2114 }
2115 
2116 void Assembler::notq(Register dst) {
2117   int encode = prefixq_and_encode(dst->encoding());
2118   emit_byte(0xF7);
2119   emit_byte(0xD0 | encode);
2120 }
2121 
2122 void Assembler::orl(Address dst, int imm32) {
2123   InstructionMark im(this);
2124   prefix(dst);
2125   emit_byte(0x81);
2126   emit_operand(rcx, dst, 4);
2127   emit_long(imm32);
2128 }
2129 
2130 void Assembler::orl(Register dst, int imm32) {
2131   prefix(dst);
2132   emit_arith(0x81, 0xC8, dst, imm32);
2133 }
2134 
2135 void Assembler::orl(Register dst, Address src) {
2136   InstructionMark im(this);
2137   prefix(src, dst);
2138   emit_byte(0x0B);
2139   emit_operand(dst, src);
2140 }
2141 
2142 void Assembler::orl(Register dst, Register src) {
2143   (void) prefix_and_encode(dst->encoding(), src->encoding());
2144   emit_arith(0x0B, 0xC0, dst, src);
2145 }
2146 
2147 void Assembler::orq(Address dst, int imm32) {
2148   InstructionMark im(this);
2149   prefixq(dst);
2150   emit_byte(0x81);
2151   emit_operand(rcx, dst, 4);
2152   emit_long(imm32);
2153 }
2154 
2155 void Assembler::orq(Register dst, int imm32) {
2156   (void) prefixq_and_encode(dst->encoding());
2157   emit_arith(0x81, 0xC8, dst, imm32);
2158 }
2159 
2160 void Assembler::orq(Register dst, Address src) {
2161   InstructionMark im(this);
2162   prefixq(src, dst);
2163   emit_byte(0x0B);
2164   emit_operand(dst, src);
2165 }
2166 
2167 void Assembler::orq(Register dst, Register src) {
2168   (void) prefixq_and_encode(dst->encoding(), src->encoding());
2169   emit_arith(0x0B, 0xC0, dst, src);
2170 }
2171 
2172 void Assembler::rcll(Register dst, int imm8) {
2173   assert(isShiftCount(imm8), "illegal shift count");
2174   int encode = prefix_and_encode(dst->encoding());
2175   if (imm8 == 1) {
2176     emit_byte(0xD1);
2177     emit_byte(0xD0 | encode);
2178   } else {
2179     emit_byte(0xC1);
2180     emit_byte(0xD0 | encode);
2181     emit_byte(imm8);
2182   }
2183 }
2184 
2185 void Assembler::rclq(Register dst, int imm8) {
2186   assert(isShiftCount(imm8 >> 1), "illegal shift count");
2187   int encode = prefixq_and_encode(dst->encoding());
2188   if (imm8 == 1) {
2189     emit_byte(0xD1);
2190     emit_byte(0xD0 | encode);
2191   } else {
2192     emit_byte(0xC1);
2193     emit_byte(0xD0 | encode);
2194     emit_byte(imm8);
2195   }
2196 }
2197 
2198 void Assembler::sarl(Register dst, int imm8) {
2199   int encode = prefix_and_encode(dst->encoding());
2200   assert(isShiftCount(imm8), "illegal shift count");
2201   if (imm8 == 1) {
2202     emit_byte(0xD1);
2203     emit_byte(0xF8 | encode);
2204   } else {
2205     emit_byte(0xC1);
2206     emit_byte(0xF8 | encode);
2207     emit_byte(imm8);
2208   }
2209 }
2210 
2211 void Assembler::sarl(Register dst) {
2212   int encode = prefix_and_encode(dst->encoding());
2213   emit_byte(0xD3);
2214   emit_byte(0xF8 | encode);
2215 }
2216 
2217 void Assembler::sarq(Register dst, int imm8) {
2218   assert(isShiftCount(imm8 >> 1), "illegal shift count");
2219   int encode = prefixq_and_encode(dst->encoding());
2220   if (imm8 == 1) {
2221     emit_byte(0xD1);
2222     emit_byte(0xF8 | encode);
2223   } else {
2224     emit_byte(0xC1);
2225     emit_byte(0xF8 | encode);
2226     emit_byte(imm8);
2227   }
2228 }
2229 
2230 void Assembler::sarq(Register dst) {
2231   int encode = prefixq_and_encode(dst->encoding());
2232   emit_byte(0xD3);
2233   emit_byte(0xF8 | encode);
2234 }
2235 
2236 void Assembler::sbbl(Address dst, int imm32) {
2237   InstructionMark im(this);
2238   prefix(dst);
2239   emit_arith_operand(0x81, rbx, dst, imm32);
2240 }
2241 
2242 void Assembler::sbbl(Register dst, int imm32) {
2243   prefix(dst);
2244   emit_arith(0x81, 0xD8, dst, imm32);
2245 }
2246 
2247 void Assembler::sbbl(Register dst, Address src) {
2248   InstructionMark im(this);
2249   prefix(src, dst);
2250   emit_byte(0x1B);
2251   emit_operand(dst, src);
2252 }
2253 
2254 void Assembler::sbbl(Register dst, Register src) {
2255   (void) prefix_and_encode(dst->encoding(), src->encoding());
2256   emit_arith(0x1B, 0xC0, dst, src);
2257 }
2258 
2259 void Assembler::sbbq(Address dst, int imm32) {
2260   InstructionMark im(this);
2261   prefixq(dst);
2262   emit_arith_operand(0x81, rbx, dst, imm32);
2263 }
2264 
2265 void Assembler::sbbq(Register dst, int imm32) {
2266   (void) prefixq_and_encode(dst->encoding());
2267   emit_arith(0x81, 0xD8, dst, imm32);
2268 }
2269 
2270 void Assembler::sbbq(Register dst, Address src) {
2271   InstructionMark im(this);
2272   prefixq(src, dst);
2273   emit_byte(0x1B);
2274   emit_operand(dst, src);
2275 }
2276 
2277 void Assembler::sbbq(Register dst, Register src) {
2278   (void) prefixq_and_encode(dst->encoding(), src->encoding());
2279   emit_arith(0x1B, 0xC0, dst, src);
2280 }
2281 
2282 void Assembler::shll(Register dst, int imm8) {
2283   assert(isShiftCount(imm8), "illegal shift count");
2284   int encode = prefix_and_encode(dst->encoding());
2285   if (imm8 == 1 ) {
2286     emit_byte(0xD1);
2287     emit_byte(0xE0 | encode);
2288   } else {
2289     emit_byte(0xC1);
2290     emit_byte(0xE0 | encode);
2291     emit_byte(imm8);
2292   }
2293 }
2294 
2295 void Assembler::shll(Register dst) {
2296   int encode = prefix_and_encode(dst->encoding());
2297   emit_byte(0xD3);
2298   emit_byte(0xE0 | encode);
2299 }
2300 
2301 void Assembler::shlq(Register dst, int imm8) {
2302   assert(isShiftCount(imm8 >> 1), "illegal shift count");
2303   int encode = prefixq_and_encode(dst->encoding());
2304   if (imm8 == 1) {
2305     emit_byte(0xD1);
2306     emit_byte(0xE0 | encode);
2307   } else {
2308     emit_byte(0xC1);
2309     emit_byte(0xE0 | encode);
2310     emit_byte(imm8);
2311   }
2312 }
2313 
2314 void Assembler::shlq(Register dst) {
2315   int encode = prefixq_and_encode(dst->encoding());
2316   emit_byte(0xD3);
2317   emit_byte(0xE0 | encode);
2318 }
2319 
2320 void Assembler::shrl(Register dst, int imm8) {
2321   assert(isShiftCount(imm8), "illegal shift count");
2322   int encode = prefix_and_encode(dst->encoding());
2323   emit_byte(0xC1);
2324   emit_byte(0xE8 | encode);
2325   emit_byte(imm8);
2326 }
2327 
2328 void Assembler::shrl(Register dst) {
2329   int encode = prefix_and_encode(dst->encoding());
2330   emit_byte(0xD3);
2331   emit_byte(0xE8 | encode);
2332 }
2333 
2334 void Assembler::shrq(Register dst, int imm8) {
2335   assert(isShiftCount(imm8 >> 1), "illegal shift count");
2336   int encode = prefixq_and_encode(dst->encoding());
2337   emit_byte(0xC1);
2338   emit_byte(0xE8 | encode);
2339   emit_byte(imm8);
2340 }
2341 
2342 void Assembler::shrq(Register dst) {
2343   int encode = prefixq_and_encode(dst->encoding());
2344   emit_byte(0xD3);
2345   emit_byte(0xE8 | encode);
2346 }
2347 
2348 void Assembler::subl(Address dst, int imm32) {
2349   InstructionMark im(this);
2350   prefix(dst);
2351   if (is8bit(imm32)) {
2352     emit_byte(0x83);
2353     emit_operand(rbp, dst, 1);
2354     emit_byte(imm32 & 0xFF);
2355   } else {
2356     emit_byte(0x81);
2357     emit_operand(rbp, dst, 4);
2358     emit_long(imm32);
2359   }
2360 }
2361 
2362 void Assembler::subl(Register dst, int imm32) {
2363   prefix(dst);
2364   emit_arith(0x81, 0xE8, dst, imm32);
2365 }
2366 
2367 void Assembler::subl(Address dst, Register src) {
2368   InstructionMark im(this);
2369   prefix(dst, src);
2370   emit_byte(0x29);
2371   emit_operand(src, dst);
2372 }
2373 
2374 void Assembler::subl(Register dst, Address src) {
2375   InstructionMark im(this);
2376   prefix(src, dst);
2377   emit_byte(0x2B);
2378   emit_operand(dst, src);
2379 }
2380 
2381 void Assembler::subl(Register dst, Register src) {
2382   (void) prefix_and_encode(dst->encoding(), src->encoding());
2383   emit_arith(0x2B, 0xC0, dst, src);
2384 }
2385 
2386 void Assembler::subq(Address dst, int imm32) {
2387   InstructionMark im(this);
2388   prefixq(dst);
2389   if (is8bit(imm32)) {
2390     emit_byte(0x83);
2391     emit_operand(rbp, dst, 1);
2392     emit_byte(imm32 & 0xFF);
2393   } else {
2394     emit_byte(0x81);
2395     emit_operand(rbp, dst, 4);
2396     emit_long(imm32);
2397   }
2398 }
2399 
2400 void Assembler::subq(Register dst, int imm32) {
2401   (void) prefixq_and_encode(dst->encoding());
2402   emit_arith(0x81, 0xE8, dst, imm32);
2403 }
2404 
2405 void Assembler::subq(Address dst, Register src) {
2406   InstructionMark im(this);
2407   prefixq(dst, src);
2408   emit_byte(0x29);
2409   emit_operand(src, dst);
2410 }
2411 
2412 void Assembler::subq(Register dst, Address src) {
2413   InstructionMark im(this);
2414   prefixq(src, dst);
2415   emit_byte(0x2B);
2416   emit_operand(dst, src);
2417 }
2418 
2419 void Assembler::subq(Register dst, Register src) {
2420   (void) prefixq_and_encode(dst->encoding(), src->encoding());
2421   emit_arith(0x2B, 0xC0, dst, src);
2422 }
2423 
2424 void Assembler::testb(Register dst, int imm8) {
2425   (void) prefix_and_encode(dst->encoding(), true);
2426   emit_arith_b(0xF6, 0xC0, dst, imm8);
2427 }
2428 
2429 void Assembler::testl(Register dst, int imm32) {
2430   // not using emit_arith because test
2431   // doesn't support sign-extension of
2432   // 8bit operands
2433   int encode = dst->encoding();
2434   if (encode == 0) {
2435     emit_byte(0xA9);
2436   } else {
2437     encode = prefix_and_encode(encode);
2438     emit_byte(0xF7);
2439     emit_byte(0xC0 | encode);
2440   }
2441   emit_long(imm32);
2442 }
2443 
2444 void Assembler::testl(Register dst, Register src) {
2445   (void) prefix_and_encode(dst->encoding(), src->encoding());
2446   emit_arith(0x85, 0xC0, dst, src);
2447 }
2448 
2449 void Assembler::testq(Register dst, int imm32) {
2450   // not using emit_arith because test
2451   // doesn't support sign-extension of
2452   // 8bit operands
2453   int encode = dst->encoding();
2454   if (encode == 0) {
2455     prefix(REX_W);
2456     emit_byte(0xA9);
2457   } else {
2458     encode = prefixq_and_encode(encode);
2459     emit_byte(0xF7);
2460     emit_byte(0xC0 | encode);
2461   }
2462   emit_long(imm32);
2463 }
2464 
2465 void Assembler::testq(Register dst, Register src) {
2466   (void) prefixq_and_encode(dst->encoding(), src->encoding());
2467   emit_arith(0x85, 0xC0, dst, src);
2468 }
2469 
2470 void Assembler::xaddl(Address dst, Register src) {
2471   InstructionMark im(this);
2472   prefix(dst, src);
2473   emit_byte(0x0F);
2474   emit_byte(0xC1);
2475   emit_operand(src, dst);
2476 }
2477 
2478 void Assembler::xaddq(Address dst, Register src) {
2479   InstructionMark im(this);
2480   prefixq(dst, src);
2481   emit_byte(0x0F);
2482   emit_byte(0xC1);
2483   emit_operand(src, dst);
2484 }
2485 
2486 void Assembler::xorl(Register dst, int imm32) {
2487   prefix(dst);
2488   emit_arith(0x81, 0xF0, dst, imm32);
2489 }
2490 
2491 void Assembler::xorl(Register dst, Register src) {
2492   (void) prefix_and_encode(dst->encoding(), src->encoding());
2493   emit_arith(0x33, 0xC0, dst, src);
2494 }
2495 
2496 void Assembler::xorl(Register dst, Address src) {
2497   InstructionMark im(this);
2498   prefix(src, dst);
2499   emit_byte(0x33);
2500   emit_operand(dst, src);
2501 }
2502 
2503 void Assembler::xorq(Register dst, int imm32) {
2504   (void) prefixq_and_encode(dst->encoding());
2505   emit_arith(0x81, 0xF0, dst, imm32);
2506 }
2507 
2508 void Assembler::xorq(Register dst, Register src) {
2509   (void) prefixq_and_encode(dst->encoding(), src->encoding());
2510   emit_arith(0x33, 0xC0, dst, src);
2511 }
2512 
2513 void Assembler::xorq(Register dst, Address src) {
2514   InstructionMark im(this);
2515   prefixq(src, dst);
2516   emit_byte(0x33);
2517   emit_operand(dst, src);
2518 }
2519 
2520 void Assembler::bswapl(Register reg) {
2521   int encode = prefix_and_encode(reg->encoding());
2522   emit_byte(0x0F);
2523   emit_byte(0xC8 | encode);
2524 }
2525 
2526 void Assembler::bswapq(Register reg) {
2527   int encode = prefixq_and_encode(reg->encoding());
2528   emit_byte(0x0F);
2529   emit_byte(0xC8 | encode);
2530 }
2531 
2532 void Assembler::lock() {
2533   emit_byte(0xF0);
2534 }
2535 
2536 void Assembler::xchgl(Register dst, Address src) {
2537   InstructionMark im(this);
2538   prefix(src, dst);
2539   emit_byte(0x87);
2540   emit_operand(dst, src);
2541 }
2542 
2543 void Assembler::xchgl(Register dst, Register src) {
2544   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2545   emit_byte(0x87);
2546   emit_byte(0xc0 | encode);
2547 }
2548 
2549 void Assembler::xchgq(Register dst, Address src) {
2550   InstructionMark im(this);
2551   prefixq(src, dst);
2552   emit_byte(0x87);
2553   emit_operand(dst, src);
2554 }
2555 
2556 void Assembler::xchgq(Register dst, Register src) {
2557   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2558   emit_byte(0x87);
2559   emit_byte(0xc0 | encode);
2560 }
2561 
2562 void Assembler::cmpxchgl(Register reg, Address adr) {
2563   InstructionMark im(this);
2564   prefix(adr, reg);
2565   emit_byte(0x0F);
2566   emit_byte(0xB1);
2567   emit_operand(reg, adr);
2568 }
2569 
2570 void Assembler::cmpxchgq(Register reg, Address adr) {
2571   InstructionMark im(this);
2572   prefixq(adr, reg);
2573   emit_byte(0x0F);
2574   emit_byte(0xB1);
2575   emit_operand(reg, adr);
2576 }
2577 
2578 void Assembler::hlt() {
2579   emit_byte(0xF4);
2580 }
2581 
2582 
2583 void Assembler::addr_nop_4() {
2584   // 4 bytes: NOP DWORD PTR [EAX+0]
2585   emit_byte(0x0F);
2586   emit_byte(0x1F);
2587   emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
2588   emit_byte(0);    // 8-bits offset (1 byte)
2589 }
2590 
2591 void Assembler::addr_nop_5() {
2592   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
2593   emit_byte(0x0F);
2594   emit_byte(0x1F);
2595   emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
2596   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
2597   emit_byte(0);    // 8-bits offset (1 byte)
2598 }
2599 
2600 void Assembler::addr_nop_7() {
2601   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
2602   emit_byte(0x0F);
2603   emit_byte(0x1F);
2604   emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
2605   emit_long(0);    // 32-bits offset (4 bytes)
2606 }
2607 
2608 void Assembler::addr_nop_8() {
2609   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
2610   emit_byte(0x0F);
2611   emit_byte(0x1F);
2612   emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
2613   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
2614   emit_long(0);    // 32-bits offset (4 bytes)
2615 }
2616 
2617 void Assembler::nop(int i) {
2618   assert(i > 0, " ");
2619   if (UseAddressNop && VM_Version::is_intel()) {
2620     //
2621     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2622     //  1: 0x90
2623     //  2: 0x66 0x90
2624     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2625     //  4: 0x0F 0x1F 0x40 0x00
2626     //  5: 0x0F 0x1F 0x44 0x00 0x00
2627     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2628     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2629     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2630     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2631     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2632     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2633 
2634     // The rest coding is Intel specific - don't use consecutive address nops
2635 
2636     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2637     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2638     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2639     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2640 
2641     while(i >= 15) {
2642       // For Intel don't generate consecutive addess nops (mix with regular nops)
2643       i -= 15;
2644       emit_byte(0x66);   // size prefix
2645       emit_byte(0x66);   // size prefix
2646       emit_byte(0x66);   // size prefix
2647       addr_nop_8();
2648       emit_byte(0x66);   // size prefix
2649       emit_byte(0x66);   // size prefix
2650       emit_byte(0x66);   // size prefix
2651       emit_byte(0x90);   // nop
2652     }
2653     switch (i) {
2654       case 14:
2655         emit_byte(0x66); // size prefix
2656       case 13:
2657         emit_byte(0x66); // size prefix
2658       case 12:
2659         addr_nop_8();
2660         emit_byte(0x66); // size prefix
2661         emit_byte(0x66); // size prefix
2662         emit_byte(0x66); // size prefix
2663         emit_byte(0x90); // nop
2664         break;
2665       case 11:
2666         emit_byte(0x66); // size prefix
2667       case 10:
2668         emit_byte(0x66); // size prefix
2669       case 9:
2670         emit_byte(0x66); // size prefix
2671       case 8:
2672         addr_nop_8();
2673         break;
2674       case 7:
2675         addr_nop_7();
2676         break;
2677       case 6:
2678         emit_byte(0x66); // size prefix
2679       case 5:
2680         addr_nop_5();
2681         break;
2682       case 4:
2683         addr_nop_4();
2684         break;
2685       case 3:
2686         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2687         emit_byte(0x66); // size prefix
2688       case 2:
2689         emit_byte(0x66); // size prefix
2690       case 1:
2691         emit_byte(0x90); // nop
2692         break;
2693       default:
2694         assert(i == 0, " ");
2695     }
2696     return;
2697   }
2698   if (UseAddressNop && VM_Version::is_amd()) {
2699     //
2700     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2701     //  1: 0x90
2702     //  2: 0x66 0x90
2703     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2704     //  4: 0x0F 0x1F 0x40 0x00
2705     //  5: 0x0F 0x1F 0x44 0x00 0x00
2706     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2707     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2708     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2709     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2710     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2711     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2712 
2713     // The rest coding is AMD specific - use consecutive address nops
2714 
2715     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2716     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2717     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2718     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2719     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2720     //     Size prefixes (0x66) are added for larger sizes
2721 
2722     while(i >= 22) {
2723       i -= 11;
2724       emit_byte(0x66); // size prefix
2725       emit_byte(0x66); // size prefix
2726       emit_byte(0x66); // size prefix
2727       addr_nop_8();
2728     }
2729     // Generate first nop for size between 21-12
2730     switch (i) {
2731       case 21:
2732         i -= 1;
2733         emit_byte(0x66); // size prefix
2734       case 20:
2735       case 19:
2736         i -= 1;
2737         emit_byte(0x66); // size prefix
2738       case 18:
2739       case 17:
2740         i -= 1;
2741         emit_byte(0x66); // size prefix
2742       case 16:
2743       case 15:
2744         i -= 8;
2745         addr_nop_8();
2746         break;
2747       case 14:
2748       case 13:
2749         i -= 7;
2750         addr_nop_7();
2751         break;
2752       case 12:
2753         i -= 6;
2754         emit_byte(0x66); // size prefix
2755         addr_nop_5();
2756         break;
2757       default:
2758         assert(i < 12, " ");
2759     }
2760 
2761     // Generate second nop for size between 11-1
2762     switch (i) {
2763       case 11:
2764         emit_byte(0x66); // size prefix
2765       case 10:
2766         emit_byte(0x66); // size prefix
2767       case 9:
2768         emit_byte(0x66); // size prefix
2769       case 8:
2770         addr_nop_8();
2771         break;
2772       case 7:
2773         addr_nop_7();
2774         break;
2775       case 6:
2776         emit_byte(0x66); // size prefix
2777       case 5:
2778         addr_nop_5();
2779         break;
2780       case 4:
2781         addr_nop_4();
2782         break;
2783       case 3:
2784         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2785         emit_byte(0x66); // size prefix
2786       case 2:
2787         emit_byte(0x66); // size prefix
2788       case 1:
2789         emit_byte(0x90); // nop
2790         break;
2791       default:
2792         assert(i == 0, " ");
2793     }
2794     return;
2795   }
2796 
2797   // Using nops with size prefixes "0x66 0x90".
2798   // From AMD Optimization Guide:
2799   //  1: 0x90
2800   //  2: 0x66 0x90
2801   //  3: 0x66 0x66 0x90
2802   //  4: 0x66 0x66 0x66 0x90
2803   //  5: 0x66 0x66 0x90 0x66 0x90
2804   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2805   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2806   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2807   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2808   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2809   //
2810   while(i > 12) {
2811     i -= 4;
2812     emit_byte(0x66); // size prefix
2813     emit_byte(0x66);
2814     emit_byte(0x66);
2815     emit_byte(0x90); // nop
2816   }
2817   // 1 - 12 nops
2818   if(i > 8) {
2819     if(i > 9) {
2820       i -= 1;
2821       emit_byte(0x66);
2822     }
2823     i -= 3;
2824     emit_byte(0x66);
2825     emit_byte(0x66);
2826     emit_byte(0x90);
2827   }
2828   // 1 - 8 nops
2829   if(i > 4) {
2830     if(i > 6) {
2831       i -= 1;
2832       emit_byte(0x66);
2833     }
2834     i -= 3;
2835     emit_byte(0x66);
2836     emit_byte(0x66);
2837     emit_byte(0x90);
2838   }
2839   switch (i) {
2840     case 4:
2841       emit_byte(0x66);
2842     case 3:
2843       emit_byte(0x66);
2844     case 2:
2845       emit_byte(0x66);
2846     case 1:
2847       emit_byte(0x90);
2848       break;
2849     default:
2850       assert(i == 0, " ");
2851   }
2852 }
2853 
2854 void Assembler::ret(int imm16) {
2855   if (imm16 == 0) {
2856     emit_byte(0xC3);
2857   } else {
2858     emit_byte(0xC2);
2859     emit_word(imm16);
2860   }
2861 }
2862 
2863 // copies a single word from [esi] to [edi]
2864 void Assembler::smovl() {
2865   emit_byte(0xA5);
2866 }
2867 
2868 // copies data from [rsi] to [rdi] using rcx words (m32)
2869 void Assembler::rep_movl() {
2870   // REP
2871   emit_byte(0xF3);
2872   // MOVSL
2873   emit_byte(0xA5);
2874 }
2875 
2876 // copies data from [rsi] to [rdi] using rcx double words (m64)
2877 void Assembler::rep_movq() {
2878   // REP
2879   emit_byte(0xF3);
2880   // MOVSQ
2881   prefix(REX_W);
2882   emit_byte(0xA5);
2883 }
2884 
2885 // sets rcx double words (m64) with rax value at [rdi]
2886 void Assembler::rep_set() {
2887   // REP
2888   emit_byte(0xF3);
2889   // STOSQ
2890   prefix(REX_W);
2891   emit_byte(0xAB);
2892 }
2893 
2894 // scans rcx double words (m64) at [rdi] for occurance of rax
2895 void Assembler::repne_scanq() {
2896   // REPNE/REPNZ
2897   emit_byte(0xF2);
2898   // SCASQ
2899   prefix(REX_W);
2900   emit_byte(0xAF);
2901 }
2902 
2903 void Assembler::repne_scanl() {
2904   // REPNE/REPNZ
2905   emit_byte(0xF2);
2906   // SCASL
2907   emit_byte(0xAF);
2908 }
2909 
2910 
2911 void Assembler::setb(Condition cc, Register dst) {
2912   assert(0 <= cc && cc < 16, "illegal cc");
2913   int encode = prefix_and_encode(dst->encoding(), true);
2914   emit_byte(0x0F);
2915   emit_byte(0x90 | cc);
2916   emit_byte(0xC0 | encode);
2917 }
2918 
2919 void Assembler::clflush(Address adr) {
2920   prefix(adr);
2921   emit_byte(0x0F);
2922   emit_byte(0xAE);
2923   emit_operand(rdi, adr);
2924 }
2925 
2926 void Assembler::call(Label& L, relocInfo::relocType rtype) {
2927   if (L.is_bound()) {
2928     const int long_size = 5;
2929     int offs = (int)( target(L) - pc() );
2930     assert(offs <= 0, "assembler error");
2931     InstructionMark im(this);
2932     // 1110 1000 #32-bit disp
2933     emit_byte(0xE8);
2934     emit_data(offs - long_size, rtype, disp32_operand);
2935   } else {
2936     InstructionMark im(this);
2937     // 1110 1000 #32-bit disp
2938     L.add_patch_at(code(), locator());
2939 
2940     emit_byte(0xE8);
2941     emit_data(int(0), rtype, disp32_operand);
2942   }
2943 }
2944 
2945 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
2946   assert(entry != NULL, "call most probably wrong");
2947   InstructionMark im(this);
2948   emit_byte(0xE8);
2949   intptr_t disp = entry - (_code_pos + sizeof(int32_t));
2950   assert(is_simm32(disp), "must be 32bit offset (call2)");
2951   // Technically, should use call32_operand, but this format is
2952   // implied by the fact that we're emitting a call instruction.
2953   emit_data((int) disp, rspec, disp32_operand);
2954 }
2955 
2956 
2957 void Assembler::call(Register dst) {
2958   // This was originally using a 32bit register encoding
2959   // and surely we want 64bit!
2960   // this is a 32bit encoding but in 64bit mode the default
2961   // operand size is 64bit so there is no need for the
2962   // wide prefix. So prefix only happens if we use the
2963   // new registers. Much like push/pop.
2964   int encode = prefixq_and_encode(dst->encoding());
2965   emit_byte(0xFF);
2966   emit_byte(0xD0 | encode);
2967 }
2968 
2969 void Assembler::call(Address adr) {
2970   InstructionMark im(this);
2971   prefix(adr);
2972   emit_byte(0xFF);
2973   emit_operand(rdx, adr);
2974 }
2975 
2976 void Assembler::jmp(Register reg) {
2977   int encode = prefix_and_encode(reg->encoding());
2978   emit_byte(0xFF);
2979   emit_byte(0xE0 | encode);
2980 }
2981 
2982 void Assembler::jmp(Address adr) {
2983   InstructionMark im(this);
2984   prefix(adr);
2985   emit_byte(0xFF);
2986   emit_operand(rsp, adr);
2987 }
2988 
2989 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2990   InstructionMark im(this);
2991   emit_byte(0xE9);
2992   assert(dest != NULL, "must have a target");
2993   intptr_t disp = dest - (_code_pos + sizeof(int32_t));
2994   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2995   emit_data(disp, rspec.reloc(), call32_operand);
2996 }
2997 
2998 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
2999   if (L.is_bound()) {
3000     address entry = target(L);
3001     assert(entry != NULL, "jmp most probably wrong");
3002     InstructionMark im(this);
3003     const int short_size = 2;
3004     const int long_size = 5;
3005     intptr_t offs = entry - _code_pos;
3006     if (rtype == relocInfo::none && is8bit(offs - short_size)) {
3007       emit_byte(0xEB);
3008       emit_byte((offs - short_size) & 0xFF);
3009     } else {
3010       emit_byte(0xE9);
3011       emit_long(offs - long_size);
3012     }
3013   } else {
3014     // By default, forward jumps are always 32-bit displacements, since
3015     // we can't yet know where the label will be bound.  If you're sure that
3016     // the forward jump will not run beyond 256 bytes, use jmpb to
3017     // force an 8-bit displacement.
3018     InstructionMark im(this);
3019     relocate(rtype);
3020     L.add_patch_at(code(), locator());
3021     emit_byte(0xE9);
3022     emit_long(0);
3023   }
3024 }
3025 
3026 void Assembler::jmpb(Label& L) {
3027   if (L.is_bound()) {
3028     const int short_size = 2;
3029     address entry = target(L);
3030     assert(is8bit((entry - _code_pos) + short_size),
3031            "Dispacement too large for a short jmp");
3032     assert(entry != NULL, "jmp most probably wrong");
3033     intptr_t offs = entry - _code_pos;
3034     emit_byte(0xEB);
3035     emit_byte((offs - short_size) & 0xFF);
3036   } else {
3037     InstructionMark im(this);
3038     L.add_patch_at(code(), locator());
3039     emit_byte(0xEB);
3040     emit_byte(0);
3041   }
3042 }
3043 
3044 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
3045   InstructionMark im(this);
3046   relocate(rtype);
3047   assert((0 <= cc) && (cc < 16), "illegal cc");
3048   if (L.is_bound()) {
3049     address dst = target(L);
3050     assert(dst != NULL, "jcc most probably wrong");
3051 
3052     const int short_size = 2;
3053     const int long_size = 6;
3054     intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
3055     if (rtype == relocInfo::none && is8bit(offs - short_size)) {
3056       // 0111 tttn #8-bit disp
3057       emit_byte(0x70 | cc);
3058       emit_byte((offs - short_size) & 0xFF);
3059     } else {
3060       // 0000 1111 1000 tttn #32-bit disp
3061       assert(is_simm32(offs - long_size),
3062              "must be 32bit offset (call4)");
3063       emit_byte(0x0F);
3064       emit_byte(0x80 | cc);
3065       emit_long(offs - long_size);
3066     }
3067   } else {
3068     // Note: could eliminate cond. jumps to this jump if condition
3069     //       is the same however, seems to be rather unlikely case.
3070     // Note: use jccb() if label to be bound is very close to get
3071     //       an 8-bit displacement
3072     L.add_patch_at(code(), locator());
3073     emit_byte(0x0F);
3074     emit_byte(0x80 | cc);
3075     emit_long(0);
3076   }
3077 }
3078 
3079 void Assembler::jccb(Condition cc, Label& L) {
3080   if (L.is_bound()) {
3081     const int short_size = 2;
3082     const int long_size = 6;
3083     address entry = target(L);
3084     assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
3085            "Dispacement too large for a short jmp");
3086     intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
3087     // 0111 tttn #8-bit disp
3088     emit_byte(0x70 | cc);
3089     emit_byte((offs - short_size) & 0xFF);
3090   } else {
3091     InstructionMark im(this);
3092     L.add_patch_at(code(), locator());
3093     emit_byte(0x70 | cc);
3094     emit_byte(0);
3095   }
3096 }
3097 
3098 // FP instructions
3099 
3100 void Assembler::fxsave(Address dst) {
3101   prefixq(dst);
3102   emit_byte(0x0F);
3103   emit_byte(0xAE);
3104   emit_operand(as_Register(0), dst);
3105 }
3106 
3107 void Assembler::fxrstor(Address src) {
3108   prefixq(src);
3109   emit_byte(0x0F);
3110   emit_byte(0xAE);
3111   emit_operand(as_Register(1), src);
3112 }
3113 
3114 void Assembler::ldmxcsr(Address src) {
3115   InstructionMark im(this);
3116   prefix(src);
3117   emit_byte(0x0F);
3118   emit_byte(0xAE);
3119   emit_operand(as_Register(2), src);
3120 }
3121 
3122 void Assembler::stmxcsr(Address dst) {
3123   InstructionMark im(this);
3124   prefix(dst);
3125   emit_byte(0x0F);
3126   emit_byte(0xAE);
3127   emit_operand(as_Register(3), dst);
3128 }
3129 
3130 void Assembler::addss(XMMRegister dst, XMMRegister src) {
3131   emit_byte(0xF3);
3132   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3133   emit_byte(0x0F);
3134   emit_byte(0x58);
3135   emit_byte(0xC0 | encode);
3136 }
3137 
3138 void Assembler::addss(XMMRegister dst, Address src) {
3139   InstructionMark im(this);
3140   emit_byte(0xF3);
3141   prefix(src, dst);
3142   emit_byte(0x0F);
3143   emit_byte(0x58);
3144   emit_operand(dst, src);
3145 }
3146 
3147 void Assembler::subss(XMMRegister dst, XMMRegister src) {
3148   emit_byte(0xF3);
3149   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3150   emit_byte(0x0F);
3151   emit_byte(0x5C);
3152   emit_byte(0xC0 | encode);
3153 }
3154 
3155 void Assembler::subss(XMMRegister dst, Address src) {
3156   InstructionMark im(this);
3157   emit_byte(0xF3);
3158   prefix(src, dst);
3159   emit_byte(0x0F);
3160   emit_byte(0x5C);
3161   emit_operand(dst, src);
3162 }
3163 
3164 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
3165   emit_byte(0xF3);
3166   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3167   emit_byte(0x0F);
3168   emit_byte(0x59);
3169   emit_byte(0xC0 | encode);
3170 }
3171 
3172 void Assembler::mulss(XMMRegister dst, Address src) {
3173   InstructionMark im(this);
3174   emit_byte(0xF3);
3175   prefix(src, dst);
3176   emit_byte(0x0F);
3177   emit_byte(0x59);
3178   emit_operand(dst, src);
3179 }
3180 
3181 void Assembler::divss(XMMRegister dst, XMMRegister src) {
3182   emit_byte(0xF3);
3183   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3184   emit_byte(0x0F);
3185   emit_byte(0x5E);
3186   emit_byte(0xC0 | encode);
3187 }
3188 
3189 void Assembler::divss(XMMRegister dst, Address src) {
3190   InstructionMark im(this);
3191   emit_byte(0xF3);
3192   prefix(src, dst);
3193   emit_byte(0x0F);
3194   emit_byte(0x5E);
3195   emit_operand(dst, src);
3196 }
3197 
3198 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
3199   emit_byte(0xF2);
3200   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3201   emit_byte(0x0F);
3202   emit_byte(0x58);
3203   emit_byte(0xC0 | encode);
3204 }
3205 
3206 void Assembler::addsd(XMMRegister dst, Address src) {
3207   InstructionMark im(this);
3208   emit_byte(0xF2);
3209   prefix(src, dst);
3210   emit_byte(0x0F);
3211   emit_byte(0x58);
3212   emit_operand(dst, src);
3213 }
3214 
3215 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
3216   emit_byte(0xF2);
3217   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3218   emit_byte(0x0F);
3219   emit_byte(0x5C);
3220   emit_byte(0xC0 | encode);
3221 }
3222 
3223 void Assembler::subsd(XMMRegister dst, Address src) {
3224   InstructionMark im(this);
3225   emit_byte(0xF2);
3226   prefix(src, dst);
3227   emit_byte(0x0F);
3228   emit_byte(0x5C);
3229   emit_operand(dst, src);
3230 }
3231 
3232 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
3233   emit_byte(0xF2);
3234   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3235   emit_byte(0x0F);
3236   emit_byte(0x59);
3237   emit_byte(0xC0 | encode);
3238 }
3239 
3240 void Assembler::mulsd(XMMRegister dst, Address src) {
3241   InstructionMark im(this);
3242   emit_byte(0xF2);
3243   prefix(src, dst);
3244   emit_byte(0x0F);
3245   emit_byte(0x59);
3246   emit_operand(dst, src);
3247 }
3248 
3249 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
3250   emit_byte(0xF2);
3251   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3252   emit_byte(0x0F);
3253   emit_byte(0x5E);
3254   emit_byte(0xC0 | encode);
3255 }
3256 
3257 void Assembler::divsd(XMMRegister dst, Address src) {
3258   InstructionMark im(this);
3259   emit_byte(0xF2);
3260   prefix(src, dst);
3261   emit_byte(0x0F);
3262   emit_byte(0x5E);
3263   emit_operand(dst, src);
3264 }
3265 
3266 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
3267   emit_byte(0xF2);
3268   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3269   emit_byte(0x0F);
3270   emit_byte(0x51);
3271   emit_byte(0xC0 | encode);
3272 }
3273 
3274 void Assembler::sqrtsd(XMMRegister dst, Address src) {
3275   InstructionMark im(this);
3276   emit_byte(0xF2);
3277   prefix(src, dst);
3278   emit_byte(0x0F);
3279   emit_byte(0x51);
3280   emit_operand(dst, src);
3281 }
3282 
3283 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3284   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3285   emit_byte(0x0F);
3286   emit_byte(0x57);
3287   emit_byte(0xC0 | encode);
3288 }
3289 
3290 void Assembler::xorps(XMMRegister dst, Address src) {
3291   InstructionMark im(this);
3292   prefix(src, dst);
3293   emit_byte(0x0F);
3294   emit_byte(0x57);
3295   emit_operand(dst, src);
3296 }
3297 
3298 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3299   emit_byte(0x66);
3300   xorps(dst, src);
3301 }
3302 
3303 void Assembler::xorpd(XMMRegister dst, Address src) {
3304   InstructionMark im(this);
3305   emit_byte(0x66);
3306   prefix(src, dst);
3307   emit_byte(0x0F);
3308   emit_byte(0x57);
3309   emit_operand(dst, src);
3310 }
3311 
3312 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
3313   emit_byte(0xF3);
3314   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3315   emit_byte(0x0F);
3316   emit_byte(0x2A);
3317   emit_byte(0xC0 | encode);
3318 }
3319 
3320 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3321   emit_byte(0xF3);
3322   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3323   emit_byte(0x0F);
3324   emit_byte(0x2A);
3325   emit_byte(0xC0 | encode);
3326 }
3327 
3328 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
3329   emit_byte(0xF2);
3330   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3331   emit_byte(0x0F);
3332   emit_byte(0x2A);
3333   emit_byte(0xC0 | encode);
3334 }
3335 
3336 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3337   emit_byte(0xF2);
3338   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3339   emit_byte(0x0F);
3340   emit_byte(0x2A);
3341   emit_byte(0xC0 | encode);
3342 }
3343 
3344 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
3345   emit_byte(0xF3);
3346   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3347   emit_byte(0x0F);
3348   emit_byte(0x2C);
3349   emit_byte(0xC0 | encode);
3350 }
3351 
3352 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3353   emit_byte(0xF3);
3354   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3355   emit_byte(0x0F);
3356   emit_byte(0x2C);
3357   emit_byte(0xC0 | encode);
3358 }
3359 
3360 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
3361   emit_byte(0xF2);
3362   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3363   emit_byte(0x0F);
3364   emit_byte(0x2C);
3365   emit_byte(0xC0 | encode);
3366 }
3367 
3368 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3369   emit_byte(0xF2);
3370   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3371   emit_byte(0x0F);
3372   emit_byte(0x2C);
3373   emit_byte(0xC0 | encode);
3374 }
3375 
3376 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
3377   emit_byte(0xF3);
3378   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3379   emit_byte(0x0F);
3380   emit_byte(0x5A);
3381   emit_byte(0xC0 | encode);
3382 }
3383 
3384 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
3385   emit_byte(0xF3);
3386   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3387   emit_byte(0x0F);
3388   emit_byte(0xE6);
3389   emit_byte(0xC0 | encode);
3390 }
3391 
3392 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
3393   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3394   emit_byte(0x0F);
3395   emit_byte(0x5B);
3396   emit_byte(0xC0 | encode);
3397 }
3398 
3399 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
3400   emit_byte(0xF2);
3401   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3402   emit_byte(0x0F);
3403   emit_byte(0x5A);
3404   emit_byte(0xC0 | encode);
3405 }
3406 
3407 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
3408   emit_byte(0x66);
3409   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3410   emit_byte(0x0F);
3411   emit_byte(0x60);
3412   emit_byte(0xC0 | encode);
3413 }
3414 
3415 // Implementation of MacroAssembler
3416 
3417 // On 32 bit it returns a vanilla displacement on 64 bit is a rip relative displacement
3418 Address MacroAssembler::as_Address(AddressLiteral adr) {
3419   assert(!adr.is_lval(), "must be rval");
3420   assert(reachable(adr), "must be");
3421   return Address((int)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
3422 }
3423 
3424 Address MacroAssembler::as_Address(ArrayAddress adr) {
3425 #ifdef _LP64
3426   AddressLiteral base = adr.base();
3427   lea(rscratch1, base);
3428   Address index = adr.index();
3429   assert(index._disp == 0, "must not have disp"); // maybe it can?
3430   Address array(rscratch1, index._index, index._scale, index._disp);
3431   return array;
3432 #else
3433   return Address::make_array(adr);
3434 #endif // _LP64
3435 
3436 }
3437 
3438 void MacroAssembler::fat_nop() {
3439   // A 5 byte nop that is safe for patching (see patch_verified_entry)
3440   // Recommened sequence from 'Software Optimization Guide for the AMD
3441   // Hammer Processor'
3442   emit_byte(0x66);
3443   emit_byte(0x66);
3444   emit_byte(0x90);
3445   emit_byte(0x66);
3446   emit_byte(0x90);
3447 }
3448 
3449 static Assembler::Condition reverse[] = {
3450     Assembler::noOverflow     /* overflow      = 0x0 */ ,
3451     Assembler::overflow       /* noOverflow    = 0x1 */ ,
3452     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
3453     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
3454     Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
3455     Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
3456     Assembler::above          /* belowEqual    = 0x6 */ ,
3457     Assembler::belowEqual     /* above         = 0x7 */ ,
3458     Assembler::positive       /* negative      = 0x8 */ ,
3459     Assembler::negative       /* positive      = 0x9 */ ,
3460     Assembler::noParity       /* parity        = 0xa */ ,
3461     Assembler::parity         /* noParity      = 0xb */ ,
3462     Assembler::greaterEqual   /* less          = 0xc */ ,
3463     Assembler::less           /* greaterEqual  = 0xd */ ,
3464     Assembler::greater        /* lessEqual     = 0xe */ ,
3465     Assembler::lessEqual      /* greater       = 0xf, */
3466 
3467 };
3468 
3469 // 32bit can do a case table jump in one instruction but we no longer allow the base
3470 // to be installed in the Address class
3471 void MacroAssembler::jump(ArrayAddress entry) {
3472 #ifdef _LP64
3473   lea(rscratch1, entry.base());
3474   Address dispatch = entry.index();
3475   assert(dispatch._base == noreg, "must be");
3476   dispatch._base = rscratch1;
3477   jmp(dispatch);
3478 #else
3479   jmp(as_Address(entry));
3480 #endif // _LP64
3481 }
3482 
3483 void MacroAssembler::jump(AddressLiteral dst) {
3484   if (reachable(dst)) {
3485     jmp_literal(dst.target(), dst.rspec());
3486   } else {
3487     lea(rscratch1, dst);
3488     jmp(rscratch1);
3489   }
3490 }
3491 
3492 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
3493   if (reachable(dst)) {
3494     InstructionMark im(this);
3495     relocate(dst.reloc());
3496     const int short_size = 2;
3497     const int long_size = 6;
3498     int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
3499     if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
3500       // 0111 tttn #8-bit disp
3501       emit_byte(0x70 | cc);
3502       emit_byte((offs - short_size) & 0xFF);
3503     } else {
3504       // 0000 1111 1000 tttn #32-bit disp
3505       emit_byte(0x0F);
3506       emit_byte(0x80 | cc);
3507       emit_long(offs - long_size);
3508     }
3509   } else {
3510 #ifdef ASSERT
3511     warning("reversing conditional branch");
3512 #endif /* ASSERT */
3513     Label skip;
3514     jccb(reverse[cc], skip);
3515     lea(rscratch1, dst);
3516     Assembler::jmp(rscratch1);
3517     bind(skip);
3518   }
3519 }
3520 
3521 // Wouldn't need if AddressLiteral version had new name
3522 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
3523   Assembler::call(L, rtype);
3524 }
3525 
3526 // Wouldn't need if AddressLiteral version had new name
3527 void MacroAssembler::call(Register entry) {
3528   Assembler::call(entry);
3529 }
3530 
3531 void MacroAssembler::call(AddressLiteral entry) {
3532   if (reachable(entry)) {
3533     Assembler::call_literal(entry.target(), entry.rspec());
3534   } else {
3535     lea(rscratch1, entry);
3536     Assembler::call(rscratch1);
3537   }
3538 }
3539 
3540 void MacroAssembler::cmp8(AddressLiteral src1, int8_t src2) {
3541   if (reachable(src1)) {
3542     cmpb(as_Address(src1), src2);
3543   } else {
3544     lea(rscratch1, src1);
3545     cmpb(Address(rscratch1, 0), src2);
3546   }
3547 }
3548 
3549 void MacroAssembler::cmp32(AddressLiteral src1, int32_t src2) {
3550   if (reachable(src1)) {
3551     cmpl(as_Address(src1), src2);
3552   } else {
3553     lea(rscratch1, src1);
3554     cmpl(Address(rscratch1, 0), src2);
3555   }
3556 }
3557 
3558 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
3559   if (reachable(src2)) {
3560     cmpl(src1, as_Address(src2));
3561   } else {
3562     lea(rscratch1, src2);
3563     cmpl(src1, Address(rscratch1, 0));
3564   }
3565 }
3566 
3567 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
3568 #ifdef _LP64
3569   if (src2.is_lval()) {
3570     movptr(rscratch1, src2);
3571     Assembler::cmpq(src1, rscratch1);
3572   } else if (reachable(src2)) {
3573     cmpq(src1, as_Address(src2));
3574   } else {
3575     lea(rscratch1, src2);
3576     Assembler::cmpq(src1, Address(rscratch1, 0));
3577   }
3578 #else
3579   if (src2.is_lval()) {
3580     cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
3581   } else {
3582     cmpl(src1, as_Address(src2));
3583   }
3584 #endif // _LP64
3585 }
3586 
3587 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
3588   assert(src2.is_lval(), "not a mem-mem compare");
3589 #ifdef _LP64
3590   // moves src2's literal address
3591   movptr(rscratch1, src2);
3592   Assembler::cmpq(src1, rscratch1);
3593 #else
3594   cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
3595 #endif // _LP64
3596 }
3597 
3598 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
3599   assert(!src2.is_lval(), "should use cmpptr");
3600 
3601   if (reachable(src2)) {
3602 #ifdef _LP64
3603     cmpq(src1, as_Address(src2));
3604 #else
3605     ShouldNotReachHere();
3606 #endif // _LP64
3607   } else {
3608     lea(rscratch1, src2);
3609     Assembler::cmpq(src1, Address(rscratch1, 0));
3610   }
3611 }
3612 
3613 void MacroAssembler::cmpxchgptr(Register reg, AddressLiteral adr) {
3614   if (reachable(adr)) {
3615 #ifdef _LP64
3616     cmpxchgq(reg, as_Address(adr));
3617 #else
3618     cmpxchgl(reg, as_Address(adr));
3619 #endif // _LP64
3620   } else {
3621     lea(rscratch1, adr);
3622     cmpxchgq(reg, Address(rscratch1, 0));
3623   }
3624 }
3625 
3626 void MacroAssembler::incrementl(AddressLiteral dst) {
3627   if (reachable(dst)) {
3628     incrementl(as_Address(dst));
3629   } else {
3630     lea(rscratch1, dst);
3631     incrementl(Address(rscratch1, 0));
3632   }
3633 }
3634 
3635 void MacroAssembler::incrementl(ArrayAddress dst) {
3636   incrementl(as_Address(dst));
3637 }
3638 
3639 void MacroAssembler::lea(Register dst, Address src) {
3640 #ifdef _LP64
3641   leaq(dst, src);
3642 #else
3643   leal(dst, src);
3644 #endif // _LP64
3645 }
3646 
3647 void MacroAssembler::lea(Register dst, AddressLiteral src) {
3648 #ifdef _LP64
3649     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
3650 #else
3651     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
3652 #endif // _LP64
3653 }
3654 
3655 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
3656   if (reachable(dst)) {
3657     movl(as_Address(dst), src);
3658   } else {
3659     lea(rscratch1, dst);
3660     movl(Address(rscratch1, 0), src);
3661   }
3662 }
3663 
3664 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
3665   if (reachable(src)) {
3666     movl(dst, as_Address(src));
3667   } else {
3668     lea(rscratch1, src);
3669     movl(dst, Address(rscratch1, 0));
3670   }
3671 }
3672 
3673 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
3674   if (reachable(src)) {
3675     if (UseXmmLoadAndClearUpper) {
3676       movsd (dst, as_Address(src));
3677     } else {
3678       movlpd(dst, as_Address(src));
3679     }
3680   } else {
3681     lea(rscratch1, src);
3682     if (UseXmmLoadAndClearUpper) {
3683       movsd (dst, Address(rscratch1, 0));
3684     } else {
3685       movlpd(dst, Address(rscratch1, 0));
3686     }
3687   }
3688 }
3689 
3690 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
3691   if (reachable(src)) {
3692     movss(dst, as_Address(src));
3693   } else {
3694     lea(rscratch1, src);
3695     movss(dst, Address(rscratch1, 0));
3696   }
3697 }
3698 
3699 void MacroAssembler::movoop(Register dst, jobject obj) {
3700   mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
3701 }
3702 
3703 void MacroAssembler::movoop(Address dst, jobject obj) {
3704   mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
3705   movq(dst, rscratch1);
3706 }
3707 
3708 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
3709 #ifdef _LP64
3710   if (src.is_lval()) {
3711     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
3712   } else {
3713     if (reachable(src)) {
3714       movq(dst, as_Address(src));
3715     } else {
3716       lea(rscratch1, src);
3717       movq(dst, Address(rscratch1,0));
3718     }
3719   }
3720 #else
3721   if (src.is_lval()) {
3722     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
3723   } else {
3724     movl(dst, as_Address(src));
3725   }
3726 #endif // LP64
3727 }
3728 
3729 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
3730 #ifdef _LP64
3731   movq(as_Address(dst), src);
3732 #else
3733   movl(as_Address(dst), src);
3734 #endif // _LP64
3735 }
3736 
3737 void MacroAssembler::pushoop(jobject obj) {
3738 #ifdef _LP64
3739   movoop(rscratch1, obj);
3740   pushq(rscratch1);
3741 #else
3742   push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
3743 #endif // _LP64
3744 }
3745 
3746 void MacroAssembler::pushptr(AddressLiteral src) {
3747 #ifdef _LP64
3748   lea(rscratch1, src);
3749   if (src.is_lval()) {
3750     pushq(rscratch1);
3751   } else {
3752     pushq(Address(rscratch1, 0));
3753   }
3754 #else
3755   if (src.is_lval()) {
3756     push_literal((int32_t)src.target(), src.rspec());
3757   else {
3758     pushl(as_Address(src));
3759   }
3760 #endif // _LP64
3761 }
3762 
3763 void MacroAssembler::ldmxcsr(AddressLiteral src) {
3764   if (reachable(src)) {
3765     Assembler::ldmxcsr(as_Address(src));
3766   } else {
3767     lea(rscratch1, src);
3768     Assembler::ldmxcsr(Address(rscratch1, 0));
3769   }
3770 }
3771 
3772 void MacroAssembler::movlpd(XMMRegister dst, AddressLiteral src) {
3773   if (reachable(src)) {
3774     movlpd(dst, as_Address(src));
3775   } else {
3776     lea(rscratch1, src);
3777     movlpd(dst, Address(rscratch1, 0));
3778   }
3779 }
3780 
3781 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
3782   if (reachable(src)) {
3783     movss(dst, as_Address(src));
3784   } else {
3785     lea(rscratch1, src);
3786     movss(dst, Address(rscratch1, 0));
3787   }
3788 }
3789 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
3790   if (reachable(src)) {
3791     xorpd(dst, as_Address(src));
3792   } else {
3793     lea(rscratch1, src);
3794     xorpd(dst, Address(rscratch1, 0));
3795   }
3796 }
3797 
3798 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
3799   if (reachable(src)) {
3800     xorps(dst, as_Address(src));
3801   } else {
3802     lea(rscratch1, src);
3803     xorps(dst, Address(rscratch1, 0));
3804   }
3805 }
3806 
3807 void MacroAssembler::null_check(Register reg, int offset) {
3808   if (needs_explicit_null_check(offset)) {
3809     // provoke OS NULL exception if reg = NULL by
3810     // accessing M[reg] w/o changing any (non-CC) registers
3811     cmpq(rax, Address(reg, 0));
3812     // Note: should probably use testl(rax, Address(reg, 0));
3813     //       may be shorter code (however, this version of
3814     //       testl needs to be implemented first)
3815   } else {
3816     // nothing to do, (later) access of M[reg + offset]
3817     // will provoke OS NULL exception if reg = NULL
3818   }
3819 }
3820 
3821 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
3822   int off = offset();
3823   movzbl(dst, src);
3824   return off;
3825 }
3826 
3827 int MacroAssembler::load_unsigned_word(Register dst, Address src) {
3828   int off = offset();
3829   movzwl(dst, src);
3830   return off;
3831 }
3832 
3833 int MacroAssembler::load_signed_byte(Register dst, Address src) {
3834   int off = offset();
3835   movsbl(dst, src);
3836   return off;
3837 }
3838 
3839 int MacroAssembler::load_signed_word(Register dst, Address src) {
3840   int off = offset();
3841   movswl(dst, src);
3842   return off;
3843 }
3844 
3845 void MacroAssembler::incrementl(Register reg, int value) {
3846   if (value == min_jint) { addl(reg, value); return; }
3847   if (value <  0) { decrementl(reg, -value); return; }
3848   if (value == 0) {                        ; return; }
3849   if (value == 1 && UseIncDec) { incl(reg) ; return; }
3850   /* else */      { addl(reg, value)       ; return; }
3851 }
3852 
3853 void MacroAssembler::decrementl(Register reg, int value) {
3854   if (value == min_jint) { subl(reg, value); return; }
3855   if (value <  0) { incrementl(reg, -value); return; }
3856   if (value == 0) {                        ; return; }
3857   if (value == 1 && UseIncDec) { decl(reg) ; return; }
3858   /* else */      { subl(reg, value)       ; return; }
3859 }
3860 
3861 void MacroAssembler::incrementq(Register reg, int value) {
3862   if (value == min_jint) { addq(reg, value); return; }
3863   if (value <  0) { decrementq(reg, -value); return; }
3864   if (value == 0) {                        ; return; }
3865   if (value == 1 && UseIncDec) { incq(reg) ; return; }
3866   /* else */      { addq(reg, value)       ; return; }
3867 }
3868 
3869 void MacroAssembler::decrementq(Register reg, int value) {
3870   if (value == min_jint) { subq(reg, value); return; }
3871   if (value <  0) { incrementq(reg, -value); return; }
3872   if (value == 0) {                        ; return; }
3873   if (value == 1 && UseIncDec) { decq(reg) ; return; }
3874   /* else */      { subq(reg, value)       ; return; }
3875 }
3876 
3877 void MacroAssembler::incrementl(Address dst, int value) {
3878   if (value == min_jint) { addl(dst, value); return; }
3879   if (value <  0) { decrementl(dst, -value); return; }
3880   if (value == 0) {                        ; return; }
3881   if (value == 1 && UseIncDec) { incl(dst) ; return; }
3882   /* else */      { addl(dst, value)       ; return; }
3883 }
3884 
3885 void MacroAssembler::decrementl(Address dst, int value) {
3886   if (value == min_jint) { subl(dst, value); return; }
3887   if (value <  0) { incrementl(dst, -value); return; }
3888   if (value == 0) {                        ; return; }
3889   if (value == 1 && UseIncDec) { decl(dst) ; return; }
3890   /* else */      { subl(dst, value)       ; return; }
3891 }
3892 
3893 void MacroAssembler::incrementq(Address dst, int value) {
3894   if (value == min_jint) { addq(dst, value); return; }
3895   if (value <  0) { decrementq(dst, -value); return; }
3896   if (value == 0) {                        ; return; }
3897   if (value == 1 && UseIncDec) { incq(dst) ; return; }
3898   /* else */      { addq(dst, value)       ; return; }
3899 }
3900 
3901 void MacroAssembler::decrementq(Address dst, int value) {
3902   if (value == min_jint) { subq(dst, value); return; }
3903   if (value <  0) { incrementq(dst, -value); return; }
3904   if (value == 0) {                        ; return; }
3905   if (value == 1 && UseIncDec) { decq(dst) ; return; }
3906   /* else */      { subq(dst, value)       ; return; }
3907 }
3908 
3909 void MacroAssembler::align(int modulus) {
3910   if (offset() % modulus != 0) {
3911     nop(modulus - (offset() % modulus));
3912   }
3913 }
3914 
3915 void MacroAssembler::enter() {
3916   pushq(rbp);
3917   movq(rbp, rsp);
3918 }
3919 
3920 void MacroAssembler::leave() {
3921   emit_byte(0xC9); // LEAVE
3922 }
3923 
3924 // C++ bool manipulation
3925 
3926 void MacroAssembler::movbool(Register dst, Address src) {
3927   if(sizeof(bool) == 1)
3928     movb(dst, src);
3929   else if(sizeof(bool) == 2)
3930     movw(dst, src);
3931   else if(sizeof(bool) == 4)
3932     movl(dst, src);
3933   else {
3934     // unsupported
3935     ShouldNotReachHere();
3936   }
3937 }
3938 
3939 void MacroAssembler::movbool(Address dst, bool boolconst) {
3940   if(sizeof(bool) == 1)
3941     movb(dst, (int) boolconst);
3942   else if(sizeof(bool) == 2)
3943     movw(dst, (int) boolconst);
3944   else if(sizeof(bool) == 4)
3945     movl(dst, (int) boolconst);
3946   else {
3947     // unsupported
3948     ShouldNotReachHere();
3949   }
3950 }
3951 
3952 void MacroAssembler::movbool(Address dst, Register src) {
3953   if(sizeof(bool) == 1)
3954     movb(dst, src);
3955   else if(sizeof(bool) == 2)
3956     movw(dst, src);
3957   else if(sizeof(bool) == 4)
3958     movl(dst, src);
3959   else {
3960     // unsupported
3961     ShouldNotReachHere();
3962   }
3963 }
3964 
3965 void MacroAssembler::testbool(Register dst) {
3966   if(sizeof(bool) == 1)
3967     testb(dst, (int) 0xff);
3968   else if(sizeof(bool) == 2) {
3969     // need testw impl
3970     ShouldNotReachHere();
3971   } else if(sizeof(bool) == 4)
3972     testl(dst, dst);
3973   else {
3974     // unsupported
3975     ShouldNotReachHere();
3976   }
3977 }
3978 
3979 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
3980                                          Register last_java_fp,
3981                                          address  last_java_pc) {
3982   // determine last_java_sp register
3983   if (!last_java_sp->is_valid()) {
3984     last_java_sp = rsp;
3985   }
3986 
3987   // last_java_fp is optional
3988   if (last_java_fp->is_valid()) {
3989     movq(Address(r15_thread, JavaThread::last_Java_fp_offset()),
3990          last_java_fp);
3991   }
3992 
3993   // last_java_pc is optional
3994   if (last_java_pc != NULL) {
3995     Address java_pc(r15_thread,
3996                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
3997     lea(rscratch1, InternalAddress(last_java_pc));
3998     movq(java_pc, rscratch1);
3999   }
4000 
4001   movq(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
4002 }
4003 
4004 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
4005                                            bool clear_pc) {
4006   // we must set sp to zero to clear frame
4007   movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
4008   // must clear fp, so that compiled frames are not confused; it is
4009   // possible that we need it only for debugging
4010   if (clear_fp) {
4011     movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
4012   }
4013 
4014   if (clear_pc) {
4015     movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
4016   }
4017 }
4018 
4019 
4020 // Implementation of call_VM versions
4021 
4022 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
4023   Label L, E;
4024 
4025 #ifdef _WIN64
4026   // Windows always allocates space for it's register args
4027   assert(num_args <= 4, "only register arguments supported");
4028   subq(rsp,  frame::arg_reg_save_area_bytes);
4029 #endif
4030 
4031   // Align stack if necessary
4032   testl(rsp, 15);
4033   jcc(Assembler::zero, L);
4034 
4035   subq(rsp, 8);
4036   {
4037     call(RuntimeAddress(entry_point));
4038   }
4039   addq(rsp, 8);
4040   jmp(E);
4041 
4042   bind(L);
4043   {
4044     call(RuntimeAddress(entry_point));
4045   }
4046 
4047   bind(E);
4048 
4049 #ifdef _WIN64
4050   // restore stack pointer
4051   addq(rsp, frame::arg_reg_save_area_bytes);
4052 #endif
4053 
4054 }
4055 
4056 
4057 void MacroAssembler::call_VM_base(Register oop_result,
4058                                   Register java_thread,
4059                                   Register last_java_sp,
4060                                   address entry_point,
4061                                   int num_args,
4062                                   bool check_exceptions) {
4063   // determine last_java_sp register
4064   if (!last_java_sp->is_valid()) {
4065     last_java_sp = rsp;
4066   }
4067 
4068   // debugging support
4069   assert(num_args >= 0, "cannot have negative number of arguments");
4070   assert(r15_thread != oop_result,
4071          "cannot use the same register for java_thread & oop_result");
4072   assert(r15_thread != last_java_sp,
4073          "cannot use the same register for java_thread & last_java_sp");
4074 
4075   // set last Java frame before call
4076 
4077   // This sets last_Java_fp which is only needed from interpreted frames
4078   // and should really be done only from the interp_masm version before
4079   // calling the underlying call_VM. That doesn't happen yet so we set
4080   // last_Java_fp here even though some callers don't need it and
4081   // also clear it below.
4082   set_last_Java_frame(last_java_sp, rbp, NULL);
4083 
4084   {
4085     Label L, E;
4086 
4087     // Align stack if necessary
4088 #ifdef _WIN64
4089     assert(num_args <= 4, "only register arguments supported");
4090     // Windows always allocates space for it's register args
4091     subq(rsp, frame::arg_reg_save_area_bytes);
4092 #endif
4093     testl(rsp, 15);
4094     jcc(Assembler::zero, L);
4095 
4096     subq(rsp, 8);
4097     {
4098       call(RuntimeAddress(entry_point));
4099     }
4100     addq(rsp, 8);
4101     jmp(E);
4102 
4103 
4104     bind(L);
4105     {
4106       call(RuntimeAddress(entry_point));
4107     }
4108 
4109     bind(E);
4110 
4111 #ifdef _WIN64
4112     // restore stack pointer
4113     addq(rsp, frame::arg_reg_save_area_bytes);
4114 #endif
4115   }
4116 
4117 #ifdef ASSERT
4118   pushq(rax);
4119   {
4120     Label L;
4121     get_thread(rax);
4122     cmpq(r15_thread, rax);
4123     jcc(Assembler::equal, L);
4124     stop("MacroAssembler::call_VM_base: register not callee saved?");
4125     bind(L);
4126   }
4127   popq(rax);
4128 #endif
4129 
4130   // reset last Java frame
4131   // This really shouldn't have to clear fp set note above at the
4132   // call to set_last_Java_frame
4133   reset_last_Java_frame(true, false);
4134 
4135   check_and_handle_popframe(noreg);
4136   check_and_handle_earlyret(noreg);
4137 
4138   if (check_exceptions) {
4139     cmpq(Address(r15_thread, Thread::pending_exception_offset()), (int) NULL);
4140     // This used to conditionally jump to forward_exception however it is
4141     // possible if we relocate that the branch will not reach. So we must jump
4142     // around so we can always reach
4143     Label ok;
4144     jcc(Assembler::equal, ok);
4145     jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
4146     bind(ok);
4147   }
4148 
4149   // get oop result if there is one and reset the value in the thread
4150   if (oop_result->is_valid()) {
4151     movq(oop_result, Address(r15_thread, JavaThread::vm_result_offset()));
4152     movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD);
4153     verify_oop(oop_result, "broken oop in call_VM_base");
4154   }
4155 }
4156 
4157 void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
4158 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
4159 
4160 void MacroAssembler::call_VM_helper(Register oop_result,
4161                                     address entry_point,
4162                                     int num_args,
4163                                     bool check_exceptions) {
4164   // Java thread becomes first argument of C function
4165   movq(c_rarg0, r15_thread);
4166 
4167   // We've pushed one address, correct last_Java_sp
4168   leaq(rax, Address(rsp, wordSize));
4169 
4170   call_VM_base(oop_result, noreg, rax, entry_point, num_args,
4171                check_exceptions);
4172 }
4173 
4174 
4175 void MacroAssembler::call_VM(Register oop_result,
4176                              address entry_point,
4177                              bool check_exceptions) {
4178   Label C, E;
4179   Assembler::call(C, relocInfo::none);
4180   jmp(E);
4181 
4182   bind(C);
4183   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
4184   ret(0);
4185 
4186   bind(E);
4187 }
4188 
4189 
4190 void MacroAssembler::call_VM(Register oop_result,
4191                              address entry_point,
4192                              Register arg_1,
4193                              bool check_exceptions) {
4194   assert(rax != arg_1, "smashed argument");
4195   assert(c_rarg0 != arg_1, "smashed argument");
4196 
4197   Label C, E;
4198   Assembler::call(C, relocInfo::none);
4199   jmp(E);
4200 
4201   bind(C);
4202   // c_rarg0 is reserved for thread
4203   if (c_rarg1 != arg_1) {
4204     movq(c_rarg1, arg_1);
4205   }
4206   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
4207   ret(0);
4208 
4209   bind(E);
4210 }
4211 
4212 void MacroAssembler::call_VM(Register oop_result,
4213                              address entry_point,
4214                              Register arg_1,
4215                              Register arg_2,
4216                              bool check_exceptions) {
4217   assert(rax != arg_1, "smashed argument");
4218   assert(rax != arg_2, "smashed argument");
4219   assert(c_rarg0 != arg_1, "smashed argument");
4220   assert(c_rarg0 != arg_2, "smashed argument");
4221   assert(c_rarg1 != arg_2, "smashed argument");
4222   assert(c_rarg2 != arg_1, "smashed argument");
4223 
4224   Label C, E;
4225   Assembler::call(C, relocInfo::none);
4226   jmp(E);
4227 
4228   bind(C);
4229   // c_rarg0 is reserved for thread
4230   if (c_rarg1 != arg_1) {
4231     movq(c_rarg1, arg_1);
4232   }
4233   if (c_rarg2 != arg_2) {
4234     movq(c_rarg2, arg_2);
4235   }
4236   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
4237   ret(0);
4238 
4239   bind(E);
4240 }
4241 
4242 
4243 void MacroAssembler::call_VM(Register oop_result,
4244                              address entry_point,
4245                              Register arg_1,
4246                              Register arg_2,
4247                              Register arg_3,
4248                              bool check_exceptions) {
4249   assert(rax != arg_1, "smashed argument");
4250   assert(rax != arg_2, "smashed argument");
4251   assert(rax != arg_3, "smashed argument");
4252   assert(c_rarg0 != arg_1, "smashed argument");
4253   assert(c_rarg0 != arg_2, "smashed argument");
4254   assert(c_rarg0 != arg_3, "smashed argument");
4255   assert(c_rarg1 != arg_2, "smashed argument");
4256   assert(c_rarg1 != arg_3, "smashed argument");
4257   assert(c_rarg2 != arg_1, "smashed argument");
4258   assert(c_rarg2 != arg_3, "smashed argument");
4259   assert(c_rarg3 != arg_1, "smashed argument");
4260   assert(c_rarg3 != arg_2, "smashed argument");
4261 
4262   Label C, E;
4263   Assembler::call(C, relocInfo::none);
4264   jmp(E);
4265 
4266   bind(C);
4267   // c_rarg0 is reserved for thread
4268   if (c_rarg1 != arg_1) {
4269     movq(c_rarg1, arg_1);
4270   }
4271   if (c_rarg2 != arg_2) {
4272     movq(c_rarg2, arg_2);
4273   }
4274   if (c_rarg3 != arg_3) {
4275     movq(c_rarg3, arg_3);
4276   }
4277   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
4278   ret(0);
4279 
4280   bind(E);
4281 }
4282 
4283 void MacroAssembler::call_VM(Register oop_result,
4284                              Register last_java_sp,
4285                              address entry_point,
4286                              int num_args,
4287                              bool check_exceptions) {
4288   call_VM_base(oop_result, noreg, last_java_sp, entry_point, num_args,
4289                check_exceptions);
4290 }
4291 
4292 void MacroAssembler::call_VM(Register oop_result,
4293                              Register last_java_sp,
4294                              address entry_point,
4295                              Register arg_1,
4296                              bool check_exceptions) {
4297   assert(c_rarg0 != arg_1, "smashed argument");
4298   assert(c_rarg1 != last_java_sp, "smashed argument");
4299   // c_rarg0 is reserved for thread
4300   if (c_rarg1 != arg_1) {
4301     movq(c_rarg1, arg_1);
4302   }
4303   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
4304 }
4305 
4306 void MacroAssembler::call_VM(Register oop_result,
4307                              Register last_java_sp,
4308                              address entry_point,
4309                              Register arg_1,
4310                              Register arg_2,
4311                              bool check_exceptions) {
4312   assert(c_rarg0 != arg_1, "smashed argument");
4313   assert(c_rarg0 != arg_2, "smashed argument");
4314   assert(c_rarg1 != arg_2, "smashed argument");
4315   assert(c_rarg1 != last_java_sp, "smashed argument");
4316   assert(c_rarg2 != arg_1, "smashed argument");
4317   assert(c_rarg2 != last_java_sp, "smashed argument");
4318   // c_rarg0 is reserved for thread
4319   if (c_rarg1 != arg_1) {
4320     movq(c_rarg1, arg_1);
4321   }
4322   if (c_rarg2 != arg_2) {
4323     movq(c_rarg2, arg_2);
4324   }
4325   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
4326 }
4327 
4328 
4329 void MacroAssembler::call_VM(Register oop_result,
4330                              Register last_java_sp,
4331                              address entry_point,
4332                              Register arg_1,
4333                              Register arg_2,
4334                              Register arg_3,
4335                              bool check_exceptions) {
4336   assert(c_rarg0 != arg_1, "smashed argument");
4337   assert(c_rarg0 != arg_2, "smashed argument");
4338   assert(c_rarg0 != arg_3, "smashed argument");
4339   assert(c_rarg1 != arg_2, "smashed argument");
4340   assert(c_rarg1 != arg_3, "smashed argument");
4341   assert(c_rarg1 != last_java_sp, "smashed argument");
4342   assert(c_rarg2 != arg_1, "smashed argument");
4343   assert(c_rarg2 != arg_3, "smashed argument");
4344   assert(c_rarg2 != last_java_sp, "smashed argument");
4345   assert(c_rarg3 != arg_1, "smashed argument");
4346   assert(c_rarg3 != arg_2, "smashed argument");
4347   assert(c_rarg3 != last_java_sp, "smashed argument");
4348   // c_rarg0 is reserved for thread
4349   if (c_rarg1 != arg_1) {
4350     movq(c_rarg1, arg_1);
4351   }
4352   if (c_rarg2 != arg_2) {
4353     movq(c_rarg2, arg_2);
4354   }
4355   if (c_rarg3 != arg_3) {
4356     movq(c_rarg2, arg_3);
4357   }
4358   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
4359 }
4360 
4361 void MacroAssembler::call_VM_leaf(address entry_point, int num_args) {
4362   call_VM_leaf_base(entry_point, num_args);
4363 }
4364 
4365 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
4366   if (c_rarg0 != arg_1) {
4367     movq(c_rarg0, arg_1);
4368   }
4369   call_VM_leaf(entry_point, 1);
4370 }
4371 
4372 void MacroAssembler::call_VM_leaf(address entry_point,
4373                                   Register arg_1,
4374                                   Register arg_2) {
4375   assert(c_rarg0 != arg_2, "smashed argument");
4376   assert(c_rarg1 != arg_1, "smashed argument");
4377   if (c_rarg0 != arg_1) {
4378     movq(c_rarg0, arg_1);
4379   }
4380   if (c_rarg1 != arg_2) {
4381     movq(c_rarg1, arg_2);
4382   }
4383   call_VM_leaf(entry_point, 2);
4384 }
4385 
4386 void MacroAssembler::call_VM_leaf(address entry_point,
4387                                   Register arg_1,
4388                                   Register arg_2,
4389                                   Register arg_3) {
4390   assert(c_rarg0 != arg_2, "smashed argument");
4391   assert(c_rarg0 != arg_3, "smashed argument");
4392   assert(c_rarg1 != arg_1, "smashed argument");
4393   assert(c_rarg1 != arg_3, "smashed argument");
4394   assert(c_rarg2 != arg_1, "smashed argument");
4395   assert(c_rarg2 != arg_2, "smashed argument");
4396   if (c_rarg0 != arg_1) {
4397     movq(c_rarg0, arg_1);
4398   }
4399   if (c_rarg1 != arg_2) {
4400     movq(c_rarg1, arg_2);
4401   }
4402   if (c_rarg2 != arg_3) {
4403     movq(c_rarg2, arg_3);
4404   }
4405   call_VM_leaf(entry_point, 3);
4406 }
4407 
4408 
4409 // Calls to C land
4410 //
4411 // When entering C land, the rbp & rsp of the last Java frame have to
4412 // be recorded in the (thread-local) JavaThread object. When leaving C
4413 // land, the last Java fp has to be reset to 0. This is required to
4414 // allow proper stack traversal.
4415 void MacroAssembler::store_check(Register obj) {
4416   // Does a store check for the oop in register obj. The content of
4417   // register obj is destroyed afterwards.
4418   store_check_part_1(obj);
4419   store_check_part_2(obj);
4420 }
4421 
4422 void MacroAssembler::store_check(Register obj, Address dst) {
4423   store_check(obj);
4424 }
4425 
4426 // split the store check operation so that other instructions can be
4427 // scheduled inbetween
4428 void MacroAssembler::store_check_part_1(Register obj) {
4429   BarrierSet* bs = Universe::heap()->barrier_set();
4430   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
4431   shrq(obj, CardTableModRefBS::card_shift);
4432 }
4433 
4434 void MacroAssembler::store_check_part_2(Register obj) {
4435   BarrierSet* bs = Universe::heap()->barrier_set();
4436   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
4437   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
4438   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
4439 
4440   // The calculation for byte_map_base is as follows:
4441   // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
4442   // So this essentially converts an address to a displacement and
4443   // it will never need to be relocated. On 64bit however the value may be too
4444   // large for a 32bit displacement
4445 
4446   intptr_t disp = (intptr_t) ct->byte_map_base;
4447   if (is_simm32(disp)) {
4448     Address cardtable(noreg, obj, Address::times_1, disp);
4449     movb(cardtable, 0);
4450   } else {
4451     // By doing it as an ExternalAddress disp could be converted to a rip-relative
4452     // displacement and done in a single instruction given favorable mapping and
4453     // a smarter version of as_Address. Worst case it is two instructions which
4454     // is no worse off then loading disp into a register and doing as a simple
4455     // Address() as above.
4456     // We can't do as ExternalAddress as the only style since if disp == 0 we'll
4457     // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
4458     // in some cases we'll get a single instruction version.
4459 
4460     ExternalAddress cardtable((address)disp);
4461     Address index(noreg, obj, Address::times_1);
4462     movb(as_Address(ArrayAddress(cardtable, index)), 0);
4463   }
4464 
4465 }
4466 
4467 void MacroAssembler::c2bool(Register x) {
4468   // implements x == 0 ? 0 : 1
4469   // note: must only look at least-significant byte of x
4470   //       since C-style booleans are stored in one byte
4471   //       only! (was bug)
4472   andl(x, 0xFF);
4473   setb(Assembler::notZero, x);
4474 }
4475 
4476 int MacroAssembler::corrected_idivl(Register reg) {
4477   // Full implementation of Java idiv and irem; checks for special
4478   // case as described in JVM spec., p.243 & p.271.  The function
4479   // returns the (pc) offset of the idivl instruction - may be needed
4480   // for implicit exceptions.
4481   //
4482   //         normal case                           special case
4483   //
4484   // input : eax: dividend                         min_int
4485   //         reg: divisor   (may not be eax/edx)   -1
4486   //
4487   // output: eax: quotient  (= eax idiv reg)       min_int
4488   //         edx: remainder (= eax irem reg)       0
4489   assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
4490   const int min_int = 0x80000000;
4491   Label normal_case, special_case;
4492 
4493   // check for special case
4494   cmpl(rax, min_int);
4495   jcc(Assembler::notEqual, normal_case);
4496   xorl(rdx, rdx); // prepare edx for possible special case (where
4497                   // remainder = 0)
4498   cmpl(reg, -1);
4499   jcc(Assembler::equal, special_case);
4500 
4501   // handle normal case
4502   bind(normal_case);
4503   cdql();
4504   int idivl_offset = offset();
4505   idivl(reg);
4506 
4507   // normal and special case exit
4508   bind(special_case);
4509 
4510   return idivl_offset;
4511 }
4512 
4513 int MacroAssembler::corrected_idivq(Register reg) {
4514   // Full implementation of Java ldiv and lrem; checks for special
4515   // case as described in JVM spec., p.243 & p.271.  The function
4516   // returns the (pc) offset of the idivl instruction - may be needed
4517   // for implicit exceptions.
4518   //
4519   //         normal case                           special case
4520   //
4521   // input : rax: dividend                         min_long
4522   //         reg: divisor   (may not be eax/edx)   -1
4523   //
4524   // output: rax: quotient  (= rax idiv reg)       min_long
4525   //         rdx: remainder (= rax irem reg)       0
4526   assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
4527   static const int64_t min_long = 0x8000000000000000;
4528   Label normal_case, special_case;
4529 
4530   // check for special case
4531   cmp64(rax, ExternalAddress((address) &min_long));
4532   jcc(Assembler::notEqual, normal_case);
4533   xorl(rdx, rdx); // prepare rdx for possible special case (where
4534                   // remainder = 0)
4535   cmpq(reg, -1);
4536   jcc(Assembler::equal, special_case);
4537 
4538   // handle normal case
4539   bind(normal_case);
4540   cdqq();
4541   int idivq_offset = offset();
4542   idivq(reg);
4543 
4544   // normal and special case exit
4545   bind(special_case);
4546 
4547   return idivq_offset;
4548 }
4549 
4550 void MacroAssembler::push_IU_state() {
4551   pushfq();     // Push flags first because pushaq kills them
4552   subq(rsp, 8); // Make sure rsp stays 16-byte aligned
4553   pushaq();
4554 }
4555 
4556 void MacroAssembler::pop_IU_state() {
4557   popaq();
4558   addq(rsp, 8);
4559   popfq();
4560 }
4561 
4562 void MacroAssembler::push_FPU_state() {
4563   subq(rsp, FPUStateSizeInWords * wordSize);
4564   fxsave(Address(rsp, 0));
4565 }
4566 
4567 void MacroAssembler::pop_FPU_state() {
4568   fxrstor(Address(rsp, 0));
4569   addq(rsp, FPUStateSizeInWords * wordSize);
4570 }
4571 
4572 // Save Integer and Float state
4573 // Warning: Stack must be 16 byte aligned
4574 void MacroAssembler::push_CPU_state() {
4575   push_IU_state();
4576   push_FPU_state();
4577 }
4578 
4579 void MacroAssembler::pop_CPU_state() {
4580   pop_FPU_state();
4581   pop_IU_state();
4582 }
4583 
4584 void MacroAssembler::sign_extend_short(Register reg) {
4585   movswl(reg, reg);
4586 }
4587 
4588 void MacroAssembler::sign_extend_byte(Register reg) {
4589   movsbl(reg, reg);
4590 }
4591 
4592 void MacroAssembler::division_with_shift(Register reg, int shift_value) {
4593   assert (shift_value > 0, "illegal shift value");
4594   Label _is_positive;
4595   testl (reg, reg);
4596   jcc (Assembler::positive, _is_positive);
4597   int offset = (1 << shift_value) - 1 ;
4598 
4599   if (offset == 1) {
4600     incrementl(reg);
4601   } else {
4602     addl(reg, offset);
4603   }
4604 
4605   bind (_is_positive);
4606   sarl(reg, shift_value);
4607 }
4608 
4609 void MacroAssembler::round_to_l(Register reg, int modulus) {
4610   addl(reg, modulus - 1);
4611   andl(reg, -modulus);
4612 }
4613 
4614 void MacroAssembler::round_to_q(Register reg, int modulus) {
4615   addq(reg, modulus - 1);
4616   andq(reg, -modulus);
4617 }
4618 
4619 void MacroAssembler::verify_oop(Register reg, const char* s) {
4620   if (!VerifyOops) {
4621     return;
4622   }
4623 
4624   // Pass register number to verify_oop_subroutine
4625   char* b = new char[strlen(s) + 50];
4626   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
4627 
4628   pushq(rax); // save rax, restored by receiver
4629 
4630   // pass args on stack, only touch rax
4631   pushq(reg);
4632   // avoid using pushptr, as it modifies scratch registers
4633   // and our contract is not to modify anything
4634   ExternalAddress buffer((address)b);
4635   movptr(rax, buffer.addr());
4636   pushq(rax);
4637 
4638   // call indirectly to solve generation ordering problem
4639   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4640   call(rax); // no alignment requirement
4641   // everything popped by receiver
4642 }
4643 
4644 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
4645   if (!VerifyOops) return;
4646   // Pass register number to verify_oop_subroutine
4647   char* b = new char[strlen(s) + 50];
4648   sprintf(b, "verify_oop_addr: %s", s);
4649   pushq(rax);                          // save rax
4650   movq(addr, rax);
4651   pushq(rax);                          // pass register argument
4652 
4653 
4654   // avoid using pushptr, as it modifies scratch registers
4655   // and our contract is not to modify anything
4656   ExternalAddress buffer((address)b);
4657   movptr(rax, buffer.addr());
4658   pushq(rax);
4659 
4660   // call indirectly to solve generation ordering problem
4661   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4662   call(rax); // no alignment requirement
4663   // everything popped by receiver
4664 }
4665 
4666 
4667 void MacroAssembler::stop(const char* msg) {
4668   address rip = pc();
4669   pushaq(); // get regs on stack
4670   lea(c_rarg0, ExternalAddress((address) msg));
4671   lea(c_rarg1, InternalAddress(rip));
4672   movq(c_rarg2, rsp); // pass pointer to regs array
4673   andq(rsp, -16); // align stack as required by ABI
4674   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug)));
4675   hlt();
4676 }
4677 
4678 void MacroAssembler::warn(const char* msg) {
4679   pushq(r12);
4680   movq(r12, rsp);
4681   andq(rsp, -16);     // align stack as required by push_CPU_state and call
4682 
4683   push_CPU_state();   // keeps alignment at 16 bytes
4684   lea(c_rarg0, ExternalAddress((address) msg));
4685   call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
4686   pop_CPU_state();
4687 
4688   movq(rsp, r12);
4689   popq(r12);
4690 }
4691 
4692 #ifndef PRODUCT
4693 extern "C" void findpc(intptr_t x);
4694 #endif
4695 
4696 void MacroAssembler::debug(char* msg, int64_t pc, int64_t regs[]) {
4697   // In order to get locks to work, we need to fake a in_VM state
4698   if (ShowMessageBoxOnError ) {
4699     JavaThread* thread = JavaThread::current();
4700     JavaThreadState saved_state = thread->thread_state();
4701     thread->set_thread_state(_thread_in_vm);
4702 #ifndef PRODUCT
4703     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
4704       ttyLocker ttyl;
4705       BytecodeCounter::print();
4706     }
4707 #endif
4708     // To see where a verify_oop failed, get $ebx+40/X for this frame.
4709     // XXX correct this offset for amd64
4710     // This is the value of eip which points to where verify_oop will return.
4711     if (os::message_box(msg, "Execution stopped, print registers?")) {
4712       ttyLocker ttyl;
4713       tty->print_cr("rip = 0x%016lx", pc);
4714 #ifndef PRODUCT
4715       tty->cr();
4716       findpc(pc);
4717       tty->cr();
4718 #endif
4719       tty->print_cr("rax = 0x%016lx", regs[15]);
4720       tty->print_cr("rbx = 0x%016lx", regs[12]);
4721       tty->print_cr("rcx = 0x%016lx", regs[14]);
4722       tty->print_cr("rdx = 0x%016lx", regs[13]);
4723       tty->print_cr("rdi = 0x%016lx", regs[8]);
4724       tty->print_cr("rsi = 0x%016lx", regs[9]);
4725       tty->print_cr("rbp = 0x%016lx", regs[10]);
4726       tty->print_cr("rsp = 0x%016lx", regs[11]);
4727       tty->print_cr("r8  = 0x%016lx", regs[7]);
4728       tty->print_cr("r9  = 0x%016lx", regs[6]);
4729       tty->print_cr("r10 = 0x%016lx", regs[5]);
4730       tty->print_cr("r11 = 0x%016lx", regs[4]);
4731       tty->print_cr("r12 = 0x%016lx", regs[3]);
4732       tty->print_cr("r13 = 0x%016lx", regs[2]);
4733       tty->print_cr("r14 = 0x%016lx", regs[1]);
4734       tty->print_cr("r15 = 0x%016lx", regs[0]);
4735       BREAKPOINT;
4736     }
4737     ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
4738   } else {
4739     ttyLocker ttyl;
4740     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
4741                     msg);
4742   }
4743 }
4744 
4745 void MacroAssembler::os_breakpoint() {
4746   // instead of directly emitting a breakpoint, call os:breakpoint for
4747   // better debugability
4748   // This shouldn't need alignment, it's an empty function
4749   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
4750 }
4751 
4752 // Write serialization page so VM thread can do a pseudo remote membar.
4753 // We use the current thread pointer to calculate a thread specific
4754 // offset to write to within the page. This minimizes bus traffic
4755 // due to cache line collision.
4756 void MacroAssembler::serialize_memory(Register thread,
4757                                       Register tmp) {
4758 
4759   movl(tmp, thread);
4760   shrl(tmp, os::get_serialize_page_shift_count());
4761   andl(tmp, (os::vm_page_size() - sizeof(int)));
4762 
4763   Address index(noreg, tmp, Address::times_1);
4764   ExternalAddress page(os::get_memory_serialize_page());
4765 
4766   movptr(ArrayAddress(page, index), tmp);
4767 }
4768 
4769 void MacroAssembler::verify_tlab() {
4770 #ifdef ASSERT
4771   if (UseTLAB) {
4772     Label next, ok;
4773     Register t1 = rsi;
4774 
4775     pushq(t1);
4776 
4777     movq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
4778     cmpq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_start_offset())));
4779     jcc(Assembler::aboveEqual, next);
4780     stop("assert(top >= start)");
4781     should_not_reach_here();
4782 
4783     bind(next);
4784     movq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_end_offset())));
4785     cmpq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
4786     jcc(Assembler::aboveEqual, ok);
4787     stop("assert(top <= end)");
4788     should_not_reach_here();
4789 
4790     bind(ok);
4791 
4792     popq(t1);
4793   }
4794 #endif
4795 }
4796 
4797 // Defines obj, preserves var_size_in_bytes
4798 void MacroAssembler::eden_allocate(Register obj,
4799                                    Register var_size_in_bytes,
4800                                    int con_size_in_bytes,
4801                                    Register t1,
4802                                    Label& slow_case) {
4803   assert(obj == rax, "obj must be in rax for cmpxchg");
4804   assert_different_registers(obj, var_size_in_bytes, t1);
4805   Register end = t1;
4806   Label retry;
4807   bind(retry);
4808   ExternalAddress heap_top((address) Universe::heap()->top_addr());
4809   movptr(obj, heap_top);
4810   if (var_size_in_bytes == noreg) {
4811     leaq(end, Address(obj, con_size_in_bytes));
4812   } else {
4813     leaq(end, Address(obj, var_size_in_bytes, Address::times_1));
4814   }
4815   // if end < obj then we wrapped around => object too long => slow case
4816   cmpq(end, obj);
4817   jcc(Assembler::below, slow_case);
4818   cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
4819 
4820   jcc(Assembler::above, slow_case);
4821   // Compare obj with the top addr, and if still equal, store the new
4822   // top addr in end at the address of the top addr pointer. Sets ZF
4823   // if was equal, and clears it otherwise. Use lock prefix for
4824   // atomicity on MPs.
4825   if (os::is_MP()) {
4826     lock();
4827   }
4828   cmpxchgptr(end, heap_top);
4829   // if someone beat us on the allocation, try again, otherwise continue
4830   jcc(Assembler::notEqual, retry);
4831 }
4832 
4833 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
4834 void MacroAssembler::tlab_allocate(Register obj,
4835                                    Register var_size_in_bytes,
4836                                    int con_size_in_bytes,
4837                                    Register t1,
4838                                    Register t2,
4839                                    Label& slow_case) {
4840   assert_different_registers(obj, t1, t2);
4841   assert_different_registers(obj, var_size_in_bytes, t1);
4842   Register end = t2;
4843 
4844   verify_tlab();
4845 
4846   movq(obj, Address(r15_thread, JavaThread::tlab_top_offset()));
4847   if (var_size_in_bytes == noreg) {
4848     leaq(end, Address(obj, con_size_in_bytes));
4849   } else {
4850     leaq(end, Address(obj, var_size_in_bytes, Address::times_1));
4851   }
4852   cmpq(end, Address(r15_thread, JavaThread::tlab_end_offset()));
4853   jcc(Assembler::above, slow_case);
4854 
4855   // update the tlab top pointer
4856   movq(Address(r15_thread, JavaThread::tlab_top_offset()), end);
4857 
4858   // recover var_size_in_bytes if necessary
4859   if (var_size_in_bytes == end) {
4860     subq(var_size_in_bytes, obj);
4861   }
4862   verify_tlab();
4863 }
4864 
4865 // Preserves rbx and rdx.
4866 void MacroAssembler::tlab_refill(Label& retry,
4867                                  Label& try_eden,
4868                                  Label& slow_case) {
4869   Register top = rax;
4870   Register t1 = rcx;
4871   Register t2 = rsi;
4872   Register t3 = r10;
4873   Register thread_reg = r15_thread;
4874   assert_different_registers(top, thread_reg, t1, t2, t3,
4875                              /* preserve: */ rbx, rdx);
4876   Label do_refill, discard_tlab;
4877 
4878   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
4879     // No allocation in the shared eden.
4880     jmp(slow_case);
4881   }
4882 
4883   movq(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
4884   movq(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
4885 
4886   // calculate amount of free space
4887   subq(t1, top);
4888   shrq(t1, LogHeapWordSize);
4889 
4890   // Retain tlab and allocate object in shared space if
4891   // the amount free in the tlab is too large to discard.
4892   cmpq(t1, Address(thread_reg, // size_t
4893                    in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
4894   jcc(Assembler::lessEqual, discard_tlab);
4895 
4896   // Retain
4897   mov64(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment());
4898   addq(Address(thread_reg,  // size_t
4899                in_bytes(JavaThread::tlab_refill_waste_limit_offset())),
4900        t2);
4901   if (TLABStats) {
4902     // increment number of slow_allocations
4903     addl(Address(thread_reg, // unsigned int
4904                  in_bytes(JavaThread::tlab_slow_allocations_offset())),
4905          1);
4906   }
4907   jmp(try_eden);
4908 
4909   bind(discard_tlab);
4910   if (TLABStats) {
4911     // increment number of refills
4912     addl(Address(thread_reg, // unsigned int
4913                  in_bytes(JavaThread::tlab_number_of_refills_offset())),
4914          1);
4915     // accumulate wastage -- t1 is amount free in tlab
4916     addl(Address(thread_reg, // unsigned int
4917                  in_bytes(JavaThread::tlab_fast_refill_waste_offset())),
4918          t1);
4919   }
4920 
4921   // if tlab is currently allocated (top or end != null) then
4922   // fill [top, end + alignment_reserve) with array object
4923   testq(top, top);
4924   jcc(Assembler::zero, do_refill);
4925 
4926   // set up the mark word
4927   mov64(t3, (int64_t) markOopDesc::prototype()->copy_set_hash(0x2));
4928   movq(Address(top, oopDesc::mark_offset_in_bytes()), t3);
4929   // set the length to the remaining space
4930   subq(t1, typeArrayOopDesc::header_size(T_INT));
4931   addq(t1, (int)ThreadLocalAllocBuffer::alignment_reserve());
4932   shlq(t1, log2_intptr(HeapWordSize / sizeof(jint)));
4933   movq(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
4934   // set klass to intArrayKlass
4935   movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
4936   store_klass(top, t1);
4937 
4938   // refill the tlab with an eden allocation
4939   bind(do_refill);
4940   movq(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
4941   shlq(t1, LogHeapWordSize);
4942   // add object_size ??
4943   eden_allocate(top, t1, 0, t2, slow_case);
4944 
4945   // Check that t1 was preserved in eden_allocate.
4946 #ifdef ASSERT
4947   if (UseTLAB) {
4948     Label ok;
4949     Register tsize = rsi;
4950     assert_different_registers(tsize, thread_reg, t1);
4951     pushq(tsize);
4952     movq(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
4953     shlq(tsize, LogHeapWordSize);
4954     cmpq(t1, tsize);
4955     jcc(Assembler::equal, ok);
4956     stop("assert(t1 != tlab size)");
4957     should_not_reach_here();
4958 
4959     bind(ok);
4960     popq(tsize);
4961   }
4962 #endif
4963   movq(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
4964   movq(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
4965   addq(top, t1);
4966   subq(top, (int)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
4967   movq(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
4968   verify_tlab();
4969   jmp(retry);
4970 }
4971 
4972 
4973 int MacroAssembler::biased_locking_enter(Register lock_reg, Register obj_reg, Register swap_reg, Register tmp_reg,
4974                                          bool swap_reg_contains_mark,
4975                                          Label& done, Label* slow_case,
4976                                          BiasedLockingCounters* counters) {
4977   assert(UseBiasedLocking, "why call this otherwise?");
4978   assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
4979   assert(tmp_reg != noreg, "tmp_reg must be supplied");
4980   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4981   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4982   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
4983   Address saved_mark_addr(lock_reg, 0);
4984 
4985   if (PrintBiasedLockingStatistics && counters == NULL)
4986     counters = BiasedLocking::counters();
4987 
4988   // Biased locking
4989   // See whether the lock is currently biased toward our thread and
4990   // whether the epoch is still valid
4991   // Note that the runtime guarantees sufficient alignment of JavaThread
4992   // pointers to allow age to be placed into low bits
4993   // First check to see whether biasing is even enabled for this object
4994   Label cas_label;
4995   int null_check_offset = -1;
4996   if (!swap_reg_contains_mark) {
4997     null_check_offset = offset();
4998     movq(swap_reg, mark_addr);
4999   }
5000   movq(tmp_reg, swap_reg);
5001   andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5002   cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
5003   jcc(Assembler::notEqual, cas_label);
5004   // The bias pattern is present in the object's header. Need to check
5005   // whether the bias owner and the epoch are both still current.
5006   load_klass(tmp_reg, obj_reg);
5007   movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
5008   orq(tmp_reg, r15_thread);
5009   xorq(tmp_reg, swap_reg);
5010   andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
5011   if (counters != NULL) {
5012     cond_inc32(Assembler::zero,
5013                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5014   }
5015   jcc(Assembler::equal, done);
5016 
5017   Label try_revoke_bias;
5018   Label try_rebias;
5019 
5020   // At this point we know that the header has the bias pattern and
5021   // that we are not the bias owner in the current epoch. We need to
5022   // figure out more details about the state of the header in order to
5023   // know what operations can be legally performed on the object's
5024   // header.
5025 
5026   // If the low three bits in the xor result aren't clear, that means
5027   // the prototype header is no longer biased and we have to revoke
5028   // the bias on this object.
5029   testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5030   jcc(Assembler::notZero, try_revoke_bias);
5031 
5032   // Biasing is still enabled for this data type. See whether the
5033   // epoch of the current bias is still valid, meaning that the epoch
5034   // bits of the mark word are equal to the epoch bits of the
5035   // prototype header. (Note that the prototype header's epoch bits
5036   // only change at a safepoint.) If not, attempt to rebias the object
5037   // toward the current thread. Note that we must be absolutely sure
5038   // that the current epoch is invalid in order to do this because
5039   // otherwise the manipulations it performs on the mark word are
5040   // illegal.
5041   testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5042   jcc(Assembler::notZero, try_rebias);
5043 
5044   // The epoch of the current bias is still valid but we know nothing
5045   // about the owner; it might be set or it might be clear. Try to
5046   // acquire the bias of the object using an atomic operation. If this
5047   // fails we will go in to the runtime to revoke the object's bias.
5048   // Note that we first construct the presumed unbiased header so we
5049   // don't accidentally blow away another thread's valid bias.
5050   andq(swap_reg,
5051        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5052   movq(tmp_reg, swap_reg);
5053   orq(tmp_reg, r15_thread);
5054   if (os::is_MP()) {
5055     lock();
5056   }
5057   cmpxchgq(tmp_reg, Address(obj_reg, 0));
5058   // If the biasing toward our thread failed, this means that
5059   // another thread succeeded in biasing it toward itself and we
5060   // need to revoke that bias. The revocation will occur in the
5061   // interpreter runtime in the slow case.
5062   if (counters != NULL) {
5063     cond_inc32(Assembler::zero,
5064                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5065   }
5066   if (slow_case != NULL) {
5067     jcc(Assembler::notZero, *slow_case);
5068   }
5069   jmp(done);
5070 
5071   bind(try_rebias);
5072   // At this point we know the epoch has expired, meaning that the
5073   // current "bias owner", if any, is actually invalid. Under these
5074   // circumstances _only_, we are allowed to use the current header's
5075   // value as the comparison value when doing the cas to acquire the
5076   // bias in the current epoch. In other words, we allow transfer of
5077   // the bias from one thread to another directly in this situation.
5078   //
5079   // FIXME: due to a lack of registers we currently blow away the age
5080   // bits in this situation. Should attempt to preserve them.
5081   load_klass(tmp_reg, obj_reg);
5082   movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
5083   orq(tmp_reg, r15_thread);
5084   if (os::is_MP()) {
5085     lock();
5086   }
5087   cmpxchgq(tmp_reg, Address(obj_reg, 0));
5088   // If the biasing toward our thread failed, then another thread
5089   // succeeded in biasing it toward itself and we need to revoke that
5090   // bias. The revocation will occur in the runtime in the slow case.
5091   if (counters != NULL) {
5092     cond_inc32(Assembler::zero,
5093                ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5094   }
5095   if (slow_case != NULL) {
5096     jcc(Assembler::notZero, *slow_case);
5097   }
5098   jmp(done);
5099 
5100   bind(try_revoke_bias);
5101   // The prototype mark in the klass doesn't have the bias bit set any
5102   // more, indicating that objects of this data type are not supposed
5103   // to be biased any more. We are going to try to reset the mark of
5104   // this object to the prototype value and fall through to the
5105   // CAS-based locking scheme. Note that if our CAS fails, it means
5106   // that another thread raced us for the privilege of revoking the
5107   // bias of this particular object, so it's okay to continue in the
5108   // normal locking code.
5109   //
5110   // FIXME: due to a lack of registers we currently blow away the age
5111   // bits in this situation. Should attempt to preserve them.
5112   load_klass(tmp_reg, obj_reg);
5113   movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
5114   if (os::is_MP()) {
5115     lock();
5116   }
5117   cmpxchgq(tmp_reg, Address(obj_reg, 0));
5118   // Fall through to the normal CAS-based lock, because no matter what
5119   // the result of the above CAS, some thread must have succeeded in
5120   // removing the bias bit from the object's header.
5121   if (counters != NULL) {
5122     cond_inc32(Assembler::zero,
5123                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5124   }
5125 
5126   bind(cas_label);
5127 
5128   return null_check_offset;
5129 }
5130 
5131 
5132 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5133   assert(UseBiasedLocking, "why call this otherwise?");
5134 
5135   // Check for biased locking unlock case, which is a no-op
5136   // Note: we do not have to check the thread ID for two reasons.
5137   // First, the interpreter checks for IllegalMonitorStateException at
5138   // a higher level. Second, if the bias was revoked while we held the
5139   // lock, the object could not be rebiased toward another thread, so
5140   // the bias bit would be clear.
5141   movq(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5142   andq(temp_reg, markOopDesc::biased_lock_mask_in_place);
5143   cmpq(temp_reg, markOopDesc::biased_lock_pattern);
5144   jcc(Assembler::equal, done);
5145 }
5146 
5147 
5148 void MacroAssembler::load_klass(Register dst, Register src) {
5149   if (UseCompressedOops) {
5150     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5151     decode_heap_oop_not_null(dst);
5152   } else {
5153     movq(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5154   }
5155 }
5156 
5157 void MacroAssembler::store_klass(Register dst, Register src) {
5158   if (UseCompressedOops) {
5159     encode_heap_oop_not_null(src);
5160     // zero the entire klass field first as the gap needs to be zeroed too.
5161     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), NULL_WORD);
5162     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5163   } else {
5164     movq(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5165   }
5166 }
5167 
5168 void MacroAssembler::load_heap_oop(Register dst, Address src) {
5169   if (UseCompressedOops) {
5170     movl(dst, src);
5171     decode_heap_oop(dst);
5172   } else {
5173     movq(dst, src);
5174   }
5175 }
5176 
5177 void MacroAssembler::store_heap_oop(Address dst, Register src) {
5178   if (UseCompressedOops) {
5179     assert(!dst.uses(src), "not enough registers");
5180     encode_heap_oop(src);
5181     movl(dst, src);
5182   } else {
5183     movq(dst, src);
5184   }
5185 }
5186 
5187 // Algorithm must match oop.inline.hpp encode_heap_oop.
5188 void MacroAssembler::encode_heap_oop(Register r) {
5189   assert (UseCompressedOops, "should be compressed");
5190 #ifdef ASSERT
5191   Label ok;
5192   pushq(rscratch1); // cmpptr trashes rscratch1
5193   cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
5194   jcc(Assembler::equal, ok);
5195   stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
5196   bind(ok);
5197   popq(rscratch1);
5198 #endif
5199   verify_oop(r, "broken oop in encode_heap_oop");
5200   testq(r, r);
5201   cmovq(Assembler::equal, r, r12_heapbase);
5202   subq(r, r12_heapbase);
5203   shrq(r, LogMinObjAlignmentInBytes);
5204 }
5205 
5206 void MacroAssembler::encode_heap_oop_not_null(Register r) {
5207   assert (UseCompressedOops, "should be compressed");
5208 #ifdef ASSERT
5209   Label ok;
5210   testq(r, r);
5211   jcc(Assembler::notEqual, ok);
5212   stop("null oop passed to encode_heap_oop_not_null");
5213   bind(ok);
5214 #endif
5215   verify_oop(r, "broken oop in encode_heap_oop_not_null");
5216   subq(r, r12_heapbase);
5217   shrq(r, LogMinObjAlignmentInBytes);
5218 }
5219 
5220 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
5221   assert (UseCompressedOops, "should be compressed");
5222 #ifdef ASSERT
5223   Label ok;
5224   testq(src, src);
5225   jcc(Assembler::notEqual, ok);
5226   stop("null oop passed to encode_heap_oop_not_null2");
5227   bind(ok);
5228 #endif
5229   verify_oop(src, "broken oop in encode_heap_oop_not_null2");
5230   if (dst != src) {
5231     movq(dst, src);
5232   }
5233   subq(dst, r12_heapbase);
5234   shrq(dst, LogMinObjAlignmentInBytes);
5235 }
5236 
5237 void  MacroAssembler::decode_heap_oop(Register r) {
5238   assert (UseCompressedOops, "should be compressed");
5239 #ifdef ASSERT
5240   Label ok;
5241   pushq(rscratch1);
5242   cmpptr(r12_heapbase,
5243          ExternalAddress((address)Universe::heap_base_addr()));
5244   jcc(Assembler::equal, ok);
5245   stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
5246   bind(ok);
5247   popq(rscratch1);
5248 #endif
5249 
5250   Label done;
5251   shlq(r, LogMinObjAlignmentInBytes);
5252   jccb(Assembler::equal, done);
5253   addq(r, r12_heapbase);
5254 #if 0
5255    // alternate decoding probably a wash.
5256    testq(r, r);
5257    jccb(Assembler::equal, done);
5258    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
5259 #endif
5260   bind(done);
5261   verify_oop(r, "broken oop in decode_heap_oop");
5262 }
5263 
5264 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
5265   assert (UseCompressedOops, "should only be used for compressed headers");
5266   // Cannot assert, unverified entry point counts instructions (see .ad file)
5267   // vtableStubs also counts instructions in pd_code_size_limit.
5268   // Also do not verify_oop as this is called by verify_oop.
5269   assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
5270   leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
5271 }
5272 
5273 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
5274   assert (UseCompressedOops, "should only be used for compressed headers");
5275   // Cannot assert, unverified entry point counts instructions (see .ad file)
5276   // vtableStubs also counts instructions in pd_code_size_limit.
5277   // Also do not verify_oop as this is called by verify_oop.
5278   assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
5279   leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
5280 }
5281 
5282 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
5283   switch (cond) {
5284     // Note some conditions are synonyms for others
5285     case Assembler::zero:         return Assembler::notZero;
5286     case Assembler::notZero:      return Assembler::zero;
5287     case Assembler::less:         return Assembler::greaterEqual;
5288     case Assembler::lessEqual:    return Assembler::greater;
5289     case Assembler::greater:      return Assembler::lessEqual;
5290     case Assembler::greaterEqual: return Assembler::less;
5291     case Assembler::below:        return Assembler::aboveEqual;
5292     case Assembler::belowEqual:   return Assembler::above;
5293     case Assembler::above:        return Assembler::belowEqual;
5294     case Assembler::aboveEqual:   return Assembler::below;
5295     case Assembler::overflow:     return Assembler::noOverflow;
5296     case Assembler::noOverflow:   return Assembler::overflow;
5297     case Assembler::negative:     return Assembler::positive;
5298     case Assembler::positive:     return Assembler::negative;
5299     case Assembler::parity:       return Assembler::noParity;
5300     case Assembler::noParity:     return Assembler::parity;
5301   }
5302   ShouldNotReachHere(); return Assembler::overflow;
5303 }
5304 
5305 
5306 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
5307   Condition negated_cond = negate_condition(cond);
5308   Label L;
5309   jcc(negated_cond, L);
5310   atomic_incl(counter_addr);
5311   bind(L);
5312 }
5313 
5314 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5315   pushfq();
5316   if (os::is_MP())
5317     lock();
5318   incrementl(counter_addr);
5319   popfq();
5320 }
5321 
5322 SkipIfEqual::SkipIfEqual(
5323     MacroAssembler* masm, const bool* flag_addr, bool value) {
5324   _masm = masm;
5325   _masm->cmp8(ExternalAddress((address)flag_addr), value);
5326   _masm->jcc(Assembler::equal, _label);
5327 }
5328 
5329 SkipIfEqual::~SkipIfEqual() {
5330   _masm->bind(_label);
5331 }
5332 
5333 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5334   movq(tmp, rsp);
5335   // Bang stack for total size given plus shadow page size.
5336   // Bang one page at a time because large size can bang beyond yellow and
5337   // red zones.
5338   Label loop;
5339   bind(loop);
5340   movl(Address(tmp, (-os::vm_page_size())), size );
5341   subq(tmp, os::vm_page_size());
5342   subl(size, os::vm_page_size());
5343   jcc(Assembler::greater, loop);
5344 
5345   // Bang down shadow pages too.
5346   // The -1 because we already subtracted 1 page.
5347   for (int i = 0; i< StackShadowPages-1; i++) {
5348     movq(Address(tmp, (-i*os::vm_page_size())), size );
5349   }
5350 }
5351 
5352 void MacroAssembler::reinit_heapbase() {
5353   if (UseCompressedOops) {
5354     movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
5355   }
5356 }