1 /*
   2  * Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20  * CA 95054 USA or visit www.sun.com if you need additional information or
  21  * have any questions.
  22  *
  23  */
  24 
  25 #include "incls/_precompiled.incl"
  26 #include "incls/_assembler_x86_64.cpp.incl"
  27 
  28 // Implementation of AddressLiteral
  29 
  30 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  31   _is_lval = false;
  32   _target = target;
  33   switch (rtype) {
  34   case relocInfo::oop_type:
  35     // Oops are a special case. Normally they would be their own section
  36     // but in cases like icBuffer they are literals in the code stream that
  37     // we don't have a section for. We use none so that we get a literal address
  38     // which is always patchable.
  39     break;
  40   case relocInfo::external_word_type:
  41     _rspec = external_word_Relocation::spec(target);
  42     break;
  43   case relocInfo::internal_word_type:
  44     _rspec = internal_word_Relocation::spec(target);
  45     break;
  46   case relocInfo::opt_virtual_call_type:
  47     _rspec = opt_virtual_call_Relocation::spec();
  48     break;
  49   case relocInfo::static_call_type:
  50     _rspec = static_call_Relocation::spec();
  51     break;
  52   case relocInfo::runtime_call_type:
  53     _rspec = runtime_call_Relocation::spec();
  54     break;
  55   case relocInfo::none:
  56     break;
  57   default:
  58     ShouldNotReachHere();
  59     break;
  60   }
  61 }
  62 
  63 // Implementation of Address
  64 
  65 Address Address::make_array(ArrayAddress adr) {
  66 #ifdef _LP64
  67   // Not implementable on 64bit machines
  68   // Should have been handled higher up the call chain.
  69   ShouldNotReachHere();
  70   return Address();
  71 #else
  72   AddressLiteral base = adr.base();
  73   Address index = adr.index();
  74   assert(index._disp == 0, "must not have disp"); // maybe it can?
  75   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
  76   array._rspec = base._rspec;
  77   return array;
  78 #endif // _LP64
  79 }
  80 
  81 // exceedingly dangerous constructor
  82 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
  83   _base  = noreg;
  84   _index = noreg;
  85   _scale = no_scale;
  86   _disp  = disp;
  87   switch (rtype) {
  88     case relocInfo::external_word_type:
  89       _rspec = external_word_Relocation::spec(loc);
  90       break;
  91     case relocInfo::internal_word_type:
  92       _rspec = internal_word_Relocation::spec(loc);
  93       break;
  94     case relocInfo::runtime_call_type:
  95       // HMM
  96       _rspec = runtime_call_Relocation::spec();
  97       break;
  98     case relocInfo::none:
  99       break;
 100     default:
 101       ShouldNotReachHere();
 102   }
 103 }
 104 
 105 // Convert the raw encoding form into the form expected by the constructor for
 106 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 107 // that to noreg for the Address constructor.
 108 Address Address::make_raw(int base, int index, int scale, int disp) {
 109   bool valid_index = index != rsp->encoding();
 110   if (valid_index) {
 111     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 112     return madr;
 113   } else {
 114     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 115     return madr;
 116   }
 117 }
 118 
 119 
 120 // Implementation of Assembler
 121 int AbstractAssembler::code_fill_byte() {
 122   return (u_char)'\xF4'; // hlt
 123 }
 124 
 125 // This should only be used by 64bit instructions that can use rip-relative
 126 // it cannot be used by instructions that want an immediate value.
 127 
 128 bool Assembler::reachable(AddressLiteral adr) {
 129   int64_t disp;
 130   // None will force a 64bit literal to the code stream. Likely a placeholder
 131   // for something that will be patched later and we need to certain it will
 132   // always be reachable.
 133   if (adr.reloc() == relocInfo::none) {
 134     return false;
 135   }
 136   if (adr.reloc() == relocInfo::internal_word_type) {
 137     // This should be rip relative and easily reachable.
 138     return true;
 139   }
 140   if (adr.reloc() != relocInfo::external_word_type &&
 141       adr.reloc() != relocInfo::runtime_call_type ) {
 142     return false;
 143   }
 144 
 145   // Stress the correction code
 146   if (ForceUnreachable) {
 147     // Must be runtimecall reloc, see if it is in the codecache
 148     // Flipping stuff in the codecache to be unreachable causes issues
 149     // with things like inline caches where the additional instructions
 150     // are not handled.
 151     if (CodeCache::find_blob(adr._target) == NULL) {
 152       return false;
 153     }
 154   }
 155   // For external_word_type/runtime_call_type if it is reachable from where we
 156   // are now (possibly a temp buffer) and where we might end up
 157   // anywhere in the codeCache then we are always reachable.
 158   // This would have to change if we ever save/restore shared code
 159   // to be more pessimistic.
 160 
 161   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
 162   if (!is_simm32(disp)) return false;
 163   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
 164   if (!is_simm32(disp)) return false;
 165 
 166   disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
 167 
 168   // Because rip relative is a disp + address_of_next_instruction and we
 169   // don't know the value of address_of_next_instruction we apply a fudge factor
 170   // to make sure we will be ok no matter the size of the instruction we get placed into.
 171   // We don't have to fudge the checks above here because they are already worst case.
 172 
 173   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
 174   // + 4 because better safe than sorry.
 175   const int fudge = 12 + 4;
 176   if (disp < 0) {
 177     disp -= fudge;
 178   } else {
 179     disp += fudge;
 180   }
 181   return is_simm32(disp);
 182 }
 183 
 184 
 185 // make this go away eventually
 186 void Assembler::emit_data(jint data,
 187                           relocInfo::relocType rtype,
 188                           int format) {
 189   if (rtype == relocInfo::none) {
 190     emit_long(data);
 191   } else {
 192     emit_data(data, Relocation::spec_simple(rtype), format);
 193   }
 194 }
 195 
 196 void Assembler::emit_data(jint data,
 197                           RelocationHolder const& rspec,
 198                           int format) {
 199   assert(imm64_operand == 0, "default format must be imm64 in this file");
 200   assert(imm64_operand != format, "must not be imm64");
 201   assert(inst_mark() != NULL, "must be inside InstructionMark");
 202   if (rspec.type() !=  relocInfo::none) {
 203     #ifdef ASSERT
 204       check_relocation(rspec, format);
 205     #endif
 206     // Do not use AbstractAssembler::relocate, which is not intended for
 207     // embedded words.  Instead, relocate to the enclosing instruction.
 208 
 209     // hack. call32 is too wide for mask so use disp32
 210     if (format == call32_operand)
 211       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 212     else
 213       code_section()->relocate(inst_mark(), rspec, format);
 214   }
 215   emit_long(data);
 216 }
 217 
 218 void Assembler::emit_data64(jlong data,
 219                             relocInfo::relocType rtype,
 220                             int format) {
 221   if (rtype == relocInfo::none) {
 222     emit_long64(data);
 223   } else {
 224     emit_data64(data, Relocation::spec_simple(rtype), format);
 225   }
 226 }
 227 
 228 void Assembler::emit_data64(jlong data,
 229                             RelocationHolder const& rspec,
 230                             int format) {
 231   assert(imm64_operand == 0, "default format must be imm64 in this file");
 232   assert(imm64_operand == format, "must be imm64");
 233   assert(inst_mark() != NULL, "must be inside InstructionMark");
 234   // Do not use AbstractAssembler::relocate, which is not intended for
 235   // embedded words.  Instead, relocate to the enclosing instruction.
 236   code_section()->relocate(inst_mark(), rspec, format);
 237 #ifdef ASSERT
 238   check_relocation(rspec, format);
 239 #endif
 240   emit_long64(data);
 241 }
 242 
 243 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 244   assert(isByte(op1) && isByte(op2), "wrong opcode");
 245   assert(isByte(imm8), "not a byte");
 246   assert((op1 & 0x01) == 0, "should be 8bit operation");
 247   int dstenc = dst->encoding();
 248   if (dstenc >= 8) {
 249     dstenc -= 8;
 250   }
 251   emit_byte(op1);
 252   emit_byte(op2 | dstenc);
 253   emit_byte(imm8);
 254 }
 255 
 256 void Assembler::emit_arith(int op1, int op2, Register dst, int imm32) {
 257   assert(isByte(op1) && isByte(op2), "wrong opcode");
 258   assert((op1 & 0x01) == 1, "should be 32bit operation");
 259   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 260   int dstenc = dst->encoding();
 261   if (dstenc >= 8) {
 262     dstenc -= 8;
 263   }
 264   if (is8bit(imm32)) {
 265     emit_byte(op1 | 0x02); // set sign bit
 266     emit_byte(op2 | dstenc);
 267     emit_byte(imm32 & 0xFF);
 268   } else {
 269     emit_byte(op1);
 270     emit_byte(op2 | dstenc);
 271     emit_long(imm32);
 272   }
 273 }
 274 
 275 // immediate-to-memory forms
 276 void Assembler::emit_arith_operand(int op1,
 277                                    Register rm, Address adr,
 278                                    int imm32) {
 279   assert((op1 & 0x01) == 1, "should be 32bit operation");
 280   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 281   if (is8bit(imm32)) {
 282     emit_byte(op1 | 0x02); // set sign bit
 283     emit_operand(rm, adr, 1);
 284     emit_byte(imm32 & 0xFF);
 285   } else {
 286     emit_byte(op1);
 287     emit_operand(rm, adr, 4);
 288     emit_long(imm32);
 289   }
 290 }
 291 
 292 
 293 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 294   assert(isByte(op1) && isByte(op2), "wrong opcode");
 295   int dstenc = dst->encoding();
 296   int srcenc = src->encoding();
 297   if (dstenc >= 8) {
 298     dstenc -= 8;
 299   }
 300   if (srcenc >= 8) {
 301     srcenc -= 8;
 302   }
 303   emit_byte(op1);
 304   emit_byte(op2 | dstenc << 3 | srcenc);
 305 }
 306 
 307 void Assembler::emit_operand(Register reg, Register base, Register index,
 308                              Address::ScaleFactor scale, int disp,
 309                              RelocationHolder const& rspec,
 310                              int rip_relative_correction) {
 311   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 312   int regenc = reg->encoding();
 313   if (regenc >= 8) {
 314     regenc -= 8;
 315   }
 316   if (base->is_valid()) {
 317     if (index->is_valid()) {
 318       assert(scale != Address::no_scale, "inconsistent address");
 319       int indexenc = index->encoding();
 320       if (indexenc >= 8) {
 321         indexenc -= 8;
 322       }
 323       int baseenc = base->encoding();
 324       if (baseenc >= 8) {
 325         baseenc -= 8;
 326       }
 327       // [base + index*scale + disp]
 328       if (disp == 0 && rtype == relocInfo::none  &&
 329           base != rbp && base != r13) {
 330         // [base + index*scale]
 331         // [00 reg 100][ss index base]
 332         assert(index != rsp, "illegal addressing mode");
 333         emit_byte(0x04 | regenc << 3);
 334         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 335       } else if (is8bit(disp) && rtype == relocInfo::none) {
 336         // [base + index*scale + imm8]
 337         // [01 reg 100][ss index base] imm8
 338         assert(index != rsp, "illegal addressing mode");
 339         emit_byte(0x44 | regenc << 3);
 340         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 341         emit_byte(disp & 0xFF);
 342       } else {
 343         // [base + index*scale + disp32]
 344         // [10 reg 100][ss index base] disp32
 345         assert(index != rsp, "illegal addressing mode");
 346         emit_byte(0x84 | regenc << 3);
 347         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 348         emit_data(disp, rspec, disp32_operand);
 349       }
 350     } else if (base == rsp || base == r12) {
 351       // [rsp + disp]
 352       if (disp == 0 && rtype == relocInfo::none) {
 353         // [rsp]
 354         // [00 reg 100][00 100 100]
 355         emit_byte(0x04 | regenc << 3);
 356         emit_byte(0x24);
 357       } else if (is8bit(disp) && rtype == relocInfo::none) {
 358         // [rsp + imm8]
 359         // [01 reg 100][00 100 100] disp8
 360         emit_byte(0x44 | regenc << 3);
 361         emit_byte(0x24);
 362         emit_byte(disp & 0xFF);
 363       } else {
 364         // [rsp + imm32]
 365         // [10 reg 100][00 100 100] disp32
 366         emit_byte(0x84 | regenc << 3);
 367         emit_byte(0x24);
 368         emit_data(disp, rspec, disp32_operand);
 369       }
 370     } else {
 371       // [base + disp]
 372       assert(base != rsp && base != r12, "illegal addressing mode");
 373       int baseenc = base->encoding();
 374       if (baseenc >= 8) {
 375         baseenc -= 8;
 376       }
 377       if (disp == 0 && rtype == relocInfo::none &&
 378           base != rbp && base != r13) {
 379         // [base]
 380         // [00 reg base]
 381         emit_byte(0x00 | regenc << 3 | baseenc);
 382       } else if (is8bit(disp) && rtype == relocInfo::none) {
 383         // [base + disp8]
 384         // [01 reg base] disp8
 385         emit_byte(0x40 | regenc << 3 | baseenc);
 386         emit_byte(disp & 0xFF);
 387       } else {
 388         // [base + disp32]
 389         // [10 reg base] disp32
 390         emit_byte(0x80 | regenc << 3 | baseenc);
 391         emit_data(disp, rspec, disp32_operand);
 392       }
 393     }
 394   } else {
 395     if (index->is_valid()) {
 396       assert(scale != Address::no_scale, "inconsistent address");
 397       int indexenc = index->encoding();
 398       if (indexenc >= 8) {
 399         indexenc -= 8;
 400       }
 401       // [index*scale + disp]
 402       // [00 reg 100][ss index 101] disp32
 403       assert(index != rsp, "illegal addressing mode");
 404       emit_byte(0x04 | regenc << 3);
 405       emit_byte(scale << 6 | indexenc << 3 | 0x05);
 406       emit_data(disp, rspec, disp32_operand);
 407 #ifdef _LP64
 408     } else if (rtype != relocInfo::none ) {
 409       // [disp] RIP-RELATIVE
 410       // [00 000 101] disp32
 411 
 412       emit_byte(0x05 | regenc << 3);
 413       // Note that the RIP-rel. correction applies to the generated
 414       // disp field, but _not_ to the target address in the rspec.
 415 
 416       // disp was created by converting the target address minus the pc
 417       // at the start of the instruction. That needs more correction here.
 418       // intptr_t disp = target - next_ip;
 419       assert(inst_mark() != NULL, "must be inside InstructionMark");
 420       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 421       int64_t adjusted = (int64_t) disp -  (next_ip - inst_mark());
 422       assert(is_simm32(adjusted),
 423              "must be 32bit offset (RIP relative address)");
 424       emit_data((int) adjusted, rspec, disp32_operand);
 425 
 426 #endif // _LP64
 427     } else {
 428       // [disp] ABSOLUTE
 429       // [00 reg 100][00 100 101] disp32
 430       emit_byte(0x04 | regenc << 3);
 431       emit_byte(0x25);
 432       emit_data(disp, rspec, disp32_operand);
 433     }
 434   }
 435 }
 436 
 437 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 438                              Address::ScaleFactor scale, int disp,
 439                              RelocationHolder const& rspec,
 440                              int rip_relative_correction) {
 441   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 442   int regenc = reg->encoding();
 443   if (regenc >= 8) {
 444     regenc -= 8;
 445   }
 446   if (base->is_valid()) {
 447     if (index->is_valid()) {
 448       assert(scale != Address::no_scale, "inconsistent address");
 449       int indexenc = index->encoding();
 450       if (indexenc >= 8) {
 451         indexenc -= 8;
 452       }
 453       int baseenc = base->encoding();
 454       if (baseenc >= 8) {
 455         baseenc -= 8;
 456       }
 457       // [base + index*scale + disp]
 458       if (disp == 0 && rtype == relocInfo::none  &&
 459           base != rbp && base != r13) {
 460         // [base + index*scale]
 461         // [00 reg 100][ss index base]
 462         assert(index != rsp, "illegal addressing mode");
 463         emit_byte(0x04 | regenc << 3);
 464         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 465       } else if (is8bit(disp) && rtype == relocInfo::none) {
 466         // [base + index*scale + disp8]
 467         // [01 reg 100][ss index base] disp8
 468         assert(index != rsp, "illegal addressing mode");
 469         emit_byte(0x44 | regenc << 3);
 470         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 471         emit_byte(disp & 0xFF);
 472       } else {
 473         // [base + index*scale + disp32]
 474         // [10 reg 100][ss index base] disp32
 475         assert(index != rsp, "illegal addressing mode");
 476         emit_byte(0x84 | regenc << 3);
 477         emit_byte(scale << 6 | indexenc << 3 | baseenc);
 478         emit_data(disp, rspec, disp32_operand);
 479       }
 480     } else if (base == rsp || base == r12) {
 481       // [rsp + disp]
 482       if (disp == 0 && rtype == relocInfo::none) {
 483         // [rsp]
 484         // [00 reg 100][00 100 100]
 485         emit_byte(0x04 | regenc << 3);
 486         emit_byte(0x24);
 487       } else if (is8bit(disp) && rtype == relocInfo::none) {
 488         // [rsp + imm8]
 489         // [01 reg 100][00 100 100] disp8
 490         emit_byte(0x44 | regenc << 3);
 491         emit_byte(0x24);
 492         emit_byte(disp & 0xFF);
 493       } else {
 494         // [rsp + imm32]
 495         // [10 reg 100][00 100 100] disp32
 496         emit_byte(0x84 | regenc << 3);
 497         emit_byte(0x24);
 498         emit_data(disp, rspec, disp32_operand);
 499       }
 500     } else {
 501       // [base + disp]
 502       assert(base != rsp && base != r12, "illegal addressing mode");
 503       int baseenc = base->encoding();
 504       if (baseenc >= 8) {
 505         baseenc -= 8;
 506       }
 507       if (disp == 0 && rtype == relocInfo::none &&
 508           base != rbp && base != r13) {
 509         // [base]
 510         // [00 reg base]
 511         emit_byte(0x00 | regenc << 3 | baseenc);
 512       } else if (is8bit(disp) && rtype == relocInfo::none) {
 513         // [base + imm8]
 514         // [01 reg base] disp8
 515         emit_byte(0x40 | regenc << 3 | baseenc);
 516         emit_byte(disp & 0xFF);
 517       } else {
 518         // [base + imm32]
 519         // [10 reg base] disp32
 520         emit_byte(0x80 | regenc << 3 | baseenc);
 521         emit_data(disp, rspec, disp32_operand);
 522       }
 523     }
 524   } else {
 525     if (index->is_valid()) {
 526       assert(scale != Address::no_scale, "inconsistent address");
 527       int indexenc = index->encoding();
 528       if (indexenc >= 8) {
 529         indexenc -= 8;
 530       }
 531       // [index*scale + disp]
 532       // [00 reg 100][ss index 101] disp32
 533       assert(index != rsp, "illegal addressing mode");
 534       emit_byte(0x04 | regenc << 3);
 535       emit_byte(scale << 6 | indexenc << 3 | 0x05);
 536       emit_data(disp, rspec, disp32_operand);
 537 #ifdef _LP64
 538     } else if ( rtype != relocInfo::none ) {
 539       // [disp] RIP-RELATIVE
 540       // [00 reg 101] disp32
 541       emit_byte(0x05 | regenc << 3);
 542       // Note that the RIP-rel. correction applies to the generated
 543       // disp field, but _not_ to the target address in the rspec.
 544 
 545       // disp was created by converting the target address minus the pc
 546       // at the start of the instruction. That needs more correction here.
 547       // intptr_t disp = target - next_ip;
 548 
 549       assert(inst_mark() != NULL, "must be inside InstructionMark");
 550       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 551 
 552       int64_t adjusted = (int64_t) disp -  (next_ip - inst_mark());
 553       assert(is_simm32(adjusted),
 554              "must be 32bit offset (RIP relative address)");
 555       emit_data((int) adjusted, rspec, disp32_operand);
 556 #endif // _LP64
 557     } else {
 558       // [disp] ABSOLUTE
 559       // [00 reg 100][00 100 101] disp32
 560       emit_byte(0x04 | regenc << 3);
 561       emit_byte(0x25);
 562       emit_data(disp, rspec, disp32_operand);
 563     }
 564   }
 565 }
 566 
 567 // Secret local extension to Assembler::WhichOperand:
 568 #define end_pc_operand (_WhichOperand_limit)
 569 
 570 address Assembler::locate_operand(address inst, WhichOperand which) {
 571   // Decode the given instruction, and return the address of
 572   // an embedded 32-bit operand word.
 573 
 574   // If "which" is disp32_operand, selects the displacement portion
 575   // of an effective address specifier.
 576   // If "which" is imm64_operand, selects the trailing immediate constant.
 577   // If "which" is call32_operand, selects the displacement of a call or jump.
 578   // Caller is responsible for ensuring that there is such an operand,
 579   // and that it is 32/64 bits wide.
 580 
 581   // If "which" is end_pc_operand, find the end of the instruction.
 582 
 583   address ip = inst;
 584   bool is_64bit = false;
 585 
 586   debug_only(bool has_disp32 = false);
 587   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 588 
 589   again_after_prefix:
 590   switch (0xFF & *ip++) {
 591 
 592   // These convenience macros generate groups of "case" labels for the switch.
 593 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 594 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 595              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 596 #define REP16(x) REP8((x)+0): \
 597               case REP8((x)+8)
 598 
 599   case CS_segment:
 600   case SS_segment:
 601   case DS_segment:
 602   case ES_segment:
 603   case FS_segment:
 604   case GS_segment:
 605     assert(0, "shouldn't have that prefix");
 606     assert(ip == inst + 1 || ip == inst + 2, "only two prefixes allowed");
 607     goto again_after_prefix;
 608 
 609   case 0x67:
 610   case REX:
 611   case REX_B:
 612   case REX_X:
 613   case REX_XB:
 614   case REX_R:
 615   case REX_RB:
 616   case REX_RX:
 617   case REX_RXB:
 618 //     assert(ip == inst + 1, "only one prefix allowed");
 619     goto again_after_prefix;
 620 
 621   case REX_W:
 622   case REX_WB:
 623   case REX_WX:
 624   case REX_WXB:
 625   case REX_WR:
 626   case REX_WRB:
 627   case REX_WRX:
 628   case REX_WRXB:
 629     is_64bit = true;
 630 //     assert(ip == inst + 1, "only one prefix allowed");
 631     goto again_after_prefix;
 632 
 633   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 634   case 0x88: // movb a, r
 635   case 0x89: // movl a, r
 636   case 0x8A: // movb r, a
 637   case 0x8B: // movl r, a
 638   case 0x8F: // popl a
 639     debug_only(has_disp32 = true);
 640     break;
 641 
 642   case 0x68: // pushq #32
 643     if (which == end_pc_operand) {
 644       return ip + 4;
 645     }
 646     assert(0, "pushq has no disp32 or imm64");
 647     ShouldNotReachHere();
 648 
 649   case 0x66: // movw ... (size prefix)
 650     again_after_size_prefix2:
 651     switch (0xFF & *ip++) {
 652     case REX:
 653     case REX_B:
 654     case REX_X:
 655     case REX_XB:
 656     case REX_R:
 657     case REX_RB:
 658     case REX_RX:
 659     case REX_RXB:
 660     case REX_W:
 661     case REX_WB:
 662     case REX_WX:
 663     case REX_WXB:
 664     case REX_WR:
 665     case REX_WRB:
 666     case REX_WRX:
 667     case REX_WRXB:
 668       goto again_after_size_prefix2;
 669     case 0x8B: // movw r, a
 670     case 0x89: // movw a, r
 671       break;
 672     case 0xC7: // movw a, #16
 673       tail_size = 2;  // the imm16
 674       break;
 675     case 0x0F: // several SSE/SSE2 variants
 676       ip--;    // reparse the 0x0F
 677       goto again_after_prefix;
 678     default:
 679       ShouldNotReachHere();
 680     }
 681     break;
 682 
 683   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 684     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 685     assert((which == call32_operand || which == imm64_operand) && is_64bit, "");
 686     return ip;
 687 
 688   case 0x69: // imul r, a, #32
 689   case 0xC7: // movl a, #32(oop?)
 690     tail_size = 4;
 691     debug_only(has_disp32 = true); // has both kinds of operands!
 692     break;
 693 
 694   case 0x0F: // movx..., etc.
 695     switch (0xFF & *ip++) {
 696     case 0x12: // movlps
 697     case 0x28: // movaps
 698     case 0x2E: // ucomiss
 699     case 0x2F: // comiss
 700     case 0x54: // andps
 701     case 0x57: // xorps
 702     case 0x6E: // movd
 703     case 0x7E: // movd
 704     case 0xAE: // ldmxcsr   a
 705       debug_only(has_disp32 = true); // has both kinds of operands!
 706       break;
 707     case 0xAD: // shrd r, a, %cl
 708     case 0xAF: // imul r, a
 709     case 0xBE: // movsbl r, a
 710     case 0xBF: // movswl r, a
 711     case 0xB6: // movzbl r, a
 712     case 0xB7: // movzwl r, a
 713     case REP16(0x40): // cmovl cc, r, a
 714     case 0xB0: // cmpxchgb
 715     case 0xB1: // cmpxchg
 716     case 0xC1: // xaddl
 717     case 0xC7: // cmpxchg8
 718     case REP16(0x90): // setcc a
 719       debug_only(has_disp32 = true);
 720       // fall out of the switch to decode the address
 721       break;
 722     case 0xAC: // shrd r, a, #8
 723       debug_only(has_disp32 = true);
 724       tail_size = 1;  // the imm8
 725       break;
 726     case REP16(0x80): // jcc rdisp32
 727       if (which == end_pc_operand)  return ip + 4;
 728       assert(which == call32_operand, "jcc has no disp32 or imm64");
 729       return ip;
 730     default:
 731       ShouldNotReachHere();
 732     }
 733     break;
 734 
 735   case 0x81: // addl a, #32; addl r, #32
 736     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 737     tail_size = 4;
 738     debug_only(has_disp32 = true); // has both kinds of operands!
 739     break;
 740 
 741   case 0x83: // addl a, #8; addl r, #8
 742     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 743     debug_only(has_disp32 = true); // has both kinds of operands!
 744     tail_size = 1;
 745     break;
 746 
 747   case 0x9B:
 748     switch (0xFF & *ip++) {
 749     case 0xD9: // fnstcw a
 750       debug_only(has_disp32 = true);
 751       break;
 752     default:
 753       ShouldNotReachHere();
 754     }
 755     break;
 756 
 757   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 758   case REP4(0x10): // adc...
 759   case REP4(0x20): // and...
 760   case REP4(0x30): // xor...
 761   case REP4(0x08): // or...
 762   case REP4(0x18): // sbb...
 763   case REP4(0x28): // sub...
 764   case 0xF7: // mull a
 765   case 0x87: // xchg r, a
 766     debug_only(has_disp32 = true);
 767     break;
 768   case REP4(0x38): // cmp...
 769   case 0x8D: // lea r, a
 770   case 0x85: // test r, a
 771     debug_only(has_disp32 = true); // has both kinds of operands!
 772     break;
 773 
 774   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 775   case 0xC6: // movb a, #8
 776   case 0x80: // cmpb a, #8
 777   case 0x6B: // imul r, a, #8
 778     debug_only(has_disp32 = true); // has both kinds of operands!
 779     tail_size = 1; // the imm8
 780     break;
 781 
 782   case 0xE8: // call rdisp32
 783   case 0xE9: // jmp  rdisp32
 784     if (which == end_pc_operand)  return ip + 4;
 785     assert(which == call32_operand, "call has no disp32 or imm32");
 786     return ip;
 787 
 788   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 789   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 790   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 791   case 0xDD: // fld_d a; fst_d a; fstp_d a
 792   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 793   case 0xDF: // fild_d a; fistp_d a
 794   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 795   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 796   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 797     debug_only(has_disp32 = true);
 798     break;
 799 
 800   case 0xF3:                    // For SSE
 801   case 0xF2:                    // For SSE2
 802     switch (0xFF & *ip++) {
 803     case REX:
 804     case REX_B:
 805     case REX_X:
 806     case REX_XB:
 807     case REX_R:
 808     case REX_RB:
 809     case REX_RX:
 810     case REX_RXB:
 811     case REX_W:
 812     case REX_WB:
 813     case REX_WX:
 814     case REX_WXB:
 815     case REX_WR:
 816     case REX_WRB:
 817     case REX_WRX:
 818     case REX_WRXB:
 819       ip++;
 820     default:
 821       ip++;
 822     }
 823     debug_only(has_disp32 = true); // has both kinds of operands!
 824     break;
 825 
 826   default:
 827     ShouldNotReachHere();
 828 
 829 #undef REP8
 830 #undef REP16
 831   }
 832 
 833   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 834   assert(which != imm64_operand, "instruction is not a movq reg, imm64");
 835   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
 836 
 837   // parse the output of emit_operand
 838   int op2 = 0xFF & *ip++;
 839   int base = op2 & 0x07;
 840   int op3 = -1;
 841   const int b100 = 4;
 842   const int b101 = 5;
 843   if (base == b100 && (op2 >> 6) != 3) {
 844     op3 = 0xFF & *ip++;
 845     base = op3 & 0x07;   // refetch the base
 846   }
 847   // now ip points at the disp (if any)
 848 
 849   switch (op2 >> 6) {
 850   case 0:
 851     // [00 reg  100][ss index base]
 852     // [00 reg  100][00   100  esp]
 853     // [00 reg base]
 854     // [00 reg  100][ss index  101][disp32]
 855     // [00 reg  101]               [disp32]
 856 
 857     if (base == b101) {
 858       if (which == disp32_operand)
 859         return ip;              // caller wants the disp32
 860       ip += 4;                  // skip the disp32
 861     }
 862     break;
 863 
 864   case 1:
 865     // [01 reg  100][ss index base][disp8]
 866     // [01 reg  100][00   100  esp][disp8]
 867     // [01 reg base]               [disp8]
 868     ip += 1;                    // skip the disp8
 869     break;
 870 
 871   case 2:
 872     // [10 reg  100][ss index base][disp32]
 873     // [10 reg  100][00   100  esp][disp32]
 874     // [10 reg base]               [disp32]
 875     if (which == disp32_operand)
 876       return ip;                // caller wants the disp32
 877     ip += 4;                    // skip the disp32
 878     break;
 879 
 880   case 3:
 881     // [11 reg base]  (not a memory addressing mode)
 882     break;
 883   }
 884 
 885   if (which == end_pc_operand) {
 886     return ip + tail_size;
 887   }
 888 
 889   assert(0, "fix locate_operand");
 890   return ip;
 891 }
 892 
 893 address Assembler::locate_next_instruction(address inst) {
 894   // Secretly share code with locate_operand:
 895   return locate_operand(inst, end_pc_operand);
 896 }
 897 
 898 #ifdef ASSERT
 899 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
 900   address inst = inst_mark();
 901   assert(inst != NULL && inst < pc(),
 902          "must point to beginning of instruction");
 903   address opnd;
 904 
 905   Relocation* r = rspec.reloc();
 906   if (r->type() == relocInfo::none) {
 907     return;
 908   } else if (r->is_call() || format == call32_operand) {
 909     opnd = locate_operand(inst, call32_operand);
 910   } else if (r->is_data()) {
 911     assert(format == imm64_operand || format == disp32_operand, "format ok");
 912     opnd = locate_operand(inst, (WhichOperand) format);
 913   } else {
 914     assert(format == 0, "cannot specify a format");
 915     return;
 916   }
 917   assert(opnd == pc(), "must put operand where relocs can find it");
 918 }
 919 #endif
 920 
 921 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
 922   if (reg_enc >= 8) {
 923     prefix(REX_B);
 924     reg_enc -= 8;
 925   } else if (byteinst && reg_enc >= 4) {
 926     prefix(REX);
 927   }
 928   return reg_enc;
 929 }
 930 
 931 int Assembler::prefixq_and_encode(int reg_enc) {
 932   if (reg_enc < 8) {
 933     prefix(REX_W);
 934   } else {
 935     prefix(REX_WB);
 936     reg_enc -= 8;
 937   }
 938   return reg_enc;
 939 }
 940 
 941 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
 942   if (dst_enc < 8) {
 943     if (src_enc >= 8) {
 944       prefix(REX_B);
 945       src_enc -= 8;
 946     } else if (byteinst && src_enc >= 4) {
 947       prefix(REX);
 948     }
 949   } else {
 950     if (src_enc < 8) {
 951       prefix(REX_R);
 952     } else {
 953       prefix(REX_RB);
 954       src_enc -= 8;
 955     }
 956     dst_enc -= 8;
 957   }
 958   return dst_enc << 3 | src_enc;
 959 }
 960 
 961 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
 962   if (dst_enc < 8) {
 963     if (src_enc < 8) {
 964       prefix(REX_W);
 965     } else {
 966       prefix(REX_WB);
 967       src_enc -= 8;
 968     }
 969   } else {
 970     if (src_enc < 8) {
 971       prefix(REX_WR);
 972     } else {
 973       prefix(REX_WRB);
 974       src_enc -= 8;
 975     }
 976     dst_enc -= 8;
 977   }
 978   return dst_enc << 3 | src_enc;
 979 }
 980 
 981 void Assembler::prefix(Register reg) {
 982   if (reg->encoding() >= 8) {
 983     prefix(REX_B);
 984   }
 985 }
 986 
 987 void Assembler::prefix(Address adr) {
 988   if (adr.base_needs_rex()) {
 989     if (adr.index_needs_rex()) {
 990       prefix(REX_XB);
 991     } else {
 992       prefix(REX_B);
 993     }
 994   } else {
 995     if (adr.index_needs_rex()) {
 996       prefix(REX_X);
 997     }
 998   }
 999 }
1000 
1001 void Assembler::prefixq(Address adr) {
1002   if (adr.base_needs_rex()) {
1003     if (adr.index_needs_rex()) {
1004       prefix(REX_WXB);
1005     } else {
1006       prefix(REX_WB);
1007     }
1008   } else {
1009     if (adr.index_needs_rex()) {
1010       prefix(REX_WX);
1011     } else {
1012       prefix(REX_W);
1013     }
1014   }
1015 }
1016 
1017 
1018 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
1019   if (reg->encoding() < 8) {
1020     if (adr.base_needs_rex()) {
1021       if (adr.index_needs_rex()) {
1022         prefix(REX_XB);
1023       } else {
1024         prefix(REX_B);
1025       }
1026     } else {
1027       if (adr.index_needs_rex()) {
1028         prefix(REX_X);
1029       } else if (reg->encoding() >= 4 ) {
1030         prefix(REX);
1031       }
1032     }
1033   } else {
1034     if (adr.base_needs_rex()) {
1035       if (adr.index_needs_rex()) {
1036         prefix(REX_RXB);
1037       } else {
1038         prefix(REX_RB);
1039       }
1040     } else {
1041       if (adr.index_needs_rex()) {
1042         prefix(REX_RX);
1043       } else {
1044         prefix(REX_R);
1045       }
1046     }
1047   }
1048 }
1049 
1050 void Assembler::prefixq(Address adr, Register src) {
1051   if (src->encoding() < 8) {
1052     if (adr.base_needs_rex()) {
1053       if (adr.index_needs_rex()) {
1054         prefix(REX_WXB);
1055       } else {
1056         prefix(REX_WB);
1057       }
1058     } else {
1059       if (adr.index_needs_rex()) {
1060         prefix(REX_WX);
1061       } else {
1062         prefix(REX_W);
1063       }
1064     }
1065   } else {
1066     if (adr.base_needs_rex()) {
1067       if (adr.index_needs_rex()) {
1068         prefix(REX_WRXB);
1069       } else {
1070         prefix(REX_WRB);
1071       }
1072     } else {
1073       if (adr.index_needs_rex()) {
1074         prefix(REX_WRX);
1075       } else {
1076         prefix(REX_WR);
1077       }
1078     }
1079   }
1080 }
1081 
1082 void Assembler::prefix(Address adr, XMMRegister reg) {
1083   if (reg->encoding() < 8) {
1084     if (adr.base_needs_rex()) {
1085       if (adr.index_needs_rex()) {
1086         prefix(REX_XB);
1087       } else {
1088         prefix(REX_B);
1089       }
1090     } else {
1091       if (adr.index_needs_rex()) {
1092         prefix(REX_X);
1093       }
1094     }
1095   } else {
1096     if (adr.base_needs_rex()) {
1097       if (adr.index_needs_rex()) {
1098         prefix(REX_RXB);
1099       } else {
1100         prefix(REX_RB);
1101       }
1102     } else {
1103       if (adr.index_needs_rex()) {
1104         prefix(REX_RX);
1105       } else {
1106         prefix(REX_R);
1107       }
1108     }
1109   }
1110 }
1111 
1112 void Assembler::emit_operand(Register reg, Address adr,
1113                              int rip_relative_correction) {
1114   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1115                adr._rspec,
1116                rip_relative_correction);
1117 }
1118 
1119 void Assembler::emit_operand(XMMRegister reg, Address adr,
1120                              int rip_relative_correction) {
1121   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1122                adr._rspec,
1123                rip_relative_correction);
1124 }
1125 
1126 void Assembler::emit_farith(int b1, int b2, int i) {
1127   assert(isByte(b1) && isByte(b2), "wrong opcode");
1128   assert(0 <= i &&  i < 8, "illegal stack offset");
1129   emit_byte(b1);
1130   emit_byte(b2 + i);
1131 }
1132 
1133 // pushad is invalid, use this instead.
1134 // NOTE: Kills flags!!
1135 void Assembler::pushaq() {
1136   // we have to store original rsp.  ABI says that 128 bytes
1137   // below rsp are local scratch.
1138   movq(Address(rsp, -5 * wordSize), rsp);
1139 
1140   subq(rsp, 16 * wordSize);
1141 
1142   movq(Address(rsp, 15 * wordSize), rax);
1143   movq(Address(rsp, 14 * wordSize), rcx);
1144   movq(Address(rsp, 13 * wordSize), rdx);
1145   movq(Address(rsp, 12 * wordSize), rbx);
1146   // skip rsp
1147   movq(Address(rsp, 10 * wordSize), rbp);
1148   movq(Address(rsp, 9 * wordSize), rsi);
1149   movq(Address(rsp, 8 * wordSize), rdi);
1150   movq(Address(rsp, 7 * wordSize), r8);
1151   movq(Address(rsp, 6 * wordSize), r9);
1152   movq(Address(rsp, 5 * wordSize), r10);
1153   movq(Address(rsp, 4 * wordSize), r11);
1154   movq(Address(rsp, 3 * wordSize), r12);
1155   movq(Address(rsp, 2 * wordSize), r13);
1156   movq(Address(rsp, wordSize), r14);
1157   movq(Address(rsp, 0), r15);
1158 }
1159 
1160 // popad is invalid, use this instead
1161 // NOTE: Kills flags!!
1162 void Assembler::popaq() {
1163   movq(r15, Address(rsp, 0));
1164   movq(r14, Address(rsp, wordSize));
1165   movq(r13, Address(rsp, 2 * wordSize));
1166   movq(r12, Address(rsp, 3 * wordSize));
1167   movq(r11, Address(rsp, 4 * wordSize));
1168   movq(r10, Address(rsp, 5 * wordSize));
1169   movq(r9,  Address(rsp, 6 * wordSize));
1170   movq(r8,  Address(rsp, 7 * wordSize));
1171   movq(rdi, Address(rsp, 8 * wordSize));
1172   movq(rsi, Address(rsp, 9 * wordSize));
1173   movq(rbp, Address(rsp, 10 * wordSize));
1174   // skip rsp
1175   movq(rbx, Address(rsp, 12 * wordSize));
1176   movq(rdx, Address(rsp, 13 * wordSize));
1177   movq(rcx, Address(rsp, 14 * wordSize));
1178   movq(rax, Address(rsp, 15 * wordSize));
1179 
1180   addq(rsp, 16 * wordSize);
1181 }
1182 
1183 void Assembler::pushfq() {
1184   emit_byte(0x9C);
1185 }
1186 
1187 void Assembler::popfq() {
1188   emit_byte(0x9D);
1189 }
1190 
1191 void Assembler::pushq(int imm32) {
1192   emit_byte(0x68);
1193   emit_long(imm32);
1194 }
1195 
1196 void Assembler::pushq(Register src) {
1197   int encode = prefix_and_encode(src->encoding());
1198 
1199   emit_byte(0x50 | encode);
1200 }
1201 
1202 void Assembler::pushq(Address src) {
1203   InstructionMark im(this);
1204   prefix(src);
1205   emit_byte(0xFF);
1206   emit_operand(rsi, src);
1207 }
1208 
1209 void Assembler::popq(Register dst) {
1210   int encode = prefix_and_encode(dst->encoding());
1211   emit_byte(0x58 | encode);
1212 }
1213 
1214 void Assembler::popq(Address dst) {
1215   InstructionMark im(this);
1216   prefix(dst);
1217   emit_byte(0x8F);
1218   emit_operand(rax, dst);
1219 }
1220 
1221 void Assembler::prefix(Prefix p) {
1222   a_byte(p);
1223 }
1224 
1225 void Assembler::movb(Register dst, Address src) {
1226   InstructionMark im(this);
1227   prefix(src, dst, true);
1228   emit_byte(0x8A);
1229   emit_operand(dst, src);
1230 }
1231 
1232 void Assembler::movb(Address dst, int imm8) {
1233   InstructionMark im(this);
1234   prefix(dst);
1235   emit_byte(0xC6);
1236   emit_operand(rax, dst, 1);
1237   emit_byte(imm8);
1238 }
1239 
1240 void Assembler::movb(Address dst, Register src) {
1241   InstructionMark im(this);
1242   prefix(dst, src, true);
1243   emit_byte(0x88);
1244   emit_operand(src, dst);
1245 }
1246 
1247 void Assembler::movw(Address dst, int imm16) {
1248   InstructionMark im(this);
1249   emit_byte(0x66); // switch to 16-bit mode
1250   prefix(dst);
1251   emit_byte(0xC7);
1252   emit_operand(rax, dst, 2);
1253   emit_word(imm16);
1254 }
1255 
1256 void Assembler::movw(Register dst, Address src) {
1257   InstructionMark im(this);
1258   emit_byte(0x66);
1259   prefix(src, dst);
1260   emit_byte(0x8B);
1261   emit_operand(dst, src);
1262 }
1263 
1264 void Assembler::movw(Address dst, Register src) {
1265   InstructionMark im(this);
1266   emit_byte(0x66);
1267   prefix(dst, src);
1268   emit_byte(0x89);
1269   emit_operand(src, dst);
1270 }
1271 
1272 // Uses zero extension.
1273 void Assembler::movl(Register dst, int imm32) {
1274   int encode = prefix_and_encode(dst->encoding());
1275   emit_byte(0xB8 | encode);
1276   emit_long(imm32);
1277 }
1278 
1279 void Assembler::movl(Register dst, Register src) {
1280   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1281   emit_byte(0x8B);
1282   emit_byte(0xC0 | encode);
1283 }
1284 
1285 void Assembler::movl(Register dst, Address src) {
1286   InstructionMark im(this);
1287   prefix(src, dst);
1288   emit_byte(0x8B);
1289   emit_operand(dst, src);
1290 }
1291 
1292 void Assembler::movl(Address dst, int imm32) {
1293   InstructionMark im(this);
1294   prefix(dst);
1295   emit_byte(0xC7);
1296   emit_operand(rax, dst, 4);
1297   emit_long(imm32);
1298 }
1299 
1300 void Assembler::movl(Address dst, Register src) {
1301   InstructionMark im(this);
1302   prefix(dst, src);
1303   emit_byte(0x89);
1304   emit_operand(src, dst);
1305 }
1306 
1307 void Assembler::mov64(Register dst, intptr_t imm64) {
1308   InstructionMark im(this);
1309   int encode = prefixq_and_encode(dst->encoding());
1310   emit_byte(0xB8 | encode);
1311   emit_long64(imm64);
1312 }
1313 
1314 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
1315   InstructionMark im(this);
1316   int encode = prefixq_and_encode(dst->encoding());
1317   emit_byte(0xB8 | encode);
1318   emit_data64(imm64, rspec);
1319 }
1320 
1321 void Assembler::movq(Register dst, Register src) {
1322   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1323   emit_byte(0x8B);
1324   emit_byte(0xC0 | encode);
1325 }
1326 
1327 void Assembler::movq(Register dst, Address src) {
1328   InstructionMark im(this);
1329   prefixq(src, dst);
1330   emit_byte(0x8B);
1331   emit_operand(dst, src);
1332 }
1333 
1334 void Assembler::mov64(Address dst, intptr_t imm32) {
1335   assert(is_simm32(imm32), "lost bits");
1336   InstructionMark im(this);
1337   prefixq(dst);
1338   emit_byte(0xC7);
1339   emit_operand(rax, dst, 4);
1340   emit_long(imm32);
1341 }
1342 
1343 void Assembler::movq(Address dst, Register src) {
1344   InstructionMark im(this);
1345   prefixq(dst, src);
1346   emit_byte(0x89);
1347   emit_operand(src, dst);
1348 }
1349 
1350 void Assembler::movsbl(Register dst, Address src) {
1351   InstructionMark im(this);
1352   prefix(src, dst);
1353   emit_byte(0x0F);
1354   emit_byte(0xBE);
1355   emit_operand(dst, src);
1356 }
1357 
1358 void Assembler::movsbl(Register dst, Register src) {
1359   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1360   emit_byte(0x0F);
1361   emit_byte(0xBE);
1362   emit_byte(0xC0 | encode);
1363 }
1364 
1365 void Assembler::movswl(Register dst, Address src) {
1366   InstructionMark im(this);
1367   prefix(src, dst);
1368   emit_byte(0x0F);
1369   emit_byte(0xBF);
1370   emit_operand(dst, src);
1371 }
1372 
1373 void Assembler::movswl(Register dst, Register src) {
1374   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1375   emit_byte(0x0F);
1376   emit_byte(0xBF);
1377   emit_byte(0xC0 | encode);
1378 }
1379 
1380 void Assembler::movslq(Register dst, Address src) {
1381   InstructionMark im(this);
1382   prefixq(src, dst);
1383   emit_byte(0x63);
1384   emit_operand(dst, src);
1385 }
1386 
1387 void Assembler::movslq(Register dst, Register src) {
1388   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1389   emit_byte(0x63);
1390   emit_byte(0xC0 | encode);
1391 }
1392 
1393 void Assembler::movzbl(Register dst, Address src) {
1394   InstructionMark im(this);
1395   prefix(src, dst);
1396   emit_byte(0x0F);
1397   emit_byte(0xB6);
1398   emit_operand(dst, src);
1399 }
1400 
1401 void Assembler::movzbl(Register dst, Register src) {
1402   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1403   emit_byte(0x0F);
1404   emit_byte(0xB6);
1405   emit_byte(0xC0 | encode);
1406 }
1407 
1408 void Assembler::movzwl(Register dst, Address src) {
1409   InstructionMark im(this);
1410   prefix(src, dst);
1411   emit_byte(0x0F);
1412   emit_byte(0xB7);
1413   emit_operand(dst, src);
1414 }
1415 
1416 void Assembler::movzwl(Register dst, Register src) {
1417   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1418   emit_byte(0x0F);
1419   emit_byte(0xB7);
1420   emit_byte(0xC0 | encode);
1421 }
1422 
1423 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1424   emit_byte(0xF3);
1425   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1426   emit_byte(0x0F);
1427   emit_byte(0x10);
1428   emit_byte(0xC0 | encode);
1429 }
1430 
1431 void Assembler::movss(XMMRegister dst, Address src) {
1432   InstructionMark im(this);
1433   emit_byte(0xF3);
1434   prefix(src, dst);
1435   emit_byte(0x0F);
1436   emit_byte(0x10);
1437   emit_operand(dst, src);
1438 }
1439 
1440 void Assembler::movss(Address dst, XMMRegister src) {
1441   InstructionMark im(this);
1442   emit_byte(0xF3);
1443   prefix(dst, src);
1444   emit_byte(0x0F);
1445   emit_byte(0x11);
1446   emit_operand(src, dst);
1447 }
1448 
1449 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1450   emit_byte(0xF2);
1451   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1452   emit_byte(0x0F);
1453   emit_byte(0x10);
1454   emit_byte(0xC0 | encode);
1455 }
1456 
1457 void Assembler::movsd(XMMRegister dst, Address src) {
1458   InstructionMark im(this);
1459   emit_byte(0xF2);
1460   prefix(src, dst);
1461   emit_byte(0x0F);
1462   emit_byte(0x10);
1463   emit_operand(dst, src);
1464 }
1465 
1466 void Assembler::movsd(Address dst, XMMRegister src) {
1467   InstructionMark im(this);
1468   emit_byte(0xF2);
1469   prefix(dst, src);
1470   emit_byte(0x0F);
1471   emit_byte(0x11);
1472   emit_operand(src, dst);
1473 }
1474 
1475 // New cpus require to use movsd and movss to avoid partial register stall
1476 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1477 // The selection is done in MacroAssembler::movdbl() and movflt().
1478 void Assembler::movlpd(XMMRegister dst, Address src) {
1479   InstructionMark im(this);
1480   emit_byte(0x66);
1481   prefix(src, dst);
1482   emit_byte(0x0F);
1483   emit_byte(0x12);
1484   emit_operand(dst, src);
1485 }
1486 
1487 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1488   int dstenc = dst->encoding();
1489   int srcenc = src->encoding();
1490   emit_byte(0x66);
1491   if (dstenc < 8) {
1492     if (srcenc >= 8) {
1493       prefix(REX_B);
1494       srcenc -= 8;
1495     }
1496   } else {
1497     if (srcenc < 8) {
1498       prefix(REX_R);
1499     } else {
1500       prefix(REX_RB);
1501       srcenc -= 8;
1502     }
1503     dstenc -= 8;
1504   }
1505   emit_byte(0x0F);
1506   emit_byte(0x28);
1507   emit_byte(0xC0 | dstenc << 3 | srcenc);
1508 }
1509 
1510 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1511   int dstenc = dst->encoding();
1512   int srcenc = src->encoding();
1513   if (dstenc < 8) {
1514     if (srcenc >= 8) {
1515       prefix(REX_B);
1516       srcenc -= 8;
1517     }
1518   } else {
1519     if (srcenc < 8) {
1520       prefix(REX_R);
1521     } else {
1522       prefix(REX_RB);
1523       srcenc -= 8;
1524     }
1525     dstenc -= 8;
1526   }
1527   emit_byte(0x0F);
1528   emit_byte(0x28);
1529   emit_byte(0xC0 | dstenc << 3 | srcenc);
1530 }
1531 
1532 void Assembler::movdl(XMMRegister dst, Register src) {
1533   emit_byte(0x66);
1534   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1535   emit_byte(0x0F);
1536   emit_byte(0x6E);
1537   emit_byte(0xC0 | encode);
1538 }
1539 
1540 void Assembler::movdl(Register dst, XMMRegister src) {
1541   emit_byte(0x66);
1542   // swap src/dst to get correct prefix
1543   int encode = prefix_and_encode(src->encoding(), dst->encoding());
1544   emit_byte(0x0F);
1545   emit_byte(0x7E);
1546   emit_byte(0xC0 | encode);
1547 }
1548 
1549 void Assembler::movdq(XMMRegister dst, Register src) {
1550   emit_byte(0x66);
1551   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1552   emit_byte(0x0F);
1553   emit_byte(0x6E);
1554   emit_byte(0xC0 | encode);
1555 }
1556 
1557 void Assembler::movdq(Register dst, XMMRegister src) {
1558   emit_byte(0x66);
1559   // swap src/dst to get correct prefix
1560   int encode = prefixq_and_encode(src->encoding(), dst->encoding());
1561   emit_byte(0x0F);
1562   emit_byte(0x7E);
1563   emit_byte(0xC0 | encode);
1564 }
1565 
1566 void Assembler::pxor(XMMRegister dst, Address src) {
1567   InstructionMark im(this);
1568   emit_byte(0x66);
1569   prefix(src, dst);
1570   emit_byte(0x0F);
1571   emit_byte(0xEF);
1572   emit_operand(dst, src);
1573 }
1574 
1575 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
1576   InstructionMark im(this);
1577   emit_byte(0x66);
1578   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1579   emit_byte(0x0F);
1580   emit_byte(0xEF);
1581   emit_byte(0xC0 | encode);
1582 }
1583 
1584 void Assembler::movdqa(XMMRegister dst, Address src) {
1585   InstructionMark im(this);
1586   emit_byte(0x66);
1587   prefix(src, dst);
1588   emit_byte(0x0F);
1589   emit_byte(0x6F);
1590   emit_operand(dst, src);
1591 }
1592 
1593 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1594   emit_byte(0x66);
1595   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1596   emit_byte(0x0F);
1597   emit_byte(0x6F);
1598   emit_byte(0xC0 | encode);
1599 }
1600 
1601 void Assembler::movdqa(Address dst, XMMRegister src) {
1602   InstructionMark im(this);
1603   emit_byte(0x66);
1604   prefix(dst, src);
1605   emit_byte(0x0F);
1606   emit_byte(0x7F);
1607   emit_operand(src, dst);
1608 }
1609 
1610 void Assembler::movq(XMMRegister dst, Address src) {
1611   InstructionMark im(this);
1612   emit_byte(0xF3);
1613   prefix(src, dst);
1614   emit_byte(0x0F);
1615   emit_byte(0x7E);
1616   emit_operand(dst, src);
1617 }
1618 
1619 void Assembler::movq(Address dst, XMMRegister src) {
1620   InstructionMark im(this);
1621   emit_byte(0x66);
1622   prefix(dst, src);
1623   emit_byte(0x0F);
1624   emit_byte(0xD6);
1625   emit_operand(src, dst);
1626 }
1627 
1628 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
1629   assert(isByte(mode), "invalid value");
1630   emit_byte(0x66);
1631   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1632   emit_byte(0x0F);
1633   emit_byte(0x70);
1634   emit_byte(0xC0 | encode);
1635   emit_byte(mode & 0xFF);
1636 }
1637 
1638 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
1639   assert(isByte(mode), "invalid value");
1640   InstructionMark im(this);
1641   emit_byte(0x66);
1642   emit_byte(0x0F);
1643   emit_byte(0x70);
1644   emit_operand(dst, src);
1645   emit_byte(mode & 0xFF);
1646 }
1647 
1648 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
1649   assert(isByte(mode), "invalid value");
1650   emit_byte(0xF2);
1651   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1652   emit_byte(0x0F);
1653   emit_byte(0x70);
1654   emit_byte(0xC0 | encode);
1655   emit_byte(mode & 0xFF);
1656 }
1657 
1658 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
1659   assert(isByte(mode), "invalid value");
1660   InstructionMark im(this);
1661   emit_byte(0xF2);
1662   emit_byte(0x0F);
1663   emit_byte(0x70);
1664   emit_operand(dst, src);
1665   emit_byte(mode & 0xFF);
1666 }
1667 
1668 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1669   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1670   emit_byte(0x0F);
1671   emit_byte(0x40 | cc);
1672   emit_byte(0xC0 | encode);
1673 }
1674 
1675 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1676   InstructionMark im(this);
1677   prefix(src, dst);
1678   emit_byte(0x0F);
1679   emit_byte(0x40 | cc);
1680   emit_operand(dst, src);
1681 }
1682 
1683 void Assembler::cmovq(Condition cc, Register dst, Register src) {
1684   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1685   emit_byte(0x0F);
1686   emit_byte(0x40 | cc);
1687   emit_byte(0xC0 | encode);
1688 }
1689 
1690 void Assembler::cmovq(Condition cc, Register dst, Address src) {
1691   InstructionMark im(this);
1692   prefixq(src, dst);
1693   emit_byte(0x0F);
1694   emit_byte(0x40 | cc);
1695   emit_operand(dst, src);
1696 }
1697 
1698 void Assembler::prefetch_prefix(Address src) {
1699   prefix(src);
1700   emit_byte(0x0F);
1701 }
1702 
1703 void Assembler::prefetcht0(Address src) {
1704   InstructionMark im(this);
1705   prefetch_prefix(src);
1706   emit_byte(0x18);
1707   emit_operand(rcx, src); // 1, src
1708 }
1709 
1710 void Assembler::prefetcht1(Address src) {
1711   InstructionMark im(this);
1712   prefetch_prefix(src);
1713   emit_byte(0x18);
1714   emit_operand(rdx, src); // 2, src
1715 }
1716 
1717 void Assembler::prefetcht2(Address src) {
1718   InstructionMark im(this);
1719   prefetch_prefix(src);
1720   emit_byte(0x18);
1721   emit_operand(rbx, src); // 3, src
1722 }
1723 
1724 void Assembler::prefetchnta(Address src) {
1725   InstructionMark im(this);
1726   prefetch_prefix(src);
1727   emit_byte(0x18);
1728   emit_operand(rax, src); // 0, src
1729 }
1730 
1731 void Assembler::prefetchw(Address src) {
1732   InstructionMark im(this);
1733   prefetch_prefix(src);
1734   emit_byte(0x0D);
1735   emit_operand(rcx, src); // 1, src
1736 }
1737 
1738 void Assembler::adcl(Register dst, int imm32) {
1739   prefix(dst);
1740   emit_arith(0x81, 0xD0, dst, imm32);
1741 }
1742 
1743 void Assembler::adcl(Register dst, Address src) {
1744   InstructionMark im(this);
1745   prefix(src, dst);
1746   emit_byte(0x13);
1747   emit_operand(dst, src);
1748 }
1749 
1750 void Assembler::adcl(Register dst, Register src) {
1751   (void) prefix_and_encode(dst->encoding(), src->encoding());
1752   emit_arith(0x13, 0xC0, dst, src);
1753 }
1754 
1755 void Assembler::adcq(Register dst, int imm32) {
1756   (void) prefixq_and_encode(dst->encoding());
1757   emit_arith(0x81, 0xD0, dst, imm32);
1758 }
1759 
1760 void Assembler::adcq(Register dst, Address src) {
1761   InstructionMark im(this);
1762   prefixq(src, dst);
1763   emit_byte(0x13);
1764   emit_operand(dst, src);
1765 }
1766 
1767 void Assembler::adcq(Register dst, Register src) {
1768   (int) prefixq_and_encode(dst->encoding(), src->encoding());
1769   emit_arith(0x13, 0xC0, dst, src);
1770 }
1771 
1772 void Assembler::addl(Address dst, int imm32) {
1773   InstructionMark im(this);
1774   prefix(dst);
1775   emit_arith_operand(0x81, rax, dst,imm32);
1776 }
1777 
1778 void Assembler::addl(Address dst, Register src) {
1779   InstructionMark im(this);
1780   prefix(dst, src);
1781   emit_byte(0x01);
1782   emit_operand(src, dst);
1783 }
1784 
1785 void Assembler::addl(Register dst, int imm32) {
1786   prefix(dst);
1787   emit_arith(0x81, 0xC0, dst, imm32);
1788 }
1789 
1790 void Assembler::addl(Register dst, Address src) {
1791   InstructionMark im(this);
1792   prefix(src, dst);
1793   emit_byte(0x03);
1794   emit_operand(dst, src);
1795 }
1796 
1797 void Assembler::addl(Register dst, Register src) {
1798   (void) prefix_and_encode(dst->encoding(), src->encoding());
1799   emit_arith(0x03, 0xC0, dst, src);
1800 }
1801 
1802 void Assembler::addq(Address dst, int imm32) {
1803   InstructionMark im(this);
1804   prefixq(dst);
1805   emit_arith_operand(0x81, rax, dst,imm32);
1806 }
1807 
1808 void Assembler::addq(Address dst, Register src) {
1809   InstructionMark im(this);
1810   prefixq(dst, src);
1811   emit_byte(0x01);
1812   emit_operand(src, dst);
1813 }
1814 
1815 void Assembler::addq(Register dst, int imm32) {
1816   (void) prefixq_and_encode(dst->encoding());
1817   emit_arith(0x81, 0xC0, dst, imm32);
1818 }
1819 
1820 void Assembler::addq(Register dst, Address src) {
1821   InstructionMark im(this);
1822   prefixq(src, dst);
1823   emit_byte(0x03);
1824   emit_operand(dst, src);
1825 }
1826 
1827 void Assembler::addq(Register dst, Register src) {
1828   (void) prefixq_and_encode(dst->encoding(), src->encoding());
1829   emit_arith(0x03, 0xC0, dst, src);
1830 }
1831 
1832 void Assembler::andl(Register dst, int imm32) {
1833   prefix(dst);
1834   emit_arith(0x81, 0xE0, dst, imm32);
1835 }
1836 
1837 void Assembler::andl(Register dst, Address src) {
1838   InstructionMark im(this);
1839   prefix(src, dst);
1840   emit_byte(0x23);
1841   emit_operand(dst, src);
1842 }
1843 
1844 void Assembler::andl(Register dst, Register src) {
1845   (void) prefix_and_encode(dst->encoding(), src->encoding());
1846   emit_arith(0x23, 0xC0, dst, src);
1847 }
1848 
1849 void Assembler::andq(Register dst, int imm32) {
1850   (void) prefixq_and_encode(dst->encoding());
1851   emit_arith(0x81, 0xE0, dst, imm32);
1852 }
1853 
1854 void Assembler::andq(Register dst, Address src) {
1855   InstructionMark im(this);
1856   prefixq(src, dst);
1857   emit_byte(0x23);
1858   emit_operand(dst, src);
1859 }
1860 
1861 void Assembler::andq(Register dst, Register src) {
1862   (int) prefixq_and_encode(dst->encoding(), src->encoding());
1863   emit_arith(0x23, 0xC0, dst, src);
1864 }
1865 
1866 void Assembler::cmpb(Address dst, int imm8) {
1867   InstructionMark im(this);
1868   prefix(dst);
1869   emit_byte(0x80);
1870   emit_operand(rdi, dst, 1);
1871   emit_byte(imm8);
1872 }
1873 
1874 void Assembler::cmpl(Address dst, int imm32) {
1875   InstructionMark im(this);
1876   prefix(dst);
1877   emit_byte(0x81);
1878   emit_operand(rdi, dst, 4);
1879   emit_long(imm32);
1880 }
1881 
1882 void Assembler::cmpl(Register dst, int imm32) {
1883   prefix(dst);
1884   emit_arith(0x81, 0xF8, dst, imm32);
1885 }
1886 
1887 void Assembler::cmpl(Register dst, Register src) {
1888   (void) prefix_and_encode(dst->encoding(), src->encoding());
1889   emit_arith(0x3B, 0xC0, dst, src);
1890 }
1891 
1892 void Assembler::cmpl(Register dst, Address src) {
1893   InstructionMark im(this);
1894   prefix(src, dst);
1895   emit_byte(0x3B);
1896   emit_operand(dst, src);
1897 }
1898 
1899 void Assembler::cmpq(Address dst, int imm32) {
1900   InstructionMark im(this);
1901   prefixq(dst);
1902   emit_byte(0x81);
1903   emit_operand(rdi, dst, 4);
1904   emit_long(imm32);
1905 }
1906 
1907 void Assembler::cmpq(Register dst, int imm32) {
1908   (void) prefixq_and_encode(dst->encoding());
1909   emit_arith(0x81, 0xF8, dst, imm32);
1910 }
1911 
1912 void Assembler::cmpq(Address dst, Register src) {
1913   prefixq(dst, src);
1914   emit_byte(0x3B);
1915   emit_operand(src, dst);
1916 }
1917 
1918 void Assembler::cmpq(Register dst, Register src) {
1919   (void) prefixq_and_encode(dst->encoding(), src->encoding());
1920   emit_arith(0x3B, 0xC0, dst, src);
1921 }
1922 
1923 void Assembler::cmpq(Register dst, Address  src) {
1924   InstructionMark im(this);
1925   prefixq(src, dst);
1926   emit_byte(0x3B);
1927   emit_operand(dst, src);
1928 }
1929 
1930 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
1931   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1932   emit_byte(0x0F);
1933   emit_byte(0x2E);
1934   emit_byte(0xC0 | encode);
1935 }
1936 
1937 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
1938   emit_byte(0x66);
1939   ucomiss(dst, src);
1940 }
1941 
1942 void Assembler::decl(Register dst) {
1943   // Don't use it directly. Use MacroAssembler::decrementl() instead.
1944   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
1945   int encode = prefix_and_encode(dst->encoding());
1946   emit_byte(0xFF);
1947   emit_byte(0xC8 | encode);
1948 }
1949 
1950 void Assembler::decl(Address dst) {
1951   // Don't use it directly. Use MacroAssembler::decrementl() instead.
1952   InstructionMark im(this);
1953   prefix(dst);
1954   emit_byte(0xFF);
1955   emit_operand(rcx, dst);
1956 }
1957 
1958 void Assembler::decq(Register dst) {
1959   // Don't use it directly. Use MacroAssembler::decrementq() instead.
1960   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
1961   int encode = prefixq_and_encode(dst->encoding());
1962   emit_byte(0xFF);
1963   emit_byte(0xC8 | encode);
1964 }
1965 
1966 void Assembler::decq(Address dst) {
1967   // Don't use it directly. Use MacroAssembler::decrementq() instead.
1968   InstructionMark im(this);
1969   prefixq(dst);
1970   emit_byte(0xFF);
1971   emit_operand(rcx, dst);
1972 }
1973 
1974 void Assembler::idivl(Register src) {
1975   int encode = prefix_and_encode(src->encoding());
1976   emit_byte(0xF7);
1977   emit_byte(0xF8 | encode);
1978 }
1979 
1980 void Assembler::idivq(Register src) {
1981   int encode = prefixq_and_encode(src->encoding());
1982   emit_byte(0xF7);
1983   emit_byte(0xF8 | encode);
1984 }
1985 
1986 void Assembler::cdql() {
1987   emit_byte(0x99);
1988 }
1989 
1990 void Assembler::cdqq() {
1991   prefix(REX_W);
1992   emit_byte(0x99);
1993 }
1994 
1995 void Assembler::imull(Register dst, Register src) {
1996   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1997   emit_byte(0x0F);
1998   emit_byte(0xAF);
1999   emit_byte(0xC0 | encode);
2000 }
2001 
2002 void Assembler::imull(Register dst, Register src, int value) {
2003   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2004   if (is8bit(value)) {
2005     emit_byte(0x6B);
2006     emit_byte(0xC0 | encode);
2007     emit_byte(value);
2008   } else {
2009     emit_byte(0x69);
2010     emit_byte(0xC0 | encode);
2011     emit_long(value);
2012   }
2013 }
2014 
2015 void Assembler::imulq(Register dst, Register src) {
2016   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2017   emit_byte(0x0F);
2018   emit_byte(0xAF);
2019   emit_byte(0xC0 | encode);
2020 }
2021 
2022 void Assembler::imulq(Register dst, Register src, int value) {
2023   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2024   if (is8bit(value)) {
2025     emit_byte(0x6B);
2026     emit_byte(0xC0 | encode);
2027     emit_byte(value);
2028   } else {
2029     emit_byte(0x69);
2030     emit_byte(0xC0 | encode);
2031     emit_long(value);
2032   }
2033 }
2034 
2035 void Assembler::incl(Register dst) {
2036   // Don't use it directly. Use MacroAssembler::incrementl() instead.
2037   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2038   int encode = prefix_and_encode(dst->encoding());
2039   emit_byte(0xFF);
2040   emit_byte(0xC0 | encode);
2041 }
2042 
2043 void Assembler::incl(Address dst) {
2044   // Don't use it directly. Use MacroAssembler::incrementl() instead.
2045   InstructionMark im(this);
2046   prefix(dst);
2047   emit_byte(0xFF);
2048   emit_operand(rax, dst);
2049 }
2050 
2051 void Assembler::incq(Register dst) {
2052   // Don't use it directly. Use MacroAssembler::incrementq() instead.
2053   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2054   int encode = prefixq_and_encode(dst->encoding());
2055   emit_byte(0xFF);
2056   emit_byte(0xC0 | encode);
2057 }
2058 
2059 void Assembler::incq(Address dst) {
2060   // Don't use it directly. Use MacroAssembler::incrementq() instead.
2061   InstructionMark im(this);
2062   prefixq(dst);
2063   emit_byte(0xFF);
2064   emit_operand(rax, dst);
2065 }
2066 
2067 void Assembler::leal(Register dst, Address src) {
2068   InstructionMark im(this);
2069   emit_byte(0x67); // addr32
2070   prefix(src, dst);
2071   emit_byte(0x8D);
2072   emit_operand(dst, src);
2073 }
2074 
2075 void Assembler::leaq(Register dst, Address src) {
2076   InstructionMark im(this);
2077   prefixq(src, dst);
2078   emit_byte(0x8D);
2079   emit_operand(dst, src);
2080 }
2081 
2082 void Assembler::mull(Address src) {
2083   InstructionMark im(this);
2084   // was missing
2085   prefix(src);
2086   emit_byte(0xF7);
2087   emit_operand(rsp, src);
2088 }
2089 
2090 void Assembler::mull(Register src) {
2091   // was missing
2092   int encode = prefix_and_encode(src->encoding());
2093   emit_byte(0xF7);
2094   emit_byte(0xE0 | encode);
2095 }
2096 
2097 void Assembler::negl(Register dst) {
2098   int encode = prefix_and_encode(dst->encoding());
2099   emit_byte(0xF7);
2100   emit_byte(0xD8 | encode);
2101 }
2102 
2103 void Assembler::negq(Register dst) {
2104   int encode = prefixq_and_encode(dst->encoding());
2105   emit_byte(0xF7);
2106   emit_byte(0xD8 | encode);
2107 }
2108 
2109 void Assembler::notl(Register dst) {
2110   int encode = prefix_and_encode(dst->encoding());
2111   emit_byte(0xF7);
2112   emit_byte(0xD0 | encode);
2113 }
2114 
2115 void Assembler::notq(Register dst) {
2116   int encode = prefixq_and_encode(dst->encoding());
2117   emit_byte(0xF7);
2118   emit_byte(0xD0 | encode);
2119 }
2120 
2121 void Assembler::orl(Address dst, int imm32) {
2122   InstructionMark im(this);
2123   prefix(dst);
2124   emit_byte(0x81);
2125   emit_operand(rcx, dst, 4);
2126   emit_long(imm32);
2127 }
2128 
2129 void Assembler::orl(Register dst, int imm32) {
2130   prefix(dst);
2131   emit_arith(0x81, 0xC8, dst, imm32);
2132 }
2133 
2134 void Assembler::orl(Register dst, Address src) {
2135   InstructionMark im(this);
2136   prefix(src, dst);
2137   emit_byte(0x0B);
2138   emit_operand(dst, src);
2139 }
2140 
2141 void Assembler::orl(Register dst, Register src) {
2142   (void) prefix_and_encode(dst->encoding(), src->encoding());
2143   emit_arith(0x0B, 0xC0, dst, src);
2144 }
2145 
2146 void Assembler::orq(Address dst, int imm32) {
2147   InstructionMark im(this);
2148   prefixq(dst);
2149   emit_byte(0x81);
2150   emit_operand(rcx, dst, 4);
2151   emit_long(imm32);
2152 }
2153 
2154 void Assembler::orq(Register dst, int imm32) {
2155   (void) prefixq_and_encode(dst->encoding());
2156   emit_arith(0x81, 0xC8, dst, imm32);
2157 }
2158 
2159 void Assembler::orq(Register dst, Address src) {
2160   InstructionMark im(this);
2161   prefixq(src, dst);
2162   emit_byte(0x0B);
2163   emit_operand(dst, src);
2164 }
2165 
2166 void Assembler::orq(Register dst, Register src) {
2167   (void) prefixq_and_encode(dst->encoding(), src->encoding());
2168   emit_arith(0x0B, 0xC0, dst, src);
2169 }
2170 
2171 void Assembler::rcll(Register dst, int imm8) {
2172   assert(isShiftCount(imm8), "illegal shift count");
2173   int encode = prefix_and_encode(dst->encoding());
2174   if (imm8 == 1) {
2175     emit_byte(0xD1);
2176     emit_byte(0xD0 | encode);
2177   } else {
2178     emit_byte(0xC1);
2179     emit_byte(0xD0 | encode);
2180     emit_byte(imm8);
2181   }
2182 }
2183 
2184 void Assembler::rclq(Register dst, int imm8) {
2185   assert(isShiftCount(imm8 >> 1), "illegal shift count");
2186   int encode = prefixq_and_encode(dst->encoding());
2187   if (imm8 == 1) {
2188     emit_byte(0xD1);
2189     emit_byte(0xD0 | encode);
2190   } else {
2191     emit_byte(0xC1);
2192     emit_byte(0xD0 | encode);
2193     emit_byte(imm8);
2194   }
2195 }
2196 
2197 void Assembler::sarl(Register dst, int imm8) {
2198   int encode = prefix_and_encode(dst->encoding());
2199   assert(isShiftCount(imm8), "illegal shift count");
2200   if (imm8 == 1) {
2201     emit_byte(0xD1);
2202     emit_byte(0xF8 | encode);
2203   } else {
2204     emit_byte(0xC1);
2205     emit_byte(0xF8 | encode);
2206     emit_byte(imm8);
2207   }
2208 }
2209 
2210 void Assembler::sarl(Register dst) {
2211   int encode = prefix_and_encode(dst->encoding());
2212   emit_byte(0xD3);
2213   emit_byte(0xF8 | encode);
2214 }
2215 
2216 void Assembler::sarq(Register dst, int imm8) {
2217   assert(isShiftCount(imm8 >> 1), "illegal shift count");
2218   int encode = prefixq_and_encode(dst->encoding());
2219   if (imm8 == 1) {
2220     emit_byte(0xD1);
2221     emit_byte(0xF8 | encode);
2222   } else {
2223     emit_byte(0xC1);
2224     emit_byte(0xF8 | encode);
2225     emit_byte(imm8);
2226   }
2227 }
2228 
2229 void Assembler::sarq(Register dst) {
2230   int encode = prefixq_and_encode(dst->encoding());
2231   emit_byte(0xD3);
2232   emit_byte(0xF8 | encode);
2233 }
2234 
2235 void Assembler::sbbl(Address dst, int imm32) {
2236   InstructionMark im(this);
2237   prefix(dst);
2238   emit_arith_operand(0x81, rbx, dst, imm32);
2239 }
2240 
2241 void Assembler::sbbl(Register dst, int imm32) {
2242   prefix(dst);
2243   emit_arith(0x81, 0xD8, dst, imm32);
2244 }
2245 
2246 void Assembler::sbbl(Register dst, Address src) {
2247   InstructionMark im(this);
2248   prefix(src, dst);
2249   emit_byte(0x1B);
2250   emit_operand(dst, src);
2251 }
2252 
2253 void Assembler::sbbl(Register dst, Register src) {
2254   (void) prefix_and_encode(dst->encoding(), src->encoding());
2255   emit_arith(0x1B, 0xC0, dst, src);
2256 }
2257 
2258 void Assembler::sbbq(Address dst, int imm32) {
2259   InstructionMark im(this);
2260   prefixq(dst);
2261   emit_arith_operand(0x81, rbx, dst, imm32);
2262 }
2263 
2264 void Assembler::sbbq(Register dst, int imm32) {
2265   (void) prefixq_and_encode(dst->encoding());
2266   emit_arith(0x81, 0xD8, dst, imm32);
2267 }
2268 
2269 void Assembler::sbbq(Register dst, Address src) {
2270   InstructionMark im(this);
2271   prefixq(src, dst);
2272   emit_byte(0x1B);
2273   emit_operand(dst, src);
2274 }
2275 
2276 void Assembler::sbbq(Register dst, Register src) {
2277   (void) prefixq_and_encode(dst->encoding(), src->encoding());
2278   emit_arith(0x1B, 0xC0, dst, src);
2279 }
2280 
2281 void Assembler::shll(Register dst, int imm8) {
2282   assert(isShiftCount(imm8), "illegal shift count");
2283   int encode = prefix_and_encode(dst->encoding());
2284   if (imm8 == 1 ) {
2285     emit_byte(0xD1);
2286     emit_byte(0xE0 | encode);
2287   } else {
2288     emit_byte(0xC1);
2289     emit_byte(0xE0 | encode);
2290     emit_byte(imm8);
2291   }
2292 }
2293 
2294 void Assembler::shll(Register dst) {
2295   int encode = prefix_and_encode(dst->encoding());
2296   emit_byte(0xD3);
2297   emit_byte(0xE0 | encode);
2298 }
2299 
2300 void Assembler::shlq(Register dst, int imm8) {
2301   assert(isShiftCount(imm8 >> 1), "illegal shift count");
2302   int encode = prefixq_and_encode(dst->encoding());
2303   if (imm8 == 1) {
2304     emit_byte(0xD1);
2305     emit_byte(0xE0 | encode);
2306   } else {
2307     emit_byte(0xC1);
2308     emit_byte(0xE0 | encode);
2309     emit_byte(imm8);
2310   }
2311 }
2312 
2313 void Assembler::shlq(Register dst) {
2314   int encode = prefixq_and_encode(dst->encoding());
2315   emit_byte(0xD3);
2316   emit_byte(0xE0 | encode);
2317 }
2318 
2319 void Assembler::shrl(Register dst, int imm8) {
2320   assert(isShiftCount(imm8), "illegal shift count");
2321   int encode = prefix_and_encode(dst->encoding());
2322   emit_byte(0xC1);
2323   emit_byte(0xE8 | encode);
2324   emit_byte(imm8);
2325 }
2326 
2327 void Assembler::shrl(Register dst) {
2328   int encode = prefix_and_encode(dst->encoding());
2329   emit_byte(0xD3);
2330   emit_byte(0xE8 | encode);
2331 }
2332 
2333 void Assembler::shrq(Register dst, int imm8) {
2334   assert(isShiftCount(imm8 >> 1), "illegal shift count");
2335   int encode = prefixq_and_encode(dst->encoding());
2336   emit_byte(0xC1);
2337   emit_byte(0xE8 | encode);
2338   emit_byte(imm8);
2339 }
2340 
2341 void Assembler::shrq(Register dst) {
2342   int encode = prefixq_and_encode(dst->encoding());
2343   emit_byte(0xD3);
2344   emit_byte(0xE8 | encode);
2345 }
2346 
2347 void Assembler::subl(Address dst, int imm32) {
2348   InstructionMark im(this);
2349   prefix(dst);
2350   if (is8bit(imm32)) {
2351     emit_byte(0x83);
2352     emit_operand(rbp, dst, 1);
2353     emit_byte(imm32 & 0xFF);
2354   } else {
2355     emit_byte(0x81);
2356     emit_operand(rbp, dst, 4);
2357     emit_long(imm32);
2358   }
2359 }
2360 
2361 void Assembler::subl(Register dst, int imm32) {
2362   prefix(dst);
2363   emit_arith(0x81, 0xE8, dst, imm32);
2364 }
2365 
2366 void Assembler::subl(Address dst, Register src) {
2367   InstructionMark im(this);
2368   prefix(dst, src);
2369   emit_byte(0x29);
2370   emit_operand(src, dst);
2371 }
2372 
2373 void Assembler::subl(Register dst, Address src) {
2374   InstructionMark im(this);
2375   prefix(src, dst);
2376   emit_byte(0x2B);
2377   emit_operand(dst, src);
2378 }
2379 
2380 void Assembler::subl(Register dst, Register src) {
2381   (void) prefix_and_encode(dst->encoding(), src->encoding());
2382   emit_arith(0x2B, 0xC0, dst, src);
2383 }
2384 
2385 void Assembler::subq(Address dst, int imm32) {
2386   InstructionMark im(this);
2387   prefixq(dst);
2388   if (is8bit(imm32)) {
2389     emit_byte(0x83);
2390     emit_operand(rbp, dst, 1);
2391     emit_byte(imm32 & 0xFF);
2392   } else {
2393     emit_byte(0x81);
2394     emit_operand(rbp, dst, 4);
2395     emit_long(imm32);
2396   }
2397 }
2398 
2399 void Assembler::subq(Register dst, int imm32) {
2400   (void) prefixq_and_encode(dst->encoding());
2401   emit_arith(0x81, 0xE8, dst, imm32);
2402 }
2403 
2404 void Assembler::subq(Address dst, Register src) {
2405   InstructionMark im(this);
2406   prefixq(dst, src);
2407   emit_byte(0x29);
2408   emit_operand(src, dst);
2409 }
2410 
2411 void Assembler::subq(Register dst, Address src) {
2412   InstructionMark im(this);
2413   prefixq(src, dst);
2414   emit_byte(0x2B);
2415   emit_operand(dst, src);
2416 }
2417 
2418 void Assembler::subq(Register dst, Register src) {
2419   (void) prefixq_and_encode(dst->encoding(), src->encoding());
2420   emit_arith(0x2B, 0xC0, dst, src);
2421 }
2422 
2423 void Assembler::testb(Register dst, int imm8) {
2424   (void) prefix_and_encode(dst->encoding(), true);
2425   emit_arith_b(0xF6, 0xC0, dst, imm8);
2426 }
2427 
2428 void Assembler::testl(Register dst, int imm32) {
2429   // not using emit_arith because test
2430   // doesn't support sign-extension of
2431   // 8bit operands
2432   int encode = dst->encoding();
2433   if (encode == 0) {
2434     emit_byte(0xA9);
2435   } else {
2436     encode = prefix_and_encode(encode);
2437     emit_byte(0xF7);
2438     emit_byte(0xC0 | encode);
2439   }
2440   emit_long(imm32);
2441 }
2442 
2443 void Assembler::testl(Register dst, Register src) {
2444   (void) prefix_and_encode(dst->encoding(), src->encoding());
2445   emit_arith(0x85, 0xC0, dst, src);
2446 }
2447 
2448 void Assembler::testq(Register dst, int imm32) {
2449   // not using emit_arith because test
2450   // doesn't support sign-extension of
2451   // 8bit operands
2452   int encode = dst->encoding();
2453   if (encode == 0) {
2454     prefix(REX_W);
2455     emit_byte(0xA9);
2456   } else {
2457     encode = prefixq_and_encode(encode);
2458     emit_byte(0xF7);
2459     emit_byte(0xC0 | encode);
2460   }
2461   emit_long(imm32);
2462 }
2463 
2464 void Assembler::testq(Register dst, Register src) {
2465   (void) prefixq_and_encode(dst->encoding(), src->encoding());
2466   emit_arith(0x85, 0xC0, dst, src);
2467 }
2468 
2469 void Assembler::xaddl(Address dst, Register src) {
2470   InstructionMark im(this);
2471   prefix(dst, src);
2472   emit_byte(0x0F);
2473   emit_byte(0xC1);
2474   emit_operand(src, dst);
2475 }
2476 
2477 void Assembler::xaddq(Address dst, Register src) {
2478   InstructionMark im(this);
2479   prefixq(dst, src);
2480   emit_byte(0x0F);
2481   emit_byte(0xC1);
2482   emit_operand(src, dst);
2483 }
2484 
2485 void Assembler::xorl(Register dst, int imm32) {
2486   prefix(dst);
2487   emit_arith(0x81, 0xF0, dst, imm32);
2488 }
2489 
2490 void Assembler::xorl(Register dst, Register src) {
2491   (void) prefix_and_encode(dst->encoding(), src->encoding());
2492   emit_arith(0x33, 0xC0, dst, src);
2493 }
2494 
2495 void Assembler::xorl(Register dst, Address src) {
2496   InstructionMark im(this);
2497   prefix(src, dst);
2498   emit_byte(0x33);
2499   emit_operand(dst, src);
2500 }
2501 
2502 void Assembler::xorq(Register dst, int imm32) {
2503   (void) prefixq_and_encode(dst->encoding());
2504   emit_arith(0x81, 0xF0, dst, imm32);
2505 }
2506 
2507 void Assembler::xorq(Register dst, Register src) {
2508   (void) prefixq_and_encode(dst->encoding(), src->encoding());
2509   emit_arith(0x33, 0xC0, dst, src);
2510 }
2511 
2512 void Assembler::xorq(Register dst, Address src) {
2513   InstructionMark im(this);
2514   prefixq(src, dst);
2515   emit_byte(0x33);
2516   emit_operand(dst, src);
2517 }
2518 
2519 void Assembler::bswapl(Register reg) {
2520   int encode = prefix_and_encode(reg->encoding());
2521   emit_byte(0x0F);
2522   emit_byte(0xC8 | encode);
2523 }
2524 
2525 void Assembler::bswapq(Register reg) {
2526   int encode = prefixq_and_encode(reg->encoding());
2527   emit_byte(0x0F);
2528   emit_byte(0xC8 | encode);
2529 }
2530 
2531 void Assembler::lock() {
2532   emit_byte(0xF0);
2533 }
2534 
2535 void Assembler::xchgl(Register dst, Address src) {
2536   InstructionMark im(this);
2537   prefix(src, dst);
2538   emit_byte(0x87);
2539   emit_operand(dst, src);
2540 }
2541 
2542 void Assembler::xchgl(Register dst, Register src) {
2543   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2544   emit_byte(0x87);
2545   emit_byte(0xc0 | encode);
2546 }
2547 
2548 void Assembler::xchgq(Register dst, Address src) {
2549   InstructionMark im(this);
2550   prefixq(src, dst);
2551   emit_byte(0x87);
2552   emit_operand(dst, src);
2553 }
2554 
2555 void Assembler::xchgq(Register dst, Register src) {
2556   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2557   emit_byte(0x87);
2558   emit_byte(0xc0 | encode);
2559 }
2560 
2561 void Assembler::cmpxchgl(Register reg, Address adr) {
2562   InstructionMark im(this);
2563   prefix(adr, reg);
2564   emit_byte(0x0F);
2565   emit_byte(0xB1);
2566   emit_operand(reg, adr);
2567 }
2568 
2569 void Assembler::cmpxchgq(Register reg, Address adr) {
2570   InstructionMark im(this);
2571   prefixq(adr, reg);
2572   emit_byte(0x0F);
2573   emit_byte(0xB1);
2574   emit_operand(reg, adr);
2575 }
2576 
2577 void Assembler::hlt() {
2578   emit_byte(0xF4);
2579 }
2580 
2581 
2582 void Assembler::addr_nop_4() {
2583   // 4 bytes: NOP DWORD PTR [EAX+0]
2584   emit_byte(0x0F);
2585   emit_byte(0x1F);
2586   emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
2587   emit_byte(0);    // 8-bits offset (1 byte)
2588 }
2589 
2590 void Assembler::addr_nop_5() {
2591   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
2592   emit_byte(0x0F);
2593   emit_byte(0x1F);
2594   emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
2595   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
2596   emit_byte(0);    // 8-bits offset (1 byte)
2597 }
2598 
2599 void Assembler::addr_nop_7() {
2600   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
2601   emit_byte(0x0F);
2602   emit_byte(0x1F);
2603   emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
2604   emit_long(0);    // 32-bits offset (4 bytes)
2605 }
2606 
2607 void Assembler::addr_nop_8() {
2608   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
2609   emit_byte(0x0F);
2610   emit_byte(0x1F);
2611   emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
2612   emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
2613   emit_long(0);    // 32-bits offset (4 bytes)
2614 }
2615 
2616 void Assembler::nop(int i) {
2617   assert(i > 0, " ");
2618   if (UseAddressNop && VM_Version::is_intel()) {
2619     //
2620     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2621     //  1: 0x90
2622     //  2: 0x66 0x90
2623     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2624     //  4: 0x0F 0x1F 0x40 0x00
2625     //  5: 0x0F 0x1F 0x44 0x00 0x00
2626     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2627     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2628     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2629     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2630     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2631     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2632 
2633     // The rest coding is Intel specific - don't use consecutive address nops
2634 
2635     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2636     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2637     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2638     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2639 
2640     while(i >= 15) {
2641       // For Intel don't generate consecutive addess nops (mix with regular nops)
2642       i -= 15;
2643       emit_byte(0x66);   // size prefix
2644       emit_byte(0x66);   // size prefix
2645       emit_byte(0x66);   // size prefix
2646       addr_nop_8();
2647       emit_byte(0x66);   // size prefix
2648       emit_byte(0x66);   // size prefix
2649       emit_byte(0x66);   // size prefix
2650       emit_byte(0x90);   // nop
2651     }
2652     switch (i) {
2653       case 14:
2654         emit_byte(0x66); // size prefix
2655       case 13:
2656         emit_byte(0x66); // size prefix
2657       case 12:
2658         addr_nop_8();
2659         emit_byte(0x66); // size prefix
2660         emit_byte(0x66); // size prefix
2661         emit_byte(0x66); // size prefix
2662         emit_byte(0x90); // nop
2663         break;
2664       case 11:
2665         emit_byte(0x66); // size prefix
2666       case 10:
2667         emit_byte(0x66); // size prefix
2668       case 9:
2669         emit_byte(0x66); // size prefix
2670       case 8:
2671         addr_nop_8();
2672         break;
2673       case 7:
2674         addr_nop_7();
2675         break;
2676       case 6:
2677         emit_byte(0x66); // size prefix
2678       case 5:
2679         addr_nop_5();
2680         break;
2681       case 4:
2682         addr_nop_4();
2683         break;
2684       case 3:
2685         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2686         emit_byte(0x66); // size prefix
2687       case 2:
2688         emit_byte(0x66); // size prefix
2689       case 1:
2690         emit_byte(0x90); // nop
2691         break;
2692       default:
2693         assert(i == 0, " ");
2694     }
2695     return;
2696   }
2697   if (UseAddressNop && VM_Version::is_amd()) {
2698     //
2699     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2700     //  1: 0x90
2701     //  2: 0x66 0x90
2702     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2703     //  4: 0x0F 0x1F 0x40 0x00
2704     //  5: 0x0F 0x1F 0x44 0x00 0x00
2705     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2706     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2707     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2708     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2709     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2710     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2711 
2712     // The rest coding is AMD specific - use consecutive address nops
2713 
2714     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2715     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2716     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2717     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2718     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2719     //     Size prefixes (0x66) are added for larger sizes
2720 
2721     while(i >= 22) {
2722       i -= 11;
2723       emit_byte(0x66); // size prefix
2724       emit_byte(0x66); // size prefix
2725       emit_byte(0x66); // size prefix
2726       addr_nop_8();
2727     }
2728     // Generate first nop for size between 21-12
2729     switch (i) {
2730       case 21:
2731         i -= 1;
2732         emit_byte(0x66); // size prefix
2733       case 20:
2734       case 19:
2735         i -= 1;
2736         emit_byte(0x66); // size prefix
2737       case 18:
2738       case 17:
2739         i -= 1;
2740         emit_byte(0x66); // size prefix
2741       case 16:
2742       case 15:
2743         i -= 8;
2744         addr_nop_8();
2745         break;
2746       case 14:
2747       case 13:
2748         i -= 7;
2749         addr_nop_7();
2750         break;
2751       case 12:
2752         i -= 6;
2753         emit_byte(0x66); // size prefix
2754         addr_nop_5();
2755         break;
2756       default:
2757         assert(i < 12, " ");
2758     }
2759 
2760     // Generate second nop for size between 11-1
2761     switch (i) {
2762       case 11:
2763         emit_byte(0x66); // size prefix
2764       case 10:
2765         emit_byte(0x66); // size prefix
2766       case 9:
2767         emit_byte(0x66); // size prefix
2768       case 8:
2769         addr_nop_8();
2770         break;
2771       case 7:
2772         addr_nop_7();
2773         break;
2774       case 6:
2775         emit_byte(0x66); // size prefix
2776       case 5:
2777         addr_nop_5();
2778         break;
2779       case 4:
2780         addr_nop_4();
2781         break;
2782       case 3:
2783         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2784         emit_byte(0x66); // size prefix
2785       case 2:
2786         emit_byte(0x66); // size prefix
2787       case 1:
2788         emit_byte(0x90); // nop
2789         break;
2790       default:
2791         assert(i == 0, " ");
2792     }
2793     return;
2794   }
2795 
2796   // Using nops with size prefixes "0x66 0x90".
2797   // From AMD Optimization Guide:
2798   //  1: 0x90
2799   //  2: 0x66 0x90
2800   //  3: 0x66 0x66 0x90
2801   //  4: 0x66 0x66 0x66 0x90
2802   //  5: 0x66 0x66 0x90 0x66 0x90
2803   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2804   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2805   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2806   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2807   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2808   //
2809   while(i > 12) {
2810     i -= 4;
2811     emit_byte(0x66); // size prefix
2812     emit_byte(0x66);
2813     emit_byte(0x66);
2814     emit_byte(0x90); // nop
2815   }
2816   // 1 - 12 nops
2817   if(i > 8) {
2818     if(i > 9) {
2819       i -= 1;
2820       emit_byte(0x66);
2821     }
2822     i -= 3;
2823     emit_byte(0x66);
2824     emit_byte(0x66);
2825     emit_byte(0x90);
2826   }
2827   // 1 - 8 nops
2828   if(i > 4) {
2829     if(i > 6) {
2830       i -= 1;
2831       emit_byte(0x66);
2832     }
2833     i -= 3;
2834     emit_byte(0x66);
2835     emit_byte(0x66);
2836     emit_byte(0x90);
2837   }
2838   switch (i) {
2839     case 4:
2840       emit_byte(0x66);
2841     case 3:
2842       emit_byte(0x66);
2843     case 2:
2844       emit_byte(0x66);
2845     case 1:
2846       emit_byte(0x90);
2847       break;
2848     default:
2849       assert(i == 0, " ");
2850   }
2851 }
2852 
2853 void Assembler::ret(int imm16) {
2854   if (imm16 == 0) {
2855     emit_byte(0xC3);
2856   } else {
2857     emit_byte(0xC2);
2858     emit_word(imm16);
2859   }
2860 }
2861 
2862 // copies a single word from [esi] to [edi]
2863 void Assembler::smovl() {
2864   emit_byte(0xA5);
2865 }
2866 
2867 // copies data from [rsi] to [rdi] using rcx words (m32)
2868 void Assembler::rep_movl() {
2869   // REP
2870   emit_byte(0xF3);
2871   // MOVSL
2872   emit_byte(0xA5);
2873 }
2874 
2875 // copies data from [rsi] to [rdi] using rcx double words (m64)
2876 void Assembler::rep_movq() {
2877   // REP
2878   emit_byte(0xF3);
2879   // MOVSQ
2880   prefix(REX_W);
2881   emit_byte(0xA5);
2882 }
2883 
2884 // sets rcx double words (m64) with rax value at [rdi]
2885 void Assembler::rep_set() {
2886   // REP
2887   emit_byte(0xF3);
2888   // STOSQ
2889   prefix(REX_W);
2890   emit_byte(0xAB);
2891 }
2892 
2893 // scans rcx double words (m64) at [rdi] for occurance of rax
2894 void Assembler::repne_scan() {
2895   // REPNE/REPNZ
2896   emit_byte(0xF2);
2897   // SCASQ
2898   prefix(REX_W);
2899   emit_byte(0xAF);
2900 }
2901 
2902 void Assembler::setb(Condition cc, Register dst) {
2903   assert(0 <= cc && cc < 16, "illegal cc");
2904   int encode = prefix_and_encode(dst->encoding(), true);
2905   emit_byte(0x0F);
2906   emit_byte(0x90 | cc);
2907   emit_byte(0xC0 | encode);
2908 }
2909 
2910 void Assembler::clflush(Address adr) {
2911   prefix(adr);
2912   emit_byte(0x0F);
2913   emit_byte(0xAE);
2914   emit_operand(rdi, adr);
2915 }
2916 
2917 void Assembler::call(Label& L, relocInfo::relocType rtype) {
2918   if (L.is_bound()) {
2919     const int long_size = 5;
2920     int offs = (int)( target(L) - pc() );
2921     assert(offs <= 0, "assembler error");
2922     InstructionMark im(this);
2923     // 1110 1000 #32-bit disp
2924     emit_byte(0xE8);
2925     emit_data(offs - long_size, rtype, disp32_operand);
2926   } else {
2927     InstructionMark im(this);
2928     // 1110 1000 #32-bit disp
2929     L.add_patch_at(code(), locator());
2930 
2931     emit_byte(0xE8);
2932     emit_data(int(0), rtype, disp32_operand);
2933   }
2934 }
2935 
2936 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
2937   assert(entry != NULL, "call most probably wrong");
2938   InstructionMark im(this);
2939   emit_byte(0xE8);
2940   intptr_t disp = entry - (_code_pos + sizeof(int32_t));
2941   assert(is_simm32(disp), "must be 32bit offset (call2)");
2942   // Technically, should use call32_operand, but this format is
2943   // implied by the fact that we're emitting a call instruction.
2944   emit_data((int) disp, rspec, disp32_operand);
2945 }
2946 
2947 
2948 void Assembler::call(Register dst) {
2949   // This was originally using a 32bit register encoding
2950   // and surely we want 64bit!
2951   // this is a 32bit encoding but in 64bit mode the default
2952   // operand size is 64bit so there is no need for the
2953   // wide prefix. So prefix only happens if we use the
2954   // new registers. Much like push/pop.
2955   int encode = prefixq_and_encode(dst->encoding());
2956   emit_byte(0xFF);
2957   emit_byte(0xD0 | encode);
2958 }
2959 
2960 void Assembler::call(Address adr) {
2961   InstructionMark im(this);
2962   prefix(adr);
2963   emit_byte(0xFF);
2964   emit_operand(rdx, adr);
2965 }
2966 
2967 void Assembler::jmp(Register reg) {
2968   int encode = prefix_and_encode(reg->encoding());
2969   emit_byte(0xFF);
2970   emit_byte(0xE0 | encode);
2971 }
2972 
2973 void Assembler::jmp(Address adr) {
2974   InstructionMark im(this);
2975   prefix(adr);
2976   emit_byte(0xFF);
2977   emit_operand(rsp, adr);
2978 }
2979 
2980 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2981   InstructionMark im(this);
2982   emit_byte(0xE9);
2983   assert(dest != NULL, "must have a target");
2984   intptr_t disp = dest - (_code_pos + sizeof(int32_t));
2985   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2986   emit_data(disp, rspec.reloc(), call32_operand);
2987 }
2988 
2989 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
2990   if (L.is_bound()) {
2991     address entry = target(L);
2992     assert(entry != NULL, "jmp most probably wrong");
2993     InstructionMark im(this);
2994     const int short_size = 2;
2995     const int long_size = 5;
2996     intptr_t offs = entry - _code_pos;
2997     if (rtype == relocInfo::none && is8bit(offs - short_size)) {
2998       emit_byte(0xEB);
2999       emit_byte((offs - short_size) & 0xFF);
3000     } else {
3001       emit_byte(0xE9);
3002       emit_long(offs - long_size);
3003     }
3004   } else {
3005     // By default, forward jumps are always 32-bit displacements, since
3006     // we can't yet know where the label will be bound.  If you're sure that
3007     // the forward jump will not run beyond 256 bytes, use jmpb to
3008     // force an 8-bit displacement.
3009     InstructionMark im(this);
3010     relocate(rtype);
3011     L.add_patch_at(code(), locator());
3012     emit_byte(0xE9);
3013     emit_long(0);
3014   }
3015 }
3016 
3017 void Assembler::jmpb(Label& L) {
3018   if (L.is_bound()) {
3019     const int short_size = 2;
3020     address entry = target(L);
3021     assert(is8bit((entry - _code_pos) + short_size),
3022            "Dispacement too large for a short jmp");
3023     assert(entry != NULL, "jmp most probably wrong");
3024     intptr_t offs = entry - _code_pos;
3025     emit_byte(0xEB);
3026     emit_byte((offs - short_size) & 0xFF);
3027   } else {
3028     InstructionMark im(this);
3029     L.add_patch_at(code(), locator());
3030     emit_byte(0xEB);
3031     emit_byte(0);
3032   }
3033 }
3034 
3035 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
3036   InstructionMark im(this);
3037   relocate(rtype);
3038   assert((0 <= cc) && (cc < 16), "illegal cc");
3039   if (L.is_bound()) {
3040     address dst = target(L);
3041     assert(dst != NULL, "jcc most probably wrong");
3042 
3043     const int short_size = 2;
3044     const int long_size = 6;
3045     intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
3046     if (rtype == relocInfo::none && is8bit(offs - short_size)) {
3047       // 0111 tttn #8-bit disp
3048       emit_byte(0x70 | cc);
3049       emit_byte((offs - short_size) & 0xFF);
3050     } else {
3051       // 0000 1111 1000 tttn #32-bit disp
3052       assert(is_simm32(offs - long_size),
3053              "must be 32bit offset (call4)");
3054       emit_byte(0x0F);
3055       emit_byte(0x80 | cc);
3056       emit_long(offs - long_size);
3057     }
3058   } else {
3059     // Note: could eliminate cond. jumps to this jump if condition
3060     //       is the same however, seems to be rather unlikely case.
3061     // Note: use jccb() if label to be bound is very close to get
3062     //       an 8-bit displacement
3063     L.add_patch_at(code(), locator());
3064     emit_byte(0x0F);
3065     emit_byte(0x80 | cc);
3066     emit_long(0);
3067   }
3068 }
3069 
3070 void Assembler::jccb(Condition cc, Label& L) {
3071   if (L.is_bound()) {
3072     const int short_size = 2;
3073     const int long_size = 6;
3074     address entry = target(L);
3075     assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
3076            "Dispacement too large for a short jmp");
3077     intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
3078     // 0111 tttn #8-bit disp
3079     emit_byte(0x70 | cc);
3080     emit_byte((offs - short_size) & 0xFF);
3081   } else {
3082     InstructionMark im(this);
3083     L.add_patch_at(code(), locator());
3084     emit_byte(0x70 | cc);
3085     emit_byte(0);
3086   }
3087 }
3088 
3089 // FP instructions
3090 
3091 void Assembler::fxsave(Address dst) {
3092   prefixq(dst);
3093   emit_byte(0x0F);
3094   emit_byte(0xAE);
3095   emit_operand(as_Register(0), dst);
3096 }
3097 
3098 void Assembler::fxrstor(Address src) {
3099   prefixq(src);
3100   emit_byte(0x0F);
3101   emit_byte(0xAE);
3102   emit_operand(as_Register(1), src);
3103 }
3104 
3105 void Assembler::ldmxcsr(Address src) {
3106   InstructionMark im(this);
3107   prefix(src);
3108   emit_byte(0x0F);
3109   emit_byte(0xAE);
3110   emit_operand(as_Register(2), src);
3111 }
3112 
3113 void Assembler::stmxcsr(Address dst) {
3114   InstructionMark im(this);
3115   prefix(dst);
3116   emit_byte(0x0F);
3117   emit_byte(0xAE);
3118   emit_operand(as_Register(3), dst);
3119 }
3120 
3121 void Assembler::addss(XMMRegister dst, XMMRegister src) {
3122   emit_byte(0xF3);
3123   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3124   emit_byte(0x0F);
3125   emit_byte(0x58);
3126   emit_byte(0xC0 | encode);
3127 }
3128 
3129 void Assembler::addss(XMMRegister dst, Address src) {
3130   InstructionMark im(this);
3131   emit_byte(0xF3);
3132   prefix(src, dst);
3133   emit_byte(0x0F);
3134   emit_byte(0x58);
3135   emit_operand(dst, src);
3136 }
3137 
3138 void Assembler::subss(XMMRegister dst, XMMRegister src) {
3139   emit_byte(0xF3);
3140   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3141   emit_byte(0x0F);
3142   emit_byte(0x5C);
3143   emit_byte(0xC0 | encode);
3144 }
3145 
3146 void Assembler::subss(XMMRegister dst, Address src) {
3147   InstructionMark im(this);
3148   emit_byte(0xF3);
3149   prefix(src, dst);
3150   emit_byte(0x0F);
3151   emit_byte(0x5C);
3152   emit_operand(dst, src);
3153 }
3154 
3155 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
3156   emit_byte(0xF3);
3157   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3158   emit_byte(0x0F);
3159   emit_byte(0x59);
3160   emit_byte(0xC0 | encode);
3161 }
3162 
3163 void Assembler::mulss(XMMRegister dst, Address src) {
3164   InstructionMark im(this);
3165   emit_byte(0xF3);
3166   prefix(src, dst);
3167   emit_byte(0x0F);
3168   emit_byte(0x59);
3169   emit_operand(dst, src);
3170 }
3171 
3172 void Assembler::divss(XMMRegister dst, XMMRegister src) {
3173   emit_byte(0xF3);
3174   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3175   emit_byte(0x0F);
3176   emit_byte(0x5E);
3177   emit_byte(0xC0 | encode);
3178 }
3179 
3180 void Assembler::divss(XMMRegister dst, Address src) {
3181   InstructionMark im(this);
3182   emit_byte(0xF3);
3183   prefix(src, dst);
3184   emit_byte(0x0F);
3185   emit_byte(0x5E);
3186   emit_operand(dst, src);
3187 }
3188 
3189 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
3190   emit_byte(0xF2);
3191   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3192   emit_byte(0x0F);
3193   emit_byte(0x58);
3194   emit_byte(0xC0 | encode);
3195 }
3196 
3197 void Assembler::addsd(XMMRegister dst, Address src) {
3198   InstructionMark im(this);
3199   emit_byte(0xF2);
3200   prefix(src, dst);
3201   emit_byte(0x0F);
3202   emit_byte(0x58);
3203   emit_operand(dst, src);
3204 }
3205 
3206 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
3207   emit_byte(0xF2);
3208   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3209   emit_byte(0x0F);
3210   emit_byte(0x5C);
3211   emit_byte(0xC0 | encode);
3212 }
3213 
3214 void Assembler::subsd(XMMRegister dst, Address src) {
3215   InstructionMark im(this);
3216   emit_byte(0xF2);
3217   prefix(src, dst);
3218   emit_byte(0x0F);
3219   emit_byte(0x5C);
3220   emit_operand(dst, src);
3221 }
3222 
3223 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
3224   emit_byte(0xF2);
3225   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3226   emit_byte(0x0F);
3227   emit_byte(0x59);
3228   emit_byte(0xC0 | encode);
3229 }
3230 
3231 void Assembler::mulsd(XMMRegister dst, Address src) {
3232   InstructionMark im(this);
3233   emit_byte(0xF2);
3234   prefix(src, dst);
3235   emit_byte(0x0F);
3236   emit_byte(0x59);
3237   emit_operand(dst, src);
3238 }
3239 
3240 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
3241   emit_byte(0xF2);
3242   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3243   emit_byte(0x0F);
3244   emit_byte(0x5E);
3245   emit_byte(0xC0 | encode);
3246 }
3247 
3248 void Assembler::divsd(XMMRegister dst, Address src) {
3249   InstructionMark im(this);
3250   emit_byte(0xF2);
3251   prefix(src, dst);
3252   emit_byte(0x0F);
3253   emit_byte(0x5E);
3254   emit_operand(dst, src);
3255 }
3256 
3257 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
3258   emit_byte(0xF2);
3259   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3260   emit_byte(0x0F);
3261   emit_byte(0x51);
3262   emit_byte(0xC0 | encode);
3263 }
3264 
3265 void Assembler::sqrtsd(XMMRegister dst, Address src) {
3266   InstructionMark im(this);
3267   emit_byte(0xF2);
3268   prefix(src, dst);
3269   emit_byte(0x0F);
3270   emit_byte(0x51);
3271   emit_operand(dst, src);
3272 }
3273 
3274 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3275   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3276   emit_byte(0x0F);
3277   emit_byte(0x57);
3278   emit_byte(0xC0 | encode);
3279 }
3280 
3281 void Assembler::xorps(XMMRegister dst, Address src) {
3282   InstructionMark im(this);
3283   prefix(src, dst);
3284   emit_byte(0x0F);
3285   emit_byte(0x57);
3286   emit_operand(dst, src);
3287 }
3288 
3289 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3290   emit_byte(0x66);
3291   xorps(dst, src);
3292 }
3293 
3294 void Assembler::xorpd(XMMRegister dst, Address src) {
3295   InstructionMark im(this);
3296   emit_byte(0x66);
3297   prefix(src, dst);
3298   emit_byte(0x0F);
3299   emit_byte(0x57);
3300   emit_operand(dst, src);
3301 }
3302 
3303 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
3304   emit_byte(0xF3);
3305   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3306   emit_byte(0x0F);
3307   emit_byte(0x2A);
3308   emit_byte(0xC0 | encode);
3309 }
3310 
3311 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3312   emit_byte(0xF3);
3313   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3314   emit_byte(0x0F);
3315   emit_byte(0x2A);
3316   emit_byte(0xC0 | encode);
3317 }
3318 
3319 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
3320   emit_byte(0xF2);
3321   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3322   emit_byte(0x0F);
3323   emit_byte(0x2A);
3324   emit_byte(0xC0 | encode);
3325 }
3326 
3327 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3328   emit_byte(0xF2);
3329   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3330   emit_byte(0x0F);
3331   emit_byte(0x2A);
3332   emit_byte(0xC0 | encode);
3333 }
3334 
3335 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
3336   emit_byte(0xF3);
3337   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3338   emit_byte(0x0F);
3339   emit_byte(0x2C);
3340   emit_byte(0xC0 | encode);
3341 }
3342 
3343 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3344   emit_byte(0xF3);
3345   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3346   emit_byte(0x0F);
3347   emit_byte(0x2C);
3348   emit_byte(0xC0 | encode);
3349 }
3350 
3351 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
3352   emit_byte(0xF2);
3353   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3354   emit_byte(0x0F);
3355   emit_byte(0x2C);
3356   emit_byte(0xC0 | encode);
3357 }
3358 
3359 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3360   emit_byte(0xF2);
3361   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3362   emit_byte(0x0F);
3363   emit_byte(0x2C);
3364   emit_byte(0xC0 | encode);
3365 }
3366 
3367 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
3368   emit_byte(0xF3);
3369   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3370   emit_byte(0x0F);
3371   emit_byte(0x5A);
3372   emit_byte(0xC0 | encode);
3373 }
3374 
3375 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
3376   emit_byte(0xF3);
3377   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3378   emit_byte(0x0F);
3379   emit_byte(0xE6);
3380   emit_byte(0xC0 | encode);
3381 }
3382 
3383 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
3384   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3385   emit_byte(0x0F);
3386   emit_byte(0x5B);
3387   emit_byte(0xC0 | encode);
3388 }
3389 
3390 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
3391   emit_byte(0xF2);
3392   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3393   emit_byte(0x0F);
3394   emit_byte(0x5A);
3395   emit_byte(0xC0 | encode);
3396 }
3397 
3398 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
3399   emit_byte(0x66);
3400   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3401   emit_byte(0x0F);
3402   emit_byte(0x60);
3403   emit_byte(0xC0 | encode);
3404 }
3405 
3406 // Implementation of MacroAssembler
3407 
3408 // On 32 bit it returns a vanilla displacement on 64 bit is a rip relative displacement
3409 Address MacroAssembler::as_Address(AddressLiteral adr) {
3410   assert(!adr.is_lval(), "must be rval");
3411   assert(reachable(adr), "must be");
3412   return Address((int)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
3413 }
3414 
3415 Address MacroAssembler::as_Address(ArrayAddress adr) {
3416 #ifdef _LP64
3417   AddressLiteral base = adr.base();
3418   lea(rscratch1, base);
3419   Address index = adr.index();
3420   assert(index._disp == 0, "must not have disp"); // maybe it can?
3421   Address array(rscratch1, index._index, index._scale, index._disp);
3422   return array;
3423 #else
3424   return Address::make_array(adr);
3425 #endif // _LP64
3426 
3427 }
3428 
3429 void MacroAssembler::fat_nop() {
3430   // A 5 byte nop that is safe for patching (see patch_verified_entry)
3431   // Recommened sequence from 'Software Optimization Guide for the AMD
3432   // Hammer Processor'
3433   emit_byte(0x66);
3434   emit_byte(0x66);
3435   emit_byte(0x90);
3436   emit_byte(0x66);
3437   emit_byte(0x90);
3438 }
3439 
3440 static Assembler::Condition reverse[] = {
3441     Assembler::noOverflow     /* overflow      = 0x0 */ ,
3442     Assembler::overflow       /* noOverflow    = 0x1 */ ,
3443     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
3444     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
3445     Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
3446     Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
3447     Assembler::above          /* belowEqual    = 0x6 */ ,
3448     Assembler::belowEqual     /* above         = 0x7 */ ,
3449     Assembler::positive       /* negative      = 0x8 */ ,
3450     Assembler::negative       /* positive      = 0x9 */ ,
3451     Assembler::noParity       /* parity        = 0xa */ ,
3452     Assembler::parity         /* noParity      = 0xb */ ,
3453     Assembler::greaterEqual   /* less          = 0xc */ ,
3454     Assembler::less           /* greaterEqual  = 0xd */ ,
3455     Assembler::greater        /* lessEqual     = 0xe */ ,
3456     Assembler::lessEqual      /* greater       = 0xf, */
3457 
3458 };
3459 
3460 // 32bit can do a case table jump in one instruction but we no longer allow the base
3461 // to be installed in the Address class
3462 void MacroAssembler::jump(ArrayAddress entry) {
3463 #ifdef _LP64
3464   lea(rscratch1, entry.base());
3465   Address dispatch = entry.index();
3466   assert(dispatch._base == noreg, "must be");
3467   dispatch._base = rscratch1;
3468   jmp(dispatch);
3469 #else
3470   jmp(as_Address(entry));
3471 #endif // _LP64
3472 }
3473 
3474 void MacroAssembler::jump(AddressLiteral dst) {
3475   if (reachable(dst)) {
3476     jmp_literal(dst.target(), dst.rspec());
3477   } else {
3478     lea(rscratch1, dst);
3479     jmp(rscratch1);
3480   }
3481 }
3482 
3483 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
3484   if (reachable(dst)) {
3485     InstructionMark im(this);
3486     relocate(dst.reloc());
3487     const int short_size = 2;
3488     const int long_size = 6;
3489     int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
3490     if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
3491       // 0111 tttn #8-bit disp
3492       emit_byte(0x70 | cc);
3493       emit_byte((offs - short_size) & 0xFF);
3494     } else {
3495       // 0000 1111 1000 tttn #32-bit disp
3496       emit_byte(0x0F);
3497       emit_byte(0x80 | cc);
3498       emit_long(offs - long_size);
3499     }
3500   } else {
3501 #ifdef ASSERT
3502     warning("reversing conditional branch");
3503 #endif /* ASSERT */
3504     Label skip;
3505     jccb(reverse[cc], skip);
3506     lea(rscratch1, dst);
3507     Assembler::jmp(rscratch1);
3508     bind(skip);
3509   }
3510 }
3511 
3512 // Wouldn't need if AddressLiteral version had new name
3513 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
3514   Assembler::call(L, rtype);
3515 }
3516 
3517 // Wouldn't need if AddressLiteral version had new name
3518 void MacroAssembler::call(Register entry) {
3519   Assembler::call(entry);
3520 }
3521 
3522 void MacroAssembler::call(AddressLiteral entry) {
3523   if (reachable(entry)) {
3524     Assembler::call_literal(entry.target(), entry.rspec());
3525   } else {
3526     lea(rscratch1, entry);
3527     Assembler::call(rscratch1);
3528   }
3529 }
3530 
3531 void MacroAssembler::cmp8(AddressLiteral src1, int8_t src2) {
3532   if (reachable(src1)) {
3533     cmpb(as_Address(src1), src2);
3534   } else {
3535     lea(rscratch1, src1);
3536     cmpb(Address(rscratch1, 0), src2);
3537   }
3538 }
3539 
3540 void MacroAssembler::cmp32(AddressLiteral src1, int32_t src2) {
3541   if (reachable(src1)) {
3542     cmpl(as_Address(src1), src2);
3543   } else {
3544     lea(rscratch1, src1);
3545     cmpl(Address(rscratch1, 0), src2);
3546   }
3547 }
3548 
3549 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
3550   if (reachable(src2)) {
3551     cmpl(src1, as_Address(src2));
3552   } else {
3553     lea(rscratch1, src2);
3554     cmpl(src1, Address(rscratch1, 0));
3555   }
3556 }
3557 
3558 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
3559 #ifdef _LP64
3560   if (src2.is_lval()) {
3561     movptr(rscratch1, src2);
3562     Assembler::cmpq(src1, rscratch1);
3563   } else if (reachable(src2)) {
3564     cmpq(src1, as_Address(src2));
3565   } else {
3566     lea(rscratch1, src2);
3567     Assembler::cmpq(src1, Address(rscratch1, 0));
3568   }
3569 #else
3570   if (src2.is_lval()) {
3571     cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
3572   } else {
3573     cmpl(src1, as_Address(src2));
3574   }
3575 #endif // _LP64
3576 }
3577 
3578 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
3579   assert(src2.is_lval(), "not a mem-mem compare");
3580 #ifdef _LP64
3581   // moves src2's literal address
3582   movptr(rscratch1, src2);
3583   Assembler::cmpq(src1, rscratch1);
3584 #else
3585   cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
3586 #endif // _LP64
3587 }
3588 
3589 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
3590   assert(!src2.is_lval(), "should use cmpptr");
3591 
3592   if (reachable(src2)) {
3593 #ifdef _LP64
3594     cmpq(src1, as_Address(src2));
3595 #else
3596     ShouldNotReachHere();
3597 #endif // _LP64
3598   } else {
3599     lea(rscratch1, src2);
3600     Assembler::cmpq(src1, Address(rscratch1, 0));
3601   }
3602 }
3603 
3604 void MacroAssembler::cmpxchgptr(Register reg, AddressLiteral adr) {
3605   if (reachable(adr)) {
3606 #ifdef _LP64
3607     cmpxchgq(reg, as_Address(adr));
3608 #else
3609     cmpxchgl(reg, as_Address(adr));
3610 #endif // _LP64
3611   } else {
3612     lea(rscratch1, adr);
3613     cmpxchgq(reg, Address(rscratch1, 0));
3614   }
3615 }
3616 
3617 void MacroAssembler::incrementl(AddressLiteral dst) {
3618   if (reachable(dst)) {
3619     incrementl(as_Address(dst));
3620   } else {
3621     lea(rscratch1, dst);
3622     incrementl(Address(rscratch1, 0));
3623   }
3624 }
3625 
3626 void MacroAssembler::incrementl(ArrayAddress dst) {
3627   incrementl(as_Address(dst));
3628 }
3629 
3630 void MacroAssembler::lea(Register dst, Address src) {
3631 #ifdef _LP64
3632   leaq(dst, src);
3633 #else
3634   leal(dst, src);
3635 #endif // _LP64
3636 }
3637 
3638 void MacroAssembler::lea(Register dst, AddressLiteral src) {
3639 #ifdef _LP64
3640     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
3641 #else
3642     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
3643 #endif // _LP64
3644 }
3645 
3646 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
3647   if (reachable(dst)) {
3648     movl(as_Address(dst), src);
3649   } else {
3650     lea(rscratch1, dst);
3651     movl(Address(rscratch1, 0), src);
3652   }
3653 }
3654 
3655 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
3656   if (reachable(src)) {
3657     movl(dst, as_Address(src));
3658   } else {
3659     lea(rscratch1, src);
3660     movl(dst, Address(rscratch1, 0));
3661   }
3662 }
3663 
3664 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
3665   if (reachable(src)) {
3666     if (UseXmmLoadAndClearUpper) {
3667       movsd (dst, as_Address(src));
3668     } else {
3669       movlpd(dst, as_Address(src));
3670     }
3671   } else {
3672     lea(rscratch1, src);
3673     if (UseXmmLoadAndClearUpper) {
3674       movsd (dst, Address(rscratch1, 0));
3675     } else {
3676       movlpd(dst, Address(rscratch1, 0));
3677     }
3678   }
3679 }
3680 
3681 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
3682   if (reachable(src)) {
3683     movss(dst, as_Address(src));
3684   } else {
3685     lea(rscratch1, src);
3686     movss(dst, Address(rscratch1, 0));
3687   }
3688 }
3689 
3690 void MacroAssembler::movoop(Register dst, jobject obj) {
3691   mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
3692 }
3693 
3694 void MacroAssembler::movoop(Address dst, jobject obj) {
3695   mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
3696   movq(dst, rscratch1);
3697 }
3698 
3699 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
3700 #ifdef _LP64
3701   if (src.is_lval()) {
3702     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
3703   } else {
3704     if (reachable(src)) {
3705       movq(dst, as_Address(src));
3706     } else {
3707       lea(rscratch1, src);
3708       movq(dst, Address(rscratch1,0));
3709     }
3710   }
3711 #else
3712   if (src.is_lval()) {
3713     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
3714   } else {
3715     movl(dst, as_Address(src));
3716   }
3717 #endif // LP64
3718 }
3719 
3720 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
3721 #ifdef _LP64
3722   movq(as_Address(dst), src);
3723 #else
3724   movl(as_Address(dst), src);
3725 #endif // _LP64
3726 }
3727 
3728 void MacroAssembler::pushoop(jobject obj) {
3729 #ifdef _LP64
3730   movoop(rscratch1, obj);
3731   pushq(rscratch1);
3732 #else
3733   push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
3734 #endif // _LP64
3735 }
3736 
3737 void MacroAssembler::pushptr(AddressLiteral src) {
3738 #ifdef _LP64
3739   lea(rscratch1, src);
3740   if (src.is_lval()) {
3741     pushq(rscratch1);
3742   } else {
3743     pushq(Address(rscratch1, 0));
3744   }
3745 #else
3746   if (src.is_lval()) {
3747     push_literal((int32_t)src.target(), src.rspec());
3748   else {
3749     pushl(as_Address(src));
3750   }
3751 #endif // _LP64
3752 }
3753 
3754 void MacroAssembler::ldmxcsr(AddressLiteral src) {
3755   if (reachable(src)) {
3756     Assembler::ldmxcsr(as_Address(src));
3757   } else {
3758     lea(rscratch1, src);
3759     Assembler::ldmxcsr(Address(rscratch1, 0));
3760   }
3761 }
3762 
3763 void MacroAssembler::movlpd(XMMRegister dst, AddressLiteral src) {
3764   if (reachable(src)) {
3765     movlpd(dst, as_Address(src));
3766   } else {
3767     lea(rscratch1, src);
3768     movlpd(dst, Address(rscratch1, 0));
3769   }
3770 }
3771 
3772 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
3773   if (reachable(src)) {
3774     movss(dst, as_Address(src));
3775   } else {
3776     lea(rscratch1, src);
3777     movss(dst, Address(rscratch1, 0));
3778   }
3779 }
3780 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
3781   if (reachable(src)) {
3782     xorpd(dst, as_Address(src));
3783   } else {
3784     lea(rscratch1, src);
3785     xorpd(dst, Address(rscratch1, 0));
3786   }
3787 }
3788 
3789 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
3790   if (reachable(src)) {
3791     xorps(dst, as_Address(src));
3792   } else {
3793     lea(rscratch1, src);
3794     xorps(dst, Address(rscratch1, 0));
3795   }
3796 }
3797 
3798 void MacroAssembler::null_check(Register reg, int offset) {
3799   if (needs_explicit_null_check(offset)) {
3800     // provoke OS NULL exception if reg = NULL by
3801     // accessing M[reg] w/o changing any (non-CC) registers
3802     cmpq(rax, Address(reg, 0));
3803     // Note: should probably use testl(rax, Address(reg, 0));
3804     //       may be shorter code (however, this version of
3805     //       testl needs to be implemented first)
3806   } else {
3807     // nothing to do, (later) access of M[reg + offset]
3808     // will provoke OS NULL exception if reg = NULL
3809   }
3810 }
3811 
3812 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
3813   int off = offset();
3814   movzbl(dst, src);
3815   return off;
3816 }
3817 
3818 int MacroAssembler::load_unsigned_word(Register dst, Address src) {
3819   int off = offset();
3820   movzwl(dst, src);
3821   return off;
3822 }
3823 
3824 int MacroAssembler::load_signed_byte(Register dst, Address src) {
3825   int off = offset();
3826   movsbl(dst, src);
3827   return off;
3828 }
3829 
3830 int MacroAssembler::load_signed_word(Register dst, Address src) {
3831   int off = offset();
3832   movswl(dst, src);
3833   return off;
3834 }
3835 
3836 void MacroAssembler::incrementl(Register reg, int value) {
3837   if (value == min_jint) { addl(reg, value); return; }
3838   if (value <  0) { decrementl(reg, -value); return; }
3839   if (value == 0) {                        ; return; }
3840   if (value == 1 && UseIncDec) { incl(reg) ; return; }
3841   /* else */      { addl(reg, value)       ; return; }
3842 }
3843 
3844 void MacroAssembler::decrementl(Register reg, int value) {
3845   if (value == min_jint) { subl(reg, value); return; }
3846   if (value <  0) { incrementl(reg, -value); return; }
3847   if (value == 0) {                        ; return; }
3848   if (value == 1 && UseIncDec) { decl(reg) ; return; }
3849   /* else */      { subl(reg, value)       ; return; }
3850 }
3851 
3852 void MacroAssembler::incrementq(Register reg, int value) {
3853   if (value == min_jint) { addq(reg, value); return; }
3854   if (value <  0) { decrementq(reg, -value); return; }
3855   if (value == 0) {                        ; return; }
3856   if (value == 1 && UseIncDec) { incq(reg) ; return; }
3857   /* else */      { addq(reg, value)       ; return; }
3858 }
3859 
3860 void MacroAssembler::decrementq(Register reg, int value) {
3861   if (value == min_jint) { subq(reg, value); return; }
3862   if (value <  0) { incrementq(reg, -value); return; }
3863   if (value == 0) {                        ; return; }
3864   if (value == 1 && UseIncDec) { decq(reg) ; return; }
3865   /* else */      { subq(reg, value)       ; return; }
3866 }
3867 
3868 void MacroAssembler::incrementl(Address dst, int value) {
3869   if (value == min_jint) { addl(dst, value); return; }
3870   if (value <  0) { decrementl(dst, -value); return; }
3871   if (value == 0) {                        ; return; }
3872   if (value == 1 && UseIncDec) { incl(dst) ; return; }
3873   /* else */      { addl(dst, value)       ; return; }
3874 }
3875 
3876 void MacroAssembler::decrementl(Address dst, int value) {
3877   if (value == min_jint) { subl(dst, value); return; }
3878   if (value <  0) { incrementl(dst, -value); return; }
3879   if (value == 0) {                        ; return; }
3880   if (value == 1 && UseIncDec) { decl(dst) ; return; }
3881   /* else */      { subl(dst, value)       ; return; }
3882 }
3883 
3884 void MacroAssembler::incrementq(Address dst, int value) {
3885   if (value == min_jint) { addq(dst, value); return; }
3886   if (value <  0) { decrementq(dst, -value); return; }
3887   if (value == 0) {                        ; return; }
3888   if (value == 1 && UseIncDec) { incq(dst) ; return; }
3889   /* else */      { addq(dst, value)       ; return; }
3890 }
3891 
3892 void MacroAssembler::decrementq(Address dst, int value) {
3893   if (value == min_jint) { subq(dst, value); return; }
3894   if (value <  0) { incrementq(dst, -value); return; }
3895   if (value == 0) {                        ; return; }
3896   if (value == 1 && UseIncDec) { decq(dst) ; return; }
3897   /* else */      { subq(dst, value)       ; return; }
3898 }
3899 
3900 void MacroAssembler::align(int modulus) {
3901   if (offset() % modulus != 0) {
3902     nop(modulus - (offset() % modulus));
3903   }
3904 }
3905 
3906 void MacroAssembler::enter() {
3907   pushq(rbp);
3908   movq(rbp, rsp);
3909 }
3910 
3911 void MacroAssembler::leave() {
3912   emit_byte(0xC9); // LEAVE
3913 }
3914 
3915 // C++ bool manipulation
3916 
3917 void MacroAssembler::movbool(Register dst, Address src) {
3918   if(sizeof(bool) == 1)
3919     movb(dst, src);
3920   else if(sizeof(bool) == 2)
3921     movw(dst, src);
3922   else if(sizeof(bool) == 4)
3923     movl(dst, src);
3924   else {
3925     // unsupported
3926     ShouldNotReachHere();
3927   }
3928 }
3929 
3930 void MacroAssembler::movbool(Address dst, bool boolconst) {
3931   if(sizeof(bool) == 1)
3932     movb(dst, (int) boolconst);
3933   else if(sizeof(bool) == 2)
3934     movw(dst, (int) boolconst);
3935   else if(sizeof(bool) == 4)
3936     movl(dst, (int) boolconst);
3937   else {
3938     // unsupported
3939     ShouldNotReachHere();
3940   }
3941 }
3942 
3943 void MacroAssembler::movbool(Address dst, Register src) {
3944   if(sizeof(bool) == 1)
3945     movb(dst, src);
3946   else if(sizeof(bool) == 2)
3947     movw(dst, src);
3948   else if(sizeof(bool) == 4)
3949     movl(dst, src);
3950   else {
3951     // unsupported
3952     ShouldNotReachHere();
3953   }
3954 }
3955 
3956 void MacroAssembler::testbool(Register dst) {
3957   if(sizeof(bool) == 1)
3958     testb(dst, (int) 0xff);
3959   else if(sizeof(bool) == 2) {
3960     // need testw impl
3961     ShouldNotReachHere();
3962   } else if(sizeof(bool) == 4)
3963     testl(dst, dst);
3964   else {
3965     // unsupported
3966     ShouldNotReachHere();
3967   }
3968 }
3969 
3970 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
3971                                          Register last_java_fp,
3972                                          address  last_java_pc) {
3973   // determine last_java_sp register
3974   if (!last_java_sp->is_valid()) {
3975     last_java_sp = rsp;
3976   }
3977 
3978   // last_java_fp is optional
3979   if (last_java_fp->is_valid()) {
3980     movq(Address(r15_thread, JavaThread::last_Java_fp_offset()),
3981          last_java_fp);
3982   }
3983 
3984   // last_java_pc is optional
3985   if (last_java_pc != NULL) {
3986     Address java_pc(r15_thread,
3987                     JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
3988     lea(rscratch1, InternalAddress(last_java_pc));
3989     movq(java_pc, rscratch1);
3990   }
3991 
3992   movq(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
3993 }
3994 
3995 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
3996                                            bool clear_pc) {
3997   // we must set sp to zero to clear frame
3998   movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
3999   // must clear fp, so that compiled frames are not confused; it is
4000   // possible that we need it only for debugging
4001   if (clear_fp) {
4002     movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
4003   }
4004 
4005   if (clear_pc) {
4006     movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
4007   }
4008 }
4009 
4010 
4011 // Implementation of call_VM versions
4012 
4013 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
4014   Label L, E;
4015 
4016 #ifdef _WIN64
4017   // Windows always allocates space for it's register args
4018   assert(num_args <= 4, "only register arguments supported");
4019   subq(rsp,  frame::arg_reg_save_area_bytes);
4020 #endif
4021 
4022   // Align stack if necessary
4023   testl(rsp, 15);
4024   jcc(Assembler::zero, L);
4025 
4026   subq(rsp, 8);
4027   {
4028     call(RuntimeAddress(entry_point));
4029   }
4030   addq(rsp, 8);
4031   jmp(E);
4032 
4033   bind(L);
4034   {
4035     call(RuntimeAddress(entry_point));
4036   }
4037 
4038   bind(E);
4039 
4040 #ifdef _WIN64
4041   // restore stack pointer
4042   addq(rsp, frame::arg_reg_save_area_bytes);
4043 #endif
4044 
4045 }
4046 
4047 
4048 void MacroAssembler::call_VM_base(Register oop_result,
4049                                   Register java_thread,
4050                                   Register last_java_sp,
4051                                   address entry_point,
4052                                   int num_args,
4053                                   bool check_exceptions) {
4054   // determine last_java_sp register
4055   if (!last_java_sp->is_valid()) {
4056     last_java_sp = rsp;
4057   }
4058 
4059   // debugging support
4060   assert(num_args >= 0, "cannot have negative number of arguments");
4061   assert(r15_thread != oop_result,
4062          "cannot use the same register for java_thread & oop_result");
4063   assert(r15_thread != last_java_sp,
4064          "cannot use the same register for java_thread & last_java_sp");
4065 
4066   // set last Java frame before call
4067 
4068   // This sets last_Java_fp which is only needed from interpreted frames
4069   // and should really be done only from the interp_masm version before
4070   // calling the underlying call_VM. That doesn't happen yet so we set
4071   // last_Java_fp here even though some callers don't need it and
4072   // also clear it below.
4073   set_last_Java_frame(last_java_sp, rbp, NULL);
4074 
4075   {
4076     Label L, E;
4077 
4078     // Align stack if necessary
4079 #ifdef _WIN64
4080     assert(num_args <= 4, "only register arguments supported");
4081     // Windows always allocates space for it's register args
4082     subq(rsp, frame::arg_reg_save_area_bytes);
4083 #endif
4084     testl(rsp, 15);
4085     jcc(Assembler::zero, L);
4086 
4087     subq(rsp, 8);
4088     {
4089       call(RuntimeAddress(entry_point));
4090     }
4091     addq(rsp, 8);
4092     jmp(E);
4093 
4094 
4095     bind(L);
4096     {
4097       call(RuntimeAddress(entry_point));
4098     }
4099 
4100     bind(E);
4101 
4102 #ifdef _WIN64
4103     // restore stack pointer
4104     addq(rsp, frame::arg_reg_save_area_bytes);
4105 #endif
4106   }
4107 
4108 #ifdef ASSERT
4109   pushq(rax);
4110   {
4111     Label L;
4112     get_thread(rax);
4113     cmpq(r15_thread, rax);
4114     jcc(Assembler::equal, L);
4115     stop("MacroAssembler::call_VM_base: register not callee saved?");
4116     bind(L);
4117   }
4118   popq(rax);
4119 #endif
4120 
4121   // reset last Java frame
4122   // This really shouldn't have to clear fp set note above at the
4123   // call to set_last_Java_frame
4124   reset_last_Java_frame(true, false);
4125 
4126   check_and_handle_popframe(noreg);
4127   check_and_handle_earlyret(noreg);
4128 
4129   if (check_exceptions) {
4130     cmpq(Address(r15_thread, Thread::pending_exception_offset()), (int) NULL);
4131     // This used to conditionally jump to forward_exception however it is
4132     // possible if we relocate that the branch will not reach. So we must jump
4133     // around so we can always reach
4134     Label ok;
4135     jcc(Assembler::equal, ok);
4136     jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
4137     bind(ok);
4138   }
4139 
4140   // get oop result if there is one and reset the value in the thread
4141   if (oop_result->is_valid()) {
4142     movq(oop_result, Address(r15_thread, JavaThread::vm_result_offset()));
4143     movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD);
4144     verify_oop(oop_result);
4145   }
4146 }
4147 
4148 void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
4149 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
4150 
4151 void MacroAssembler::call_VM_helper(Register oop_result,
4152                                     address entry_point,
4153                                     int num_args,
4154                                     bool check_exceptions) {
4155   // Java thread becomes first argument of C function
4156   movq(c_rarg0, r15_thread);
4157 
4158   // We've pushed one address, correct last_Java_sp
4159   leaq(rax, Address(rsp, wordSize));
4160 
4161   call_VM_base(oop_result, noreg, rax, entry_point, num_args,
4162                check_exceptions);
4163 }
4164 
4165 
4166 void MacroAssembler::call_VM(Register oop_result,
4167                              address entry_point,
4168                              bool check_exceptions) {
4169   Label C, E;
4170   Assembler::call(C, relocInfo::none);
4171   jmp(E);
4172 
4173   bind(C);
4174   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
4175   ret(0);
4176 
4177   bind(E);
4178 }
4179 
4180 
4181 void MacroAssembler::call_VM(Register oop_result,
4182                              address entry_point,
4183                              Register arg_1,
4184                              bool check_exceptions) {
4185   assert(rax != arg_1, "smashed argument");
4186   assert(c_rarg0 != arg_1, "smashed argument");
4187 
4188   Label C, E;
4189   Assembler::call(C, relocInfo::none);
4190   jmp(E);
4191 
4192   bind(C);
4193   // c_rarg0 is reserved for thread
4194   if (c_rarg1 != arg_1) {
4195     movq(c_rarg1, arg_1);
4196   }
4197   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
4198   ret(0);
4199 
4200   bind(E);
4201 }
4202 
4203 void MacroAssembler::call_VM(Register oop_result,
4204                              address entry_point,
4205                              Register arg_1,
4206                              Register arg_2,
4207                              bool check_exceptions) {
4208   assert(rax != arg_1, "smashed argument");
4209   assert(rax != arg_2, "smashed argument");
4210   assert(c_rarg0 != arg_1, "smashed argument");
4211   assert(c_rarg0 != arg_2, "smashed argument");
4212   assert(c_rarg1 != arg_2, "smashed argument");
4213   assert(c_rarg2 != arg_1, "smashed argument");
4214 
4215   Label C, E;
4216   Assembler::call(C, relocInfo::none);
4217   jmp(E);
4218 
4219   bind(C);
4220   // c_rarg0 is reserved for thread
4221   if (c_rarg1 != arg_1) {
4222     movq(c_rarg1, arg_1);
4223   }
4224   if (c_rarg2 != arg_2) {
4225     movq(c_rarg2, arg_2);
4226   }
4227   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
4228   ret(0);
4229 
4230   bind(E);
4231 }
4232 
4233 
4234 void MacroAssembler::call_VM(Register oop_result,
4235                              address entry_point,
4236                              Register arg_1,
4237                              Register arg_2,
4238                              Register arg_3,
4239                              bool check_exceptions) {
4240   assert(rax != arg_1, "smashed argument");
4241   assert(rax != arg_2, "smashed argument");
4242   assert(rax != arg_3, "smashed argument");
4243   assert(c_rarg0 != arg_1, "smashed argument");
4244   assert(c_rarg0 != arg_2, "smashed argument");
4245   assert(c_rarg0 != arg_3, "smashed argument");
4246   assert(c_rarg1 != arg_2, "smashed argument");
4247   assert(c_rarg1 != arg_3, "smashed argument");
4248   assert(c_rarg2 != arg_1, "smashed argument");
4249   assert(c_rarg2 != arg_3, "smashed argument");
4250   assert(c_rarg3 != arg_1, "smashed argument");
4251   assert(c_rarg3 != arg_2, "smashed argument");
4252 
4253   Label C, E;
4254   Assembler::call(C, relocInfo::none);
4255   jmp(E);
4256 
4257   bind(C);
4258   // c_rarg0 is reserved for thread
4259   if (c_rarg1 != arg_1) {
4260     movq(c_rarg1, arg_1);
4261   }
4262   if (c_rarg2 != arg_2) {
4263     movq(c_rarg2, arg_2);
4264   }
4265   if (c_rarg3 != arg_3) {
4266     movq(c_rarg3, arg_3);
4267   }
4268   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
4269   ret(0);
4270 
4271   bind(E);
4272 }
4273 
4274 void MacroAssembler::call_VM(Register oop_result,
4275                              Register last_java_sp,
4276                              address entry_point,
4277                              int num_args,
4278                              bool check_exceptions) {
4279   call_VM_base(oop_result, noreg, last_java_sp, entry_point, num_args,
4280                check_exceptions);
4281 }
4282 
4283 void MacroAssembler::call_VM(Register oop_result,
4284                              Register last_java_sp,
4285                              address entry_point,
4286                              Register arg_1,
4287                              bool check_exceptions) {
4288   assert(c_rarg0 != arg_1, "smashed argument");
4289   assert(c_rarg1 != last_java_sp, "smashed argument");
4290   // c_rarg0 is reserved for thread
4291   if (c_rarg1 != arg_1) {
4292     movq(c_rarg1, arg_1);
4293   }
4294   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
4295 }
4296 
4297 void MacroAssembler::call_VM(Register oop_result,
4298                              Register last_java_sp,
4299                              address entry_point,
4300                              Register arg_1,
4301                              Register arg_2,
4302                              bool check_exceptions) {
4303   assert(c_rarg0 != arg_1, "smashed argument");
4304   assert(c_rarg0 != arg_2, "smashed argument");
4305   assert(c_rarg1 != arg_2, "smashed argument");
4306   assert(c_rarg1 != last_java_sp, "smashed argument");
4307   assert(c_rarg2 != arg_1, "smashed argument");
4308   assert(c_rarg2 != last_java_sp, "smashed argument");
4309   // c_rarg0 is reserved for thread
4310   if (c_rarg1 != arg_1) {
4311     movq(c_rarg1, arg_1);
4312   }
4313   if (c_rarg2 != arg_2) {
4314     movq(c_rarg2, arg_2);
4315   }
4316   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
4317 }
4318 
4319 
4320 void MacroAssembler::call_VM(Register oop_result,
4321                              Register last_java_sp,
4322                              address entry_point,
4323                              Register arg_1,
4324                              Register arg_2,
4325                              Register arg_3,
4326                              bool check_exceptions) {
4327   assert(c_rarg0 != arg_1, "smashed argument");
4328   assert(c_rarg0 != arg_2, "smashed argument");
4329   assert(c_rarg0 != arg_3, "smashed argument");
4330   assert(c_rarg1 != arg_2, "smashed argument");
4331   assert(c_rarg1 != arg_3, "smashed argument");
4332   assert(c_rarg1 != last_java_sp, "smashed argument");
4333   assert(c_rarg2 != arg_1, "smashed argument");
4334   assert(c_rarg2 != arg_3, "smashed argument");
4335   assert(c_rarg2 != last_java_sp, "smashed argument");
4336   assert(c_rarg3 != arg_1, "smashed argument");
4337   assert(c_rarg3 != arg_2, "smashed argument");
4338   assert(c_rarg3 != last_java_sp, "smashed argument");
4339   // c_rarg0 is reserved for thread
4340   if (c_rarg1 != arg_1) {
4341     movq(c_rarg1, arg_1);
4342   }
4343   if (c_rarg2 != arg_2) {
4344     movq(c_rarg2, arg_2);
4345   }
4346   if (c_rarg3 != arg_3) {
4347     movq(c_rarg2, arg_3);
4348   }
4349   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
4350 }
4351 
4352 void MacroAssembler::call_VM_leaf(address entry_point, int num_args) {
4353   call_VM_leaf_base(entry_point, num_args);
4354 }
4355 
4356 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
4357   if (c_rarg0 != arg_1) {
4358     movq(c_rarg0, arg_1);
4359   }
4360   call_VM_leaf(entry_point, 1);
4361 }
4362 
4363 void MacroAssembler::call_VM_leaf(address entry_point,
4364                                   Register arg_1,
4365                                   Register arg_2) {
4366   assert(c_rarg0 != arg_2, "smashed argument");
4367   assert(c_rarg1 != arg_1, "smashed argument");
4368   if (c_rarg0 != arg_1) {
4369     movq(c_rarg0, arg_1);
4370   }
4371   if (c_rarg1 != arg_2) {
4372     movq(c_rarg1, arg_2);
4373   }
4374   call_VM_leaf(entry_point, 2);
4375 }
4376 
4377 void MacroAssembler::call_VM_leaf(address entry_point,
4378                                   Register arg_1,
4379                                   Register arg_2,
4380                                   Register arg_3) {
4381   assert(c_rarg0 != arg_2, "smashed argument");
4382   assert(c_rarg0 != arg_3, "smashed argument");
4383   assert(c_rarg1 != arg_1, "smashed argument");
4384   assert(c_rarg1 != arg_3, "smashed argument");
4385   assert(c_rarg2 != arg_1, "smashed argument");
4386   assert(c_rarg2 != arg_2, "smashed argument");
4387   if (c_rarg0 != arg_1) {
4388     movq(c_rarg0, arg_1);
4389   }
4390   if (c_rarg1 != arg_2) {
4391     movq(c_rarg1, arg_2);
4392   }
4393   if (c_rarg2 != arg_3) {
4394     movq(c_rarg2, arg_3);
4395   }
4396   call_VM_leaf(entry_point, 3);
4397 }
4398 
4399 
4400 // Calls to C land
4401 //
4402 // When entering C land, the rbp & rsp of the last Java frame have to
4403 // be recorded in the (thread-local) JavaThread object. When leaving C
4404 // land, the last Java fp has to be reset to 0. This is required to
4405 // allow proper stack traversal.
4406 void MacroAssembler::store_check(Register obj) {
4407   // Does a store check for the oop in register obj. The content of
4408   // register obj is destroyed afterwards.
4409   store_check_part_1(obj);
4410   store_check_part_2(obj);
4411 }
4412 
4413 void MacroAssembler::store_check(Register obj, Address dst) {
4414   store_check(obj);
4415 }
4416 
4417 // split the store check operation so that other instructions can be
4418 // scheduled inbetween
4419 void MacroAssembler::store_check_part_1(Register obj) {
4420   BarrierSet* bs = Universe::heap()->barrier_set();
4421   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
4422   shrq(obj, CardTableModRefBS::card_shift);
4423 }
4424 
4425 void MacroAssembler::store_check_part_2(Register obj) {
4426   BarrierSet* bs = Universe::heap()->barrier_set();
4427   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
4428   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
4429   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
4430 
4431   // The calculation for byte_map_base is as follows:
4432   // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
4433   // So this essentially converts an address to a displacement and
4434   // it will never need to be relocated. On 64bit however the value may be too
4435   // large for a 32bit displacement
4436 
4437   intptr_t disp = (intptr_t) ct->byte_map_base;
4438   if (is_simm32(disp)) {
4439     Address cardtable(noreg, obj, Address::times_1, disp);
4440     movb(cardtable, 0);
4441   } else {
4442     // By doing it as an ExternalAddress disp could be converted to a rip-relative
4443     // displacement and done in a single instruction given favorable mapping and
4444     // a smarter version of as_Address. Worst case it is two instructions which
4445     // is no worse off then loading disp into a register and doing as a simple
4446     // Address() as above.
4447     // We can't do as ExternalAddress as the only style since if disp == 0 we'll
4448     // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
4449     // in some cases we'll get a single instruction version.
4450 
4451     ExternalAddress cardtable((address)disp);
4452     Address index(noreg, obj, Address::times_1);
4453     movb(as_Address(ArrayAddress(cardtable, index)), 0);
4454   }
4455 
4456 }
4457 
4458 void MacroAssembler::c2bool(Register x) {
4459   // implements x == 0 ? 0 : 1
4460   // note: must only look at least-significant byte of x
4461   //       since C-style booleans are stored in one byte
4462   //       only! (was bug)
4463   andl(x, 0xFF);
4464   setb(Assembler::notZero, x);
4465 }
4466 
4467 int MacroAssembler::corrected_idivl(Register reg) {
4468   // Full implementation of Java idiv and irem; checks for special
4469   // case as described in JVM spec., p.243 & p.271.  The function
4470   // returns the (pc) offset of the idivl instruction - may be needed
4471   // for implicit exceptions.
4472   //
4473   //         normal case                           special case
4474   //
4475   // input : eax: dividend                         min_int
4476   //         reg: divisor   (may not be eax/edx)   -1
4477   //
4478   // output: eax: quotient  (= eax idiv reg)       min_int
4479   //         edx: remainder (= eax irem reg)       0
4480   assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
4481   const int min_int = 0x80000000;
4482   Label normal_case, special_case;
4483 
4484   // check for special case
4485   cmpl(rax, min_int);
4486   jcc(Assembler::notEqual, normal_case);
4487   xorl(rdx, rdx); // prepare edx for possible special case (where
4488                   // remainder = 0)
4489   cmpl(reg, -1);
4490   jcc(Assembler::equal, special_case);
4491 
4492   // handle normal case
4493   bind(normal_case);
4494   cdql();
4495   int idivl_offset = offset();
4496   idivl(reg);
4497 
4498   // normal and special case exit
4499   bind(special_case);
4500 
4501   return idivl_offset;
4502 }
4503 
4504 int MacroAssembler::corrected_idivq(Register reg) {
4505   // Full implementation of Java ldiv and lrem; checks for special
4506   // case as described in JVM spec., p.243 & p.271.  The function
4507   // returns the (pc) offset of the idivl instruction - may be needed
4508   // for implicit exceptions.
4509   //
4510   //         normal case                           special case
4511   //
4512   // input : rax: dividend                         min_long
4513   //         reg: divisor   (may not be eax/edx)   -1
4514   //
4515   // output: rax: quotient  (= rax idiv reg)       min_long
4516   //         rdx: remainder (= rax irem reg)       0
4517   assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
4518   static const int64_t min_long = 0x8000000000000000;
4519   Label normal_case, special_case;
4520 
4521   // check for special case
4522   cmp64(rax, ExternalAddress((address) &min_long));
4523   jcc(Assembler::notEqual, normal_case);
4524   xorl(rdx, rdx); // prepare rdx for possible special case (where
4525                   // remainder = 0)
4526   cmpq(reg, -1);
4527   jcc(Assembler::equal, special_case);
4528 
4529   // handle normal case
4530   bind(normal_case);
4531   cdqq();
4532   int idivq_offset = offset();
4533   idivq(reg);
4534 
4535   // normal and special case exit
4536   bind(special_case);
4537 
4538   return idivq_offset;
4539 }
4540 
4541 void MacroAssembler::push_IU_state() {
4542   pushfq();     // Push flags first because pushaq kills them
4543   subq(rsp, 8); // Make sure rsp stays 16-byte aligned
4544   pushaq();
4545 }
4546 
4547 void MacroAssembler::pop_IU_state() {
4548   popaq();
4549   addq(rsp, 8);
4550   popfq();
4551 }
4552 
4553 void MacroAssembler::push_FPU_state() {
4554   subq(rsp, FPUStateSizeInWords * wordSize);
4555   fxsave(Address(rsp, 0));
4556 }
4557 
4558 void MacroAssembler::pop_FPU_state() {
4559   fxrstor(Address(rsp, 0));
4560   addq(rsp, FPUStateSizeInWords * wordSize);
4561 }
4562 
4563 // Save Integer and Float state
4564 // Warning: Stack must be 16 byte aligned
4565 void MacroAssembler::push_CPU_state() {
4566   push_IU_state();
4567   push_FPU_state();
4568 }
4569 
4570 void MacroAssembler::pop_CPU_state() {
4571   pop_FPU_state();
4572   pop_IU_state();
4573 }
4574 
4575 void MacroAssembler::sign_extend_short(Register reg) {
4576   movswl(reg, reg);
4577 }
4578 
4579 void MacroAssembler::sign_extend_byte(Register reg) {
4580   movsbl(reg, reg);
4581 }
4582 
4583 void MacroAssembler::division_with_shift(Register reg, int shift_value) {
4584   assert (shift_value > 0, "illegal shift value");
4585   Label _is_positive;
4586   testl (reg, reg);
4587   jcc (Assembler::positive, _is_positive);
4588   int offset = (1 << shift_value) - 1 ;
4589 
4590   if (offset == 1) {
4591     incrementl(reg);
4592   } else {
4593     addl(reg, offset);
4594   }
4595 
4596   bind (_is_positive);
4597   sarl(reg, shift_value);
4598 }
4599 
4600 void MacroAssembler::round_to_l(Register reg, int modulus) {
4601   addl(reg, modulus - 1);
4602   andl(reg, -modulus);
4603 }
4604 
4605 void MacroAssembler::round_to_q(Register reg, int modulus) {
4606   addq(reg, modulus - 1);
4607   andq(reg, -modulus);
4608 }
4609 
4610 void MacroAssembler::verify_oop(Register reg, const char* s) {
4611   if (!VerifyOops) {
4612     return;
4613   }
4614 
4615   // Pass register number to verify_oop_subroutine
4616   char* b = new char[strlen(s) + 50];
4617   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
4618 
4619   pushq(rax); // save rax, restored by receiver
4620 
4621   // pass args on stack, only touch rax
4622   pushq(reg);
4623 
4624   // avoid using pushptr, as it modifies scratch registers
4625   // and our contract is not to modify anything
4626   ExternalAddress buffer((address)b);
4627   movptr(rax, buffer.addr());
4628   pushq(rax);
4629 
4630   // call indirectly to solve generation ordering problem
4631   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4632   call(rax); // no alignment requirement
4633   // everything popped by receiver
4634 }
4635 
4636 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
4637   if (!VerifyOops) return;
4638   // Pass register number to verify_oop_subroutine
4639   char* b = new char[strlen(s) + 50];
4640   sprintf(b, "verify_oop_addr: %s", s);
4641   pushq(rax);                          // save rax
4642   movq(addr, rax);
4643   pushq(rax);                          // pass register argument
4644 
4645 
4646   // avoid using pushptr, as it modifies scratch registers
4647   // and our contract is not to modify anything
4648   ExternalAddress buffer((address)b);
4649   movptr(rax, buffer.addr());
4650   pushq(rax);
4651 
4652   // call indirectly to solve generation ordering problem
4653   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4654   call(rax); // no alignment requirement
4655   // everything popped by receiver
4656 }
4657 
4658 
4659 void MacroAssembler::stop(const char* msg) {
4660   address rip = pc();
4661   pushaq(); // get regs on stack
4662   lea(c_rarg0, ExternalAddress((address) msg));
4663   lea(c_rarg1, InternalAddress(rip));
4664   movq(c_rarg2, rsp); // pass pointer to regs array
4665   andq(rsp, -16); // align stack as required by ABI
4666   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug)));
4667   hlt();
4668 }
4669 
4670 void MacroAssembler::warn(const char* msg) {
4671   pushq(r12);
4672   movq(r12, rsp);
4673   andq(rsp, -16);     // align stack as required by push_CPU_state and call
4674 
4675   push_CPU_state();   // keeps alignment at 16 bytes
4676   lea(c_rarg0, ExternalAddress((address) msg));
4677   call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
4678   pop_CPU_state();
4679 
4680   movq(rsp, r12);
4681   popq(r12);
4682 }
4683 
4684 void MacroAssembler::debug(char* msg, int64_t pc, int64_t regs[]) {
4685   // In order to get locks to work, we need to fake a in_VM state
4686   if (ShowMessageBoxOnError ) {
4687     JavaThread* thread = JavaThread::current();
4688     JavaThreadState saved_state = thread->thread_state();
4689     thread->set_thread_state(_thread_in_vm);
4690     ttyLocker ttyl;
4691 #ifndef PRODUCT
4692     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
4693       BytecodeCounter::print();
4694     }
4695 #endif
4696     // To see where a verify_oop failed, get $ebx+40/X for this frame.
4697     // XXX correct this offset for amd64
4698     // This is the value of eip which points to where verify_oop will return.
4699     if (os::message_box(msg, "Execution stopped, print registers?")) {
4700       tty->print_cr("rip = 0x%016lx", pc);
4701       tty->print_cr("rax = 0x%016lx", regs[15]);
4702       tty->print_cr("rbx = 0x%016lx", regs[12]);
4703       tty->print_cr("rcx = 0x%016lx", regs[14]);
4704       tty->print_cr("rdx = 0x%016lx", regs[13]);
4705       tty->print_cr("rdi = 0x%016lx", regs[8]);
4706       tty->print_cr("rsi = 0x%016lx", regs[9]);
4707       tty->print_cr("rbp = 0x%016lx", regs[10]);
4708       tty->print_cr("rsp = 0x%016lx", regs[11]);
4709       tty->print_cr("r8  = 0x%016lx", regs[7]);
4710       tty->print_cr("r9  = 0x%016lx", regs[6]);
4711       tty->print_cr("r10 = 0x%016lx", regs[5]);
4712       tty->print_cr("r11 = 0x%016lx", regs[4]);
4713       tty->print_cr("r12 = 0x%016lx", regs[3]);
4714       tty->print_cr("r13 = 0x%016lx", regs[2]);
4715       tty->print_cr("r14 = 0x%016lx", regs[1]);
4716       tty->print_cr("r15 = 0x%016lx", regs[0]);
4717       BREAKPOINT;
4718     }
4719     ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
4720   } else {
4721     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
4722                     msg);
4723   }
4724 }
4725 
4726 void MacroAssembler::os_breakpoint() {
4727   // instead of directly emitting a breakpoint, call os:breakpoint for
4728   // better debugability
4729   // This shouldn't need alignment, it's an empty function
4730   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
4731 }
4732 
4733 // Write serialization page so VM thread can do a pseudo remote membar.
4734 // We use the current thread pointer to calculate a thread specific
4735 // offset to write to within the page. This minimizes bus traffic
4736 // due to cache line collision.
4737 void MacroAssembler::serialize_memory(Register thread,
4738                                       Register tmp) {
4739 
4740   movl(tmp, thread);
4741   shrl(tmp, os::get_serialize_page_shift_count());
4742   andl(tmp, (os::vm_page_size() - sizeof(int)));
4743 
4744   Address index(noreg, tmp, Address::times_1);
4745   ExternalAddress page(os::get_memory_serialize_page());
4746 
4747   movptr(ArrayAddress(page, index), tmp);
4748 }
4749 
4750 void MacroAssembler::verify_tlab() {
4751 #ifdef ASSERT
4752   if (UseTLAB) {
4753     Label next, ok;
4754     Register t1 = rsi;
4755 
4756     pushq(t1);
4757 
4758     movq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
4759     cmpq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_start_offset())));
4760     jcc(Assembler::aboveEqual, next);
4761     stop("assert(top >= start)");
4762     should_not_reach_here();
4763 
4764     bind(next);
4765     movq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_end_offset())));
4766     cmpq(t1, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
4767     jcc(Assembler::aboveEqual, ok);
4768     stop("assert(top <= end)");
4769     should_not_reach_here();
4770 
4771     bind(ok);
4772 
4773     popq(t1);
4774   }
4775 #endif
4776 }
4777 
4778 // Defines obj, preserves var_size_in_bytes
4779 void MacroAssembler::eden_allocate(Register obj,
4780                                    Register var_size_in_bytes,
4781                                    int con_size_in_bytes,
4782                                    Register t1,
4783                                    Label& slow_case) {
4784   assert(obj == rax, "obj must be in rax for cmpxchg");
4785   assert_different_registers(obj, var_size_in_bytes, t1);
4786   Register end = t1;
4787   Label retry;
4788   bind(retry);
4789   ExternalAddress heap_top((address) Universe::heap()->top_addr());
4790   movptr(obj, heap_top);
4791   if (var_size_in_bytes == noreg) {
4792     leaq(end, Address(obj, con_size_in_bytes));
4793   } else {
4794     leaq(end, Address(obj, var_size_in_bytes, Address::times_1));
4795   }
4796   // if end < obj then we wrapped around => object too long => slow case
4797   cmpq(end, obj);
4798   jcc(Assembler::below, slow_case);
4799   cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
4800 
4801   jcc(Assembler::above, slow_case);
4802   // Compare obj with the top addr, and if still equal, store the new
4803   // top addr in end at the address of the top addr pointer. Sets ZF
4804   // if was equal, and clears it otherwise. Use lock prefix for
4805   // atomicity on MPs.
4806   if (os::is_MP()) {
4807     lock();
4808   }
4809   cmpxchgptr(end, heap_top);
4810   // if someone beat us on the allocation, try again, otherwise continue
4811   jcc(Assembler::notEqual, retry);
4812 }
4813 
4814 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
4815 void MacroAssembler::tlab_allocate(Register obj,
4816                                    Register var_size_in_bytes,
4817                                    int con_size_in_bytes,
4818                                    Register t1,
4819                                    Register t2,
4820                                    Label& slow_case) {
4821   assert_different_registers(obj, t1, t2);
4822   assert_different_registers(obj, var_size_in_bytes, t1);
4823   Register end = t2;
4824 
4825   verify_tlab();
4826 
4827   movq(obj, Address(r15_thread, JavaThread::tlab_top_offset()));
4828   if (var_size_in_bytes == noreg) {
4829     leaq(end, Address(obj, con_size_in_bytes));
4830   } else {
4831     leaq(end, Address(obj, var_size_in_bytes, Address::times_1));
4832   }
4833   cmpq(end, Address(r15_thread, JavaThread::tlab_end_offset()));
4834   jcc(Assembler::above, slow_case);
4835 
4836   // update the tlab top pointer
4837   movq(Address(r15_thread, JavaThread::tlab_top_offset()), end);
4838 
4839   // recover var_size_in_bytes if necessary
4840   if (var_size_in_bytes == end) {
4841     subq(var_size_in_bytes, obj);
4842   }
4843   verify_tlab();
4844 }
4845 
4846 // Preserves rbx and rdx.
4847 void MacroAssembler::tlab_refill(Label& retry,
4848                                  Label& try_eden,
4849                                  Label& slow_case) {
4850   Register top = rax;
4851   Register t1 = rcx;
4852   Register t2 = rsi;
4853   Register t3 = r10;
4854   Register thread_reg = r15_thread;
4855   assert_different_registers(top, thread_reg, t1, t2, t3,
4856                              /* preserve: */ rbx, rdx);
4857   Label do_refill, discard_tlab;
4858 
4859   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
4860     // No allocation in the shared eden.
4861     jmp(slow_case);
4862   }
4863 
4864   movq(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
4865   movq(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
4866 
4867   // calculate amount of free space
4868   subq(t1, top);
4869   shrq(t1, LogHeapWordSize);
4870 
4871   // Retain tlab and allocate object in shared space if
4872   // the amount free in the tlab is too large to discard.
4873   cmpq(t1, Address(thread_reg, // size_t
4874                    in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
4875   jcc(Assembler::lessEqual, discard_tlab);
4876 
4877   // Retain
4878   mov64(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment());
4879   addq(Address(thread_reg,  // size_t
4880                in_bytes(JavaThread::tlab_refill_waste_limit_offset())),
4881        t2);
4882   if (TLABStats) {
4883     // increment number of slow_allocations
4884     addl(Address(thread_reg, // unsigned int
4885                  in_bytes(JavaThread::tlab_slow_allocations_offset())),
4886          1);
4887   }
4888   jmp(try_eden);
4889 
4890   bind(discard_tlab);
4891   if (TLABStats) {
4892     // increment number of refills
4893     addl(Address(thread_reg, // unsigned int
4894                  in_bytes(JavaThread::tlab_number_of_refills_offset())),
4895          1);
4896     // accumulate wastage -- t1 is amount free in tlab
4897     addl(Address(thread_reg, // unsigned int
4898                  in_bytes(JavaThread::tlab_fast_refill_waste_offset())),
4899          t1);
4900   }
4901 
4902   // if tlab is currently allocated (top or end != null) then
4903   // fill [top, end + alignment_reserve) with array object
4904   testq(top, top);
4905   jcc(Assembler::zero, do_refill);
4906 
4907   // set up the mark word
4908   mov64(t3, (int64_t) markOopDesc::prototype()->copy_set_hash(0x2));
4909   movq(Address(top, oopDesc::mark_offset_in_bytes()), t3);
4910   // set the length to the remaining space
4911   subq(t1, typeArrayOopDesc::header_size(T_INT));
4912   addq(t1, (int)ThreadLocalAllocBuffer::alignment_reserve());
4913   shlq(t1, log2_intptr(HeapWordSize / sizeof(jint)));
4914   movq(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
4915   // set klass to intArrayKlass
4916   movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
4917   movq(Address(top, oopDesc::klass_offset_in_bytes()), t1);
4918 
4919   // refill the tlab with an eden allocation
4920   bind(do_refill);
4921   movq(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
4922   shlq(t1, LogHeapWordSize);
4923   // add object_size ??
4924   eden_allocate(top, t1, 0, t2, slow_case);
4925 
4926   // Check that t1 was preserved in eden_allocate.
4927 #ifdef ASSERT
4928   if (UseTLAB) {
4929     Label ok;
4930     Register tsize = rsi;
4931     assert_different_registers(tsize, thread_reg, t1);
4932     pushq(tsize);
4933     movq(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
4934     shlq(tsize, LogHeapWordSize);
4935     cmpq(t1, tsize);
4936     jcc(Assembler::equal, ok);
4937     stop("assert(t1 != tlab size)");
4938     should_not_reach_here();
4939 
4940     bind(ok);
4941     popq(tsize);
4942   }
4943 #endif
4944   movq(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
4945   movq(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
4946   addq(top, t1);
4947   subq(top, (int)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
4948   movq(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
4949   verify_tlab();
4950   jmp(retry);
4951 }
4952 
4953 
4954 int MacroAssembler::biased_locking_enter(Register lock_reg, Register obj_reg, Register swap_reg, Register tmp_reg,
4955                                          bool swap_reg_contains_mark,
4956                                          Label& done, Label* slow_case,
4957                                          BiasedLockingCounters* counters) {
4958   assert(UseBiasedLocking, "why call this otherwise?");
4959   assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
4960   assert(tmp_reg != noreg, "tmp_reg must be supplied");
4961   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4962   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4963   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
4964   Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
4965   Address saved_mark_addr(lock_reg, 0);
4966 
4967   if (PrintBiasedLockingStatistics && counters == NULL)
4968     counters = BiasedLocking::counters();
4969 
4970   // Biased locking
4971   // See whether the lock is currently biased toward our thread and
4972   // whether the epoch is still valid
4973   // Note that the runtime guarantees sufficient alignment of JavaThread
4974   // pointers to allow age to be placed into low bits
4975   // First check to see whether biasing is even enabled for this object
4976   Label cas_label;
4977   int null_check_offset = -1;
4978   if (!swap_reg_contains_mark) {
4979     null_check_offset = offset();
4980     movq(swap_reg, mark_addr);
4981   }
4982   movq(tmp_reg, swap_reg);
4983   andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4984   cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
4985   jcc(Assembler::notEqual, cas_label);
4986   // The bias pattern is present in the object's header. Need to check
4987   // whether the bias owner and the epoch are both still current.
4988   movq(tmp_reg, klass_addr);
4989   movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4990   orq(tmp_reg, r15_thread);
4991   xorq(tmp_reg, swap_reg);
4992   andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
4993   if (counters != NULL) {
4994     cond_inc32(Assembler::zero,
4995                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
4996   }
4997   jcc(Assembler::equal, done);
4998 
4999   Label try_revoke_bias;
5000   Label try_rebias;
5001 
5002   // At this point we know that the header has the bias pattern and
5003   // that we are not the bias owner in the current epoch. We need to
5004   // figure out more details about the state of the header in order to
5005   // know what operations can be legally performed on the object's
5006   // header.
5007 
5008   // If the low three bits in the xor result aren't clear, that means
5009   // the prototype header is no longer biased and we have to revoke
5010   // the bias on this object.
5011   testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5012   jcc(Assembler::notZero, try_revoke_bias);
5013 
5014   // Biasing is still enabled for this data type. See whether the
5015   // epoch of the current bias is still valid, meaning that the epoch
5016   // bits of the mark word are equal to the epoch bits of the
5017   // prototype header. (Note that the prototype header's epoch bits
5018   // only change at a safepoint.) If not, attempt to rebias the object
5019   // toward the current thread. Note that we must be absolutely sure
5020   // that the current epoch is invalid in order to do this because
5021   // otherwise the manipulations it performs on the mark word are
5022   // illegal.
5023   testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5024   jcc(Assembler::notZero, try_rebias);
5025 
5026   // The epoch of the current bias is still valid but we know nothing
5027   // about the owner; it might be set or it might be clear. Try to
5028   // acquire the bias of the object using an atomic operation. If this
5029   // fails we will go in to the runtime to revoke the object's bias.
5030   // Note that we first construct the presumed unbiased header so we
5031   // don't accidentally blow away another thread's valid bias.
5032   andq(swap_reg,
5033        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5034   movq(tmp_reg, swap_reg);
5035   orq(tmp_reg, r15_thread);
5036   if (os::is_MP()) {
5037     lock();
5038   }
5039   cmpxchgq(tmp_reg, Address(obj_reg, 0));
5040   // If the biasing toward our thread failed, this means that
5041   // another thread succeeded in biasing it toward itself and we
5042   // need to revoke that bias. The revocation will occur in the
5043   // interpreter runtime in the slow case.
5044   if (counters != NULL) {
5045     cond_inc32(Assembler::zero,
5046                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5047   }
5048   if (slow_case != NULL) {
5049     jcc(Assembler::notZero, *slow_case);
5050   }
5051   jmp(done);
5052 
5053   bind(try_rebias);
5054   // At this point we know the epoch has expired, meaning that the
5055   // current "bias owner", if any, is actually invalid. Under these
5056   // circumstances _only_, we are allowed to use the current header's
5057   // value as the comparison value when doing the cas to acquire the
5058   // bias in the current epoch. In other words, we allow transfer of
5059   // the bias from one thread to another directly in this situation.
5060   //
5061   // FIXME: due to a lack of registers we currently blow away the age
5062   // bits in this situation. Should attempt to preserve them.
5063   movq(tmp_reg, klass_addr);
5064   movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
5065   orq(tmp_reg, r15_thread);
5066   if (os::is_MP()) {
5067     lock();
5068   }
5069   cmpxchgq(tmp_reg, Address(obj_reg, 0));
5070   // If the biasing toward our thread failed, then another thread
5071   // succeeded in biasing it toward itself and we need to revoke that
5072   // bias. The revocation will occur in the runtime in the slow case.
5073   if (counters != NULL) {
5074     cond_inc32(Assembler::zero,
5075                ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5076   }
5077   if (slow_case != NULL) {
5078     jcc(Assembler::notZero, *slow_case);
5079   }
5080   jmp(done);
5081 
5082   bind(try_revoke_bias);
5083   // The prototype mark in the klass doesn't have the bias bit set any
5084   // more, indicating that objects of this data type are not supposed
5085   // to be biased any more. We are going to try to reset the mark of
5086   // this object to the prototype value and fall through to the
5087   // CAS-based locking scheme. Note that if our CAS fails, it means
5088   // that another thread raced us for the privilege of revoking the
5089   // bias of this particular object, so it's okay to continue in the
5090   // normal locking code.
5091   //
5092   // FIXME: due to a lack of registers we currently blow away the age
5093   // bits in this situation. Should attempt to preserve them.
5094   movq(tmp_reg, klass_addr);
5095   movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
5096   if (os::is_MP()) {
5097     lock();
5098   }
5099   cmpxchgq(tmp_reg, Address(obj_reg, 0));
5100   // Fall through to the normal CAS-based lock, because no matter what
5101   // the result of the above CAS, some thread must have succeeded in
5102   // removing the bias bit from the object's header.
5103   if (counters != NULL) {
5104     cond_inc32(Assembler::zero,
5105                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5106   }
5107 
5108   bind(cas_label);
5109 
5110   return null_check_offset;
5111 }
5112 
5113 
5114 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5115   assert(UseBiasedLocking, "why call this otherwise?");
5116 
5117   // Check for biased locking unlock case, which is a no-op
5118   // Note: we do not have to check the thread ID for two reasons.
5119   // First, the interpreter checks for IllegalMonitorStateException at
5120   // a higher level. Second, if the bias was revoked while we held the
5121   // lock, the object could not be rebiased toward another thread, so
5122   // the bias bit would be clear.
5123   movq(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5124   andq(temp_reg, markOopDesc::biased_lock_mask_in_place);
5125   cmpq(temp_reg, markOopDesc::biased_lock_pattern);
5126   jcc(Assembler::equal, done);
5127 }
5128 
5129 
5130 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
5131   switch (cond) {
5132     // Note some conditions are synonyms for others
5133     case Assembler::zero:         return Assembler::notZero;
5134     case Assembler::notZero:      return Assembler::zero;
5135     case Assembler::less:         return Assembler::greaterEqual;
5136     case Assembler::lessEqual:    return Assembler::greater;
5137     case Assembler::greater:      return Assembler::lessEqual;
5138     case Assembler::greaterEqual: return Assembler::less;
5139     case Assembler::below:        return Assembler::aboveEqual;
5140     case Assembler::belowEqual:   return Assembler::above;
5141     case Assembler::above:        return Assembler::belowEqual;
5142     case Assembler::aboveEqual:   return Assembler::below;
5143     case Assembler::overflow:     return Assembler::noOverflow;
5144     case Assembler::noOverflow:   return Assembler::overflow;
5145     case Assembler::negative:     return Assembler::positive;
5146     case Assembler::positive:     return Assembler::negative;
5147     case Assembler::parity:       return Assembler::noParity;
5148     case Assembler::noParity:     return Assembler::parity;
5149   }
5150   ShouldNotReachHere(); return Assembler::overflow;
5151 }
5152 
5153 
5154 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
5155   Condition negated_cond = negate_condition(cond);
5156   Label L;
5157   jcc(negated_cond, L);
5158   atomic_incl(counter_addr);
5159   bind(L);
5160 }
5161 
5162 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5163   pushfq();
5164   if (os::is_MP())
5165     lock();
5166   incrementl(counter_addr);
5167   popfq();
5168 }
5169 
5170 SkipIfEqual::SkipIfEqual(
5171     MacroAssembler* masm, const bool* flag_addr, bool value) {
5172   _masm = masm;
5173   _masm->cmp8(ExternalAddress((address)flag_addr), value);
5174   _masm->jcc(Assembler::equal, _label);
5175 }
5176 
5177 SkipIfEqual::~SkipIfEqual() {
5178   _masm->bind(_label);
5179 }
5180 
5181 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5182   movq(tmp, rsp);
5183   // Bang stack for total size given plus shadow page size.
5184   // Bang one page at a time because large size can bang beyond yellow and
5185   // red zones.
5186   Label loop;
5187   bind(loop);
5188   movl(Address(tmp, (-os::vm_page_size())), size );
5189   subq(tmp, os::vm_page_size());
5190   subl(size, os::vm_page_size());
5191   jcc(Assembler::greater, loop);
5192 
5193   // Bang down shadow pages too.
5194   // The -1 because we already subtracted 1 page.
5195   for (int i = 0; i< StackShadowPages-1; i++) {
5196     movq(Address(tmp, (-i*os::vm_page_size())), size );
5197   }
5198 }