1 /*
   2  * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20  * CA 95054 USA or visit www.sun.com if you need additional information or
  21  * have any questions.
  22  *
  23  */
  24 
  25 #include "incls/_precompiled.incl"
  26 #include "incls/_templateTable_x86_32.cpp.incl"
  27 
  28 #ifndef CC_INTERP
  29 #define __ _masm->
  30 
  31 //----------------------------------------------------------------------------------------------------
  32 // Platform-dependent initialization
  33 
  34 void TemplateTable::pd_initialize() {
  35   // No i486 specific initialization
  36 }
  37 
  38 //----------------------------------------------------------------------------------------------------
  39 // Address computation
  40 
  41 // local variables
  42 static inline Address iaddress(int n)            {
  43   return Address(rdi, Interpreter::local_offset_in_bytes(n));
  44 }
  45 
  46 static inline Address laddress(int n)            { return iaddress(n + 1); }
  47 static inline Address haddress(int n)            { return iaddress(n + 0); }
  48 static inline Address faddress(int n)            { return iaddress(n); }
  49 static inline Address daddress(int n)            { return laddress(n); }
  50 static inline Address aaddress(int n)            { return iaddress(n); }
  51 
  52 static inline Address iaddress(Register r)       {
  53   return Address(rdi, r, Interpreter::stackElementScale(), Interpreter::value_offset_in_bytes());
  54 }
  55 static inline Address laddress(Register r)       {
  56   return Address(rdi, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(1));
  57 }
  58 static inline Address haddress(Register r)       {
  59   return Address(rdi, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(0));
  60 }
  61 
  62 static inline Address faddress(Register r)       { return iaddress(r); };
  63 static inline Address daddress(Register r)       {
  64   assert(!TaggedStackInterpreter, "This doesn't work");
  65   return laddress(r);
  66 };
  67 static inline Address aaddress(Register r)       { return iaddress(r); };
  68 
  69 // expression stack
  70 // (Note: Must not use symmetric equivalents at_rsp_m1/2 since they store
  71 // data beyond the rsp which is potentially unsafe in an MT environment;
  72 // an interrupt may overwrite that data.)
  73 static inline Address at_rsp   () {
  74   return Address(rsp, 0);
  75 }
  76 
  77 // At top of Java expression stack which may be different than rsp().  It
  78 // isn't for category 1 objects.
  79 static inline Address at_tos   () {
  80   Address tos = Address(rsp,  Interpreter::expr_offset_in_bytes(0));
  81   return tos;
  82 }
  83 
  84 static inline Address at_tos_p1() {
  85   return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
  86 }
  87 
  88 static inline Address at_tos_p2() {
  89   return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
  90 }
  91 
  92 // Condition conversion
  93 static Assembler::Condition j_not(TemplateTable::Condition cc) {
  94   switch (cc) {
  95     case TemplateTable::equal        : return Assembler::notEqual;
  96     case TemplateTable::not_equal    : return Assembler::equal;
  97     case TemplateTable::less         : return Assembler::greaterEqual;
  98     case TemplateTable::less_equal   : return Assembler::greater;
  99     case TemplateTable::greater      : return Assembler::lessEqual;
 100     case TemplateTable::greater_equal: return Assembler::less;
 101   }
 102   ShouldNotReachHere();
 103   return Assembler::zero;
 104 }
 105 
 106 
 107 //----------------------------------------------------------------------------------------------------
 108 // Miscelaneous helper routines
 109 
 110 Address TemplateTable::at_bcp(int offset) {
 111   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
 112   return Address(rsi, offset);
 113 }
 114 
 115 
 116 void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc,
 117                                    Register scratch,
 118                                    bool load_bc_into_scratch/*=true*/) {
 119 
 120   if (!RewriteBytecodes) return;
 121   // the pair bytecodes have already done the load.
 122   if (load_bc_into_scratch) __ movl(bc, bytecode);
 123   Label patch_done;
 124   if (JvmtiExport::can_post_breakpoint()) {
 125     Label fast_patch;
 126     // if a breakpoint is present we can't rewrite the stream directly
 127     __ movzxb(scratch, at_bcp(0));
 128     __ cmpl(scratch, Bytecodes::_breakpoint);
 129     __ jcc(Assembler::notEqual, fast_patch);
 130     __ get_method(scratch);
 131     // Let breakpoint table handling rewrite to quicker bytecode
 132     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, rsi, bc);
 133 #ifndef ASSERT
 134     __ jmpb(patch_done);
 135     __ bind(fast_patch);
 136   }
 137 #else
 138     __ jmp(patch_done);
 139     __ bind(fast_patch);
 140   }
 141   Label okay;
 142   __ load_unsigned_byte(scratch, at_bcp(0));
 143   __ cmpl(scratch, (int)Bytecodes::java_code(bytecode));
 144   __ jccb(Assembler::equal, okay);
 145   __ cmpl(scratch, bc);
 146   __ jcc(Assembler::equal, okay);
 147   __ stop("patching the wrong bytecode");
 148   __ bind(okay);
 149 #endif
 150   // patch bytecode
 151   __ movb(at_bcp(0), bc);
 152   __ bind(patch_done);
 153 }
 154 
 155 //----------------------------------------------------------------------------------------------------
 156 // Individual instructions
 157 
 158 void TemplateTable::nop() {
 159   transition(vtos, vtos);
 160   // nothing to do
 161 }
 162 
 163 void TemplateTable::shouldnotreachhere() {
 164   transition(vtos, vtos);
 165   __ stop("shouldnotreachhere bytecode");
 166 }
 167 
 168 
 169 
 170 void TemplateTable::aconst_null() {
 171   transition(vtos, atos);
 172   __ xorl(rax, rax);
 173 }
 174 
 175 
 176 void TemplateTable::iconst(int value) {
 177   transition(vtos, itos);
 178   if (value == 0) {
 179     __ xorl(rax, rax);
 180   } else {
 181     __ movl(rax, value);
 182   }
 183 }
 184 
 185 
 186 void TemplateTable::lconst(int value) {
 187   transition(vtos, ltos);
 188   if (value == 0) {
 189     __ xorl(rax, rax);
 190   } else {
 191     __ movl(rax, value);
 192   }
 193   assert(value >= 0, "check this code");
 194   __ xorl(rdx, rdx);
 195 }
 196 
 197 
 198 void TemplateTable::fconst(int value) {
 199   transition(vtos, ftos);
 200          if (value == 0) { __ fldz();
 201   } else if (value == 1) { __ fld1();
 202   } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
 203   } else                 { ShouldNotReachHere();
 204   }
 205 }
 206 
 207 
 208 void TemplateTable::dconst(int value) {
 209   transition(vtos, dtos);
 210          if (value == 0) { __ fldz();
 211   } else if (value == 1) { __ fld1();
 212   } else                 { ShouldNotReachHere();
 213   }
 214 }
 215 
 216 
 217 void TemplateTable::bipush() {
 218   transition(vtos, itos);
 219   __ load_signed_byte(rax, at_bcp(1));
 220 }
 221 
 222 
 223 void TemplateTable::sipush() {
 224   transition(vtos, itos);
 225   __ load_unsigned_word(rax, at_bcp(1));
 226   __ bswap(rax);
 227   __ sarl(rax, 16);
 228 }
 229 
 230 void TemplateTable::ldc(bool wide) {
 231   transition(vtos, vtos);
 232   Label call_ldc, notFloat, notClass, Done;
 233 
 234   if (wide) {
 235     __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
 236   } else {
 237     __ load_unsigned_byte(rbx, at_bcp(1));
 238   }
 239   __ get_cpool_and_tags(rcx, rax);
 240   const int base_offset = constantPoolOopDesc::header_size() * wordSize;
 241   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
 242 
 243   // get type
 244   __ xorl(rdx, rdx);
 245   __ movb(rdx, Address(rax, rbx, Address::times_1, tags_offset));
 246 
 247   // unresolved string - get the resolved string
 248   __ cmpl(rdx, JVM_CONSTANT_UnresolvedString);
 249   __ jccb(Assembler::equal, call_ldc);
 250 
 251   // unresolved class - get the resolved class
 252   __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
 253   __ jccb(Assembler::equal, call_ldc);
 254 
 255   // unresolved class in error (resolution failed) - call into runtime
 256   // so that the same error from first resolution attempt is thrown.
 257   __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
 258   __ jccb(Assembler::equal, call_ldc);
 259 
 260   // resolved class - need to call vm to get java mirror of the class
 261   __ cmpl(rdx, JVM_CONSTANT_Class);
 262   __ jcc(Assembler::notEqual, notClass);
 263 
 264   __ bind(call_ldc);
 265   __ movl(rcx, wide);
 266   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), rcx);
 267   __ push(atos);
 268   __ jmp(Done);
 269 
 270   __ bind(notClass);
 271   __ cmpl(rdx, JVM_CONSTANT_Float);
 272   __ jccb(Assembler::notEqual, notFloat);
 273   // ftos
 274   __ fld_s(    Address(rcx, rbx, Address::times_4, base_offset));
 275   __ push(ftos);
 276   __ jmp(Done);
 277 
 278   __ bind(notFloat);
 279 #ifdef ASSERT
 280   { Label L;
 281     __ cmpl(rdx, JVM_CONSTANT_Integer);
 282     __ jcc(Assembler::equal, L);
 283     __ cmpl(rdx, JVM_CONSTANT_String);
 284     __ jcc(Assembler::equal, L);
 285     __ stop("unexpected tag type in ldc");
 286     __ bind(L);
 287   }
 288 #endif
 289   Label isOop;
 290   // atos and itos
 291   __ movl(rax, Address(rcx, rbx, Address::times_4, base_offset));
 292   // String is only oop type we will see here
 293   __ cmpl(rdx, JVM_CONSTANT_String);
 294   __ jccb(Assembler::equal, isOop);
 295   __ push(itos);
 296   __ jmp(Done);
 297   __ bind(isOop);
 298   __ push(atos);
 299 
 300   if (VerifyOops) {
 301     __ verify_oop(rax);
 302   }
 303   __ bind(Done);
 304 }
 305 
 306 void TemplateTable::ldc2_w() {
 307   transition(vtos, vtos);
 308   Label Long, Done;
 309   __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
 310 
 311   __ get_cpool_and_tags(rcx, rax);
 312   const int base_offset = constantPoolOopDesc::header_size() * wordSize;
 313   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
 314 
 315   // get type
 316   __ cmpb(Address(rax, rbx, Address::times_1, tags_offset), JVM_CONSTANT_Double);
 317   __ jccb(Assembler::notEqual, Long);
 318   // dtos
 319   __ fld_d(    Address(rcx, rbx, Address::times_4, base_offset));
 320   __ push(dtos);
 321   __ jmpb(Done);
 322 
 323   __ bind(Long);
 324   // ltos
 325   __ movl(rax, Address(rcx, rbx, Address::times_4, base_offset + 0 * wordSize));
 326   __ movl(rdx, Address(rcx, rbx, Address::times_4, base_offset + 1 * wordSize));
 327 
 328   __ push(ltos);
 329 
 330   __ bind(Done);
 331 }
 332 
 333 
 334 void TemplateTable::locals_index(Register reg, int offset) {
 335   __ load_unsigned_byte(reg, at_bcp(offset));
 336   __ negl(reg);
 337 }
 338 
 339 
 340 void TemplateTable::iload() {
 341   transition(vtos, itos);
 342   if (RewriteFrequentPairs) {
 343     Label rewrite, done;
 344 
 345     // get next byte
 346     __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
 347     // if _iload, wait to rewrite to iload2.  We only want to rewrite the
 348     // last two iloads in a pair.  Comparing against fast_iload means that
 349     // the next bytecode is neither an iload or a caload, and therefore
 350     // an iload pair.
 351     __ cmpl(rbx, Bytecodes::_iload);
 352     __ jcc(Assembler::equal, done);
 353 
 354     __ cmpl(rbx, Bytecodes::_fast_iload);
 355     __ movl(rcx, Bytecodes::_fast_iload2);
 356     __ jccb(Assembler::equal, rewrite);
 357 
 358     // if _caload, rewrite to fast_icaload
 359     __ cmpl(rbx, Bytecodes::_caload);
 360     __ movl(rcx, Bytecodes::_fast_icaload);
 361     __ jccb(Assembler::equal, rewrite);
 362 
 363     // rewrite so iload doesn't check again.
 364     __ movl(rcx, Bytecodes::_fast_iload);
 365 
 366     // rewrite
 367     // rcx: fast bytecode
 368     __ bind(rewrite);
 369     patch_bytecode(Bytecodes::_iload, rcx, rbx, false);
 370     __ bind(done);
 371   }
 372 
 373   // Get the local value into tos
 374   locals_index(rbx);
 375   __ movl(rax, iaddress(rbx));
 376   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 377 }
 378 
 379 
 380 void TemplateTable::fast_iload2() {
 381   transition(vtos, itos);
 382   locals_index(rbx);
 383   __ movl(rax, iaddress(rbx));
 384   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 385   __ push(itos);
 386   locals_index(rbx, 3);
 387   __ movl(rax, iaddress(rbx));
 388   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 389 }
 390 
 391 void TemplateTable::fast_iload() {
 392   transition(vtos, itos);
 393   locals_index(rbx);
 394   __ movl(rax, iaddress(rbx));
 395   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 396 }
 397 
 398 
 399 void TemplateTable::lload() {
 400   transition(vtos, ltos);
 401   locals_index(rbx);
 402   __ movl(rax, laddress(rbx));
 403   __ movl(rdx, haddress(rbx));
 404   debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
 405 }
 406 
 407 
 408 void TemplateTable::fload() {
 409   transition(vtos, ftos);
 410   locals_index(rbx);
 411   __ fld_s(faddress(rbx));
 412   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 413 }
 414 
 415 
 416 void TemplateTable::dload() {
 417   transition(vtos, dtos);
 418   locals_index(rbx);
 419   if (TaggedStackInterpreter) {
 420     // Get double out of locals array, onto temp stack and load with
 421     // float instruction into ST0
 422     __ movl(rax, laddress(rbx));
 423     __ movl(rdx, haddress(rbx));
 424     __ pushl(rdx);  // push hi first
 425     __ pushl(rax);
 426     __ fld_d(Address(rsp, 0));
 427     __ addl(rsp, 2*wordSize);
 428     debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
 429   } else {
 430     __ fld_d(daddress(rbx));
 431   }
 432 }
 433 
 434 
 435 void TemplateTable::aload() {
 436   transition(vtos, atos);
 437   locals_index(rbx);
 438   __ movl(rax, iaddress(rbx));
 439   debug_only(__ verify_local_tag(frame::TagReference, rbx));
 440 }
 441 
 442 
 443 void TemplateTable::locals_index_wide(Register reg) {
 444   __ movl(reg, at_bcp(2));
 445   __ bswap(reg);
 446   __ shrl(reg, 16);
 447   __ negl(reg);
 448 }
 449 
 450 
 451 void TemplateTable::wide_iload() {
 452   transition(vtos, itos);
 453   locals_index_wide(rbx);
 454   __ movl(rax, iaddress(rbx));
 455   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 456 }
 457 
 458 
 459 void TemplateTable::wide_lload() {
 460   transition(vtos, ltos);
 461   locals_index_wide(rbx);
 462   __ movl(rax, laddress(rbx));
 463   __ movl(rdx, haddress(rbx));
 464   debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
 465 }
 466 
 467 
 468 void TemplateTable::wide_fload() {
 469   transition(vtos, ftos);
 470   locals_index_wide(rbx);
 471   __ fld_s(faddress(rbx));
 472   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 473 }
 474 
 475 
 476 void TemplateTable::wide_dload() {
 477   transition(vtos, dtos);
 478   locals_index_wide(rbx);
 479   if (TaggedStackInterpreter) {
 480     // Get double out of locals array, onto temp stack and load with
 481     // float instruction into ST0
 482     __ movl(rax, laddress(rbx));
 483     __ movl(rdx, haddress(rbx));
 484     __ pushl(rdx);  // push hi first
 485     __ pushl(rax);
 486     __ fld_d(Address(rsp, 0));
 487     __ addl(rsp, 2*wordSize);
 488     debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
 489   } else {
 490     __ fld_d(daddress(rbx));
 491   }
 492 }
 493 
 494 
 495 void TemplateTable::wide_aload() {
 496   transition(vtos, atos);
 497   locals_index_wide(rbx);
 498   __ movl(rax, iaddress(rbx));
 499   debug_only(__ verify_local_tag(frame::TagReference, rbx));
 500 }
 501 
 502 void TemplateTable::index_check(Register array, Register index) {
 503   // Pop ptr into array
 504   __ pop_ptr(array);
 505   index_check_without_pop(array, index);
 506 }
 507 
 508 void TemplateTable::index_check_without_pop(Register array, Register index) {
 509   // destroys rbx,
 510   // check array
 511   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
 512   // check index
 513   __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
 514   if (index != rbx) {
 515     // ??? convention: move aberrant index into rbx, for exception message
 516     assert(rbx != array, "different registers");
 517     __ movl(rbx, index);
 518   }
 519   __ jump_cc(Assembler::aboveEqual,
 520              ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
 521 }
 522 
 523 
 524 void TemplateTable::iaload() {
 525   transition(itos, itos);
 526   // rdx: array
 527   index_check(rdx, rax);  // kills rbx,
 528   // rax,: index
 529   __ movl(rax, Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_INT)));
 530 }
 531 
 532 
 533 void TemplateTable::laload() {
 534   transition(itos, ltos);
 535   // rax,: index
 536   // rdx: array
 537   index_check(rdx, rax);
 538   __ movl(rbx, rax);
 539   // rbx,: index
 540   __ movl(rax, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize));
 541   __ movl(rdx, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize));
 542 }
 543 
 544 
 545 void TemplateTable::faload() {
 546   transition(itos, ftos);
 547   // rdx: array
 548   index_check(rdx, rax);  // kills rbx,
 549   // rax,: index
 550   __ fld_s(Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
 551 }
 552 
 553 
 554 void TemplateTable::daload() {
 555   transition(itos, dtos);
 556   // rdx: array
 557   index_check(rdx, rax);  // kills rbx,
 558   // rax,: index
 559   __ fld_d(Address(rdx, rax, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
 560 }
 561 
 562 
 563 void TemplateTable::aaload() {
 564   transition(itos, atos);
 565   // rdx: array
 566   index_check(rdx, rax);  // kills rbx,
 567   // rax,: index
 568   __ movl(rax, Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 569 }
 570 
 571 
 572 void TemplateTable::baload() {
 573   transition(itos, itos);
 574   // rdx: array
 575   index_check(rdx, rax);  // kills rbx,
 576   // rax,: index
 577   // can do better code for P5 - fix this at some point
 578   __ load_signed_byte(rbx, Address(rdx, rax, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)));
 579   __ movl(rax, rbx);
 580 }
 581 
 582 
 583 void TemplateTable::caload() {
 584   transition(itos, itos);
 585   // rdx: array
 586   index_check(rdx, rax);  // kills rbx,
 587   // rax,: index
 588   // can do better code for P5 - may want to improve this at some point
 589   __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 590   __ movl(rax, rbx);
 591 }
 592 
 593 // iload followed by caload frequent pair
 594 void TemplateTable::fast_icaload() {
 595   transition(vtos, itos);
 596   // load index out of locals
 597   locals_index(rbx);
 598   __ movl(rax, iaddress(rbx));
 599   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 600 
 601   // rdx: array
 602   index_check(rdx, rax);
 603   // rax,: index
 604   __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 605   __ movl(rax, rbx);
 606 }
 607 
 608 void TemplateTable::saload() {
 609   transition(itos, itos);
 610   // rdx: array
 611   index_check(rdx, rax);  // kills rbx,
 612   // rax,: index
 613   // can do better code for P5 - may want to improve this at some point
 614   __ load_signed_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
 615   __ movl(rax, rbx);
 616 }
 617 
 618 
 619 void TemplateTable::iload(int n) {
 620   transition(vtos, itos);
 621   __ movl(rax, iaddress(n));
 622   debug_only(__ verify_local_tag(frame::TagValue, n));
 623 }
 624 
 625 
 626 void TemplateTable::lload(int n) {
 627   transition(vtos, ltos);
 628   __ movl(rax, laddress(n));
 629   __ movl(rdx, haddress(n));
 630   debug_only(__ verify_local_tag(frame::TagCategory2, n));
 631 }
 632 
 633 
 634 void TemplateTable::fload(int n) {
 635   transition(vtos, ftos);
 636   __ fld_s(faddress(n));
 637   debug_only(__ verify_local_tag(frame::TagValue, n));
 638 }
 639 
 640 
 641 void TemplateTable::dload(int n) {
 642   transition(vtos, dtos);
 643   if (TaggedStackInterpreter) {
 644     // Get double out of locals array, onto temp stack and load with
 645     // float instruction into ST0
 646     __ movl(rax, laddress(n));
 647     __ movl(rdx, haddress(n));
 648     __ pushl(rdx);  // push hi first
 649     __ pushl(rax);
 650     __ fld_d(Address(rsp, 0));
 651     __ addl(rsp, 2*wordSize);  // reset rsp
 652     debug_only(__ verify_local_tag(frame::TagCategory2, n));
 653   } else {
 654     __ fld_d(daddress(n));
 655   }
 656 }
 657 
 658 
 659 void TemplateTable::aload(int n) {
 660   transition(vtos, atos);
 661   __ movl(rax, aaddress(n));
 662   debug_only(__ verify_local_tag(frame::TagReference, n));
 663 }
 664 
 665 
 666 void TemplateTable::aload_0() {
 667   transition(vtos, atos);
 668   // According to bytecode histograms, the pairs:
 669   //
 670   // _aload_0, _fast_igetfield
 671   // _aload_0, _fast_agetfield
 672   // _aload_0, _fast_fgetfield
 673   //
 674   // occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0
 675   // bytecode checks if the next bytecode is either _fast_igetfield,
 676   // _fast_agetfield or _fast_fgetfield and then rewrites the
 677   // current bytecode into a pair bytecode; otherwise it rewrites the current
 678   // bytecode into _fast_aload_0 that doesn't do the pair check anymore.
 679   //
 680   // Note: If the next bytecode is _getfield, the rewrite must be delayed,
 681   //       otherwise we may miss an opportunity for a pair.
 682   //
 683   // Also rewrite frequent pairs
 684   //   aload_0, aload_1
 685   //   aload_0, iload_1
 686   // These bytecodes with a small amount of code are most profitable to rewrite
 687   if (RewriteFrequentPairs) {
 688     Label rewrite, done;
 689     // get next byte
 690     __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
 691 
 692     // do actual aload_0
 693     aload(0);
 694 
 695     // if _getfield then wait with rewrite
 696     __ cmpl(rbx, Bytecodes::_getfield);
 697     __ jcc(Assembler::equal, done);
 698 
 699     // if _igetfield then reqrite to _fast_iaccess_0
 700     assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
 701     __ cmpl(rbx, Bytecodes::_fast_igetfield);
 702     __ movl(rcx, Bytecodes::_fast_iaccess_0);
 703     __ jccb(Assembler::equal, rewrite);
 704 
 705     // if _agetfield then reqrite to _fast_aaccess_0
 706     assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
 707     __ cmpl(rbx, Bytecodes::_fast_agetfield);
 708     __ movl(rcx, Bytecodes::_fast_aaccess_0);
 709     __ jccb(Assembler::equal, rewrite);
 710 
 711     // if _fgetfield then reqrite to _fast_faccess_0
 712     assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
 713     __ cmpl(rbx, Bytecodes::_fast_fgetfield);
 714     __ movl(rcx, Bytecodes::_fast_faccess_0);
 715     __ jccb(Assembler::equal, rewrite);
 716 
 717     // else rewrite to _fast_aload0
 718     assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
 719     __ movl(rcx, Bytecodes::_fast_aload_0);
 720 
 721     // rewrite
 722     // rcx: fast bytecode
 723     __ bind(rewrite);
 724     patch_bytecode(Bytecodes::_aload_0, rcx, rbx, false);
 725 
 726     __ bind(done);
 727   } else {
 728     aload(0);
 729   }
 730 }
 731 
 732 void TemplateTable::istore() {
 733   transition(itos, vtos);
 734   locals_index(rbx);
 735   __ movl(iaddress(rbx), rax);
 736   __ tag_local(frame::TagValue, rbx);
 737 }
 738 
 739 
 740 void TemplateTable::lstore() {
 741   transition(ltos, vtos);
 742   locals_index(rbx);
 743   __ movl(laddress(rbx), rax);
 744   __ movl(haddress(rbx), rdx);
 745   __ tag_local(frame::TagCategory2, rbx);
 746 }
 747 
 748 
 749 void TemplateTable::fstore() {
 750   transition(ftos, vtos);
 751   locals_index(rbx);
 752   __ fstp_s(faddress(rbx));
 753   __ tag_local(frame::TagValue, rbx);
 754 }
 755 
 756 
 757 void TemplateTable::dstore() {
 758   transition(dtos, vtos);
 759   locals_index(rbx);
 760   if (TaggedStackInterpreter) {
 761     // Store double on stack and reload into locals nonadjacently
 762     __ subl(rsp, 2 * wordSize);
 763     __ fstp_d(Address(rsp, 0));
 764     __ popl(rax);
 765     __ popl(rdx);
 766     __ movl(laddress(rbx), rax);
 767     __ movl(haddress(rbx), rdx);
 768     __ tag_local(frame::TagCategory2, rbx);
 769   } else {
 770     __ fstp_d(daddress(rbx));
 771   }
 772 }
 773 
 774 
 775 void TemplateTable::astore() {
 776   transition(vtos, vtos);
 777   __ pop_ptr(rax, rdx);   // will need to pop tag too
 778   locals_index(rbx);
 779   __ movl(aaddress(rbx), rax);
 780   __ tag_local(rdx, rbx);    // need to store same tag in local may be returnAddr
 781 }
 782 
 783 
 784 void TemplateTable::wide_istore() {
 785   transition(vtos, vtos);
 786   __ pop_i(rax);
 787   locals_index_wide(rbx);
 788   __ movl(iaddress(rbx), rax);
 789   __ tag_local(frame::TagValue, rbx);
 790 }
 791 
 792 
 793 void TemplateTable::wide_lstore() {
 794   transition(vtos, vtos);
 795   __ pop_l(rax, rdx);
 796   locals_index_wide(rbx);
 797   __ movl(laddress(rbx), rax);
 798   __ movl(haddress(rbx), rdx);
 799   __ tag_local(frame::TagCategory2, rbx);
 800 }
 801 
 802 
 803 void TemplateTable::wide_fstore() {
 804   wide_istore();
 805 }
 806 
 807 
 808 void TemplateTable::wide_dstore() {
 809   wide_lstore();
 810 }
 811 
 812 
 813 void TemplateTable::wide_astore() {
 814   transition(vtos, vtos);
 815   __ pop_ptr(rax, rdx);
 816   locals_index_wide(rbx);
 817   __ movl(aaddress(rbx), rax);
 818   __ tag_local(rdx, rbx);
 819 }
 820 
 821 
 822 void TemplateTable::iastore() {
 823   transition(itos, vtos);
 824   __ pop_i(rbx);
 825   // rax,: value
 826   // rdx: array
 827   index_check(rdx, rbx);  // prefer index in rbx,
 828   // rbx,: index
 829   __ movl(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_INT)), rax);
 830 }
 831 
 832 
 833 void TemplateTable::lastore() {
 834   transition(ltos, vtos);
 835   __ pop_i(rbx);
 836   // rax,: low(value)
 837   // rcx: array
 838   // rdx: high(value)
 839   index_check(rcx, rbx);  // prefer index in rbx,
 840   // rbx,: index
 841   __ movl(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize), rax);
 842   __ movl(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize), rdx);
 843 }
 844 
 845 
 846 void TemplateTable::fastore() {
 847   transition(ftos, vtos);
 848   __ pop_i(rbx);
 849   // rdx: array
 850   // st0: value
 851   index_check(rdx, rbx);  // prefer index in rbx,
 852   // rbx,: index
 853   __ fstp_s(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
 854 }
 855 
 856 
 857 void TemplateTable::dastore() {
 858   transition(dtos, vtos);
 859   __ pop_i(rbx);
 860   // rdx: array
 861   // st0: value
 862   index_check(rdx, rbx);  // prefer index in rbx,
 863   // rbx,: index
 864   __ fstp_d(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
 865 }
 866 
 867 
 868 void TemplateTable::aastore() {
 869   Label is_null, ok_is_subtype, done;
 870   transition(vtos, vtos);
 871   // stack: ..., array, index, value
 872   __ movl(rax, at_tos());     // Value
 873   __ movl(rcx, at_tos_p1());  // Index
 874   __ movl(rdx, at_tos_p2());  // Array
 875   index_check_without_pop(rdx, rcx);      // kills rbx,
 876   // do array store check - check for NULL value first
 877   __ testl(rax, rax);
 878   __ jcc(Assembler::zero, is_null);
 879 
 880   // Move subklass into EBX
 881   __ movl(rbx, Address(rax, oopDesc::klass_offset_in_bytes()));
 882   // Move superklass into EAX
 883   __ movl(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
 884   __ movl(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
 885   // Compress array+index*4+12 into a single register.  Frees ECX.
 886   __ leal(rdx, Address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 887 
 888   // Generate subtype check.  Blows ECX.  Resets EDI to locals.
 889   // Superklass in EAX.  Subklass in EBX.
 890   __ gen_subtype_check( rbx, ok_is_subtype );
 891 
 892   // Come here on failure
 893   // object is at TOS
 894   __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
 895 
 896   // Come here on success
 897   __ bind(ok_is_subtype);
 898   __ movl(rax, at_rsp());     // Value
 899   __ movl(Address(rdx, 0), rax);
 900   __ store_check(rdx);
 901   __ jmpb(done);
 902 
 903   // Have a NULL in EAX, EDX=array, ECX=index.  Store NULL at ary[idx]
 904   __ bind(is_null);
 905   __ profile_null_seen(rbx);
 906   __ movl(Address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax);
 907 
 908   // Pop stack arguments
 909   __ bind(done);
 910   __ addl(rsp, 3 * Interpreter::stackElementSize());
 911 }
 912 
 913 
 914 void TemplateTable::bastore() {
 915   transition(itos, vtos);
 916   __ pop_i(rbx);
 917   // rax,: value
 918   // rdx: array
 919   index_check(rdx, rbx);  // prefer index in rbx,
 920   // rbx,: index
 921   __ movb(Address(rdx, rbx, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)), rax);
 922 }
 923 
 924 
 925 void TemplateTable::castore() {
 926   transition(itos, vtos);
 927   __ pop_i(rbx);
 928   // rax,: value
 929   // rdx: array
 930   index_check(rdx, rbx);  // prefer index in rbx,
 931   // rbx,: index
 932   __ movw(Address(rdx, rbx, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)), rax);
 933 }
 934 
 935 
 936 void TemplateTable::sastore() {
 937   castore();
 938 }
 939 
 940 
 941 void TemplateTable::istore(int n) {
 942   transition(itos, vtos);
 943   __ movl(iaddress(n), rax);
 944   __ tag_local(frame::TagValue, n);
 945 }
 946 
 947 
 948 void TemplateTable::lstore(int n) {
 949   transition(ltos, vtos);
 950   __ movl(laddress(n), rax);
 951   __ movl(haddress(n), rdx);
 952   __ tag_local(frame::TagCategory2, n);
 953 }
 954 
 955 
 956 void TemplateTable::fstore(int n) {
 957   transition(ftos, vtos);
 958   __ fstp_s(faddress(n));
 959   __ tag_local(frame::TagValue, n);
 960 }
 961 
 962 
 963 void TemplateTable::dstore(int n) {
 964   transition(dtos, vtos);
 965   if (TaggedStackInterpreter) {
 966     __ subl(rsp, 2 * wordSize);
 967     __ fstp_d(Address(rsp, 0));
 968     __ popl(rax);
 969     __ popl(rdx);
 970     __ movl(laddress(n), rax);
 971     __ movl(haddress(n), rdx);
 972     __ tag_local(frame::TagCategory2, n);
 973   } else {
 974     __ fstp_d(daddress(n));
 975   }
 976 }
 977 
 978 
 979 void TemplateTable::astore(int n) {
 980   transition(vtos, vtos);
 981   __ pop_ptr(rax, rdx);
 982   __ movl(aaddress(n), rax);
 983   __ tag_local(rdx, n);
 984 }
 985 
 986 
 987 void TemplateTable::pop() {
 988   transition(vtos, vtos);
 989   __ addl(rsp, Interpreter::stackElementSize());
 990 }
 991 
 992 
 993 void TemplateTable::pop2() {
 994   transition(vtos, vtos);
 995   __ addl(rsp, 2*Interpreter::stackElementSize());
 996 }
 997 
 998 
 999 void TemplateTable::dup() {
1000   transition(vtos, vtos);
1001   // stack: ..., a
1002   __ load_ptr_and_tag(0, rax, rdx);
1003   __ push_ptr(rax, rdx);
1004   // stack: ..., a, a
1005 }
1006 
1007 
1008 void TemplateTable::dup_x1() {
1009   transition(vtos, vtos);
1010   // stack: ..., a, b
1011   __ load_ptr_and_tag(0, rax, rdx);  // load b
1012   __ load_ptr_and_tag(1, rcx, rbx);  // load a
1013   __ store_ptr_and_tag(1, rax, rdx); // store b
1014   __ store_ptr_and_tag(0, rcx, rbx); // store a
1015   __ push_ptr(rax, rdx);             // push b
1016   // stack: ..., b, a, b
1017 }
1018 
1019 
1020 void TemplateTable::dup_x2() {
1021   transition(vtos, vtos);
1022   // stack: ..., a, b, c
1023   __ load_ptr_and_tag(0, rax, rdx);  // load c
1024   __ load_ptr_and_tag(2, rcx, rbx);  // load a
1025   __ store_ptr_and_tag(2, rax, rdx); // store c in a
1026   __ push_ptr(rax, rdx);             // push c
1027   // stack: ..., c, b, c, c
1028   __ load_ptr_and_tag(2, rax, rdx);  // load b
1029   __ store_ptr_and_tag(2, rcx, rbx); // store a in b
1030   // stack: ..., c, a, c, c
1031   __ store_ptr_and_tag(1, rax, rdx); // store b in c
1032   // stack: ..., c, a, b, c
1033 }
1034 
1035 
1036 void TemplateTable::dup2() {
1037   transition(vtos, vtos);
1038   // stack: ..., a, b
1039   __ load_ptr_and_tag(1, rax, rdx);  // load a
1040   __ push_ptr(rax, rdx);             // push a
1041   __ load_ptr_and_tag(1, rax, rdx);  // load b
1042   __ push_ptr(rax, rdx);             // push b
1043   // stack: ..., a, b, a, b
1044 }
1045 
1046 
1047 void TemplateTable::dup2_x1() {
1048   transition(vtos, vtos);
1049   // stack: ..., a, b, c
1050   __ load_ptr_and_tag(0, rcx, rbx);  // load c
1051   __ load_ptr_and_tag(1, rax, rdx);  // load b
1052   __ push_ptr(rax, rdx);             // push b
1053   __ push_ptr(rcx, rbx);             // push c
1054   // stack: ..., a, b, c, b, c
1055   __ store_ptr_and_tag(3, rcx, rbx); // store c in b
1056   // stack: ..., a, c, c, b, c
1057   __ load_ptr_and_tag(4, rcx, rbx);  // load a
1058   __ store_ptr_and_tag(2, rcx, rbx); // store a in 2nd c
1059   // stack: ..., a, c, a, b, c
1060   __ store_ptr_and_tag(4, rax, rdx); // store b in a
1061   // stack: ..., b, c, a, b, c
1062   // stack: ..., b, c, a, b, c
1063 }
1064 
1065 
1066 void TemplateTable::dup2_x2() {
1067   transition(vtos, vtos);
1068   // stack: ..., a, b, c, d
1069   __ load_ptr_and_tag(0, rcx, rbx);  // load d
1070   __ load_ptr_and_tag(1, rax, rdx);  // load c
1071   __ push_ptr(rax, rdx);             // push c
1072   __ push_ptr(rcx, rbx);             // push d
1073   // stack: ..., a, b, c, d, c, d
1074   __ load_ptr_and_tag(4, rax, rdx);  // load b
1075   __ store_ptr_and_tag(2, rax, rdx); // store b in d
1076   __ store_ptr_and_tag(4, rcx, rbx); // store d in b
1077   // stack: ..., a, d, c, b, c, d
1078   __ load_ptr_and_tag(5, rcx, rbx);  // load a
1079   __ load_ptr_and_tag(3, rax, rdx);  // load c
1080   __ store_ptr_and_tag(3, rcx, rbx); // store a in c
1081   __ store_ptr_and_tag(5, rax, rdx); // store c in a
1082   // stack: ..., c, d, a, b, c, d
1083   // stack: ..., c, d, a, b, c, d
1084 }
1085 
1086 
1087 void TemplateTable::swap() {
1088   transition(vtos, vtos);
1089   // stack: ..., a, b
1090   __ load_ptr_and_tag(1, rcx, rbx);  // load a
1091   __ load_ptr_and_tag(0, rax, rdx);  // load b
1092   __ store_ptr_and_tag(0, rcx, rbx); // store a in b
1093   __ store_ptr_and_tag(1, rax, rdx); // store b in a
1094   // stack: ..., b, a
1095 }
1096 
1097 
1098 void TemplateTable::iop2(Operation op) {
1099   transition(itos, itos);
1100   switch (op) {
1101     case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
1102     case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
1103     case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
1104     case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
1105     case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
1106     case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
1107     case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
1108     case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
1109     case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
1110     default   : ShouldNotReachHere();
1111   }
1112 }
1113 
1114 
1115 void TemplateTable::lop2(Operation op) {
1116   transition(ltos, ltos);
1117   __ pop_l(rbx, rcx);
1118   switch (op) {
1119     case add : __ addl(rax, rbx); __ adcl(rdx, rcx); break;
1120     case sub : __ subl(rbx, rax); __ sbbl(rcx, rdx);
1121                __ movl(rax, rbx); __ movl(rdx, rcx); break;
1122     case _and: __ andl(rax, rbx); __ andl(rdx, rcx); break;
1123     case _or : __ orl (rax, rbx); __ orl (rdx, rcx); break;
1124     case _xor: __ xorl(rax, rbx); __ xorl(rdx, rcx); break;
1125     default : ShouldNotReachHere();
1126   }
1127 }
1128 
1129 
1130 void TemplateTable::idiv() {
1131   transition(itos, itos);
1132   __ movl(rcx, rax);
1133   __ pop_i(rax);
1134   // Note: could xor rax, and rcx and compare with (-1 ^ min_int). If
1135   //       they are not equal, one could do a normal division (no correction
1136   //       needed), which may speed up this implementation for the common case.
1137   //       (see also JVM spec., p.243 & p.271)
1138   __ corrected_idivl(rcx);
1139 }
1140 
1141 
1142 void TemplateTable::irem() {
1143   transition(itos, itos);
1144   __ movl(rcx, rax);
1145   __ pop_i(rax);
1146   // Note: could xor rax, and rcx and compare with (-1 ^ min_int). If
1147   //       they are not equal, one could do a normal division (no correction
1148   //       needed), which may speed up this implementation for the common case.
1149   //       (see also JVM spec., p.243 & p.271)
1150   __ corrected_idivl(rcx);
1151   __ movl(rax, rdx);
1152 }
1153 
1154 
1155 void TemplateTable::lmul() {
1156   transition(ltos, ltos);
1157   __ pop_l(rbx, rcx);
1158   __ pushl(rcx); __ pushl(rbx);
1159   __ pushl(rdx); __ pushl(rax);
1160   __ lmul(2 * wordSize, 0);
1161   __ addl(rsp, 4 * wordSize);  // take off temporaries
1162 }
1163 
1164 
1165 void TemplateTable::ldiv() {
1166   transition(ltos, ltos);
1167   __ pop_l(rbx, rcx);
1168   __ pushl(rcx); __ pushl(rbx);
1169   __ pushl(rdx); __ pushl(rax);
1170   // check if y = 0
1171   __ orl(rax, rdx);
1172   __ jump_cc(Assembler::zero,
1173              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1174   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv));
1175   __ addl(rsp, 4 * wordSize);  // take off temporaries
1176 }
1177 
1178 
1179 void TemplateTable::lrem() {
1180   transition(ltos, ltos);
1181   __ pop_l(rbx, rcx);
1182   __ pushl(rcx); __ pushl(rbx);
1183   __ pushl(rdx); __ pushl(rax);
1184   // check if y = 0
1185   __ orl(rax, rdx);
1186   __ jump_cc(Assembler::zero,
1187              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1188   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem));
1189   __ addl(rsp, 4 * wordSize);
1190 }
1191 
1192 
1193 void TemplateTable::lshl() {
1194   transition(itos, ltos);
1195   __ movl(rcx, rax);                             // get shift count
1196   __ pop_l(rax, rdx);                            // get shift value
1197   __ lshl(rdx, rax);
1198 }
1199 
1200 
1201 void TemplateTable::lshr() {
1202   transition(itos, ltos);
1203   __ movl(rcx, rax);                             // get shift count
1204   __ pop_l(rax, rdx);                            // get shift value
1205   __ lshr(rdx, rax, true);
1206 }
1207 
1208 
1209 void TemplateTable::lushr() {
1210   transition(itos, ltos);
1211   __ movl(rcx, rax);                             // get shift count
1212   __ pop_l(rax, rdx);                            // get shift value
1213   __ lshr(rdx, rax);
1214 }
1215 
1216 
1217 void TemplateTable::fop2(Operation op) {
1218   transition(ftos, ftos);
1219   __ pop_ftos_to_rsp();  // pop ftos into rsp
1220   switch (op) {
1221     case add: __ fadd_s (at_rsp());                break;
1222     case sub: __ fsubr_s(at_rsp());                break;
1223     case mul: __ fmul_s (at_rsp());                break;
1224     case div: __ fdivr_s(at_rsp());                break;
1225     case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
1226     default : ShouldNotReachHere();
1227   }
1228   __ f2ieee();
1229   __ popl(rax);  // pop float thing off
1230 }
1231 
1232 
1233 void TemplateTable::dop2(Operation op) {
1234   transition(dtos, dtos);
1235   __ pop_dtos_to_rsp();  // pop dtos into rsp
1236 
1237   switch (op) {
1238     case add: __ fadd_d (at_rsp());                break;
1239     case sub: __ fsubr_d(at_rsp());                break;
1240     case mul: {
1241       Label L_strict;
1242       Label L_join;
1243       const Address access_flags      (rcx, methodOopDesc::access_flags_offset());
1244       __ get_method(rcx);
1245       __ movl(rcx, access_flags);
1246       __ testl(rcx, JVM_ACC_STRICT);
1247       __ jccb(Assembler::notZero, L_strict);
1248       __ fmul_d (at_rsp());
1249       __ jmpb(L_join);
1250       __ bind(L_strict);
1251       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
1252       __ fmulp();
1253       __ fmul_d (at_rsp());
1254       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
1255       __ fmulp();
1256       __ bind(L_join);
1257       break;
1258     }
1259     case div: {
1260       Label L_strict;
1261       Label L_join;
1262       const Address access_flags      (rcx, methodOopDesc::access_flags_offset());
1263       __ get_method(rcx);
1264       __ movl(rcx, access_flags);
1265       __ testl(rcx, JVM_ACC_STRICT);
1266       __ jccb(Assembler::notZero, L_strict);
1267       __ fdivr_d(at_rsp());
1268       __ jmp(L_join);
1269       __ bind(L_strict);
1270       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
1271       __ fmul_d (at_rsp());
1272       __ fdivrp();
1273       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
1274       __ fmulp();
1275       __ bind(L_join);
1276       break;
1277     }
1278     case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
1279     default : ShouldNotReachHere();
1280   }
1281   __ d2ieee();
1282   // Pop double precision number from rsp.
1283   __ popl(rax);
1284   __ popl(rdx);
1285 }
1286 
1287 
1288 void TemplateTable::ineg() {
1289   transition(itos, itos);
1290   __ negl(rax);
1291 }
1292 
1293 
1294 void TemplateTable::lneg() {
1295   transition(ltos, ltos);
1296   __ lneg(rdx, rax);
1297 }
1298 
1299 
1300 void TemplateTable::fneg() {
1301   transition(ftos, ftos);
1302   __ fchs();
1303 }
1304 
1305 
1306 void TemplateTable::dneg() {
1307   transition(dtos, dtos);
1308   __ fchs();
1309 }
1310 
1311 
1312 void TemplateTable::iinc() {
1313   transition(vtos, vtos);
1314   __ load_signed_byte(rdx, at_bcp(2));           // get constant
1315   locals_index(rbx);
1316   __ addl(iaddress(rbx), rdx);
1317 }
1318 
1319 
1320 void TemplateTable::wide_iinc() {
1321   transition(vtos, vtos);
1322   __ movl(rdx, at_bcp(4));                       // get constant
1323   locals_index_wide(rbx);
1324   __ bswap(rdx);                                 // swap bytes & sign-extend constant
1325   __ sarl(rdx, 16);
1326   __ addl(iaddress(rbx), rdx);
1327   // Note: should probably use only one movl to get both
1328   //       the index and the constant -> fix this
1329 }
1330 
1331 
1332 void TemplateTable::convert() {
1333   // Checking
1334 #ifdef ASSERT
1335   { TosState tos_in  = ilgl;
1336     TosState tos_out = ilgl;
1337     switch (bytecode()) {
1338       case Bytecodes::_i2l: // fall through
1339       case Bytecodes::_i2f: // fall through
1340       case Bytecodes::_i2d: // fall through
1341       case Bytecodes::_i2b: // fall through
1342       case Bytecodes::_i2c: // fall through
1343       case Bytecodes::_i2s: tos_in = itos; break;
1344       case Bytecodes::_l2i: // fall through
1345       case Bytecodes::_l2f: // fall through
1346       case Bytecodes::_l2d: tos_in = ltos; break;
1347       case Bytecodes::_f2i: // fall through
1348       case Bytecodes::_f2l: // fall through
1349       case Bytecodes::_f2d: tos_in = ftos; break;
1350       case Bytecodes::_d2i: // fall through
1351       case Bytecodes::_d2l: // fall through
1352       case Bytecodes::_d2f: tos_in = dtos; break;
1353       default             : ShouldNotReachHere();
1354     }
1355     switch (bytecode()) {
1356       case Bytecodes::_l2i: // fall through
1357       case Bytecodes::_f2i: // fall through
1358       case Bytecodes::_d2i: // fall through
1359       case Bytecodes::_i2b: // fall through
1360       case Bytecodes::_i2c: // fall through
1361       case Bytecodes::_i2s: tos_out = itos; break;
1362       case Bytecodes::_i2l: // fall through
1363       case Bytecodes::_f2l: // fall through
1364       case Bytecodes::_d2l: tos_out = ltos; break;
1365       case Bytecodes::_i2f: // fall through
1366       case Bytecodes::_l2f: // fall through
1367       case Bytecodes::_d2f: tos_out = ftos; break;
1368       case Bytecodes::_i2d: // fall through
1369       case Bytecodes::_l2d: // fall through
1370       case Bytecodes::_f2d: tos_out = dtos; break;
1371       default             : ShouldNotReachHere();
1372     }
1373     transition(tos_in, tos_out);
1374   }
1375 #endif // ASSERT
1376 
1377   // Conversion
1378   // (Note: use pushl(rcx)/popl(rcx) for 1/2-word stack-ptr manipulation)
1379   switch (bytecode()) {
1380     case Bytecodes::_i2l:
1381       __ extend_sign(rdx, rax);
1382       break;
1383     case Bytecodes::_i2f:
1384       __ pushl(rax);         // store int on tos
1385       __ fild_s(at_rsp());   // load int to ST0
1386       __ f2ieee();           // truncate to float size
1387       __ popl(rcx);          // adjust rsp
1388       break;
1389     case Bytecodes::_i2d:
1390       __ pushl(rax);         // add one slot for d2ieee()
1391       __ pushl(rax);         // store int on tos
1392       __ fild_s(at_rsp());   // load int to ST0
1393       __ d2ieee();           // truncate to double size
1394       __ popl(rcx);          // adjust rsp
1395       __ popl(rcx);
1396       break;
1397     case Bytecodes::_i2b:
1398       __ shll(rax, 24);      // truncate upper 24 bits
1399       __ sarl(rax, 24);      // and sign-extend byte
1400       break;
1401     case Bytecodes::_i2c:
1402       __ andl(rax, 0xFFFF);  // truncate upper 16 bits
1403       break;
1404     case Bytecodes::_i2s:
1405       __ shll(rax, 16);      // truncate upper 16 bits
1406       __ sarl(rax, 16);      // and sign-extend short
1407       break;
1408     case Bytecodes::_l2i:
1409       /* nothing to do */
1410       break;
1411     case Bytecodes::_l2f:
1412       __ pushl(rdx);         // store long on tos
1413       __ pushl(rax);
1414       __ fild_d(at_rsp());   // load long to ST0
1415       __ f2ieee();           // truncate to float size
1416       __ popl(rcx);          // adjust rsp
1417       __ popl(rcx);
1418       break;
1419     case Bytecodes::_l2d:
1420       __ pushl(rdx);         // store long on tos
1421       __ pushl(rax);
1422       __ fild_d(at_rsp());   // load long to ST0
1423       __ d2ieee();           // truncate to double size
1424       __ popl(rcx);          // adjust rsp
1425       __ popl(rcx);
1426       break;
1427     case Bytecodes::_f2i:
1428       __ pushl(rcx);         // reserve space for argument
1429       __ fstp_s(at_rsp());   // pass float argument on stack
1430       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
1431       break;
1432     case Bytecodes::_f2l:
1433       __ pushl(rcx);         // reserve space for argument
1434       __ fstp_s(at_rsp());   // pass float argument on stack
1435       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
1436       break;
1437     case Bytecodes::_f2d:
1438       /* nothing to do */
1439       break;
1440     case Bytecodes::_d2i:
1441       __ pushl(rcx);         // reserve space for argument
1442       __ pushl(rcx);
1443       __ fstp_d(at_rsp());   // pass double argument on stack
1444       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
1445       break;
1446     case Bytecodes::_d2l:
1447       __ pushl(rcx);         // reserve space for argument
1448       __ pushl(rcx);
1449       __ fstp_d(at_rsp());   // pass double argument on stack
1450       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
1451       break;
1452     case Bytecodes::_d2f:
1453       __ pushl(rcx);         // reserve space for f2ieee()
1454       __ f2ieee();           // truncate to float size
1455       __ popl(rcx);          // adjust rsp
1456       break;
1457     default             :
1458       ShouldNotReachHere();
1459   }
1460 }
1461 
1462 
1463 void TemplateTable::lcmp() {
1464   transition(ltos, itos);
1465   // y = rdx:rax
1466   __ pop_l(rbx, rcx);             // get x = rcx:rbx
1467   __ lcmp2int(rcx, rbx, rdx, rax);// rcx := cmp(x, y)
1468   __ movl(rax, rcx);
1469 }
1470 
1471 
1472 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
1473   if (is_float) {
1474     __ pop_ftos_to_rsp();
1475     __ fld_s(at_rsp());
1476   } else {
1477     __ pop_dtos_to_rsp();
1478     __ fld_d(at_rsp());
1479     __ popl(rdx);
1480   }
1481   __ popl(rcx);
1482   __ fcmp2int(rax, unordered_result < 0);
1483 }
1484 
1485 
1486 void TemplateTable::branch(bool is_jsr, bool is_wide) {
1487   __ get_method(rcx);           // ECX holds method
1488   __ profile_taken_branch(rax,rbx); // EAX holds updated MDP, EBX holds bumped taken count
1489 
1490   const ByteSize be_offset = methodOopDesc::backedge_counter_offset() + InvocationCounter::counter_offset();
1491   const ByteSize inv_offset = methodOopDesc::invocation_counter_offset() + InvocationCounter::counter_offset();
1492   const int method_offset = frame::interpreter_frame_method_offset * wordSize;
1493 
1494   // Load up EDX with the branch displacement
1495   __ movl(rdx, at_bcp(1));
1496   __ bswap(rdx);
1497   if (!is_wide) __ sarl(rdx, 16);
1498 
1499   // Handle all the JSR stuff here, then exit.
1500   // It's much shorter and cleaner than intermingling with the
1501   // non-JSR normal-branch stuff occuring below.
1502   if (is_jsr) {
1503     // Pre-load the next target bytecode into EBX
1504     __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1, 0));
1505 
1506     // compute return address as bci in rax,
1507     __ leal(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset())));
1508     __ subl(rax, Address(rcx, methodOopDesc::const_offset()));
1509     // Adjust the bcp in ESI by the displacement in EDX
1510     __ addl(rsi, rdx);
1511     // Push return address
1512     __ push_i(rax);
1513     // jsr returns vtos
1514     __ dispatch_only_noverify(vtos);
1515     return;
1516   }
1517 
1518   // Normal (non-jsr) branch handling
1519 
1520   // Adjust the bcp in ESI by the displacement in EDX
1521   __ addl(rsi, rdx);
1522 
1523   assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
1524   Label backedge_counter_overflow;
1525   Label profile_method;
1526   Label dispatch;
1527   if (UseLoopCounter) {
1528     // increment backedge counter for backward branches
1529     // rax,: MDO
1530     // rbx,: MDO bumped taken-count
1531     // rcx: method
1532     // rdx: target offset
1533     // rsi: target bcp
1534     // rdi: locals pointer
1535     __ testl(rdx, rdx);             // check if forward or backward branch
1536     __ jcc(Assembler::positive, dispatch); // count only if backward branch
1537 
1538     // increment counter
1539     __ movl(rax, Address(rcx, be_offset));        // load backedge counter
1540     __ increment(rax, InvocationCounter::count_increment); // increment counter
1541     __ movl(Address(rcx, be_offset), rax);        // store counter
1542 
1543     __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
1544     __ andl(rax, InvocationCounter::count_mask_value);     // and the status bits
1545     __ addl(rax, Address(rcx, be_offset));        // add both counters
1546 
1547     if (ProfileInterpreter) {
1548       // Test to see if we should create a method data oop
1549       __ cmp32(rax,
1550                ExternalAddress((address) &InvocationCounter::InterpreterProfileLimit));
1551       __ jcc(Assembler::less, dispatch);
1552 
1553       // if no method data exists, go to profile method
1554       __ test_method_data_pointer(rax, profile_method);
1555 
1556       if (UseOnStackReplacement) {
1557         // check for overflow against rbx, which is the MDO taken count
1558         __ cmp32(rbx,
1559                  ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
1560         __ jcc(Assembler::below, dispatch);
1561 
1562         // When ProfileInterpreter is on, the backedge_count comes from the
1563         // methodDataOop, which value does not get reset on the call to
1564         // frequency_counter_overflow().  To avoid excessive calls to the overflow
1565         // routine while the method is being compiled, add a second test to make
1566         // sure the overflow function is called only once every overflow_frequency.
1567         const int overflow_frequency = 1024;
1568         __ andl(rbx, overflow_frequency-1);
1569         __ jcc(Assembler::zero, backedge_counter_overflow);
1570 
1571       }
1572     } else {
1573       if (UseOnStackReplacement) {
1574         // check for overflow against rax, which is the sum of the counters
1575         __ cmp32(rax,
1576                  ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
1577         __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
1578 
1579       }
1580     }
1581     __ bind(dispatch);
1582   }
1583 
1584   // Pre-load the next target bytecode into EBX
1585   __ load_unsigned_byte(rbx, Address(rsi, 0));
1586 
1587   // continue with the bytecode @ target
1588   // rax,: return bci for jsr's, unused otherwise
1589   // rbx,: target bytecode
1590   // rsi: target bcp
1591   __ dispatch_only(vtos);
1592 
1593   if (UseLoopCounter) {
1594     if (ProfileInterpreter) {
1595       // Out-of-line code to allocate method data oop.
1596       __ bind(profile_method);
1597       __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method), rsi);
1598       __ load_unsigned_byte(rbx, Address(rsi, 0));  // restore target bytecode
1599       __ movl(rcx, Address(rbp, method_offset));
1600       __ movl(rcx, Address(rcx, in_bytes(methodOopDesc::method_data_offset())));
1601       __ movl(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), rcx);
1602       __ test_method_data_pointer(rcx, dispatch);
1603       // offset non-null mdp by MDO::data_offset() + IR::profile_method()
1604       __ addl(rcx, in_bytes(methodDataOopDesc::data_offset()));
1605       __ addl(rcx, rax);
1606       __ movl(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), rcx);
1607       __ jmp(dispatch);
1608     }
1609 
1610     if (UseOnStackReplacement) {
1611 
1612       // invocation counter overflow
1613       __ bind(backedge_counter_overflow);
1614       __ negl(rdx);
1615       __ addl(rdx, rsi);        // branch bcp
1616       call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), rdx);
1617       __ load_unsigned_byte(rbx, Address(rsi, 0));  // restore target bytecode
1618 
1619       // rax,: osr nmethod (osr ok) or NULL (osr not possible)
1620       // rbx,: target bytecode
1621       // rdx: scratch
1622       // rdi: locals pointer
1623       // rsi: bcp
1624       __ testl(rax, rax);                        // test result
1625       __ jcc(Assembler::zero, dispatch);         // no osr if null
1626       // nmethod may have been invalidated (VM may block upon call_VM return)
1627       __ movl(rcx, Address(rax, nmethod::entry_bci_offset()));
1628       __ cmpl(rcx, InvalidOSREntryBci);
1629       __ jcc(Assembler::equal, dispatch);
1630 
1631       // We have the address of an on stack replacement routine in rax,
1632       // We need to prepare to execute the OSR method. First we must
1633       // migrate the locals and monitors off of the stack.
1634 
1635       __ movl(rbx, rax);                             // save the nmethod
1636 
1637       const Register thread = rcx;
1638       __ get_thread(thread);
1639       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
1640       // rax, is OSR buffer, move it to expected parameter location
1641       __ movl(rcx, rax);
1642 
1643       // pop the interpreter frame
1644       __ movl(rdx, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
1645       __ leave();                                // remove frame anchor
1646       __ popl(rdi);                              // get return address
1647       __ movl(rsp, rdx);                         // set sp to sender sp
1648 
1649 
1650       Label skip;
1651       Label chkint;
1652 
1653       // The interpreter frame we have removed may be returning to
1654       // either the callstub or the interpreter. Since we will
1655       // now be returning from a compiled (OSR) nmethod we must
1656       // adjust the return to the return were it can handler compiled
1657       // results and clean the fpu stack. This is very similar to
1658       // what a i2c adapter must do.
1659 
1660       // Are we returning to the call stub?
1661 
1662       __ cmp32(rdi, ExternalAddress(StubRoutines::_call_stub_return_address));
1663       __ jcc(Assembler::notEqual, chkint);
1664 
1665       // yes adjust to the specialized call stub  return.
1666       assert(StubRoutines::i486::get_call_stub_compiled_return() != NULL, "must be set");
1667       __ lea(rdi, ExternalAddress(StubRoutines::i486::get_call_stub_compiled_return()));
1668       __ jmp(skip);
1669 
1670       __ bind(chkint);
1671 
1672       // Are we returning to the interpreter? Look for sentinel
1673 
1674       __ cmpl(Address(rdi, -8), Interpreter::return_sentinel);
1675       __ jcc(Assembler::notEqual, skip);
1676 
1677       // Adjust to compiled return back to interpreter
1678 
1679       __ movl(rdi, Address(rdi, -4));
1680       __ bind(skip);
1681 
1682       // Align stack pointer for compiled code (note that caller is
1683       // responsible for undoing this fixup by remembering the old SP
1684       // in an rbp,-relative location)
1685       __ andl(rsp, -(StackAlignmentInBytes));
1686 
1687       // push the (possibly adjusted) return address
1688       __ pushl(rdi);
1689 
1690       // and begin the OSR nmethod
1691       __ jmp(Address(rbx, nmethod::osr_entry_point_offset()));
1692     }
1693   }
1694 }
1695 
1696 
1697 void TemplateTable::if_0cmp(Condition cc) {
1698   transition(itos, vtos);
1699   // assume branch is more often taken than not (loops use backward branches)
1700   Label not_taken;
1701   __ testl(rax, rax);
1702   __ jcc(j_not(cc), not_taken);
1703   branch(false, false);
1704   __ bind(not_taken);
1705   __ profile_not_taken_branch(rax);
1706 }
1707 
1708 
1709 void TemplateTable::if_icmp(Condition cc) {
1710   transition(itos, vtos);
1711   // assume branch is more often taken than not (loops use backward branches)
1712   Label not_taken;
1713   __ pop_i(rdx);
1714   __ cmpl(rdx, rax);
1715   __ jcc(j_not(cc), not_taken);
1716   branch(false, false);
1717   __ bind(not_taken);
1718   __ profile_not_taken_branch(rax);
1719 }
1720 
1721 
1722 void TemplateTable::if_nullcmp(Condition cc) {
1723   transition(atos, vtos);
1724   // assume branch is more often taken than not (loops use backward branches)
1725   Label not_taken;
1726   __ testl(rax, rax);
1727   __ jcc(j_not(cc), not_taken);
1728   branch(false, false);
1729   __ bind(not_taken);
1730   __ profile_not_taken_branch(rax);
1731 }
1732 
1733 
1734 void TemplateTable::if_acmp(Condition cc) {
1735   transition(atos, vtos);
1736   // assume branch is more often taken than not (loops use backward branches)
1737   Label not_taken;
1738   __ pop_ptr(rdx);
1739   __ cmpl(rdx, rax);
1740   __ jcc(j_not(cc), not_taken);
1741   branch(false, false);
1742   __ bind(not_taken);
1743   __ profile_not_taken_branch(rax);
1744 }
1745 
1746 
1747 void TemplateTable::ret() {
1748   transition(vtos, vtos);
1749   locals_index(rbx);
1750   __ movl(rbx, iaddress(rbx));                   // get return bci, compute return bcp
1751   __ profile_ret(rbx, rcx);
1752   __ get_method(rax);
1753   __ movl(rsi, Address(rax, methodOopDesc::const_offset()));
1754   __ leal(rsi, Address(rsi, rbx, Address::times_1,
1755                        constMethodOopDesc::codes_offset()));
1756   __ dispatch_next(vtos);
1757 }
1758 
1759 
1760 void TemplateTable::wide_ret() {
1761   transition(vtos, vtos);
1762   locals_index_wide(rbx);
1763   __ movl(rbx, iaddress(rbx));                   // get return bci, compute return bcp
1764   __ profile_ret(rbx, rcx);
1765   __ get_method(rax);
1766   __ movl(rsi, Address(rax, methodOopDesc::const_offset()));
1767   __ leal(rsi, Address(rsi, rbx, Address::times_1, constMethodOopDesc::codes_offset()));
1768   __ dispatch_next(vtos);
1769 }
1770 
1771 
1772 void TemplateTable::tableswitch() {
1773   Label default_case, continue_execution;
1774   transition(itos, vtos);
1775   // align rsi
1776   __ leal(rbx, at_bcp(wordSize));
1777   __ andl(rbx, -wordSize);
1778   // load lo & hi
1779   __ movl(rcx, Address(rbx, 1 * wordSize));
1780   __ movl(rdx, Address(rbx, 2 * wordSize));
1781   __ bswap(rcx);
1782   __ bswap(rdx);
1783   // check against lo & hi
1784   __ cmpl(rax, rcx);
1785   __ jccb(Assembler::less, default_case);
1786   __ cmpl(rax, rdx);
1787   __ jccb(Assembler::greater, default_case);
1788   // lookup dispatch offset
1789   __ subl(rax, rcx);
1790   __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * wordSize));
1791   __ profile_switch_case(rax, rbx, rcx);
1792   // continue execution
1793   __ bind(continue_execution);
1794   __ bswap(rdx);
1795   __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1));
1796   __ addl(rsi, rdx);
1797   __ dispatch_only(vtos);
1798   // handle default
1799   __ bind(default_case);
1800   __ profile_switch_default(rax);
1801   __ movl(rdx, Address(rbx, 0));
1802   __ jmp(continue_execution);
1803 }
1804 
1805 
1806 void TemplateTable::lookupswitch() {
1807   transition(itos, itos);
1808   __ stop("lookupswitch bytecode should have been rewritten");
1809 }
1810 
1811 
1812 void TemplateTable::fast_linearswitch() {
1813   transition(itos, vtos);
1814   Label loop_entry, loop, found, continue_execution;
1815   // bswap rax, so we can avoid bswapping the table entries
1816   __ bswap(rax);
1817   // align rsi
1818   __ leal(rbx, at_bcp(wordSize));                // btw: should be able to get rid of this instruction (change offsets below)
1819   __ andl(rbx, -wordSize);
1820   // set counter
1821   __ movl(rcx, Address(rbx, wordSize));
1822   __ bswap(rcx);
1823   __ jmpb(loop_entry);
1824   // table search
1825   __ bind(loop);
1826   __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * wordSize));
1827   __ jccb(Assembler::equal, found);
1828   __ bind(loop_entry);
1829   __ decrement(rcx);
1830   __ jcc(Assembler::greaterEqual, loop);
1831   // default case
1832   __ profile_switch_default(rax);
1833   __ movl(rdx, Address(rbx, 0));
1834   __ jmpb(continue_execution);
1835   // entry found -> get offset
1836   __ bind(found);
1837   __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * wordSize));
1838   __ profile_switch_case(rcx, rax, rbx);
1839   // continue execution
1840   __ bind(continue_execution);
1841   __ bswap(rdx);
1842   __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1));
1843   __ addl(rsi, rdx);
1844   __ dispatch_only(vtos);
1845 }
1846 
1847 
1848 void TemplateTable::fast_binaryswitch() {
1849   transition(itos, vtos);
1850   // Implementation using the following core algorithm:
1851   //
1852   // int binary_search(int key, LookupswitchPair* array, int n) {
1853   //   // Binary search according to "Methodik des Programmierens" by
1854   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
1855   //   int i = 0;
1856   //   int j = n;
1857   //   while (i+1 < j) {
1858   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
1859   //     // with      Q: for all i: 0 <= i < n: key < a[i]
1860   //     // where a stands for the array and assuming that the (inexisting)
1861   //     // element a[n] is infinitely big.
1862   //     int h = (i + j) >> 1;
1863   //     // i < h < j
1864   //     if (key < array[h].fast_match()) {
1865   //       j = h;
1866   //     } else {
1867   //       i = h;
1868   //     }
1869   //   }
1870   //   // R: a[i] <= key < a[i+1] or Q
1871   //   // (i.e., if key is within array, i is the correct index)
1872   //   return i;
1873   // }
1874 
1875   // register allocation
1876   const Register key   = rax;                    // already set (tosca)
1877   const Register array = rbx;
1878   const Register i     = rcx;
1879   const Register j     = rdx;
1880   const Register h     = rdi;                    // needs to be restored
1881   const Register temp  = rsi;
1882   // setup array
1883   __ save_bcp();
1884 
1885   __ leal(array, at_bcp(3*wordSize));            // btw: should be able to get rid of this instruction (change offsets below)
1886   __ andl(array, -wordSize);
1887   // initialize i & j
1888   __ xorl(i, i);                                 // i = 0;
1889   __ movl(j, Address(array, -wordSize));         // j = length(array);
1890   // Convert j into native byteordering
1891   __ bswap(j);
1892   // and start
1893   Label entry;
1894   __ jmp(entry);
1895 
1896   // binary search loop
1897   { Label loop;
1898     __ bind(loop);
1899     // int h = (i + j) >> 1;
1900     __ leal(h, Address(i, j, Address::times_1)); // h = i + j;
1901     __ sarl(h, 1);                               // h = (i + j) >> 1;
1902     // if (key < array[h].fast_match()) {
1903     //   j = h;
1904     // } else {
1905     //   i = h;
1906     // }
1907     // Convert array[h].match to native byte-ordering before compare
1908     __ movl(temp, Address(array, h, Address::times_8, 0*wordSize));
1909     __ bswap(temp);
1910     __ cmpl(key, temp);
1911     if (VM_Version::supports_cmov()) {
1912       __ cmovl(Assembler::less        , j, h);   // j = h if (key <  array[h].fast_match())
1913       __ cmovl(Assembler::greaterEqual, i, h);   // i = h if (key >= array[h].fast_match())
1914     } else {
1915       Label set_i, end_of_if;
1916       __ jccb(Assembler::greaterEqual, set_i);    // {
1917       __ movl(j, h);                             //   j = h;
1918       __ jmp(end_of_if);                         // }
1919       __ bind(set_i);                            // else {
1920       __ movl(i, h);                             //   i = h;
1921       __ bind(end_of_if);                        // }
1922     }
1923     // while (i+1 < j)
1924     __ bind(entry);
1925     __ leal(h, Address(i, 1));                   // i+1
1926     __ cmpl(h, j);                               // i+1 < j
1927     __ jcc(Assembler::less, loop);
1928   }
1929 
1930   // end of binary search, result index is i (must check again!)
1931   Label default_case;
1932   // Convert array[i].match to native byte-ordering before compare
1933   __ movl(temp, Address(array, i, Address::times_8, 0*wordSize));
1934   __ bswap(temp);
1935   __ cmpl(key, temp);
1936   __ jcc(Assembler::notEqual, default_case);
1937 
1938   // entry found -> j = offset
1939   __ movl(j , Address(array, i, Address::times_8, 1*wordSize));
1940   __ profile_switch_case(i, key, array);
1941   __ bswap(j);
1942   __ restore_bcp();
1943   __ restore_locals();                           // restore rdi
1944   __ load_unsigned_byte(rbx, Address(rsi, j, Address::times_1));
1945 
1946   __ addl(rsi, j);
1947   __ dispatch_only(vtos);
1948 
1949   // default case -> j = default offset
1950   __ bind(default_case);
1951   __ profile_switch_default(i);
1952   __ movl(j, Address(array, -2*wordSize));
1953   __ bswap(j);
1954   __ restore_bcp();
1955   __ restore_locals();                           // restore rdi
1956   __ load_unsigned_byte(rbx, Address(rsi, j, Address::times_1));
1957   __ addl(rsi, j);
1958   __ dispatch_only(vtos);
1959 }
1960 
1961 
1962 void TemplateTable::_return(TosState state) {
1963   transition(state, state);
1964   assert(_desc->calls_vm(), "inconsistent calls_vm information"); // call in remove_activation
1965 
1966   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
1967     assert(state == vtos, "only valid state");
1968     __ movl(rax, aaddress(0));
1969     __ movl(rdi, Address(rax, oopDesc::klass_offset_in_bytes()));
1970     __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
1971     __ testl(rdi, JVM_ACC_HAS_FINALIZER);
1972     Label skip_register_finalizer;
1973     __ jcc(Assembler::zero, skip_register_finalizer);
1974 
1975     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), rax);
1976 
1977     __ bind(skip_register_finalizer);
1978   }
1979 
1980   __ remove_activation(state, rsi);
1981   __ jmp(rsi);
1982 }
1983 
1984 
1985 // ----------------------------------------------------------------------------
1986 // Volatile variables demand their effects be made known to all CPU's in
1987 // order.  Store buffers on most chips allow reads & writes to reorder; the
1988 // JMM's ReadAfterWrite.java test fails in -Xint mode without some kind of
1989 // memory barrier (i.e., it's not sufficient that the interpreter does not
1990 // reorder volatile references, the hardware also must not reorder them).
1991 //
1992 // According to the new Java Memory Model (JMM):
1993 // (1) All volatiles are serialized wrt to each other.
1994 // ALSO reads & writes act as aquire & release, so:
1995 // (2) A read cannot let unrelated NON-volatile memory refs that happen after
1996 // the read float up to before the read.  It's OK for non-volatile memory refs
1997 // that happen before the volatile read to float down below it.
1998 // (3) Similar a volatile write cannot let unrelated NON-volatile memory refs
1999 // that happen BEFORE the write float down to after the write.  It's OK for
2000 // non-volatile memory refs that happen after the volatile write to float up
2001 // before it.
2002 //
2003 // We only put in barriers around volatile refs (they are expensive), not
2004 // _between_ memory refs (that would require us to track the flavor of the
2005 // previous memory refs).  Requirements (2) and (3) require some barriers
2006 // before volatile stores and after volatile loads.  These nearly cover
2007 // requirement (1) but miss the volatile-store-volatile-load case.  This final
2008 // case is placed after volatile-stores although it could just as well go
2009 // before volatile-loads.
2010 void TemplateTable::volatile_barrier( ) {
2011   // Helper function to insert a is-volatile test and memory barrier
2012   if( !os::is_MP() ) return;    // Not needed on single CPU
2013   __ membar();
2014 }
2015 
2016 void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index) {
2017   assert(byte_no == 1 || byte_no == 2, "byte_no out of range");
2018 
2019   Register temp = rbx;
2020 
2021   assert_different_registers(Rcache, index, temp);
2022 
2023   const int shift_count = (1 + byte_no)*BitsPerByte;
2024   Label resolved;
2025   __ get_cache_and_index_at_bcp(Rcache, index, 1);
2026   __ movl(temp, Address(Rcache, index, Address::times_4, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
2027   __ shrl(temp, shift_count);
2028   // have we resolved this bytecode?
2029   __ andl(temp, 0xFF);
2030   __ cmpl(temp, (int)bytecode());
2031   __ jcc(Assembler::equal, resolved);
2032 
2033   // resolve first time through
2034   address entry;
2035   switch (bytecode()) {
2036     case Bytecodes::_getstatic      : // fall through
2037     case Bytecodes::_putstatic      : // fall through
2038     case Bytecodes::_getfield       : // fall through
2039     case Bytecodes::_putfield       : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break;
2040     case Bytecodes::_invokevirtual  : // fall through
2041     case Bytecodes::_invokespecial  : // fall through
2042     case Bytecodes::_invokestatic   : // fall through
2043     case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);  break;
2044     default                         : ShouldNotReachHere();                                 break;
2045   }
2046   __ movl(temp, (int)bytecode());
2047   __ call_VM(noreg, entry, temp);
2048   // Update registers with resolved info
2049   __ get_cache_and_index_at_bcp(Rcache, index, 1);
2050   __ bind(resolved);
2051 }
2052 
2053 
2054 // The cache and index registers must be set before call
2055 void TemplateTable::load_field_cp_cache_entry(Register obj,
2056                                               Register cache,
2057                                               Register index,
2058                                               Register off,
2059                                               Register flags,
2060                                               bool is_static = false) {
2061   assert_different_registers(cache, index, flags, off);
2062 
2063   ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
2064   // Field offset
2065   __ movl(off, Address(cache, index, Address::times_4,
2066            in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())));
2067   // Flags
2068   __ movl(flags, Address(cache, index, Address::times_4,
2069            in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())));
2070 
2071   // klass     overwrite register
2072   if (is_static) {
2073     __ movl(obj, Address(cache, index, Address::times_4,
2074              in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())));
2075   }
2076 }
2077 
2078 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
2079                                                Register method,
2080                                                Register itable_index,
2081                                                Register flags,
2082                                                bool is_invokevirtual,
2083                                                bool is_invokevfinal /*unused*/) {
2084   // setup registers
2085   const Register cache = rcx;
2086   const Register index = rdx;
2087   assert_different_registers(method, flags);
2088   assert_different_registers(method, cache, index);
2089   assert_different_registers(itable_index, flags);
2090   assert_different_registers(itable_index, cache, index);
2091   // determine constant pool cache field offsets
2092   const int method_offset = in_bytes(
2093     constantPoolCacheOopDesc::base_offset() +
2094       (is_invokevirtual
2095        ? ConstantPoolCacheEntry::f2_offset()
2096        : ConstantPoolCacheEntry::f1_offset()
2097       )
2098     );
2099   const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
2100                                     ConstantPoolCacheEntry::flags_offset());
2101   // access constant pool cache fields
2102   const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
2103                                     ConstantPoolCacheEntry::f2_offset());
2104 
2105   resolve_cache_and_index(byte_no, cache, index);
2106 
2107   assert(wordSize == 4, "adjust code below");
2108   __ movl(method, Address(cache, index, Address::times_4, method_offset));
2109   if (itable_index != noreg) {
2110     __ movl(itable_index, Address(cache, index, Address::times_4, index_offset));
2111   }
2112   __ movl(flags , Address(cache, index, Address::times_4, flags_offset ));
2113 }
2114 
2115 
2116 // The registers cache and index expected to be set before call.
2117 // Correct values of the cache and index registers are preserved.
2118 void TemplateTable::jvmti_post_field_access(Register cache,
2119                                             Register index,
2120                                             bool is_static,
2121                                             bool has_tos) {
2122   if (JvmtiExport::can_post_field_access()) {
2123     // Check to see if a field access watch has been set before we take
2124     // the time to call into the VM.
2125     Label L1;
2126     assert_different_registers(cache, index, rax);
2127     __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2128     __ testl(rax,rax);
2129     __ jcc(Assembler::zero, L1);
2130 
2131     // cache entry pointer
2132     __ addl(cache, in_bytes(constantPoolCacheOopDesc::base_offset()));
2133     __ shll(index, LogBytesPerWord);
2134     __ addl(cache, index);
2135     if (is_static) {
2136       __ movl(rax, 0);      // NULL object reference
2137     } else {
2138       __ pop(atos);         // Get the object
2139       __ verify_oop(rax);
2140       __ push(atos);        // Restore stack state
2141     }
2142     // rax,:   object pointer or NULL
2143     // cache: cache entry pointer
2144     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
2145                rax, cache);
2146     __ get_cache_and_index_at_bcp(cache, index, 1);
2147     __ bind(L1);
2148   }
2149 }
2150 
2151 void TemplateTable::pop_and_check_object(Register r) {
2152   __ pop_ptr(r);
2153   __ null_check(r);  // for field access must check obj.
2154   __ verify_oop(r);
2155 }
2156 
2157 void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
2158   transition(vtos, vtos);
2159 
2160   const Register cache = rcx;
2161   const Register index = rdx;
2162   const Register obj   = rcx;
2163   const Register off   = rbx;
2164   const Register flags = rax;
2165 
2166   resolve_cache_and_index(byte_no, cache, index);
2167   jvmti_post_field_access(cache, index, is_static, false);
2168   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2169 
2170   if (!is_static) pop_and_check_object(obj);
2171 
2172   const Address lo(obj, off, Address::times_1, 0*wordSize);
2173   const Address hi(obj, off, Address::times_1, 1*wordSize);
2174 
2175   Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
2176 
2177   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
2178   assert(btos == 0, "change code, btos != 0");
2179   // btos
2180   __ andl(flags, 0x0f);
2181   __ jcc(Assembler::notZero, notByte);
2182 
2183   __ load_signed_byte(rax, lo );
2184   __ push(btos);
2185   // Rewrite bytecode to be faster
2186   if (!is_static) {
2187     patch_bytecode(Bytecodes::_fast_bgetfield, rcx, rbx);
2188   }
2189   __ jmp(Done);
2190 
2191   __ bind(notByte);
2192   // itos
2193   __ cmpl(flags, itos );
2194   __ jcc(Assembler::notEqual, notInt);
2195 
2196   __ movl(rax, lo );
2197   __ push(itos);
2198   // Rewrite bytecode to be faster
2199   if (!is_static) {
2200     patch_bytecode(Bytecodes::_fast_igetfield, rcx, rbx);
2201   }
2202   __ jmp(Done);
2203 
2204   __ bind(notInt);
2205   // atos
2206   __ cmpl(flags, atos );
2207   __ jcc(Assembler::notEqual, notObj);
2208 
2209   __ movl(rax, lo );
2210   __ push(atos);
2211   if (!is_static) {
2212     patch_bytecode(Bytecodes::_fast_agetfield, rcx, rbx);
2213   }
2214   __ jmp(Done);
2215 
2216   __ bind(notObj);
2217   // ctos
2218   __ cmpl(flags, ctos );
2219   __ jcc(Assembler::notEqual, notChar);
2220 
2221   __ load_unsigned_word(rax, lo );
2222   __ push(ctos);
2223   if (!is_static) {
2224     patch_bytecode(Bytecodes::_fast_cgetfield, rcx, rbx);
2225   }
2226   __ jmp(Done);
2227 
2228   __ bind(notChar);
2229   // stos
2230   __ cmpl(flags, stos );
2231   __ jcc(Assembler::notEqual, notShort);
2232 
2233   __ load_signed_word(rax, lo );
2234   __ push(stos);
2235   if (!is_static) {
2236     patch_bytecode(Bytecodes::_fast_sgetfield, rcx, rbx);
2237   }
2238   __ jmp(Done);
2239 
2240   __ bind(notShort);
2241   // ltos
2242   __ cmpl(flags, ltos );
2243   __ jcc(Assembler::notEqual, notLong);
2244 
2245   // Generate code as if volatile.  There just aren't enough registers to
2246   // save that information and this code is faster than the test.
2247   __ fild_d(lo);                // Must load atomically
2248   __ subl(rsp,2*wordSize);      // Make space for store
2249   __ fistp_d(Address(rsp,0));
2250   __ popl(rax);
2251   __ popl(rdx);
2252 
2253   __ push(ltos);
2254   // Don't rewrite to _fast_lgetfield for potential volatile case.
2255   __ jmp(Done);
2256 
2257   __ bind(notLong);
2258   // ftos
2259   __ cmpl(flags, ftos );
2260   __ jcc(Assembler::notEqual, notFloat);
2261 
2262   __ fld_s(lo);
2263   __ push(ftos);
2264   if (!is_static) {
2265     patch_bytecode(Bytecodes::_fast_fgetfield, rcx, rbx);
2266   }
2267   __ jmp(Done);
2268 
2269   __ bind(notFloat);
2270   // dtos
2271   __ cmpl(flags, dtos );
2272   __ jcc(Assembler::notEqual, notDouble);
2273 
2274   __ fld_d(lo);
2275   __ push(dtos);
2276   if (!is_static) {
2277     patch_bytecode(Bytecodes::_fast_dgetfield, rcx, rbx);
2278   }
2279   __ jmpb(Done);
2280 
2281   __ bind(notDouble);
2282 
2283   __ stop("Bad state");
2284 
2285   __ bind(Done);
2286   // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
2287   // volatile_barrier( );
2288 }
2289 
2290 
2291 void TemplateTable::getfield(int byte_no) {
2292   getfield_or_static(byte_no, false);
2293 }
2294 
2295 
2296 void TemplateTable::getstatic(int byte_no) {
2297   getfield_or_static(byte_no, true);
2298 }
2299 
2300 // The registers cache and index expected to be set before call.
2301 // The function may destroy various registers, just not the cache and index registers.
2302 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
2303 
2304   ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
2305 
2306   if (JvmtiExport::can_post_field_modification()) {
2307     // Check to see if a field modification watch has been set before we take
2308     // the time to call into the VM.
2309     Label L1;
2310     assert_different_registers(cache, index, rax);
2311     __ mov32(rax, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2312     __ testl(rax, rax);
2313     __ jcc(Assembler::zero, L1);
2314 
2315     // The cache and index registers have been already set.
2316     // This allows to eliminate this call but the cache and index
2317     // registers have to be correspondingly used after this line.
2318     __ get_cache_and_index_at_bcp(rax, rdx, 1);
2319 
2320     if (is_static) {
2321       // Life is simple.  Null out the object pointer.
2322       __ xorl(rbx, rbx);
2323     } else {
2324       // Life is harder. The stack holds the value on top, followed by the object.
2325       // We don't know the size of the value, though; it could be one or two words
2326       // depending on its type. As a result, we must find the type to determine where
2327       // the object is.
2328       Label two_word, valsize_known;
2329       __ movl(rcx, Address(rax, rdx, Address::times_4, in_bytes(cp_base_offset +
2330                                    ConstantPoolCacheEntry::flags_offset())));
2331       __ movl(rbx, rsp);
2332       __ shrl(rcx, ConstantPoolCacheEntry::tosBits);
2333       // Make sure we don't need to mask rcx for tosBits after the above shift
2334       ConstantPoolCacheEntry::verify_tosBits();
2335       __ cmpl(rcx, ltos);
2336       __ jccb(Assembler::equal, two_word);
2337       __ cmpl(rcx, dtos);
2338       __ jccb(Assembler::equal, two_word);
2339       __ addl(rbx, Interpreter::expr_offset_in_bytes(1)); // one word jvalue (not ltos, dtos)
2340       __ jmpb(valsize_known);
2341 
2342       __ bind(two_word);
2343       __ addl(rbx, Interpreter::expr_offset_in_bytes(2)); // two words jvalue
2344 
2345       __ bind(valsize_known);
2346       // setup object pointer
2347       __ movl(rbx, Address(rbx, 0));
2348     }
2349     // cache entry pointer
2350     __ addl(rax, in_bytes(cp_base_offset));
2351     __ shll(rdx, LogBytesPerWord);
2352     __ addl(rax, rdx);
2353     // object (tos)
2354     __ movl(rcx, rsp);
2355     // rbx,: object pointer set up above (NULL if static)
2356     // rax,: cache entry pointer
2357     // rcx: jvalue object on the stack
2358     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification),
2359                rbx, rax, rcx);
2360     __ get_cache_and_index_at_bcp(cache, index, 1);
2361     __ bind(L1);
2362   }
2363 }
2364 
2365 
2366 void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
2367   transition(vtos, vtos);
2368 
2369   const Register cache = rcx;
2370   const Register index = rdx;
2371   const Register obj   = rcx;
2372   const Register off   = rbx;
2373   const Register flags = rax;
2374 
2375   resolve_cache_and_index(byte_no, cache, index);
2376   jvmti_post_field_mod(cache, index, is_static);
2377   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2378 
2379   // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
2380   // volatile_barrier( );
2381 
2382   Label notVolatile, Done;
2383   __ movl(rdx, flags);
2384   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
2385   __ andl(rdx, 0x1);
2386 
2387   // field addresses
2388   const Address lo(obj, off, Address::times_1, 0*wordSize);
2389   const Address hi(obj, off, Address::times_1, 1*wordSize);
2390 
2391   Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
2392 
2393   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
2394   assert(btos == 0, "change code, btos != 0");
2395   // btos
2396   __ andl(flags, 0x0f);
2397   __ jcc(Assembler::notZero, notByte);
2398 
2399   __ pop(btos);
2400   if (!is_static) pop_and_check_object(obj);
2401   __ movb(lo, rax );
2402   if (!is_static) {
2403     patch_bytecode(Bytecodes::_fast_bputfield, rcx, rbx);
2404   }
2405   __ jmp(Done);
2406 
2407   __ bind(notByte);
2408   // itos
2409   __ cmpl(flags, itos );
2410   __ jcc(Assembler::notEqual, notInt);
2411 
2412   __ pop(itos);
2413   if (!is_static) pop_and_check_object(obj);
2414 
2415   __ movl(lo, rax );
2416   if (!is_static) {
2417     patch_bytecode(Bytecodes::_fast_iputfield, rcx, rbx);
2418   }
2419   __ jmp(Done);
2420 
2421   __ bind(notInt);
2422   // atos
2423   __ cmpl(flags, atos );
2424   __ jcc(Assembler::notEqual, notObj);
2425 
2426   __ pop(atos);
2427   if (!is_static) pop_and_check_object(obj);
2428 
2429   __ movl(lo, rax );
2430   __ store_check(obj, lo);  // Need to mark card
2431   if (!is_static) {
2432     patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx);
2433   }
2434   __ jmp(Done);
2435 
2436   __ bind(notObj);
2437   // ctos
2438   __ cmpl(flags, ctos );
2439   __ jcc(Assembler::notEqual, notChar);
2440 
2441   __ pop(ctos);
2442   if (!is_static) pop_and_check_object(obj);
2443   __ movw(lo, rax );
2444   if (!is_static) {
2445     patch_bytecode(Bytecodes::_fast_cputfield, rcx, rbx);
2446   }
2447   __ jmp(Done);
2448 
2449   __ bind(notChar);
2450   // stos
2451   __ cmpl(flags, stos );
2452   __ jcc(Assembler::notEqual, notShort);
2453 
2454   __ pop(stos);
2455   if (!is_static) pop_and_check_object(obj);
2456   __ movw(lo, rax );
2457   if (!is_static) {
2458     patch_bytecode(Bytecodes::_fast_sputfield, rcx, rbx);
2459   }
2460   __ jmp(Done);
2461 
2462   __ bind(notShort);
2463   // ltos
2464   __ cmpl(flags, ltos );
2465   __ jcc(Assembler::notEqual, notLong);
2466 
2467   Label notVolatileLong;
2468   __ testl(rdx, rdx);
2469   __ jcc(Assembler::zero, notVolatileLong);
2470 
2471   __ pop(ltos);  // overwrites rdx, do this after testing volatile.
2472   if (!is_static) pop_and_check_object(obj);
2473 
2474   // Replace with real volatile test
2475   __ pushl(rdx);
2476   __ pushl(rax);                // Must update atomically with FIST
2477   __ fild_d(Address(rsp,0));    // So load into FPU register
2478   __ fistp_d(lo);               // and put into memory atomically
2479   __ addl(rsp,2*wordSize);
2480   volatile_barrier();
2481   // Don't rewrite volatile version
2482   __ jmp(notVolatile);
2483 
2484   __ bind(notVolatileLong);
2485 
2486   __ pop(ltos);  // overwrites rdx
2487   if (!is_static) pop_and_check_object(obj);
2488   __ movl(hi, rdx);
2489   __ movl(lo, rax);
2490   if (!is_static) {
2491     patch_bytecode(Bytecodes::_fast_lputfield, rcx, rbx);
2492   }
2493   __ jmp(notVolatile);
2494 
2495   __ bind(notLong);
2496   // ftos
2497   __ cmpl(flags, ftos );
2498   __ jcc(Assembler::notEqual, notFloat);
2499 
2500   __ pop(ftos);
2501   if (!is_static) pop_and_check_object(obj);
2502   __ fstp_s(lo);
2503   if (!is_static) {
2504     patch_bytecode(Bytecodes::_fast_fputfield, rcx, rbx);
2505   }
2506   __ jmp(Done);
2507 
2508   __ bind(notFloat);
2509   // dtos
2510   __ cmpl(flags, dtos );
2511   __ jcc(Assembler::notEqual, notDouble);
2512 
2513   __ pop(dtos);
2514   if (!is_static) pop_and_check_object(obj);
2515   __ fstp_d(lo);
2516   if (!is_static) {
2517     patch_bytecode(Bytecodes::_fast_dputfield, rcx, rbx);
2518   }
2519   __ jmp(Done);
2520 
2521   __ bind(notDouble);
2522 
2523   __ stop("Bad state");
2524 
2525   __ bind(Done);
2526 
2527   // Check for volatile store
2528   __ testl(rdx, rdx);
2529   __ jcc(Assembler::zero, notVolatile);
2530   volatile_barrier( );
2531   __ bind(notVolatile);
2532 }
2533 
2534 
2535 void TemplateTable::putfield(int byte_no) {
2536   putfield_or_static(byte_no, false);
2537 }
2538 
2539 
2540 void TemplateTable::putstatic(int byte_no) {
2541   putfield_or_static(byte_no, true);
2542 }
2543 
2544 void TemplateTable::jvmti_post_fast_field_mod() {
2545   if (JvmtiExport::can_post_field_modification()) {
2546     // Check to see if a field modification watch has been set before we take
2547     // the time to call into the VM.
2548     Label L2;
2549     __ mov32(rcx, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2550     __ testl(rcx,rcx);
2551     __ jcc(Assembler::zero, L2);
2552     __ pop_ptr(rbx);               // copy the object pointer from tos
2553     __ verify_oop(rbx);
2554     __ push_ptr(rbx);              // put the object pointer back on tos
2555     __ subl(rsp, sizeof(jvalue));  // add space for a jvalue object
2556     __ movl(rcx, rsp);
2557     __ push_ptr(rbx);                 // save object pointer so we can steal rbx,
2558     __ movl(rbx, 0);
2559     const Address lo_value(rcx, rbx, Address::times_1, 0*wordSize);
2560     const Address hi_value(rcx, rbx, Address::times_1, 1*wordSize);
2561     switch (bytecode()) {          // load values into the jvalue object
2562     case Bytecodes::_fast_bputfield: __ movb(lo_value, rax); break;
2563     case Bytecodes::_fast_sputfield: __ movw(lo_value, rax); break;
2564     case Bytecodes::_fast_cputfield: __ movw(lo_value, rax); break;
2565     case Bytecodes::_fast_iputfield: __ movl(lo_value, rax);                         break;
2566     case Bytecodes::_fast_lputfield: __ movl(hi_value, rdx); __ movl(lo_value, rax); break;
2567     // need to call fld_s() after fstp_s() to restore the value for below
2568     case Bytecodes::_fast_fputfield: __ fstp_s(lo_value); __ fld_s(lo_value);        break;
2569     // need to call fld_d() after fstp_d() to restore the value for below
2570     case Bytecodes::_fast_dputfield: __ fstp_d(lo_value); __ fld_d(lo_value);        break;
2571     // since rcx is not an object we don't call store_check() here
2572     case Bytecodes::_fast_aputfield: __ movl(lo_value, rax);                         break;
2573     default:  ShouldNotReachHere();
2574     }
2575     __ pop_ptr(rbx);  // restore copy of object pointer
2576 
2577     // Save rax, and sometimes rdx because call_VM() will clobber them,
2578     // then use them for JVM/DI purposes
2579     __ pushl(rax);
2580     if (bytecode() == Bytecodes::_fast_lputfield) __ pushl(rdx);
2581     // access constant pool cache entry
2582     __ get_cache_entry_pointer_at_bcp(rax, rdx, 1);
2583     __ verify_oop(rbx);
2584     // rbx,: object pointer copied above
2585     // rax,: cache entry pointer
2586     // rcx: jvalue object on the stack
2587     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, rax, rcx);
2588     if (bytecode() == Bytecodes::_fast_lputfield) __ popl(rdx);  // restore high value
2589     __ popl(rax);     // restore lower value
2590     __ addl(rsp, sizeof(jvalue));  // release jvalue object space
2591     __ bind(L2);
2592   }
2593 }
2594 
2595 void TemplateTable::fast_storefield(TosState state) {
2596   transition(state, vtos);
2597 
2598   ByteSize base = constantPoolCacheOopDesc::base_offset();
2599 
2600   jvmti_post_fast_field_mod();
2601 
2602   // access constant pool cache
2603   __ get_cache_and_index_at_bcp(rcx, rbx, 1);
2604 
2605   // test for volatile with rdx but rdx is tos register for lputfield.
2606   if (bytecode() == Bytecodes::_fast_lputfield) __ pushl(rdx);
2607   __ movl(rdx, Address(rcx, rbx, Address::times_4, in_bytes(base +
2608                        ConstantPoolCacheEntry::flags_offset())));
2609 
2610   // replace index with field offset from cache entry
2611   __ movl(rbx, Address(rcx, rbx, Address::times_4, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
2612 
2613   // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
2614   // volatile_barrier( );
2615 
2616   Label notVolatile, Done;
2617   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
2618   __ andl(rdx, 0x1);
2619   // Check for volatile store
2620   __ testl(rdx, rdx);
2621   __ jcc(Assembler::zero, notVolatile);
2622 
2623   if (bytecode() == Bytecodes::_fast_lputfield) __ popl(rdx);
2624 
2625   // Get object from stack
2626   pop_and_check_object(rcx);
2627 
2628   // field addresses
2629   const Address lo(rcx, rbx, Address::times_1, 0*wordSize);
2630   const Address hi(rcx, rbx, Address::times_1, 1*wordSize);
2631 
2632   // access field
2633   switch (bytecode()) {
2634     case Bytecodes::_fast_bputfield: __ movb(lo, rax); break;
2635     case Bytecodes::_fast_sputfield: // fall through
2636     case Bytecodes::_fast_cputfield: __ movw(lo, rax); break;
2637     case Bytecodes::_fast_iputfield: __ movl(lo, rax); break;
2638     case Bytecodes::_fast_lputfield: __ movl(hi, rdx); __ movl(lo, rax);        break;
2639     case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
2640     case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
2641     case Bytecodes::_fast_aputfield: __ movl(lo, rax); __ store_check(rcx, lo); break;
2642     default:
2643       ShouldNotReachHere();
2644   }
2645 
2646   Label done;
2647   volatile_barrier( );
2648   __ jmpb(done);
2649 
2650   // Same code as above, but don't need rdx to test for volatile.
2651   __ bind(notVolatile);
2652 
2653   if (bytecode() == Bytecodes::_fast_lputfield) __ popl(rdx);
2654 
2655   // Get object from stack
2656   pop_and_check_object(rcx);
2657 
2658   // access field
2659   switch (bytecode()) {
2660     case Bytecodes::_fast_bputfield: __ movb(lo, rax); break;
2661     case Bytecodes::_fast_sputfield: // fall through
2662     case Bytecodes::_fast_cputfield: __ movw(lo, rax); break;
2663     case Bytecodes::_fast_iputfield: __ movl(lo, rax); break;
2664     case Bytecodes::_fast_lputfield: __ movl(hi, rdx); __ movl(lo, rax);        break;
2665     case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
2666     case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
2667     case Bytecodes::_fast_aputfield: __ movl(lo, rax); __ store_check(rcx, lo); break;
2668     default:
2669       ShouldNotReachHere();
2670   }
2671   __ bind(done);
2672 }
2673 
2674 
2675 void TemplateTable::fast_accessfield(TosState state) {
2676   transition(atos, state);
2677 
2678   // do the JVMTI work here to avoid disturbing the register state below
2679   if (JvmtiExport::can_post_field_access()) {
2680     // Check to see if a field access watch has been set before we take
2681     // the time to call into the VM.
2682     Label L1;
2683     __ mov32(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2684     __ testl(rcx,rcx);
2685     __ jcc(Assembler::zero, L1);
2686     // access constant pool cache entry
2687     __ get_cache_entry_pointer_at_bcp(rcx, rdx, 1);
2688     __ push_ptr(rax);  // save object pointer before call_VM() clobbers it
2689     __ verify_oop(rax);
2690     // rax,: object pointer copied above
2691     // rcx: cache entry pointer
2692     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), rax, rcx);
2693     __ pop_ptr(rax);   // restore object pointer
2694     __ bind(L1);
2695   }
2696 
2697   // access constant pool cache
2698   __ get_cache_and_index_at_bcp(rcx, rbx, 1);
2699   // replace index with field offset from cache entry
2700   __ movl(rbx, Address(rcx, rbx, Address::times_4, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset())));
2701 
2702 
2703   // rax,: object
2704   __ verify_oop(rax);
2705   __ null_check(rax);
2706   // field addresses
2707   const Address lo = Address(rax, rbx, Address::times_1, 0*wordSize);
2708   const Address hi = Address(rax, rbx, Address::times_1, 1*wordSize);
2709 
2710   // access field
2711   switch (bytecode()) {
2712     case Bytecodes::_fast_bgetfield: __ movsxb(rax, lo );                 break;
2713     case Bytecodes::_fast_sgetfield: __ load_signed_word(rax, lo );       break;
2714     case Bytecodes::_fast_cgetfield: __ load_unsigned_word(rax, lo );     break;
2715     case Bytecodes::_fast_igetfield: __ movl(rax, lo);                    break;
2716     case Bytecodes::_fast_lgetfield: __ stop("should not be rewritten");  break;
2717     case Bytecodes::_fast_fgetfield: __ fld_s(lo);                        break;
2718     case Bytecodes::_fast_dgetfield: __ fld_d(lo);                        break;
2719     case Bytecodes::_fast_agetfield: __ movl(rax, lo); __ verify_oop(rax); break;
2720     default:
2721       ShouldNotReachHere();
2722   }
2723 
2724   // Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO)
2725   // volatile_barrier( );
2726 }
2727 
2728 void TemplateTable::fast_xaccess(TosState state) {
2729   transition(vtos, state);
2730   // get receiver
2731   __ movl(rax, aaddress(0));
2732   debug_only(__ verify_local_tag(frame::TagReference, 0));
2733   // access constant pool cache
2734   __ get_cache_and_index_at_bcp(rcx, rdx, 2);
2735   __ movl(rbx, Address(rcx, rdx, Address::times_4, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset())));
2736   // make sure exception is reported in correct bcp range (getfield is next instruction)
2737   __ increment(rsi);
2738   __ null_check(rax);
2739   const Address lo = Address(rax, rbx, Address::times_1, 0*wordSize);
2740   if (state == itos) {
2741     __ movl(rax, lo);
2742   } else if (state == atos) {
2743     __ movl(rax, lo);
2744     __ verify_oop(rax);
2745   } else if (state == ftos) {
2746     __ fld_s(lo);
2747   } else {
2748     ShouldNotReachHere();
2749   }
2750   __ decrement(rsi);
2751 }
2752 
2753 
2754 
2755 //----------------------------------------------------------------------------------------------------
2756 // Calls
2757 
2758 void TemplateTable::count_calls(Register method, Register temp) {
2759   // implemented elsewhere
2760   ShouldNotReachHere();
2761 }
2762 
2763 
2764 void TemplateTable::prepare_invoke(Register method, Register index, int byte_no, Bytecodes::Code code) {
2765   // determine flags
2766   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
2767   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
2768   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
2769   const bool load_receiver       = code != Bytecodes::_invokestatic;
2770   const bool receiver_null_check = is_invokespecial;
2771   const bool save_flags = is_invokeinterface || is_invokevirtual;
2772   // setup registers & access constant pool cache
2773   const Register recv   = rcx;
2774   const Register flags  = rdx;
2775   assert_different_registers(method, index, recv, flags);
2776 
2777   // save 'interpreter return address'
2778   __ save_bcp();
2779 
2780   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual);
2781 
2782   // load receiver if needed (note: no return address pushed yet)
2783   if (load_receiver) {
2784     __ movl(recv, flags);
2785     __ andl(recv, 0xFF);
2786     // recv count is 0 based?
2787     __ movl(recv, Address(rsp, recv, Interpreter::stackElementScale(), -Interpreter::expr_offset_in_bytes(1)));
2788     __ verify_oop(recv);
2789   }
2790 
2791   // do null check if needed
2792   if (receiver_null_check) {
2793     __ null_check(recv);
2794   }
2795 
2796   if (save_flags) {
2797     __ movl(rsi, flags);
2798   }
2799 
2800   // compute return type
2801   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
2802   // Make sure we don't need to mask flags for tosBits after the above shift
2803   ConstantPoolCacheEntry::verify_tosBits();
2804   // load return address
2805   { const int table =
2806       is_invokeinterface
2807       ? (int)Interpreter::return_5_addrs_by_index_table()
2808       : (int)Interpreter::return_3_addrs_by_index_table();
2809     __ movl(flags, Address(noreg, flags, Address::times_4, table));
2810   }
2811 
2812   // push return address
2813   __ pushl(flags);
2814 
2815   // Restore flag value from the constant pool cache, and restore rsi
2816   // for later null checks.  rsi is the bytecode pointer
2817   if (save_flags) {
2818     __ movl(flags, rsi);
2819     __ restore_bcp();
2820   }
2821 }
2822 
2823 
2824 void TemplateTable::invokevirtual_helper(Register index, Register recv,
2825                         Register flags) {
2826 
2827   // Uses temporary registers rax, rdx
2828   assert_different_registers(index, recv, rax, rdx);
2829 
2830   // Test for an invoke of a final method
2831   Label notFinal;
2832   __ movl(rax, flags);
2833   __ andl(rax, (1 << ConstantPoolCacheEntry::vfinalMethod));
2834   __ jcc(Assembler::zero, notFinal);
2835 
2836   Register method = index;  // method must be rbx,
2837   assert(method == rbx, "methodOop must be rbx, for interpreter calling convention");
2838 
2839   // do the call - the index is actually the method to call
2840   __ verify_oop(method);
2841 
2842   // It's final, need a null check here!
2843   __ null_check(recv);
2844 
2845   // profile this call
2846   __ profile_final_call(rax);
2847 
2848   __ jump_from_interpreted(method, rax);
2849 
2850   __ bind(notFinal);
2851 
2852   // get receiver klass
2853   __ null_check(recv, oopDesc::klass_offset_in_bytes());
2854   // Keep recv in rcx for callee expects it there
2855   __ movl(rax, Address(recv, oopDesc::klass_offset_in_bytes()));
2856   __ verify_oop(rax);
2857 
2858   // profile this call
2859   __ profile_virtual_call(rax, rdi, rdx);
2860 
2861   // get target methodOop & entry point
2862   const int base = instanceKlass::vtable_start_offset() * wordSize;
2863   assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
2864   __ movl(method, Address(rax, index, Address::times_4, base + vtableEntry::method_offset_in_bytes()));
2865   __ jump_from_interpreted(method, rdx);
2866 }
2867 
2868 
2869 void TemplateTable::invokevirtual(int byte_no) {
2870   transition(vtos, vtos);
2871   prepare_invoke(rbx, noreg, byte_no, bytecode());
2872 
2873   // rbx,: index
2874   // rcx: receiver
2875   // rdx: flags
2876 
2877   invokevirtual_helper(rbx, rcx, rdx);
2878 }
2879 
2880 
2881 void TemplateTable::invokespecial(int byte_no) {
2882   transition(vtos, vtos);
2883   prepare_invoke(rbx, noreg, byte_no, bytecode());
2884   // do the call
2885   __ verify_oop(rbx);
2886   __ profile_call(rax);
2887   __ jump_from_interpreted(rbx, rax);
2888 }
2889 
2890 
2891 void TemplateTable::invokestatic(int byte_no) {
2892   transition(vtos, vtos);
2893   prepare_invoke(rbx, noreg, byte_no, bytecode());
2894   // do the call
2895   __ verify_oop(rbx);
2896   __ profile_call(rax);
2897   __ jump_from_interpreted(rbx, rax);
2898 }
2899 
2900 
2901 void TemplateTable::fast_invokevfinal(int byte_no) {
2902   transition(vtos, vtos);
2903   __ stop("fast_invokevfinal not used on x86");
2904 }
2905 
2906 
2907 void TemplateTable::invokeinterface(int byte_no) {
2908   transition(vtos, vtos);
2909   prepare_invoke(rax, rbx, byte_no, bytecode());
2910 
2911   // rax,: Interface
2912   // rbx,: index
2913   // rcx: receiver
2914   // rdx: flags
2915 
2916   // Special case of invokeinterface called for virtual method of
2917   // java.lang.Object.  See cpCacheOop.cpp for details.
2918   // This code isn't produced by javac, but could be produced by
2919   // another compliant java compiler.
2920   Label notMethod;
2921   __ movl(rdi, rdx);
2922   __ andl(rdi, (1 << ConstantPoolCacheEntry::methodInterface));
2923   __ jcc(Assembler::zero, notMethod);
2924 
2925   invokevirtual_helper(rbx, rcx, rdx);
2926   __ bind(notMethod);
2927 
2928   // Get receiver klass into rdx - also a null check
2929   __ restore_locals();  // restore rdi
2930   __ movl(rdx, Address(rcx, oopDesc::klass_offset_in_bytes()));
2931   __ verify_oop(rdx);
2932 
2933   // profile this call
2934   __ profile_virtual_call(rdx, rsi, rdi);
2935 
2936   __ movl(rdi, rdx); // Save klassOop in rdi
2937 
2938   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2939   const int base = instanceKlass::vtable_start_offset() * wordSize;
2940   assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
2941   __ movl(rsi, Address(rdx, instanceKlass::vtable_length_offset() * wordSize)); // Get length of vtable
2942   __ leal(rdx, Address(rdx, rsi, Address::times_4, base));
2943   if (HeapWordsPerLong > 1) {
2944     // Round up to align_object_offset boundary
2945     __ round_to(rdx, BytesPerLong);
2946   }
2947 
2948   Label entry, search, interface_ok;
2949 
2950   __ jmpb(entry);
2951   __ bind(search);
2952   __ addl(rdx, itableOffsetEntry::size() * wordSize);
2953 
2954   __ bind(entry);
2955 
2956   // Check that the entry is non-null.  A null entry means that the receiver
2957   // class doesn't implement the interface, and wasn't the same as the
2958   // receiver class checked when the interface was resolved.
2959   __ pushl(rdx);
2960   __ movl(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
2961   __ testl(rdx, rdx);
2962   __ jcc(Assembler::notZero, interface_ok);
2963   // throw exception
2964   __ popl(rdx);          // pop saved register first.
2965   __ popl(rbx);          // pop return address (pushed by prepare_invoke)
2966   __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
2967   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
2968   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
2969                    InterpreterRuntime::throw_IncompatibleClassChangeError));
2970   // the call_VM checks for exception, so we should never return here.
2971   __ should_not_reach_here();
2972   __ bind(interface_ok);
2973 
2974     __ popl(rdx);
2975 
2976     __ cmpl(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
2977     __ jcc(Assembler::notEqual, search);
2978 
2979     __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes()));
2980     __ addl(rdx, rdi); // Add offset to klassOop
2981     assert(itableMethodEntry::size() * wordSize == 4, "adjust the scaling in the code below");
2982     __ movl(rbx, Address(rdx, rbx, Address::times_4));
2983     // rbx,: methodOop to call
2984     // rcx: receiver
2985     // Check for abstract method error
2986     // Note: This should be done more efficiently via a throw_abstract_method_error
2987     //       interpreter entry point and a conditional jump to it in case of a null
2988     //       method.
2989     { Label L;
2990       __ testl(rbx, rbx);
2991       __ jcc(Assembler::notZero, L);
2992       // throw exception
2993           // note: must restore interpreter registers to canonical
2994           //       state for exception handling to work correctly!
2995           __ popl(rbx);          // pop return address (pushed by prepare_invoke)
2996           __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
2997           __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
2998       __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
2999       // the call_VM checks for exception, so we should never return here.
3000       __ should_not_reach_here();
3001       __ bind(L);
3002     }
3003 
3004   // do the call
3005   // rcx: receiver
3006   // rbx,: methodOop
3007   __ jump_from_interpreted(rbx, rdx);
3008 }
3009 
3010 //----------------------------------------------------------------------------------------------------
3011 // Allocation
3012 
3013 void TemplateTable::_new() {
3014   transition(vtos, atos);
3015   __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
3016   Label slow_case;
3017   Label done;
3018   Label initialize_header;
3019   Label initialize_object;  // including clearing the fields
3020   Label allocate_shared;
3021 
3022   ExternalAddress heap_top((address)Universe::heap()->top_addr());
3023 
3024   __ get_cpool_and_tags(rcx, rax);
3025   // get instanceKlass
3026   __ movl(rcx, Address(rcx, rdx, Address::times_4, sizeof(constantPoolOopDesc)));
3027   __ pushl(rcx);  // save the contexts of klass for initializing the header
3028 
3029   // make sure the class we're about to instantiate has been resolved.
3030   // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
3031   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
3032   __ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);
3033   __ jcc(Assembler::notEqual, slow_case);
3034 
3035   // make sure klass is initialized & doesn't have finalizer
3036   // make sure klass is fully initialized
3037   __ cmpl(Address(rcx, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized);
3038   __ jcc(Assembler::notEqual, slow_case);
3039 
3040   // get instance_size in instanceKlass (scaled to a count of bytes)
3041   __ movl(rdx, Address(rcx, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
3042   // test to see if it has a finalizer or is malformed in some way
3043   __ testl(rdx, Klass::_lh_instance_slow_path_bit);
3044   __ jcc(Assembler::notZero, slow_case);
3045 
3046   //
3047   // Allocate the instance
3048   // 1) Try to allocate in the TLAB
3049   // 2) if fail and the object is large allocate in the shared Eden
3050   // 3) if the above fails (or is not applicable), go to a slow case
3051   // (creates a new TLAB, etc.)
3052 
3053   const bool allow_shared_alloc =
3054     Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
3055 
3056   if (UseTLAB) {
3057     const Register thread = rcx;
3058 
3059     __ get_thread(thread);
3060     __ movl(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
3061     __ leal(rbx, Address(rax, rdx, Address::times_1));
3062     __ cmpl(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
3063     __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
3064     __ movl(Address(thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
3065     if (ZeroTLAB) {
3066       // the fields have been already cleared
3067       __ jmp(initialize_header);
3068     } else {
3069       // initialize both the header and fields
3070       __ jmp(initialize_object);
3071     }
3072   }
3073 
3074   // Allocation in the shared Eden, if allowed.
3075   //
3076   // rdx: instance size in bytes
3077   if (allow_shared_alloc) {
3078     __ bind(allocate_shared);
3079 
3080     Label retry;
3081     __ bind(retry);
3082     __ mov32(rax, heap_top);
3083     __ leal(rbx, Address(rax, rdx, Address::times_1));
3084     __ cmp32(rbx, ExternalAddress((address)Universe::heap()->end_addr()));
3085     __ jcc(Assembler::above, slow_case);
3086 
3087     // Compare rax, with the top addr, and if still equal, store the new
3088     // top addr in rbx, at the address of the top addr pointer. Sets ZF if was
3089     // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
3090     //
3091     // rax,: object begin
3092     // rbx,: object end
3093     // rdx: instance size in bytes
3094     if (os::is_MP()) __ lock();
3095     __ cmpxchgptr(rbx, heap_top);
3096 
3097     // if someone beat us on the allocation, try again, otherwise continue
3098     __ jcc(Assembler::notEqual, retry);
3099   }
3100 
3101   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
3102     // The object is initialized before the header.  If the object size is
3103     // zero, go directly to the header initialization.
3104     __ bind(initialize_object);
3105     __ decrement(rdx, sizeof(oopDesc));
3106     __ jcc(Assembler::zero, initialize_header);
3107 
3108   // Initialize topmost object field, divide rdx by 8, check if odd and
3109   // test if zero.
3110     __ xorl(rcx, rcx);    // use zero reg to clear memory (shorter code)
3111     __ shrl(rdx, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
3112 
3113   // rdx must have been multiple of 8
3114 #ifdef ASSERT
3115     // make sure rdx was multiple of 8
3116     Label L;
3117     // Ignore partial flag stall after shrl() since it is debug VM
3118     __ jccb(Assembler::carryClear, L);
3119     __ stop("object size is not multiple of 2 - adjust this code");
3120     __ bind(L);
3121     // rdx must be > 0, no extra check needed here
3122 #endif
3123 
3124     // initialize remaining object fields: rdx was a multiple of 8
3125     { Label loop;
3126     __ bind(loop);
3127     __ movl(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
3128     __ movl(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx);
3129     __ decrement(rdx);
3130     __ jcc(Assembler::notZero, loop);
3131     }
3132 
3133     // initialize object header only.
3134     __ bind(initialize_header);
3135     if (UseBiasedLocking) {
3136       __ popl(rcx);   // get saved klass back in the register.
3137       __ movl(rbx, Address(rcx, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
3138       __ movl(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
3139     } else {
3140       __ movl(Address(rax, oopDesc::mark_offset_in_bytes ()),
3141               (int)markOopDesc::prototype()); // header
3142       __ popl(rcx);   // get saved klass back in the register.
3143     }
3144     __ movl(Address(rax, oopDesc::klass_offset_in_bytes()), rcx);  // klass
3145 
3146     {
3147       SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
3148       // Trigger dtrace event for fastpath
3149       __ push(atos);
3150       __ call_VM_leaf(
3151            CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
3152       __ pop(atos);
3153     }
3154 
3155     __ jmp(done);
3156   }
3157 
3158   // slow case
3159   __ bind(slow_case);
3160   __ popl(rcx);   // restore stack pointer to what it was when we came in.
3161   __ get_constant_pool(rax);
3162   __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
3163   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rax, rdx);
3164 
3165   // continue
3166   __ bind(done);
3167 }
3168 
3169 
3170 void TemplateTable::newarray() {
3171   transition(itos, atos);
3172   __ push_i(rax);                                 // make sure everything is on the stack
3173   __ load_unsigned_byte(rdx, at_bcp(1));
3174   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), rdx, rax);
3175   __ pop_i(rdx);                                  // discard size
3176 }
3177 
3178 
3179 void TemplateTable::anewarray() {
3180   transition(itos, atos);
3181   __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
3182   __ get_constant_pool(rcx);
3183   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), rcx, rdx, rax);
3184 }
3185 
3186 
3187 void TemplateTable::arraylength() {
3188   transition(atos, itos);
3189   __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
3190   __ movl(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
3191 }
3192 
3193 
3194 void TemplateTable::checkcast() {
3195   transition(atos, atos);
3196   Label done, is_null, ok_is_subtype, quicked, resolved;
3197   __ testl(rax, rax);   // Object is in EAX
3198   __ jcc(Assembler::zero, is_null);
3199 
3200   // Get cpool & tags index
3201   __ get_cpool_and_tags(rcx, rdx); // ECX=cpool, EDX=tags array
3202   __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // EBX=index
3203   // See if bytecode has already been quicked
3204   __ cmpb(Address(rdx, rbx, Address::times_1, typeArrayOopDesc::header_size(T_BYTE) * wordSize), JVM_CONSTANT_Class);
3205   __ jcc(Assembler::equal, quicked);
3206 
3207   __ push(atos);
3208   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
3209   __ pop_ptr(rdx);
3210   __ jmpb(resolved);
3211 
3212   // Get superklass in EAX and subklass in EBX
3213   __ bind(quicked);
3214   __ movl(rdx, rax);          // Save object in EDX; EAX needed for subtype check
3215   __ movl(rax, Address(rcx, rbx, Address::times_4, sizeof(constantPoolOopDesc)));
3216 
3217   __ bind(resolved);
3218   __ movl(rbx, Address(rdx, oopDesc::klass_offset_in_bytes()));
3219 
3220   // Generate subtype check.  Blows ECX.  Resets EDI.  Object in EDX.
3221   // Superklass in EAX.  Subklass in EBX.
3222   __ gen_subtype_check( rbx, ok_is_subtype );
3223 
3224   // Come here on failure
3225   __ pushl(rdx);
3226   // object is at TOS
3227   __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
3228 
3229   // Come here on success
3230   __ bind(ok_is_subtype);
3231   __ movl(rax,rdx);           // Restore object in EDX
3232 
3233   // Collect counts on whether this check-cast sees NULLs a lot or not.
3234   if (ProfileInterpreter) {
3235     __ jmp(done);
3236     __ bind(is_null);
3237     __ profile_null_seen(rcx);
3238   } else {
3239     __ bind(is_null);   // same as 'done'
3240   }
3241   __ bind(done);
3242 }
3243 
3244 
3245 void TemplateTable::instanceof() {
3246   transition(atos, itos);
3247   Label done, is_null, ok_is_subtype, quicked, resolved;
3248   __ testl(rax, rax);
3249   __ jcc(Assembler::zero, is_null);
3250 
3251   // Get cpool & tags index
3252   __ get_cpool_and_tags(rcx, rdx); // ECX=cpool, EDX=tags array
3253   __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // EBX=index
3254   // See if bytecode has already been quicked
3255   __ cmpb(Address(rdx, rbx, Address::times_1, typeArrayOopDesc::header_size(T_BYTE) * wordSize), JVM_CONSTANT_Class);
3256   __ jcc(Assembler::equal, quicked);
3257 
3258   __ push(atos);
3259   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
3260   __ pop_ptr(rdx);
3261   __ movl(rdx, Address(rdx, oopDesc::klass_offset_in_bytes()));
3262   __ jmp(resolved);
3263 
3264   // Get superklass in EAX and subklass in EDX
3265   __ bind(quicked);
3266   __ movl(rdx, Address(rax, oopDesc::klass_offset_in_bytes()));
3267   __ movl(rax, Address(rcx, rbx, Address::times_4, sizeof(constantPoolOopDesc)));
3268 
3269   __ bind(resolved);
3270 
3271   // Generate subtype check.  Blows ECX.  Resets EDI.
3272   // Superklass in EAX.  Subklass in EDX.
3273   __ gen_subtype_check( rdx, ok_is_subtype );
3274 
3275   // Come here on failure
3276   __ xorl(rax,rax);
3277   __ jmpb(done);
3278   // Come here on success
3279   __ bind(ok_is_subtype);
3280   __ movl(rax, 1);
3281 
3282   // Collect counts on whether this test sees NULLs a lot or not.
3283   if (ProfileInterpreter) {
3284     __ jmp(done);
3285     __ bind(is_null);
3286     __ profile_null_seen(rcx);
3287   } else {
3288     __ bind(is_null);   // same as 'done'
3289   }
3290   __ bind(done);
3291   // rax, = 0: obj == NULL or  obj is not an instanceof the specified klass
3292   // rax, = 1: obj != NULL and obj is     an instanceof the specified klass
3293 }
3294 
3295 
3296 //----------------------------------------------------------------------------------------------------
3297 // Breakpoints
3298 void TemplateTable::_breakpoint() {
3299 
3300   // Note: We get here even if we are single stepping..
3301   // jbug inists on setting breakpoints at every bytecode
3302   // even if we are in single step mode.
3303 
3304   transition(vtos, vtos);
3305 
3306   // get the unpatched byte code
3307   __ get_method(rcx);
3308   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), rcx, rsi);
3309   __ movl(rbx, rax);
3310 
3311   // post the breakpoint event
3312   __ get_method(rcx);
3313   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), rcx, rsi);
3314 
3315   // complete the execution of original bytecode
3316   __ dispatch_only_normal(vtos);
3317 }
3318 
3319 
3320 //----------------------------------------------------------------------------------------------------
3321 // Exceptions
3322 
3323 void TemplateTable::athrow() {
3324   transition(atos, vtos);
3325   __ null_check(rax);
3326   __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
3327 }
3328 
3329 
3330 //----------------------------------------------------------------------------------------------------
3331 // Synchronization
3332 //
3333 // Note: monitorenter & exit are symmetric routines; which is reflected
3334 //       in the assembly code structure as well
3335 //
3336 // Stack layout:
3337 //
3338 // [expressions  ] <--- rsp               = expression stack top
3339 // ..
3340 // [expressions  ]
3341 // [monitor entry] <--- monitor block top = expression stack bot
3342 // ..
3343 // [monitor entry]
3344 // [frame data   ] <--- monitor block bot
3345 // ...
3346 // [saved rbp,    ] <--- rbp,
3347 
3348 
3349 void TemplateTable::monitorenter() {
3350   transition(atos, vtos);
3351 
3352   // check for NULL object
3353   __ null_check(rax);
3354 
3355   const Address monitor_block_top(rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
3356   const Address monitor_block_bot(rbp, frame::interpreter_frame_initial_sp_offset        * wordSize);
3357   const int entry_size =         (     frame::interpreter_frame_monitor_size()           * wordSize);
3358   Label allocated;
3359 
3360   // initialize entry pointer
3361   __ xorl(rdx, rdx);                             // points to free slot or NULL
3362 
3363   // find a free slot in the monitor block (result in rdx)
3364   { Label entry, loop, exit;
3365     __ movl(rcx, monitor_block_top);             // points to current entry, starting with top-most entry
3366     __ leal(rbx, monitor_block_bot);             // points to word before bottom of monitor block
3367     __ jmpb(entry);
3368 
3369     __ bind(loop);
3370     __ cmpl(Address(rcx, BasicObjectLock::obj_offset_in_bytes()), NULL_WORD);  // check if current entry is used
3371 
3372 // TODO - need new func here - kbt
3373     if (VM_Version::supports_cmov()) {
3374       __ cmovl(Assembler::equal, rdx, rcx);      // if not used then remember entry in rdx
3375     } else {
3376       Label L;
3377       __ jccb(Assembler::notEqual, L);
3378       __ movl(rdx, rcx);                         // if not used then remember entry in rdx
3379       __ bind(L);
3380     }
3381     __ cmpl(rax, Address(rcx, BasicObjectLock::obj_offset_in_bytes()));   // check if current entry is for same object
3382     __ jccb(Assembler::equal, exit);              // if same object then stop searching
3383     __ addl(rcx, entry_size);                    // otherwise advance to next entry
3384     __ bind(entry);
3385     __ cmpl(rcx, rbx);                           // check if bottom reached
3386     __ jcc(Assembler::notEqual, loop);           // if not at bottom then check this entry
3387     __ bind(exit);
3388   }
3389 
3390   __ testl(rdx, rdx);                            // check if a slot has been found
3391   __ jccb(Assembler::notZero, allocated);         // if found, continue with that one
3392 
3393   // allocate one if there's no free slot
3394   { Label entry, loop;
3395     // 1. compute new pointers                   // rsp: old expression stack top
3396     __ movl(rdx, monitor_block_bot);             // rdx: old expression stack bottom
3397     __ subl(rsp, entry_size);                    // move expression stack top
3398     __ subl(rdx, entry_size);                    // move expression stack bottom
3399     __ movl(rcx, rsp);                           // set start value for copy loop
3400     __ movl(monitor_block_bot, rdx);             // set new monitor block top
3401     __ jmp(entry);
3402     // 2. move expression stack contents
3403     __ bind(loop);
3404     __ movl(rbx, Address(rcx, entry_size));      // load expression stack word from old location
3405     __ movl(Address(rcx, 0), rbx);               // and store it at new location
3406     __ addl(rcx, wordSize);                      // advance to next word
3407     __ bind(entry);
3408     __ cmpl(rcx, rdx);                           // check if bottom reached
3409     __ jcc(Assembler::notEqual, loop);           // if not at bottom then copy next word
3410   }
3411 
3412   // call run-time routine
3413   // rdx: points to monitor entry
3414   __ bind(allocated);
3415 
3416   // Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly.
3417   // The object has already been poped from the stack, so the expression stack looks correct.
3418   __ increment(rsi);
3419 
3420   __ movl(Address(rdx, BasicObjectLock::obj_offset_in_bytes()), rax);     // store object
3421   __ lock_object(rdx);
3422 
3423   // check to make sure this monitor doesn't cause stack overflow after locking
3424   __ save_bcp();  // in case of exception
3425   __ generate_stack_overflow_check(0);
3426 
3427   // The bcp has already been incremented. Just need to dispatch to next instruction.
3428   __ dispatch_next(vtos);
3429 }
3430 
3431 
3432 void TemplateTable::monitorexit() {
3433   transition(atos, vtos);
3434 
3435   // check for NULL object
3436   __ null_check(rax);
3437 
3438   const Address monitor_block_top(rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
3439   const Address monitor_block_bot(rbp, frame::interpreter_frame_initial_sp_offset        * wordSize);
3440   const int entry_size =         (     frame::interpreter_frame_monitor_size()           * wordSize);
3441   Label found;
3442 
3443   // find matching slot
3444   { Label entry, loop;
3445     __ movl(rdx, monitor_block_top);             // points to current entry, starting with top-most entry
3446     __ leal(rbx, monitor_block_bot);             // points to word before bottom of monitor block
3447     __ jmpb(entry);
3448 
3449     __ bind(loop);
3450     __ cmpl(rax, Address(rdx, BasicObjectLock::obj_offset_in_bytes()));   // check if current entry is for same object
3451     __ jcc(Assembler::equal, found);             // if same object then stop searching
3452     __ addl(rdx, entry_size);                    // otherwise advance to next entry
3453     __ bind(entry);
3454     __ cmpl(rdx, rbx);                           // check if bottom reached
3455     __ jcc(Assembler::notEqual, loop);           // if not at bottom then check this entry
3456   }
3457 
3458   // error handling. Unlocking was not block-structured
3459   Label end;
3460   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
3461   __ should_not_reach_here();
3462 
3463   // call run-time routine
3464   // rcx: points to monitor entry
3465   __ bind(found);
3466   __ push_ptr(rax);                                 // make sure object is on stack (contract with oopMaps)
3467   __ unlock_object(rdx);
3468   __ pop_ptr(rax);                                  // discard object
3469   __ bind(end);
3470 }
3471 
3472 
3473 //----------------------------------------------------------------------------------------------------
3474 // Wide instructions
3475 
3476 void TemplateTable::wide() {
3477   transition(vtos, vtos);
3478   __ load_unsigned_byte(rbx, at_bcp(1));
3479   __ jmp(Address(noreg, rbx, Address::times_4, int(Interpreter::_wentry_point)));
3480   // Note: the rsi increment step is part of the individual wide bytecode implementations
3481 }
3482 
3483 
3484 //----------------------------------------------------------------------------------------------------
3485 // Multi arrays
3486 
3487 void TemplateTable::multianewarray() {
3488   transition(vtos, atos);
3489   __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
3490   // last dim is on top of stack; we want address of first one:
3491   // first_addr = last_addr + (ndims - 1) * stackElementSize - 1*wordsize
3492   // the latter wordSize to point to the beginning of the array.
3493   __ leal(  rax, Address(rsp, rax, Interpreter::stackElementScale(), -wordSize));
3494   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), rax);     // pass in rax,
3495   __ load_unsigned_byte(rbx, at_bcp(3));
3496   __ leal(rsp, Address(rsp, rbx, Interpreter::stackElementScale()));  // get rid of counts
3497 }
3498 
3499 #endif /* !CC_INTERP */