1 /*
2 * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25 #include "incls/_precompiled.incl"
26 #include "incls/_assembler_x86.cpp.incl"
27
28 // Implementation of AddressLiteral
29
30 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
31 _is_lval = false;
32 _target = target;
33 switch (rtype) {
34 case relocInfo::oop_type:
35 // Oops are a special case. Normally they would be their own section
36 // but in cases like icBuffer they are literals in the code stream that
37 // we don't have a section for. We use none so that we get a literal address
38 // which is always patchable.
39 break;
40 case relocInfo::external_word_type:
41 _rspec = external_word_Relocation::spec(target);
42 break;
43 case relocInfo::internal_word_type:
44 _rspec = internal_word_Relocation::spec(target);
45 break;
46 case relocInfo::opt_virtual_call_type:
47 _rspec = opt_virtual_call_Relocation::spec();
48 break;
49 case relocInfo::static_call_type:
50 _rspec = static_call_Relocation::spec();
51 break;
52 case relocInfo::runtime_call_type:
53 _rspec = runtime_call_Relocation::spec();
54 break;
55 case relocInfo::poll_type:
56 case relocInfo::poll_return_type:
57 _rspec = Relocation::spec_simple(rtype);
58 break;
59 case relocInfo::none:
60 break;
61 default:
62 ShouldNotReachHere();
63 break;
64 }
65 }
66
67 // Implementation of Address
68
69 #ifdef _LP64
70
71 Address Address::make_array(ArrayAddress adr) {
72 // Not implementable on 64bit machines
73 // Should have been handled higher up the call chain.
74 ShouldNotReachHere();
75 return Address();
76 }
77
78 // exceedingly dangerous constructor
79 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
80 _base = noreg;
81 _index = noreg;
82 _scale = no_scale;
83 _disp = disp;
84 switch (rtype) {
85 case relocInfo::external_word_type:
86 _rspec = external_word_Relocation::spec(loc);
87 break;
88 case relocInfo::internal_word_type:
89 _rspec = internal_word_Relocation::spec(loc);
90 break;
91 case relocInfo::runtime_call_type:
92 // HMM
93 _rspec = runtime_call_Relocation::spec();
94 break;
95 case relocInfo::poll_type:
96 case relocInfo::poll_return_type:
97 _rspec = Relocation::spec_simple(rtype);
98 break;
99 case relocInfo::none:
100 break;
101 default:
102 ShouldNotReachHere();
103 }
104 }
105 #else // LP64
106
107 Address Address::make_array(ArrayAddress adr) {
108 AddressLiteral base = adr.base();
109 Address index = adr.index();
110 assert(index._disp == 0, "must not have disp"); // maybe it can?
111 Address array(index._base, index._index, index._scale, (intptr_t) base.target());
112 array._rspec = base._rspec;
113 return array;
114 }
115
116 // exceedingly dangerous constructor
117 Address::Address(address loc, RelocationHolder spec) {
118 _base = noreg;
119 _index = noreg;
120 _scale = no_scale;
121 _disp = (intptr_t) loc;
122 _rspec = spec;
123 }
124
125 #endif // _LP64
126
127
128
129 // Convert the raw encoding form into the form expected by the constructor for
130 // Address. An index of 4 (rsp) corresponds to having no index, so convert
131 // that to noreg for the Address constructor.
132 Address Address::make_raw(int base, int index, int scale, int disp) {
133 bool valid_index = index != rsp->encoding();
134 if (valid_index) {
135 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
136 return madr;
137 } else {
138 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
139 return madr;
140 }
141 }
142
143 // Implementation of Assembler
144
145 int AbstractAssembler::code_fill_byte() {
146 return (u_char)'\xF4'; // hlt
147 }
148
149 // make this go away someday
150 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
151 if (rtype == relocInfo::none)
152 emit_long(data);
153 else emit_data(data, Relocation::spec_simple(rtype), format);
154 }
155
156 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
157 assert(imm_operand == 0, "default format must be immediate in this file");
158 assert(inst_mark() != NULL, "must be inside InstructionMark");
159 if (rspec.type() != relocInfo::none) {
160 #ifdef ASSERT
161 check_relocation(rspec, format);
162 #endif
163 // Do not use AbstractAssembler::relocate, which is not intended for
164 // embedded words. Instead, relocate to the enclosing instruction.
165
166 // hack. call32 is too wide for mask so use disp32
167 if (format == call32_operand)
168 code_section()->relocate(inst_mark(), rspec, disp32_operand);
169 else
170 code_section()->relocate(inst_mark(), rspec, format);
171 }
172 emit_long(data);
173 }
174
175 static int encode(Register r) {
176 int enc = r->encoding();
177 if (enc >= 8) {
178 enc -= 8;
179 }
180 return enc;
181 }
182
183 static int encode(XMMRegister r) {
184 int enc = r->encoding();
185 if (enc >= 8) {
186 enc -= 8;
187 }
188 return enc;
189 }
190
191 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
192 assert(dst->has_byte_register(), "must have byte register");
193 assert(isByte(op1) && isByte(op2), "wrong opcode");
194 assert(isByte(imm8), "not a byte");
195 assert((op1 & 0x01) == 0, "should be 8bit operation");
196 emit_byte(op1);
197 emit_byte(op2 | encode(dst));
198 emit_byte(imm8);
199 }
200
201
202 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
203 assert(isByte(op1) && isByte(op2), "wrong opcode");
204 assert((op1 & 0x01) == 1, "should be 32bit operation");
205 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
206 if (is8bit(imm32)) {
207 emit_byte(op1 | 0x02); // set sign bit
208 emit_byte(op2 | encode(dst));
209 emit_byte(imm32 & 0xFF);
210 } else {
211 emit_byte(op1);
212 emit_byte(op2 | encode(dst));
213 emit_long(imm32);
214 }
215 }
216
217 // immediate-to-memory forms
218 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
219 assert((op1 & 0x01) == 1, "should be 32bit operation");
220 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
221 if (is8bit(imm32)) {
222 emit_byte(op1 | 0x02); // set sign bit
223 emit_operand(rm, adr, 1);
224 emit_byte(imm32 & 0xFF);
225 } else {
226 emit_byte(op1);
227 emit_operand(rm, adr, 4);
228 emit_long(imm32);
229 }
230 }
231
232 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
233 LP64_ONLY(ShouldNotReachHere());
234 assert(isByte(op1) && isByte(op2), "wrong opcode");
235 assert((op1 & 0x01) == 1, "should be 32bit operation");
236 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
237 InstructionMark im(this);
238 emit_byte(op1);
239 emit_byte(op2 | encode(dst));
240 emit_data((intptr_t)obj, relocInfo::oop_type, 0);
241 }
242
243
244 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
245 assert(isByte(op1) && isByte(op2), "wrong opcode");
246 emit_byte(op1);
247 emit_byte(op2 | encode(dst) << 3 | encode(src));
248 }
249
250
251 void Assembler::emit_operand(Register reg, Register base, Register index,
252 Address::ScaleFactor scale, int disp,
253 RelocationHolder const& rspec,
254 int rip_relative_correction) {
255 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
256
257 // Encode the registers as needed in the fields they are used in
258
259 int regenc = encode(reg) << 3;
260 int indexenc = index->is_valid() ? encode(index) << 3 : 0;
261 int baseenc = base->is_valid() ? encode(base) : 0;
262
263 if (base->is_valid()) {
264 if (index->is_valid()) {
265 assert(scale != Address::no_scale, "inconsistent address");
266 // [base + index*scale + disp]
267 if (disp == 0 && rtype == relocInfo::none &&
268 base != rbp LP64_ONLY(&& base != r13)) {
269 // [base + index*scale]
270 // [00 reg 100][ss index base]
271 assert(index != rsp, "illegal addressing mode");
272 emit_byte(0x04 | regenc);
273 emit_byte(scale << 6 | indexenc | baseenc);
274 } else if (is8bit(disp) && rtype == relocInfo::none) {
275 // [base + index*scale + imm8]
276 // [01 reg 100][ss index base] imm8
277 assert(index != rsp, "illegal addressing mode");
278 emit_byte(0x44 | regenc);
279 emit_byte(scale << 6 | indexenc | baseenc);
280 emit_byte(disp & 0xFF);
281 } else {
282 // [base + index*scale + disp32]
283 // [10 reg 100][ss index base] disp32
284 assert(index != rsp, "illegal addressing mode");
285 emit_byte(0x84 | regenc);
286 emit_byte(scale << 6 | indexenc | baseenc);
287 emit_data(disp, rspec, disp32_operand);
288 }
289 } else if (base == rsp LP64_ONLY(|| base == r12)) {
290 // [rsp + disp]
291 if (disp == 0 && rtype == relocInfo::none) {
292 // [rsp]
293 // [00 reg 100][00 100 100]
294 emit_byte(0x04 | regenc);
295 emit_byte(0x24);
296 } else if (is8bit(disp) && rtype == relocInfo::none) {
297 // [rsp + imm8]
298 // [01 reg 100][00 100 100] disp8
299 emit_byte(0x44 | regenc);
300 emit_byte(0x24);
301 emit_byte(disp & 0xFF);
302 } else {
303 // [rsp + imm32]
304 // [10 reg 100][00 100 100] disp32
305 emit_byte(0x84 | regenc);
306 emit_byte(0x24);
307 emit_data(disp, rspec, disp32_operand);
308 }
309 } else {
310 // [base + disp]
311 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
312 if (disp == 0 && rtype == relocInfo::none &&
313 base != rbp LP64_ONLY(&& base != r13)) {
314 // [base]
315 // [00 reg base]
316 emit_byte(0x00 | regenc | baseenc);
317 } else if (is8bit(disp) && rtype == relocInfo::none) {
318 // [base + disp8]
319 // [01 reg base] disp8
320 emit_byte(0x40 | regenc | baseenc);
321 emit_byte(disp & 0xFF);
322 } else {
323 // [base + disp32]
324 // [10 reg base] disp32
325 emit_byte(0x80 | regenc | baseenc);
326 emit_data(disp, rspec, disp32_operand);
327 }
328 }
329 } else {
330 if (index->is_valid()) {
331 assert(scale != Address::no_scale, "inconsistent address");
332 // [index*scale + disp]
333 // [00 reg 100][ss index 101] disp32
334 assert(index != rsp, "illegal addressing mode");
335 emit_byte(0x04 | regenc);
336 emit_byte(scale << 6 | indexenc | 0x05);
337 emit_data(disp, rspec, disp32_operand);
338 } else if (rtype != relocInfo::none ) {
339 // [disp] (64bit) RIP-RELATIVE (32bit) abs
340 // [00 000 101] disp32
341
342 emit_byte(0x05 | regenc);
343 // Note that the RIP-rel. correction applies to the generated
344 // disp field, but _not_ to the target address in the rspec.
345
346 // disp was created by converting the target address minus the pc
347 // at the start of the instruction. That needs more correction here.
348 // intptr_t disp = target - next_ip;
349 assert(inst_mark() != NULL, "must be inside InstructionMark");
350 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
351 int64_t adjusted = disp;
352 // Do rip-rel adjustment for 64bit
353 LP64_ONLY(adjusted -= (next_ip - inst_mark()));
354 assert(is_simm32(adjusted),
355 "must be 32bit offset (RIP relative address)");
356 emit_data((int32_t) adjusted, rspec, disp32_operand);
357
358 } else {
359 // 32bit never did this, did everything as the rip-rel/disp code above
360 // [disp] ABSOLUTE
361 // [00 reg 100][00 100 101] disp32
362 emit_byte(0x04 | regenc);
363 emit_byte(0x25);
364 emit_data(disp, rspec, disp32_operand);
365 }
366 }
367 }
368
369 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
370 Address::ScaleFactor scale, int disp,
371 RelocationHolder const& rspec) {
372 emit_operand((Register)reg, base, index, scale, disp, rspec);
373 }
374
375 // Secret local extension to Assembler::WhichOperand:
376 #define end_pc_operand (_WhichOperand_limit)
377
378 address Assembler::locate_operand(address inst, WhichOperand which) {
379 // Decode the given instruction, and return the address of
380 // an embedded 32-bit operand word.
381
382 // If "which" is disp32_operand, selects the displacement portion
383 // of an effective address specifier.
384 // If "which" is imm64_operand, selects the trailing immediate constant.
385 // If "which" is call32_operand, selects the displacement of a call or jump.
386 // Caller is responsible for ensuring that there is such an operand,
387 // and that it is 32/64 bits wide.
388
389 // If "which" is end_pc_operand, find the end of the instruction.
390
391 address ip = inst;
392 bool is_64bit = false;
393
394 debug_only(bool has_disp32 = false);
395 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
396
397 again_after_prefix:
398 switch (0xFF & *ip++) {
399
400 // These convenience macros generate groups of "case" labels for the switch.
401 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
402 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
403 case (x)+4: case (x)+5: case (x)+6: case (x)+7
404 #define REP16(x) REP8((x)+0): \
405 case REP8((x)+8)
406
407 case CS_segment:
408 case SS_segment:
409 case DS_segment:
410 case ES_segment:
411 case FS_segment:
412 case GS_segment:
413 // Seems dubious
414 LP64_ONLY(assert(false, "shouldn't have that prefix"));
415 assert(ip == inst+1, "only one prefix allowed");
416 goto again_after_prefix;
417
418 case 0x67:
419 case REX:
420 case REX_B:
421 case REX_X:
422 case REX_XB:
423 case REX_R:
424 case REX_RB:
425 case REX_RX:
426 case REX_RXB:
427 NOT_LP64(assert(false, "64bit prefixes"));
428 goto again_after_prefix;
429
430 case REX_W:
431 case REX_WB:
432 case REX_WX:
433 case REX_WXB:
434 case REX_WR:
435 case REX_WRB:
436 case REX_WRX:
437 case REX_WRXB:
438 NOT_LP64(assert(false, "64bit prefixes"));
439 is_64bit = true;
440 goto again_after_prefix;
441
442 case 0xFF: // pushq a; decl a; incl a; call a; jmp a
443 case 0x88: // movb a, r
444 case 0x89: // movl a, r
445 case 0x8A: // movb r, a
446 case 0x8B: // movl r, a
447 case 0x8F: // popl a
448 debug_only(has_disp32 = true);
449 break;
450
451 case 0x68: // pushq #32
452 if (which == end_pc_operand) {
453 return ip + 4;
454 }
455 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
456 return ip; // not produced by emit_operand
457
458 case 0x66: // movw ... (size prefix)
459 again_after_size_prefix2:
460 switch (0xFF & *ip++) {
461 case REX:
462 case REX_B:
463 case REX_X:
464 case REX_XB:
465 case REX_R:
466 case REX_RB:
467 case REX_RX:
468 case REX_RXB:
469 case REX_W:
470 case REX_WB:
471 case REX_WX:
472 case REX_WXB:
473 case REX_WR:
474 case REX_WRB:
475 case REX_WRX:
476 case REX_WRXB:
477 NOT_LP64(assert(false, "64bit prefix found"));
478 goto again_after_size_prefix2;
479 case 0x8B: // movw r, a
480 case 0x89: // movw a, r
481 debug_only(has_disp32 = true);
482 break;
483 case 0xC7: // movw a, #16
484 debug_only(has_disp32 = true);
485 tail_size = 2; // the imm16
486 break;
487 case 0x0F: // several SSE/SSE2 variants
488 ip--; // reparse the 0x0F
489 goto again_after_prefix;
490 default:
491 ShouldNotReachHere();
492 }
493 break;
494
495 case REP8(0xB8): // movl/q r, #32/#64(oop?)
496 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4);
497 // these asserts are somewhat nonsensical
498 #ifndef _LP64
499 assert(which == imm_operand || which == disp32_operand, "");
500 #else
501 assert((which == call32_operand || which == imm_operand) && is_64bit ||
502 which == narrow_oop_operand && !is_64bit, "");
503 #endif // _LP64
504 return ip;
505
506 case 0x69: // imul r, a, #32
507 case 0xC7: // movl a, #32(oop?)
508 tail_size = 4;
509 debug_only(has_disp32 = true); // has both kinds of operands!
510 break;
511
512 case 0x0F: // movx..., etc.
513 switch (0xFF & *ip++) {
514 case 0x12: // movlps
515 case 0x28: // movaps
516 case 0x2E: // ucomiss
517 case 0x2F: // comiss
518 case 0x54: // andps
519 case 0x55: // andnps
520 case 0x56: // orps
521 case 0x57: // xorps
522 case 0x6E: // movd
523 case 0x7E: // movd
524 case 0xAE: // ldmxcsr a
525 // 64bit side says it these have both operands but that doesn't
526 // appear to be true
527 debug_only(has_disp32 = true);
528 break;
529
530 case 0xAD: // shrd r, a, %cl
531 case 0xAF: // imul r, a
532 case 0xBE: // movsbl r, a (movsxb)
533 case 0xBF: // movswl r, a (movsxw)
534 case 0xB6: // movzbl r, a (movzxb)
535 case 0xB7: // movzwl r, a (movzxw)
536 case REP16(0x40): // cmovl cc, r, a
537 case 0xB0: // cmpxchgb
538 case 0xB1: // cmpxchg
539 case 0xC1: // xaddl
540 case 0xC7: // cmpxchg8
541 case REP16(0x90): // setcc a
542 debug_only(has_disp32 = true);
543 // fall out of the switch to decode the address
544 break;
545
546 case 0xAC: // shrd r, a, #8
547 debug_only(has_disp32 = true);
548 tail_size = 1; // the imm8
549 break;
550
551 case REP16(0x80): // jcc rdisp32
552 if (which == end_pc_operand) return ip + 4;
553 assert(which == call32_operand, "jcc has no disp32 or imm");
554 return ip;
555 default:
556 ShouldNotReachHere();
557 }
558 break;
559
560 case 0x81: // addl a, #32; addl r, #32
561 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
562 // on 32bit in the case of cmpl, the imm might be an oop
563 tail_size = 4;
564 debug_only(has_disp32 = true); // has both kinds of operands!
565 break;
566
567 case 0x83: // addl a, #8; addl r, #8
568 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
569 debug_only(has_disp32 = true); // has both kinds of operands!
570 tail_size = 1;
571 break;
572
573 case 0x9B:
574 switch (0xFF & *ip++) {
575 case 0xD9: // fnstcw a
576 debug_only(has_disp32 = true);
577 break;
578 default:
579 ShouldNotReachHere();
580 }
581 break;
582
583 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
584 case REP4(0x10): // adc...
585 case REP4(0x20): // and...
586 case REP4(0x30): // xor...
587 case REP4(0x08): // or...
588 case REP4(0x18): // sbb...
589 case REP4(0x28): // sub...
590 case 0xF7: // mull a
591 case 0x8D: // lea r, a
592 case 0x87: // xchg r, a
593 case REP4(0x38): // cmp...
594 case 0x85: // test r, a
595 debug_only(has_disp32 = true); // has both kinds of operands!
596 break;
597
598 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
599 case 0xC6: // movb a, #8
600 case 0x80: // cmpb a, #8
601 case 0x6B: // imul r, a, #8
602 debug_only(has_disp32 = true); // has both kinds of operands!
603 tail_size = 1; // the imm8
604 break;
605
606 case 0xE8: // call rdisp32
607 case 0xE9: // jmp rdisp32
608 if (which == end_pc_operand) return ip + 4;
609 assert(which == call32_operand, "call has no disp32 or imm");
610 return ip;
611
612 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
613 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
614 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
615 case 0xDD: // fld_d a; fst_d a; fstp_d a
616 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
617 case 0xDF: // fild_d a; fistp_d a
618 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
619 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
620 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
621 debug_only(has_disp32 = true);
622 break;
623
624 case 0xF0: // Lock
625 assert(os::is_MP(), "only on MP");
626 goto again_after_prefix;
627
628 case 0xF3: // For SSE
629 case 0xF2: // For SSE2
630 switch (0xFF & *ip++) {
631 case REX:
632 case REX_B:
633 case REX_X:
634 case REX_XB:
635 case REX_R:
636 case REX_RB:
637 case REX_RX:
638 case REX_RXB:
639 case REX_W:
640 case REX_WB:
641 case REX_WX:
642 case REX_WXB:
643 case REX_WR:
644 case REX_WRB:
645 case REX_WRX:
646 case REX_WRXB:
647 NOT_LP64(assert(false, "found 64bit prefix"));
648 ip++;
649 default:
650 ip++;
651 }
652 debug_only(has_disp32 = true); // has both kinds of operands!
653 break;
654
655 default:
656 ShouldNotReachHere();
657
658 #undef REP8
659 #undef REP16
660 }
661
662 assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
663 #ifdef _LP64
664 assert(which != imm_operand, "instruction is not a movq reg, imm64");
665 #else
666 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
667 assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
668 #endif // LP64
669 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
670
671 // parse the output of emit_operand
672 int op2 = 0xFF & *ip++;
673 int base = op2 & 0x07;
674 int op3 = -1;
675 const int b100 = 4;
676 const int b101 = 5;
677 if (base == b100 && (op2 >> 6) != 3) {
678 op3 = 0xFF & *ip++;
679 base = op3 & 0x07; // refetch the base
680 }
681 // now ip points at the disp (if any)
682
683 switch (op2 >> 6) {
684 case 0:
685 // [00 reg 100][ss index base]
686 // [00 reg 100][00 100 esp]
687 // [00 reg base]
688 // [00 reg 100][ss index 101][disp32]
689 // [00 reg 101] [disp32]
690
691 if (base == b101) {
692 if (which == disp32_operand)
693 return ip; // caller wants the disp32
694 ip += 4; // skip the disp32
695 }
696 break;
697
698 case 1:
699 // [01 reg 100][ss index base][disp8]
700 // [01 reg 100][00 100 esp][disp8]
701 // [01 reg base] [disp8]
702 ip += 1; // skip the disp8
703 break;
704
705 case 2:
706 // [10 reg 100][ss index base][disp32]
707 // [10 reg 100][00 100 esp][disp32]
708 // [10 reg base] [disp32]
709 if (which == disp32_operand)
710 return ip; // caller wants the disp32
711 ip += 4; // skip the disp32
712 break;
713
714 case 3:
715 // [11 reg base] (not a memory addressing mode)
716 break;
717 }
718
719 if (which == end_pc_operand) {
720 return ip + tail_size;
721 }
722
723 #ifdef _LP64
724 assert(false, "fix locate_operand");
725 #else
726 assert(which == imm_operand, "instruction has only an imm field");
727 #endif // LP64
728 return ip;
729 }
730
731 address Assembler::locate_next_instruction(address inst) {
732 // Secretly share code with locate_operand:
733 return locate_operand(inst, end_pc_operand);
734 }
735
736
737 #ifdef ASSERT
738 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
739 address inst = inst_mark();
740 assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
741 address opnd;
742
743 Relocation* r = rspec.reloc();
744 if (r->type() == relocInfo::none) {
745 return;
746 } else if (r->is_call() || format == call32_operand) {
747 // assert(format == imm32_operand, "cannot specify a nonzero format");
748 opnd = locate_operand(inst, call32_operand);
749 } else if (r->is_data()) {
750 assert(format == imm_operand || format == disp32_operand
751 LP64_ONLY(|| format == narrow_oop_operand), "format ok");
752 opnd = locate_operand(inst, (WhichOperand)format);
753 } else {
754 assert(format == imm_operand, "cannot specify a format");
755 return;
756 }
757 assert(opnd == pc(), "must put operand where relocs can find it");
758 }
759 #endif // ASSERT
760
761 void Assembler::emit_operand32(Register reg, Address adr) {
762 assert(reg->encoding() < 8, "no extended registers");
763 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
764 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
765 adr._rspec);
766 }
767
768 void Assembler::emit_operand(Register reg, Address adr,
769 int rip_relative_correction) {
770 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
771 adr._rspec,
772 rip_relative_correction);
773 }
774
775 void Assembler::emit_operand(XMMRegister reg, Address adr) {
776 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
777 adr._rspec);
778 }
779
780 // MMX operations
781 void Assembler::emit_operand(MMXRegister reg, Address adr) {
782 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
783 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
784 }
785
786 // work around gcc (3.2.1-7a) bug
787 void Assembler::emit_operand(Address adr, MMXRegister reg) {
788 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
789 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
790 }
791
792
793 void Assembler::emit_farith(int b1, int b2, int i) {
794 assert(isByte(b1) && isByte(b2), "wrong opcode");
795 assert(0 <= i && i < 8, "illegal stack offset");
796 emit_byte(b1);
797 emit_byte(b2 + i);
798 }
799
800
801 // Now the Assembler instruction (identical for 32/64 bits)
802
803 void Assembler::adcl(Register dst, int32_t imm32) {
804 prefix(dst);
805 emit_arith(0x81, 0xD0, dst, imm32);
806 }
807
808 void Assembler::adcl(Register dst, Address src) {
809 InstructionMark im(this);
810 prefix(src, dst);
811 emit_byte(0x13);
812 emit_operand(dst, src);
813 }
814
815 void Assembler::adcl(Register dst, Register src) {
816 (void) prefix_and_encode(dst->encoding(), src->encoding());
817 emit_arith(0x13, 0xC0, dst, src);
818 }
819
820 void Assembler::addl(Address dst, int32_t imm32) {
821 InstructionMark im(this);
822 prefix(dst);
823 emit_arith_operand(0x81, rax, dst, imm32);
824 }
825
826 void Assembler::addl(Address dst, Register src) {
827 InstructionMark im(this);
828 prefix(dst, src);
829 emit_byte(0x01);
830 emit_operand(src, dst);
831 }
832
833 void Assembler::addl(Register dst, int32_t imm32) {
834 prefix(dst);
835 emit_arith(0x81, 0xC0, dst, imm32);
836 }
837
838 void Assembler::addl(Register dst, Address src) {
839 InstructionMark im(this);
840 prefix(src, dst);
841 emit_byte(0x03);
842 emit_operand(dst, src);
843 }
844
845 void Assembler::addl(Register dst, Register src) {
846 (void) prefix_and_encode(dst->encoding(), src->encoding());
847 emit_arith(0x03, 0xC0, dst, src);
848 }
849
850 void Assembler::addr_nop_4() {
851 // 4 bytes: NOP DWORD PTR [EAX+0]
852 emit_byte(0x0F);
853 emit_byte(0x1F);
854 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
855 emit_byte(0); // 8-bits offset (1 byte)
856 }
857
858 void Assembler::addr_nop_5() {
859 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
860 emit_byte(0x0F);
861 emit_byte(0x1F);
862 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
863 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
864 emit_byte(0); // 8-bits offset (1 byte)
865 }
866
867 void Assembler::addr_nop_7() {
868 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
869 emit_byte(0x0F);
870 emit_byte(0x1F);
871 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
872 emit_long(0); // 32-bits offset (4 bytes)
873 }
874
875 void Assembler::addr_nop_8() {
876 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
877 emit_byte(0x0F);
878 emit_byte(0x1F);
879 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
880 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
881 emit_long(0); // 32-bits offset (4 bytes)
882 }
883
884 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
885 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
886 emit_byte(0xF2);
887 int encode = prefix_and_encode(dst->encoding(), src->encoding());
888 emit_byte(0x0F);
889 emit_byte(0x58);
890 emit_byte(0xC0 | encode);
891 }
892
893 void Assembler::addsd(XMMRegister dst, Address src) {
894 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
895 InstructionMark im(this);
896 emit_byte(0xF2);
897 prefix(src, dst);
898 emit_byte(0x0F);
899 emit_byte(0x58);
900 emit_operand(dst, src);
901 }
902
903 void Assembler::addss(XMMRegister dst, XMMRegister src) {
904 NOT_LP64(assert(VM_Version::supports_sse(), ""));
905 emit_byte(0xF3);
906 int encode = prefix_and_encode(dst->encoding(), src->encoding());
907 emit_byte(0x0F);
908 emit_byte(0x58);
909 emit_byte(0xC0 | encode);
910 }
911
912 void Assembler::addss(XMMRegister dst, Address src) {
913 NOT_LP64(assert(VM_Version::supports_sse(), ""));
914 InstructionMark im(this);
915 emit_byte(0xF3);
916 prefix(src, dst);
917 emit_byte(0x0F);
918 emit_byte(0x58);
919 emit_operand(dst, src);
920 }
921
922 void Assembler::andl(Register dst, int32_t imm32) {
923 prefix(dst);
924 emit_arith(0x81, 0xE0, dst, imm32);
925 }
926
927 void Assembler::andl(Register dst, Address src) {
928 InstructionMark im(this);
929 prefix(src, dst);
930 emit_byte(0x23);
931 emit_operand(dst, src);
932 }
933
934 void Assembler::andl(Register dst, Register src) {
935 (void) prefix_and_encode(dst->encoding(), src->encoding());
936 emit_arith(0x23, 0xC0, dst, src);
937 }
938
939 void Assembler::andpd(XMMRegister dst, Address src) {
940 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
941 InstructionMark im(this);
942 emit_byte(0x66);
943 prefix(src, dst);
944 emit_byte(0x0F);
945 emit_byte(0x54);
946 emit_operand(dst, src);
947 }
948
949 void Assembler::bswapl(Register reg) { // bswap
950 int encode = prefix_and_encode(reg->encoding());
951 emit_byte(0x0F);
952 emit_byte(0xC8 | encode);
953 }
954
955 void Assembler::call(Label& L, relocInfo::relocType rtype) {
956 // suspect disp32 is always good
957 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
958
959 if (L.is_bound()) {
960 const int long_size = 5;
961 int offs = (int)( target(L) - pc() );
962 assert(offs <= 0, "assembler error");
963 InstructionMark im(this);
964 // 1110 1000 #32-bit disp
965 emit_byte(0xE8);
966 emit_data(offs - long_size, rtype, operand);
967 } else {
968 InstructionMark im(this);
969 // 1110 1000 #32-bit disp
970 L.add_patch_at(code(), locator());
971
972 emit_byte(0xE8);
973 emit_data(int(0), rtype, operand);
974 }
975 }
976
977 void Assembler::call(Register dst) {
978 // This was originally using a 32bit register encoding
979 // and surely we want 64bit!
980 // this is a 32bit encoding but in 64bit mode the default
981 // operand size is 64bit so there is no need for the
982 // wide prefix. So prefix only happens if we use the
983 // new registers. Much like push/pop.
984 int x = offset();
985 // this may be true but dbx disassembles it as if it
986 // were 32bits...
987 // int encode = prefix_and_encode(dst->encoding());
988 // if (offset() != x) assert(dst->encoding() >= 8, "what?");
989 int encode = prefixq_and_encode(dst->encoding());
990
991 emit_byte(0xFF);
992 emit_byte(0xD0 | encode);
993 }
994
995
996 void Assembler::call(Address adr) {
997 InstructionMark im(this);
998 prefix(adr);
999 emit_byte(0xFF);
1000 emit_operand(rdx, adr);
1001 }
1002
1003 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1004 assert(entry != NULL, "call most probably wrong");
1005 InstructionMark im(this);
1006 emit_byte(0xE8);
1007 intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1008 assert(is_simm32(disp), "must be 32bit offset (call2)");
1009 // Technically, should use call32_operand, but this format is
1010 // implied by the fact that we're emitting a call instruction.
1011
1012 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1013 emit_data((int) disp, rspec, operand);
1014 }
1015
1016 void Assembler::cdql() {
1017 emit_byte(0x99);
1018 }
1019
1020 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1021 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1022 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1023 emit_byte(0x0F);
1024 emit_byte(0x40 | cc);
1025 emit_byte(0xC0 | encode);
1026 }
1027
1028
1029 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1030 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1031 prefix(src, dst);
1032 emit_byte(0x0F);
1033 emit_byte(0x40 | cc);
1034 emit_operand(dst, src);
1035 }
1036
1037 void Assembler::cmpb(Address dst, int imm8) {
1038 InstructionMark im(this);
1039 prefix(dst);
1040 emit_byte(0x80);
1041 emit_operand(rdi, dst, 1);
1042 emit_byte(imm8);
1043 }
1044
1045 void Assembler::cmpl(Address dst, int32_t imm32) {
1046 InstructionMark im(this);
1047 prefix(dst);
1048 emit_byte(0x81);
1049 emit_operand(rdi, dst, 4);
1050 emit_long(imm32);
1051 }
1052
1053 void Assembler::cmpl(Register dst, int32_t imm32) {
1054 prefix(dst);
1055 emit_arith(0x81, 0xF8, dst, imm32);
1056 }
1057
1058 void Assembler::cmpl(Register dst, Register src) {
1059 (void) prefix_and_encode(dst->encoding(), src->encoding());
1060 emit_arith(0x3B, 0xC0, dst, src);
1061 }
1062
1063
1064 void Assembler::cmpl(Register dst, Address src) {
1065 InstructionMark im(this);
1066 prefix(src, dst);
1067 emit_byte(0x3B);
1068 emit_operand(dst, src);
1069 }
1070
1071 void Assembler::cmpw(Address dst, int imm16) {
1072 InstructionMark im(this);
1073 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1074 emit_byte(0x66);
1075 emit_byte(0x81);
1076 emit_operand(rdi, dst, 2);
1077 emit_word(imm16);
1078 }
1079
1080 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1081 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1082 // The ZF is set if the compared values were equal, and cleared otherwise.
1083 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1084 if (Atomics & 2) {
1085 // caveat: no instructionmark, so this isn't relocatable.
1086 // Emit a synthetic, non-atomic, CAS equivalent.
1087 // Beware. The synthetic form sets all ICCs, not just ZF.
1088 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1089 cmpl(rax, adr);
1090 movl(rax, adr);
1091 if (reg != rax) {
1092 Label L ;
1093 jcc(Assembler::notEqual, L);
1094 movl(adr, reg);
1095 bind(L);
1096 }
1097 } else {
1098 InstructionMark im(this);
1099 prefix(adr, reg);
1100 emit_byte(0x0F);
1101 emit_byte(0xB1);
1102 emit_operand(reg, adr);
1103 }
1104 }
1105
1106 void Assembler::comisd(XMMRegister dst, Address src) {
1107 // NOTE: dbx seems to decode this as comiss even though the
1108 // 0x66 is there. Strangly ucomisd comes out correct
1109 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1110 emit_byte(0x66);
1111 comiss(dst, src);
1112 }
1113
1114 void Assembler::comiss(XMMRegister dst, Address src) {
1115 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1116
1117 InstructionMark im(this);
1118 prefix(src, dst);
1119 emit_byte(0x0F);
1120 emit_byte(0x2F);
1121 emit_operand(dst, src);
1122 }
1123
1124 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1125 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1126 emit_byte(0xF3);
1127 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1128 emit_byte(0x0F);
1129 emit_byte(0xE6);
1130 emit_byte(0xC0 | encode);
1131 }
1132
1133 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1134 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1135 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1136 emit_byte(0x0F);
1137 emit_byte(0x5B);
1138 emit_byte(0xC0 | encode);
1139 }
1140
1141 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1142 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1143 emit_byte(0xF2);
1144 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1145 emit_byte(0x0F);
1146 emit_byte(0x5A);
1147 emit_byte(0xC0 | encode);
1148 }
1149
1150 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1151 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1152 emit_byte(0xF2);
1153 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1154 emit_byte(0x0F);
1155 emit_byte(0x2A);
1156 emit_byte(0xC0 | encode);
1157 }
1158
1159 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1160 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1161 emit_byte(0xF3);
1162 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1163 emit_byte(0x0F);
1164 emit_byte(0x2A);
1165 emit_byte(0xC0 | encode);
1166 }
1167
1168 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1169 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1170 emit_byte(0xF3);
1171 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1172 emit_byte(0x0F);
1173 emit_byte(0x5A);
1174 emit_byte(0xC0 | encode);
1175 }
1176
1177 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1178 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1179 emit_byte(0xF2);
1180 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1181 emit_byte(0x0F);
1182 emit_byte(0x2C);
1183 emit_byte(0xC0 | encode);
1184 }
1185
1186 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1187 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1188 emit_byte(0xF3);
1189 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1190 emit_byte(0x0F);
1191 emit_byte(0x2C);
1192 emit_byte(0xC0 | encode);
1193 }
1194
1195 void Assembler::decl(Address dst) {
1196 // Don't use it directly. Use MacroAssembler::decrement() instead.
1197 InstructionMark im(this);
1198 prefix(dst);
1199 emit_byte(0xFF);
1200 emit_operand(rcx, dst);
1201 }
1202
1203 void Assembler::divsd(XMMRegister dst, Address src) {
1204 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1205 InstructionMark im(this);
1206 emit_byte(0xF2);
1207 prefix(src, dst);
1208 emit_byte(0x0F);
1209 emit_byte(0x5E);
1210 emit_operand(dst, src);
1211 }
1212
1213 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1214 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1215 emit_byte(0xF2);
1216 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1217 emit_byte(0x0F);
1218 emit_byte(0x5E);
1219 emit_byte(0xC0 | encode);
1220 }
1221
1222 void Assembler::divss(XMMRegister dst, Address src) {
1223 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1224 InstructionMark im(this);
1225 emit_byte(0xF3);
1226 prefix(src, dst);
1227 emit_byte(0x0F);
1228 emit_byte(0x5E);
1229 emit_operand(dst, src);
1230 }
1231
1232 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1233 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1234 emit_byte(0xF3);
1235 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1236 emit_byte(0x0F);
1237 emit_byte(0x5E);
1238 emit_byte(0xC0 | encode);
1239 }
1240
1241 void Assembler::emms() {
1242 NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1243 emit_byte(0x0F);
1244 emit_byte(0x77);
1245 }
1246
1247 void Assembler::hlt() {
1248 emit_byte(0xF4);
1249 }
1250
1251 void Assembler::idivl(Register src) {
1252 int encode = prefix_and_encode(src->encoding());
1253 emit_byte(0xF7);
1254 emit_byte(0xF8 | encode);
1255 }
1256
1257 void Assembler::imull(Register dst, Register src) {
1258 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1259 emit_byte(0x0F);
1260 emit_byte(0xAF);
1261 emit_byte(0xC0 | encode);
1262 }
1263
1264
1265 void Assembler::imull(Register dst, Register src, int value) {
1266 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1267 if (is8bit(value)) {
1268 emit_byte(0x6B);
1269 emit_byte(0xC0 | encode);
1270 emit_byte(value);
1271 } else {
1272 emit_byte(0x69);
1273 emit_byte(0xC0 | encode);
1274 emit_long(value);
1275 }
1276 }
1277
1278 void Assembler::incl(Address dst) {
1279 // Don't use it directly. Use MacroAssembler::increment() instead.
1280 InstructionMark im(this);
1281 prefix(dst);
1282 emit_byte(0xFF);
1283 emit_operand(rax, dst);
1284 }
1285
1286 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
1287 InstructionMark im(this);
1288 relocate(rtype);
1289 assert((0 <= cc) && (cc < 16), "illegal cc");
1290 if (L.is_bound()) {
1291 address dst = target(L);
1292 assert(dst != NULL, "jcc most probably wrong");
1293
1294 const int short_size = 2;
1295 const int long_size = 6;
1296 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1297 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1298 // 0111 tttn #8-bit disp
1299 emit_byte(0x70 | cc);
1300 emit_byte((offs - short_size) & 0xFF);
1301 } else {
1302 // 0000 1111 1000 tttn #32-bit disp
1303 assert(is_simm32(offs - long_size),
1304 "must be 32bit offset (call4)");
1305 emit_byte(0x0F);
1306 emit_byte(0x80 | cc);
1307 emit_long(offs - long_size);
1308 }
1309 } else {
1310 // Note: could eliminate cond. jumps to this jump if condition
1311 // is the same however, seems to be rather unlikely case.
1312 // Note: use jccb() if label to be bound is very close to get
1313 // an 8-bit displacement
1314 L.add_patch_at(code(), locator());
1315 emit_byte(0x0F);
1316 emit_byte(0x80 | cc);
1317 emit_long(0);
1318 }
1319 }
1320
1321 void Assembler::jccb(Condition cc, Label& L) {
1322 if (L.is_bound()) {
1323 const int short_size = 2;
1324 address entry = target(L);
1325 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1326 "Dispacement too large for a short jmp");
1327 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1328 // 0111 tttn #8-bit disp
1329 emit_byte(0x70 | cc);
1330 emit_byte((offs - short_size) & 0xFF);
1331 } else {
1332 InstructionMark im(this);
1333 L.add_patch_at(code(), locator());
1334 emit_byte(0x70 | cc);
1335 emit_byte(0);
1336 }
1337 }
1338
1339 void Assembler::jmp(Address adr) {
1340 InstructionMark im(this);
1341 prefix(adr);
1342 emit_byte(0xFF);
1343 emit_operand(rsp, adr);
1344 }
1345
1346 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
1347 if (L.is_bound()) {
1348 address entry = target(L);
1349 assert(entry != NULL, "jmp most probably wrong");
1350 InstructionMark im(this);
1351 const int short_size = 2;
1352 const int long_size = 5;
1353 intptr_t offs = entry - _code_pos;
1354 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1355 emit_byte(0xEB);
1356 emit_byte((offs - short_size) & 0xFF);
1357 } else {
1358 emit_byte(0xE9);
1359 emit_long(offs - long_size);
1360 }
1361 } else {
1362 // By default, forward jumps are always 32-bit displacements, since
1363 // we can't yet know where the label will be bound. If you're sure that
1364 // the forward jump will not run beyond 256 bytes, use jmpb to
1365 // force an 8-bit displacement.
1366 InstructionMark im(this);
1367 relocate(rtype);
1368 L.add_patch_at(code(), locator());
1369 emit_byte(0xE9);
1370 emit_long(0);
1371 }
1372 }
1373
1374 void Assembler::jmp(Register entry) {
1375 int encode = prefix_and_encode(entry->encoding());
1376 emit_byte(0xFF);
1377 emit_byte(0xE0 | encode);
1378 }
1379
1380 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1381 InstructionMark im(this);
1382 emit_byte(0xE9);
1383 assert(dest != NULL, "must have a target");
1384 intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1385 assert(is_simm32(disp), "must be 32bit offset (jmp)");
1386 emit_data(disp, rspec.reloc(), call32_operand);
1387 }
1388
1389 void Assembler::jmpb(Label& L) {
1390 if (L.is_bound()) {
1391 const int short_size = 2;
1392 address entry = target(L);
1393 assert(is8bit((entry - _code_pos) + short_size),
1394 "Dispacement too large for a short jmp");
1395 assert(entry != NULL, "jmp most probably wrong");
1396 intptr_t offs = entry - _code_pos;
1397 emit_byte(0xEB);
1398 emit_byte((offs - short_size) & 0xFF);
1399 } else {
1400 InstructionMark im(this);
1401 L.add_patch_at(code(), locator());
1402 emit_byte(0xEB);
1403 emit_byte(0);
1404 }
1405 }
1406
1407 void Assembler::ldmxcsr( Address src) {
1408 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1409 InstructionMark im(this);
1410 prefix(src);
1411 emit_byte(0x0F);
1412 emit_byte(0xAE);
1413 emit_operand(as_Register(2), src);
1414 }
1415
1416 void Assembler::leal(Register dst, Address src) {
1417 InstructionMark im(this);
1418 #ifdef _LP64
1419 emit_byte(0x67); // addr32
1420 prefix(src, dst);
1421 #endif // LP64
1422 emit_byte(0x8D);
1423 emit_operand(dst, src);
1424 }
1425
1426 void Assembler::lock() {
1427 if (Atomics & 1) {
1428 // Emit either nothing, a NOP, or a NOP: prefix
1429 emit_byte(0x90) ;
1430 } else {
1431 emit_byte(0xF0);
1432 }
1433 }
1434
1435 // Serializes memory.
1436 void Assembler::mfence() {
1437 // Memory barriers are only needed on multiprocessors
1438 if (os::is_MP()) {
1439 if( LP64_ONLY(true ||) VM_Version::supports_sse2() ) {
1440 emit_byte( 0x0F ); // MFENCE; faster blows no regs
1441 emit_byte( 0xAE );
1442 emit_byte( 0xF0 );
1443 } else {
1444 // All usable chips support "locked" instructions which suffice
1445 // as barriers, and are much faster than the alternative of
1446 // using cpuid instruction. We use here a locked add [esp],0.
1447 // This is conveniently otherwise a no-op except for blowing
1448 // flags (which we save and restore.)
1449 pushf(); // Save eflags register
1450 lock();
1451 addl(Address(rsp, 0), 0);// Assert the lock# signal here
1452 popf(); // Restore eflags register
1453 }
1454 }
1455 }
1456
1457 void Assembler::mov(Register dst, Register src) {
1458 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1459 }
1460
1461 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1462 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1463 int dstenc = dst->encoding();
1464 int srcenc = src->encoding();
1465 emit_byte(0x66);
1466 if (dstenc < 8) {
1467 if (srcenc >= 8) {
1468 prefix(REX_B);
1469 srcenc -= 8;
1470 }
1471 } else {
1472 if (srcenc < 8) {
1473 prefix(REX_R);
1474 } else {
1475 prefix(REX_RB);
1476 srcenc -= 8;
1477 }
1478 dstenc -= 8;
1479 }
1480 emit_byte(0x0F);
1481 emit_byte(0x28);
1482 emit_byte(0xC0 | dstenc << 3 | srcenc);
1483 }
1484
1485 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1486 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1487 int dstenc = dst->encoding();
1488 int srcenc = src->encoding();
1489 if (dstenc < 8) {
1490 if (srcenc >= 8) {
1491 prefix(REX_B);
1492 srcenc -= 8;
1493 }
1494 } else {
1495 if (srcenc < 8) {
1496 prefix(REX_R);
1497 } else {
1498 prefix(REX_RB);
1499 srcenc -= 8;
1500 }
1501 dstenc -= 8;
1502 }
1503 emit_byte(0x0F);
1504 emit_byte(0x28);
1505 emit_byte(0xC0 | dstenc << 3 | srcenc);
1506 }
1507
1508 void Assembler::movb(Register dst, Address src) {
1509 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1510 InstructionMark im(this);
1511 prefix(src, dst, true);
1512 emit_byte(0x8A);
1513 emit_operand(dst, src);
1514 }
1515
1516
1517 void Assembler::movb(Address dst, int imm8) {
1518 InstructionMark im(this);
1519 prefix(dst);
1520 emit_byte(0xC6);
1521 emit_operand(rax, dst, 1);
1522 emit_byte(imm8);
1523 }
1524
1525
1526 void Assembler::movb(Address dst, Register src) {
1527 assert(src->has_byte_register(), "must have byte register");
1528 InstructionMark im(this);
1529 prefix(dst, src, true);
1530 emit_byte(0x88);
1531 emit_operand(src, dst);
1532 }
1533
1534 void Assembler::movdl(XMMRegister dst, Register src) {
1535 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1536 emit_byte(0x66);
1537 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1538 emit_byte(0x0F);
1539 emit_byte(0x6E);
1540 emit_byte(0xC0 | encode);
1541 }
1542
1543 void Assembler::movdl(Register dst, XMMRegister src) {
1544 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1545 emit_byte(0x66);
1546 // swap src/dst to get correct prefix
1547 int encode = prefix_and_encode(src->encoding(), dst->encoding());
1548 emit_byte(0x0F);
1549 emit_byte(0x7E);
1550 emit_byte(0xC0 | encode);
1551 }
1552
1553 void Assembler::movdqa(XMMRegister dst, Address src) {
1554 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1555 InstructionMark im(this);
1556 emit_byte(0x66);
1557 prefix(src, dst);
1558 emit_byte(0x0F);
1559 emit_byte(0x6F);
1560 emit_operand(dst, src);
1561 }
1562
1563 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1564 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1565 emit_byte(0x66);
1566 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1567 emit_byte(0x0F);
1568 emit_byte(0x6F);
1569 emit_byte(0xC0 | encode);
1570 }
1571
1572 void Assembler::movdqa(Address dst, XMMRegister src) {
1573 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1574 InstructionMark im(this);
1575 emit_byte(0x66);
1576 prefix(dst, src);
1577 emit_byte(0x0F);
1578 emit_byte(0x7F);
1579 emit_operand(src, dst);
1580 }
1581
1582 void Assembler::movdqu(XMMRegister dst, Address src) {
1583 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1584 InstructionMark im(this);
1585 emit_byte(0xF3);
1586 prefix(src, dst);
1587 emit_byte(0x0F);
1588 emit_byte(0x6F);
1589 emit_operand(dst, src);
1590 }
1591
1592 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1593 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1594 emit_byte(0xF3);
1595 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1596 emit_byte(0x0F);
1597 emit_byte(0x6F);
1598 emit_byte(0xC0 | encode);
1599 }
1600
1601 void Assembler::movdqu(Address dst, XMMRegister src) {
1602 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1603 InstructionMark im(this);
1604 emit_byte(0xF3);
1605 prefix(dst, src);
1606 emit_byte(0x0F);
1607 emit_byte(0x7F);
1608 emit_operand(src, dst);
1609 }
1610
1611 // Uses zero extension on 64bit
1612
1613 void Assembler::movl(Register dst, int32_t imm32) {
1614 int encode = prefix_and_encode(dst->encoding());
1615 emit_byte(0xB8 | encode);
1616 emit_long(imm32);
1617 }
1618
1619 void Assembler::movl(Register dst, Register src) {
1620 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1621 emit_byte(0x8B);
1622 emit_byte(0xC0 | encode);
1623 }
1624
1625 void Assembler::movl(Register dst, Address src) {
1626 InstructionMark im(this);
1627 prefix(src, dst);
1628 emit_byte(0x8B);
1629 emit_operand(dst, src);
1630 }
1631
1632 void Assembler::movl(Address dst, int32_t imm32) {
1633 InstructionMark im(this);
1634 prefix(dst);
1635 emit_byte(0xC7);
1636 emit_operand(rax, dst, 4);
1637 emit_long(imm32);
1638 }
1639
1640 void Assembler::movl(Address dst, Register src) {
1641 InstructionMark im(this);
1642 prefix(dst, src);
1643 emit_byte(0x89);
1644 emit_operand(src, dst);
1645 }
1646
1647 // New cpus require to use movsd and movss to avoid partial register stall
1648 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1649 // The selection is done in MacroAssembler::movdbl() and movflt().
1650 void Assembler::movlpd(XMMRegister dst, Address src) {
1651 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1652 InstructionMark im(this);
1653 emit_byte(0x66);
1654 prefix(src, dst);
1655 emit_byte(0x0F);
1656 emit_byte(0x12);
1657 emit_operand(dst, src);
1658 }
1659
1660 void Assembler::movq( MMXRegister dst, Address src ) {
1661 assert( VM_Version::supports_mmx(), "" );
1662 emit_byte(0x0F);
1663 emit_byte(0x6F);
1664 emit_operand(dst, src);
1665 }
1666
1667 void Assembler::movq( Address dst, MMXRegister src ) {
1668 assert( VM_Version::supports_mmx(), "" );
1669 emit_byte(0x0F);
1670 emit_byte(0x7F);
1671 // workaround gcc (3.2.1-7a) bug
1672 // In that version of gcc with only an emit_operand(MMX, Address)
1673 // gcc will tail jump and try and reverse the parameters completely
1674 // obliterating dst in the process. By having a version available
1675 // that doesn't need to swap the args at the tail jump the bug is
1676 // avoided.
1677 emit_operand(dst, src);
1678 }
1679
1680 void Assembler::movq(XMMRegister dst, Address src) {
1681 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1682 InstructionMark im(this);
1683 emit_byte(0xF3);
1684 prefix(src, dst);
1685 emit_byte(0x0F);
1686 emit_byte(0x7E);
1687 emit_operand(dst, src);
1688 }
1689
1690 void Assembler::movq(Address dst, XMMRegister src) {
1691 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1692 InstructionMark im(this);
1693 emit_byte(0x66);
1694 prefix(dst, src);
1695 emit_byte(0x0F);
1696 emit_byte(0xD6);
1697 emit_operand(src, dst);
1698 }
1699
1700 void Assembler::movsbl(Register dst, Address src) { // movsxb
1701 InstructionMark im(this);
1702 prefix(src, dst);
1703 emit_byte(0x0F);
1704 emit_byte(0xBE);
1705 emit_operand(dst, src);
1706 }
1707
1708 void Assembler::movsbl(Register dst, Register src) { // movsxb
1709 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1710 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1711 emit_byte(0x0F);
1712 emit_byte(0xBE);
1713 emit_byte(0xC0 | encode);
1714 }
1715
1716 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1717 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1718 emit_byte(0xF2);
1719 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1720 emit_byte(0x0F);
1721 emit_byte(0x10);
1722 emit_byte(0xC0 | encode);
1723 }
1724
1725 void Assembler::movsd(XMMRegister dst, Address src) {
1726 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1727 InstructionMark im(this);
1728 emit_byte(0xF2);
1729 prefix(src, dst);
1730 emit_byte(0x0F);
1731 emit_byte(0x10);
1732 emit_operand(dst, src);
1733 }
1734
1735 void Assembler::movsd(Address dst, XMMRegister src) {
1736 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1737 InstructionMark im(this);
1738 emit_byte(0xF2);
1739 prefix(dst, src);
1740 emit_byte(0x0F);
1741 emit_byte(0x11);
1742 emit_operand(src, dst);
1743 }
1744
1745 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1746 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1747 emit_byte(0xF3);
1748 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1749 emit_byte(0x0F);
1750 emit_byte(0x10);
1751 emit_byte(0xC0 | encode);
1752 }
1753
1754 void Assembler::movss(XMMRegister dst, Address src) {
1755 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1756 InstructionMark im(this);
1757 emit_byte(0xF3);
1758 prefix(src, dst);
1759 emit_byte(0x0F);
1760 emit_byte(0x10);
1761 emit_operand(dst, src);
1762 }
1763
1764 void Assembler::movss(Address dst, XMMRegister src) {
1765 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1766 InstructionMark im(this);
1767 emit_byte(0xF3);
1768 prefix(dst, src);
1769 emit_byte(0x0F);
1770 emit_byte(0x11);
1771 emit_operand(src, dst);
1772 }
1773
1774 void Assembler::movswl(Register dst, Address src) { // movsxw
1775 InstructionMark im(this);
1776 prefix(src, dst);
1777 emit_byte(0x0F);
1778 emit_byte(0xBF);
1779 emit_operand(dst, src);
1780 }
1781
1782 void Assembler::movswl(Register dst, Register src) { // movsxw
1783 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1784 emit_byte(0x0F);
1785 emit_byte(0xBF);
1786 emit_byte(0xC0 | encode);
1787 }
1788
1789 void Assembler::movw(Address dst, int imm16) {
1790 InstructionMark im(this);
1791
1792 emit_byte(0x66); // switch to 16-bit mode
1793 prefix(dst);
1794 emit_byte(0xC7);
1795 emit_operand(rax, dst, 2);
1796 emit_word(imm16);
1797 }
1798
1799 void Assembler::movw(Register dst, Address src) {
1800 InstructionMark im(this);
1801 emit_byte(0x66);
1802 prefix(src, dst);
1803 emit_byte(0x8B);
1804 emit_operand(dst, src);
1805 }
1806
1807 void Assembler::movw(Address dst, Register src) {
1808 InstructionMark im(this);
1809 emit_byte(0x66);
1810 prefix(dst, src);
1811 emit_byte(0x89);
1812 emit_operand(src, dst);
1813 }
1814
1815 void Assembler::movzbl(Register dst, Address src) { // movzxb
1816 InstructionMark im(this);
1817 prefix(src, dst);
1818 emit_byte(0x0F);
1819 emit_byte(0xB6);
1820 emit_operand(dst, src);
1821 }
1822
1823 void Assembler::movzbl(Register dst, Register src) { // movzxb
1824 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1825 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1826 emit_byte(0x0F);
1827 emit_byte(0xB6);
1828 emit_byte(0xC0 | encode);
1829 }
1830
1831 void Assembler::movzwl(Register dst, Address src) { // movzxw
1832 InstructionMark im(this);
1833 prefix(src, dst);
1834 emit_byte(0x0F);
1835 emit_byte(0xB7);
1836 emit_operand(dst, src);
1837 }
1838
1839 void Assembler::movzwl(Register dst, Register src) { // movzxw
1840 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1841 emit_byte(0x0F);
1842 emit_byte(0xB7);
1843 emit_byte(0xC0 | encode);
1844 }
1845
1846 void Assembler::mull(Address src) {
1847 InstructionMark im(this);
1848 prefix(src);
1849 emit_byte(0xF7);
1850 emit_operand(rsp, src);
1851 }
1852
1853 void Assembler::mull(Register src) {
1854 int encode = prefix_and_encode(src->encoding());
1855 emit_byte(0xF7);
1856 emit_byte(0xE0 | encode);
1857 }
1858
1859 void Assembler::mulsd(XMMRegister dst, Address src) {
1860 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1861 InstructionMark im(this);
1862 emit_byte(0xF2);
1863 prefix(src, dst);
1864 emit_byte(0x0F);
1865 emit_byte(0x59);
1866 emit_operand(dst, src);
1867 }
1868
1869 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1870 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1871 emit_byte(0xF2);
1872 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1873 emit_byte(0x0F);
1874 emit_byte(0x59);
1875 emit_byte(0xC0 | encode);
1876 }
1877
1878 void Assembler::mulss(XMMRegister dst, Address src) {
1879 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1880 InstructionMark im(this);
1881 emit_byte(0xF3);
1882 prefix(src, dst);
1883 emit_byte(0x0F);
1884 emit_byte(0x59);
1885 emit_operand(dst, src);
1886 }
1887
1888 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1889 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1890 emit_byte(0xF3);
1891 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1892 emit_byte(0x0F);
1893 emit_byte(0x59);
1894 emit_byte(0xC0 | encode);
1895 }
1896
1897 void Assembler::negl(Register dst) {
1898 int encode = prefix_and_encode(dst->encoding());
1899 emit_byte(0xF7);
1900 emit_byte(0xD8 | encode);
1901 }
1902
1903 void Assembler::nop(int i) {
1904 #ifdef ASSERT
1905 assert(i > 0, " ");
1906 // The fancy nops aren't currently recognized by debuggers making it a
1907 // pain to disassemble code while debugging. If asserts are on clearly
1908 // speed is not an issue so simply use the single byte traditional nop
1909 // to do alignment.
1910
1911 for (; i > 0 ; i--) emit_byte(0x90);
1912 return;
1913
1914 #endif // ASSERT
1915
1916 if (UseAddressNop && VM_Version::is_intel()) {
1917 //
1918 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1919 // 1: 0x90
1920 // 2: 0x66 0x90
1921 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1922 // 4: 0x0F 0x1F 0x40 0x00
1923 // 5: 0x0F 0x1F 0x44 0x00 0x00
1924 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1925 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1926 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1927 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1928 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1929 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1930
1931 // The rest coding is Intel specific - don't use consecutive address nops
1932
1933 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1934 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1935 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1936 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1937
1938 while(i >= 15) {
1939 // For Intel don't generate consecutive addess nops (mix with regular nops)
1940 i -= 15;
1941 emit_byte(0x66); // size prefix
1942 emit_byte(0x66); // size prefix
1943 emit_byte(0x66); // size prefix
1944 addr_nop_8();
1945 emit_byte(0x66); // size prefix
1946 emit_byte(0x66); // size prefix
1947 emit_byte(0x66); // size prefix
1948 emit_byte(0x90); // nop
1949 }
1950 switch (i) {
1951 case 14:
1952 emit_byte(0x66); // size prefix
1953 case 13:
1954 emit_byte(0x66); // size prefix
1955 case 12:
1956 addr_nop_8();
1957 emit_byte(0x66); // size prefix
1958 emit_byte(0x66); // size prefix
1959 emit_byte(0x66); // size prefix
1960 emit_byte(0x90); // nop
1961 break;
1962 case 11:
1963 emit_byte(0x66); // size prefix
1964 case 10:
1965 emit_byte(0x66); // size prefix
1966 case 9:
1967 emit_byte(0x66); // size prefix
1968 case 8:
1969 addr_nop_8();
1970 break;
1971 case 7:
1972 addr_nop_7();
1973 break;
1974 case 6:
1975 emit_byte(0x66); // size prefix
1976 case 5:
1977 addr_nop_5();
1978 break;
1979 case 4:
1980 addr_nop_4();
1981 break;
1982 case 3:
1983 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
1984 emit_byte(0x66); // size prefix
1985 case 2:
1986 emit_byte(0x66); // size prefix
1987 case 1:
1988 emit_byte(0x90); // nop
1989 break;
1990 default:
1991 assert(i == 0, " ");
1992 }
1993 return;
1994 }
1995 if (UseAddressNop && VM_Version::is_amd()) {
1996 //
1997 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
1998 // 1: 0x90
1999 // 2: 0x66 0x90
2000 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2001 // 4: 0x0F 0x1F 0x40 0x00
2002 // 5: 0x0F 0x1F 0x44 0x00 0x00
2003 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2004 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2005 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2006 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2007 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2008 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2009
2010 // The rest coding is AMD specific - use consecutive address nops
2011
2012 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2013 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2014 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2015 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2016 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2017 // Size prefixes (0x66) are added for larger sizes
2018
2019 while(i >= 22) {
2020 i -= 11;
2021 emit_byte(0x66); // size prefix
2022 emit_byte(0x66); // size prefix
2023 emit_byte(0x66); // size prefix
2024 addr_nop_8();
2025 }
2026 // Generate first nop for size between 21-12
2027 switch (i) {
2028 case 21:
2029 i -= 1;
2030 emit_byte(0x66); // size prefix
2031 case 20:
2032 case 19:
2033 i -= 1;
2034 emit_byte(0x66); // size prefix
2035 case 18:
2036 case 17:
2037 i -= 1;
2038 emit_byte(0x66); // size prefix
2039 case 16:
2040 case 15:
2041 i -= 8;
2042 addr_nop_8();
2043 break;
2044 case 14:
2045 case 13:
2046 i -= 7;
2047 addr_nop_7();
2048 break;
2049 case 12:
2050 i -= 6;
2051 emit_byte(0x66); // size prefix
2052 addr_nop_5();
2053 break;
2054 default:
2055 assert(i < 12, " ");
2056 }
2057
2058 // Generate second nop for size between 11-1
2059 switch (i) {
2060 case 11:
2061 emit_byte(0x66); // size prefix
2062 case 10:
2063 emit_byte(0x66); // size prefix
2064 case 9:
2065 emit_byte(0x66); // size prefix
2066 case 8:
2067 addr_nop_8();
2068 break;
2069 case 7:
2070 addr_nop_7();
2071 break;
2072 case 6:
2073 emit_byte(0x66); // size prefix
2074 case 5:
2075 addr_nop_5();
2076 break;
2077 case 4:
2078 addr_nop_4();
2079 break;
2080 case 3:
2081 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2082 emit_byte(0x66); // size prefix
2083 case 2:
2084 emit_byte(0x66); // size prefix
2085 case 1:
2086 emit_byte(0x90); // nop
2087 break;
2088 default:
2089 assert(i == 0, " ");
2090 }
2091 return;
2092 }
2093
2094 // Using nops with size prefixes "0x66 0x90".
2095 // From AMD Optimization Guide:
2096 // 1: 0x90
2097 // 2: 0x66 0x90
2098 // 3: 0x66 0x66 0x90
2099 // 4: 0x66 0x66 0x66 0x90
2100 // 5: 0x66 0x66 0x90 0x66 0x90
2101 // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2102 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2103 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2104 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2105 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2106 //
2107 while(i > 12) {
2108 i -= 4;
2109 emit_byte(0x66); // size prefix
2110 emit_byte(0x66);
2111 emit_byte(0x66);
2112 emit_byte(0x90); // nop
2113 }
2114 // 1 - 12 nops
2115 if(i > 8) {
2116 if(i > 9) {
2117 i -= 1;
2118 emit_byte(0x66);
2119 }
2120 i -= 3;
2121 emit_byte(0x66);
2122 emit_byte(0x66);
2123 emit_byte(0x90);
2124 }
2125 // 1 - 8 nops
2126 if(i > 4) {
2127 if(i > 6) {
2128 i -= 1;
2129 emit_byte(0x66);
2130 }
2131 i -= 3;
2132 emit_byte(0x66);
2133 emit_byte(0x66);
2134 emit_byte(0x90);
2135 }
2136 switch (i) {
2137 case 4:
2138 emit_byte(0x66);
2139 case 3:
2140 emit_byte(0x66);
2141 case 2:
2142 emit_byte(0x66);
2143 case 1:
2144 emit_byte(0x90);
2145 break;
2146 default:
2147 assert(i == 0, " ");
2148 }
2149 }
2150
2151 void Assembler::notl(Register dst) {
2152 int encode = prefix_and_encode(dst->encoding());
2153 emit_byte(0xF7);
2154 emit_byte(0xD0 | encode );
2155 }
2156
2157 void Assembler::orl(Address dst, int32_t imm32) {
2158 InstructionMark im(this);
2159 prefix(dst);
2160 emit_byte(0x81);
2161 emit_operand(rcx, dst, 4);
2162 emit_long(imm32);
2163 }
2164
2165 void Assembler::orl(Register dst, int32_t imm32) {
2166 prefix(dst);
2167 emit_arith(0x81, 0xC8, dst, imm32);
2168 }
2169
2170
2171 void Assembler::orl(Register dst, Address src) {
2172 InstructionMark im(this);
2173 prefix(src, dst);
2174 emit_byte(0x0B);
2175 emit_operand(dst, src);
2176 }
2177
2178
2179 void Assembler::orl(Register dst, Register src) {
2180 (void) prefix_and_encode(dst->encoding(), src->encoding());
2181 emit_arith(0x0B, 0xC0, dst, src);
2182 }
2183
2184 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2185 assert(VM_Version::supports_sse4_2(), "");
2186
2187 InstructionMark im(this);
2188 emit_byte(0x66);
2189 prefix(src, dst);
2190 emit_byte(0x0F);
2191 emit_byte(0x3A);
2192 emit_byte(0x61);
2193 emit_operand(dst, src);
2194 emit_byte(imm8);
2195 }
2196
2197 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2198 assert(VM_Version::supports_sse4_2(), "");
2199
2200 emit_byte(0x66);
2201 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2202 emit_byte(0x0F);
2203 emit_byte(0x3A);
2204 emit_byte(0x61);
2205 emit_byte(0xC0 | encode);
2206 emit_byte(imm8);
2207 }
2208
2209 // generic
2210 void Assembler::pop(Register dst) {
2211 int encode = prefix_and_encode(dst->encoding());
2212 emit_byte(0x58 | encode);
2213 }
2214
2215 void Assembler::popf() {
2216 emit_byte(0x9D);
2217 }
2218
2219 void Assembler::popl(Address dst) {
2220 // NOTE: this will adjust stack by 8byte on 64bits
2221 InstructionMark im(this);
2222 prefix(dst);
2223 emit_byte(0x8F);
2224 emit_operand(rax, dst);
2225 }
2226
2227 void Assembler::prefetch_prefix(Address src) {
2228 prefix(src);
2229 emit_byte(0x0F);
2230 }
2231
2232 void Assembler::prefetchnta(Address src) {
2233 NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2234 InstructionMark im(this);
2235 prefetch_prefix(src);
2236 emit_byte(0x18);
2237 emit_operand(rax, src); // 0, src
2238 }
2239
2240 void Assembler::prefetchr(Address src) {
2241 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2242 InstructionMark im(this);
2243 prefetch_prefix(src);
2244 emit_byte(0x0D);
2245 emit_operand(rax, src); // 0, src
2246 }
2247
2248 void Assembler::prefetcht0(Address src) {
2249 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2250 InstructionMark im(this);
2251 prefetch_prefix(src);
2252 emit_byte(0x18);
2253 emit_operand(rcx, src); // 1, src
2254 }
2255
2256 void Assembler::prefetcht1(Address src) {
2257 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2258 InstructionMark im(this);
2259 prefetch_prefix(src);
2260 emit_byte(0x18);
2261 emit_operand(rdx, src); // 2, src
2262 }
2263
2264 void Assembler::prefetcht2(Address src) {
2265 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2266 InstructionMark im(this);
2267 prefetch_prefix(src);
2268 emit_byte(0x18);
2269 emit_operand(rbx, src); // 3, src
2270 }
2271
2272 void Assembler::prefetchw(Address src) {
2273 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2274 InstructionMark im(this);
2275 prefetch_prefix(src);
2276 emit_byte(0x0D);
2277 emit_operand(rcx, src); // 1, src
2278 }
2279
2280 void Assembler::prefix(Prefix p) {
2281 a_byte(p);
2282 }
2283
2284 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2285 assert(isByte(mode), "invalid value");
2286 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2287
2288 emit_byte(0x66);
2289 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2290 emit_byte(0x0F);
2291 emit_byte(0x70);
2292 emit_byte(0xC0 | encode);
2293 emit_byte(mode & 0xFF);
2294
2295 }
2296
2297 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2298 assert(isByte(mode), "invalid value");
2299 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2300
2301 InstructionMark im(this);
2302 emit_byte(0x66);
2303 prefix(src, dst);
2304 emit_byte(0x0F);
2305 emit_byte(0x70);
2306 emit_operand(dst, src);
2307 emit_byte(mode & 0xFF);
2308 }
2309
2310 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2311 assert(isByte(mode), "invalid value");
2312 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2313
2314 emit_byte(0xF2);
2315 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2316 emit_byte(0x0F);
2317 emit_byte(0x70);
2318 emit_byte(0xC0 | encode);
2319 emit_byte(mode & 0xFF);
2320 }
2321
2322 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2323 assert(isByte(mode), "invalid value");
2324 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2325
2326 InstructionMark im(this);
2327 emit_byte(0xF2);
2328 prefix(src, dst); // QQ new
2329 emit_byte(0x0F);
2330 emit_byte(0x70);
2331 emit_operand(dst, src);
2332 emit_byte(mode & 0xFF);
2333 }
2334
2335 void Assembler::psrlq(XMMRegister dst, int shift) {
2336 // HMM Table D-1 says sse2 or mmx
2337 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2338
2339 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2340 emit_byte(0x66);
2341 emit_byte(0x0F);
2342 emit_byte(0x73);
2343 emit_byte(0xC0 | encode);
2344 emit_byte(shift);
2345 }
2346
2347 void Assembler::ptest(XMMRegister dst, Address src) {
2348 assert(VM_Version::supports_sse4_1(), "");
2349
2350 InstructionMark im(this);
2351 emit_byte(0x66);
2352 prefix(src, dst);
2353 emit_byte(0x0F);
2354 emit_byte(0x38);
2355 emit_byte(0x17);
2356 emit_operand(dst, src);
2357 }
2358
2359 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2360 assert(VM_Version::supports_sse4_1(), "");
2361
2362 emit_byte(0x66);
2363 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2364 emit_byte(0x0F);
2365 emit_byte(0x38);
2366 emit_byte(0x17);
2367 emit_byte(0xC0 | encode);
2368 }
2369
2370 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2371 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2372 emit_byte(0x66);
2373 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2374 emit_byte(0x0F);
2375 emit_byte(0x60);
2376 emit_byte(0xC0 | encode);
2377 }
2378
2379 void Assembler::push(int32_t imm32) {
2380 // in 64bits we push 64bits onto the stack but only
2381 // take a 32bit immediate
2382 emit_byte(0x68);
2383 emit_long(imm32);
2384 }
2385
2386 void Assembler::push(Register src) {
2387 int encode = prefix_and_encode(src->encoding());
2388
2389 emit_byte(0x50 | encode);
2390 }
2391
2392 void Assembler::pushf() {
2393 emit_byte(0x9C);
2394 }
2395
2396 void Assembler::pushl(Address src) {
2397 // Note this will push 64bit on 64bit
2398 InstructionMark im(this);
2399 prefix(src);
2400 emit_byte(0xFF);
2401 emit_operand(rsi, src);
2402 }
2403
2404 void Assembler::pxor(XMMRegister dst, Address src) {
2405 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2406 InstructionMark im(this);
2407 emit_byte(0x66);
2408 prefix(src, dst);
2409 emit_byte(0x0F);
2410 emit_byte(0xEF);
2411 emit_operand(dst, src);
2412 }
2413
2414 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2415 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2416 InstructionMark im(this);
2417 emit_byte(0x66);
2418 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2419 emit_byte(0x0F);
2420 emit_byte(0xEF);
2421 emit_byte(0xC0 | encode);
2422 }
2423
2424 void Assembler::rcll(Register dst, int imm8) {
2425 assert(isShiftCount(imm8), "illegal shift count");
2426 int encode = prefix_and_encode(dst->encoding());
2427 if (imm8 == 1) {
2428 emit_byte(0xD1);
2429 emit_byte(0xD0 | encode);
2430 } else {
2431 emit_byte(0xC1);
2432 emit_byte(0xD0 | encode);
2433 emit_byte(imm8);
2434 }
2435 }
2436
2437 // copies data from [esi] to [edi] using rcx pointer sized words
2438 // generic
2439 void Assembler::rep_mov() {
2440 emit_byte(0xF3);
2441 // MOVSQ
2442 LP64_ONLY(prefix(REX_W));
2443 emit_byte(0xA5);
2444 }
2445
2446 // sets rcx pointer sized words with rax, value at [edi]
2447 // generic
2448 void Assembler::rep_set() { // rep_set
2449 emit_byte(0xF3);
2450 // STOSQ
2451 LP64_ONLY(prefix(REX_W));
2452 emit_byte(0xAB);
2453 }
2454
2455 // scans rcx pointer sized words at [edi] for occurance of rax,
2456 // generic
2457 void Assembler::repne_scan() { // repne_scan
2458 emit_byte(0xF2);
2459 // SCASQ
2460 LP64_ONLY(prefix(REX_W));
2461 emit_byte(0xAF);
2462 }
2463
2464 #ifdef _LP64
2465 // scans rcx 4 byte words at [edi] for occurance of rax,
2466 // generic
2467 void Assembler::repne_scanl() { // repne_scan
2468 emit_byte(0xF2);
2469 // SCASL
2470 emit_byte(0xAF);
2471 }
2472 #endif
2473
2474 void Assembler::ret(int imm16) {
2475 if (imm16 == 0) {
2476 emit_byte(0xC3);
2477 } else {
2478 emit_byte(0xC2);
2479 emit_word(imm16);
2480 }
2481 }
2482
2483 void Assembler::sahf() {
2484 #ifdef _LP64
2485 // Not supported in 64bit mode
2486 ShouldNotReachHere();
2487 #endif
2488 emit_byte(0x9E);
2489 }
2490
2491 void Assembler::sarl(Register dst, int imm8) {
2492 int encode = prefix_and_encode(dst->encoding());
2493 assert(isShiftCount(imm8), "illegal shift count");
2494 if (imm8 == 1) {
2495 emit_byte(0xD1);
2496 emit_byte(0xF8 | encode);
2497 } else {
2498 emit_byte(0xC1);
2499 emit_byte(0xF8 | encode);
2500 emit_byte(imm8);
2501 }
2502 }
2503
2504 void Assembler::sarl(Register dst) {
2505 int encode = prefix_and_encode(dst->encoding());
2506 emit_byte(0xD3);
2507 emit_byte(0xF8 | encode);
2508 }
2509
2510 void Assembler::sbbl(Address dst, int32_t imm32) {
2511 InstructionMark im(this);
2512 prefix(dst);
2513 emit_arith_operand(0x81, rbx, dst, imm32);
2514 }
2515
2516 void Assembler::sbbl(Register dst, int32_t imm32) {
2517 prefix(dst);
2518 emit_arith(0x81, 0xD8, dst, imm32);
2519 }
2520
2521
2522 void Assembler::sbbl(Register dst, Address src) {
2523 InstructionMark im(this);
2524 prefix(src, dst);
2525 emit_byte(0x1B);
2526 emit_operand(dst, src);
2527 }
2528
2529 void Assembler::sbbl(Register dst, Register src) {
2530 (void) prefix_and_encode(dst->encoding(), src->encoding());
2531 emit_arith(0x1B, 0xC0, dst, src);
2532 }
2533
2534 void Assembler::setb(Condition cc, Register dst) {
2535 assert(0 <= cc && cc < 16, "illegal cc");
2536 int encode = prefix_and_encode(dst->encoding(), true);
2537 emit_byte(0x0F);
2538 emit_byte(0x90 | cc);
2539 emit_byte(0xC0 | encode);
2540 }
2541
2542 void Assembler::shll(Register dst, int imm8) {
2543 assert(isShiftCount(imm8), "illegal shift count");
2544 int encode = prefix_and_encode(dst->encoding());
2545 if (imm8 == 1 ) {
2546 emit_byte(0xD1);
2547 emit_byte(0xE0 | encode);
2548 } else {
2549 emit_byte(0xC1);
2550 emit_byte(0xE0 | encode);
2551 emit_byte(imm8);
2552 }
2553 }
2554
2555 void Assembler::shll(Register dst) {
2556 int encode = prefix_and_encode(dst->encoding());
2557 emit_byte(0xD3);
2558 emit_byte(0xE0 | encode);
2559 }
2560
2561 void Assembler::shrl(Register dst, int imm8) {
2562 assert(isShiftCount(imm8), "illegal shift count");
2563 int encode = prefix_and_encode(dst->encoding());
2564 emit_byte(0xC1);
2565 emit_byte(0xE8 | encode);
2566 emit_byte(imm8);
2567 }
2568
2569 void Assembler::shrl(Register dst) {
2570 int encode = prefix_and_encode(dst->encoding());
2571 emit_byte(0xD3);
2572 emit_byte(0xE8 | encode);
2573 }
2574
2575 // copies a single word from [esi] to [edi]
2576 void Assembler::smovl() {
2577 emit_byte(0xA5);
2578 }
2579
2580 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2581 // HMM Table D-1 says sse2
2582 // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2583 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2584 emit_byte(0xF2);
2585 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2586 emit_byte(0x0F);
2587 emit_byte(0x51);
2588 emit_byte(0xC0 | encode);
2589 }
2590
2591 void Assembler::stmxcsr( Address dst) {
2592 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2593 InstructionMark im(this);
2594 prefix(dst);
2595 emit_byte(0x0F);
2596 emit_byte(0xAE);
2597 emit_operand(as_Register(3), dst);
2598 }
2599
2600 void Assembler::subl(Address dst, int32_t imm32) {
2601 InstructionMark im(this);
2602 prefix(dst);
2603 if (is8bit(imm32)) {
2604 emit_byte(0x83);
2605 emit_operand(rbp, dst, 1);
2606 emit_byte(imm32 & 0xFF);
2607 } else {
2608 emit_byte(0x81);
2609 emit_operand(rbp, dst, 4);
2610 emit_long(imm32);
2611 }
2612 }
2613
2614 void Assembler::subl(Register dst, int32_t imm32) {
2615 prefix(dst);
2616 emit_arith(0x81, 0xE8, dst, imm32);
2617 }
2618
2619 void Assembler::subl(Address dst, Register src) {
2620 InstructionMark im(this);
2621 prefix(dst, src);
2622 emit_byte(0x29);
2623 emit_operand(src, dst);
2624 }
2625
2626 void Assembler::subl(Register dst, Address src) {
2627 InstructionMark im(this);
2628 prefix(src, dst);
2629 emit_byte(0x2B);
2630 emit_operand(dst, src);
2631 }
2632
2633 void Assembler::subl(Register dst, Register src) {
2634 (void) prefix_and_encode(dst->encoding(), src->encoding());
2635 emit_arith(0x2B, 0xC0, dst, src);
2636 }
2637
2638 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2639 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2640 emit_byte(0xF2);
2641 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2642 emit_byte(0x0F);
2643 emit_byte(0x5C);
2644 emit_byte(0xC0 | encode);
2645 }
2646
2647 void Assembler::subsd(XMMRegister dst, Address src) {
2648 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2649 InstructionMark im(this);
2650 emit_byte(0xF2);
2651 prefix(src, dst);
2652 emit_byte(0x0F);
2653 emit_byte(0x5C);
2654 emit_operand(dst, src);
2655 }
2656
2657 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2658 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2659 emit_byte(0xF3);
2660 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2661 emit_byte(0x0F);
2662 emit_byte(0x5C);
2663 emit_byte(0xC0 | encode);
2664 }
2665
2666 void Assembler::subss(XMMRegister dst, Address src) {
2667 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2668 InstructionMark im(this);
2669 emit_byte(0xF3);
2670 prefix(src, dst);
2671 emit_byte(0x0F);
2672 emit_byte(0x5C);
2673 emit_operand(dst, src);
2674 }
2675
2676 void Assembler::testb(Register dst, int imm8) {
2677 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2678 (void) prefix_and_encode(dst->encoding(), true);
2679 emit_arith_b(0xF6, 0xC0, dst, imm8);
2680 }
2681
2682 void Assembler::testl(Register dst, int32_t imm32) {
2683 // not using emit_arith because test
2684 // doesn't support sign-extension of
2685 // 8bit operands
2686 int encode = dst->encoding();
2687 if (encode == 0) {
2688 emit_byte(0xA9);
2689 } else {
2690 encode = prefix_and_encode(encode);
2691 emit_byte(0xF7);
2692 emit_byte(0xC0 | encode);
2693 }
2694 emit_long(imm32);
2695 }
2696
2697 void Assembler::testl(Register dst, Register src) {
2698 (void) prefix_and_encode(dst->encoding(), src->encoding());
2699 emit_arith(0x85, 0xC0, dst, src);
2700 }
2701
2702 void Assembler::testl(Register dst, Address src) {
2703 InstructionMark im(this);
2704 prefix(src, dst);
2705 emit_byte(0x85);
2706 emit_operand(dst, src);
2707 }
2708
2709 void Assembler::ucomisd(XMMRegister dst, Address src) {
2710 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2711 emit_byte(0x66);
2712 ucomiss(dst, src);
2713 }
2714
2715 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2716 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2717 emit_byte(0x66);
2718 ucomiss(dst, src);
2719 }
2720
2721 void Assembler::ucomiss(XMMRegister dst, Address src) {
2722 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2723
2724 InstructionMark im(this);
2725 prefix(src, dst);
2726 emit_byte(0x0F);
2727 emit_byte(0x2E);
2728 emit_operand(dst, src);
2729 }
2730
2731 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2732 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2733 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2734 emit_byte(0x0F);
2735 emit_byte(0x2E);
2736 emit_byte(0xC0 | encode);
2737 }
2738
2739
2740 void Assembler::xaddl(Address dst, Register src) {
2741 InstructionMark im(this);
2742 prefix(dst, src);
2743 emit_byte(0x0F);
2744 emit_byte(0xC1);
2745 emit_operand(src, dst);
2746 }
2747
2748 void Assembler::xchgl(Register dst, Address src) { // xchg
2749 InstructionMark im(this);
2750 prefix(src, dst);
2751 emit_byte(0x87);
2752 emit_operand(dst, src);
2753 }
2754
2755 void Assembler::xchgl(Register dst, Register src) {
2756 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2757 emit_byte(0x87);
2758 emit_byte(0xc0 | encode);
2759 }
2760
2761 void Assembler::xorl(Register dst, int32_t imm32) {
2762 prefix(dst);
2763 emit_arith(0x81, 0xF0, dst, imm32);
2764 }
2765
2766 void Assembler::xorl(Register dst, Address src) {
2767 InstructionMark im(this);
2768 prefix(src, dst);
2769 emit_byte(0x33);
2770 emit_operand(dst, src);
2771 }
2772
2773 void Assembler::xorl(Register dst, Register src) {
2774 (void) prefix_and_encode(dst->encoding(), src->encoding());
2775 emit_arith(0x33, 0xC0, dst, src);
2776 }
2777
2778 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2779 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2780 emit_byte(0x66);
2781 xorps(dst, src);
2782 }
2783
2784 void Assembler::xorpd(XMMRegister dst, Address src) {
2785 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2786 InstructionMark im(this);
2787 emit_byte(0x66);
2788 prefix(src, dst);
2789 emit_byte(0x0F);
2790 emit_byte(0x57);
2791 emit_operand(dst, src);
2792 }
2793
2794
2795 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2796 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2797 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2798 emit_byte(0x0F);
2799 emit_byte(0x57);
2800 emit_byte(0xC0 | encode);
2801 }
2802
2803 void Assembler::xorps(XMMRegister dst, Address src) {
2804 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2805 InstructionMark im(this);
2806 prefix(src, dst);
2807 emit_byte(0x0F);
2808 emit_byte(0x57);
2809 emit_operand(dst, src);
2810 }
2811
2812 #ifndef _LP64
2813 // 32bit only pieces of the assembler
2814
2815 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2816 // NO PREFIX AS NEVER 64BIT
2817 InstructionMark im(this);
2818 emit_byte(0x81);
2819 emit_byte(0xF8 | src1->encoding());
2820 emit_data(imm32, rspec, 0);
2821 }
2822
2823 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2824 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2825 InstructionMark im(this);
2826 emit_byte(0x81);
2827 emit_operand(rdi, src1);
2828 emit_data(imm32, rspec, 0);
2829 }
2830
2831 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2832 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2833 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise.
2834 void Assembler::cmpxchg8(Address adr) {
2835 InstructionMark im(this);
2836 emit_byte(0x0F);
2837 emit_byte(0xc7);
2838 emit_operand(rcx, adr);
2839 }
2840
2841 void Assembler::decl(Register dst) {
2842 // Don't use it directly. Use MacroAssembler::decrementl() instead.
2843 emit_byte(0x48 | dst->encoding());
2844 }
2845
2846 #endif // _LP64
2847
2848 // 64bit typically doesn't use the x87 but needs to for the trig funcs
2849
2850 void Assembler::fabs() {
2851 emit_byte(0xD9);
2852 emit_byte(0xE1);
2853 }
2854
2855 void Assembler::fadd(int i) {
2856 emit_farith(0xD8, 0xC0, i);
2857 }
2858
2859 void Assembler::fadd_d(Address src) {
2860 InstructionMark im(this);
2861 emit_byte(0xDC);
2862 emit_operand32(rax, src);
2863 }
2864
2865 void Assembler::fadd_s(Address src) {
2866 InstructionMark im(this);
2867 emit_byte(0xD8);
2868 emit_operand32(rax, src);
2869 }
2870
2871 void Assembler::fadda(int i) {
2872 emit_farith(0xDC, 0xC0, i);
2873 }
2874
2875 void Assembler::faddp(int i) {
2876 emit_farith(0xDE, 0xC0, i);
2877 }
2878
2879 void Assembler::fchs() {
2880 emit_byte(0xD9);
2881 emit_byte(0xE0);
2882 }
2883
2884 void Assembler::fcom(int i) {
2885 emit_farith(0xD8, 0xD0, i);
2886 }
2887
2888 void Assembler::fcomp(int i) {
2889 emit_farith(0xD8, 0xD8, i);
2890 }
2891
2892 void Assembler::fcomp_d(Address src) {
2893 InstructionMark im(this);
2894 emit_byte(0xDC);
2895 emit_operand32(rbx, src);
2896 }
2897
2898 void Assembler::fcomp_s(Address src) {
2899 InstructionMark im(this);
2900 emit_byte(0xD8);
2901 emit_operand32(rbx, src);
2902 }
2903
2904 void Assembler::fcompp() {
2905 emit_byte(0xDE);
2906 emit_byte(0xD9);
2907 }
2908
2909 void Assembler::fcos() {
2910 emit_byte(0xD9);
2911 emit_byte(0xFF);
2912 }
2913
2914 void Assembler::fdecstp() {
2915 emit_byte(0xD9);
2916 emit_byte(0xF6);
2917 }
2918
2919 void Assembler::fdiv(int i) {
2920 emit_farith(0xD8, 0xF0, i);
2921 }
2922
2923 void Assembler::fdiv_d(Address src) {
2924 InstructionMark im(this);
2925 emit_byte(0xDC);
2926 emit_operand32(rsi, src);
2927 }
2928
2929 void Assembler::fdiv_s(Address src) {
2930 InstructionMark im(this);
2931 emit_byte(0xD8);
2932 emit_operand32(rsi, src);
2933 }
2934
2935 void Assembler::fdiva(int i) {
2936 emit_farith(0xDC, 0xF8, i);
2937 }
2938
2939 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
2940 // is erroneous for some of the floating-point instructions below.
2941
2942 void Assembler::fdivp(int i) {
2943 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
2944 }
2945
2946 void Assembler::fdivr(int i) {
2947 emit_farith(0xD8, 0xF8, i);
2948 }
2949
2950 void Assembler::fdivr_d(Address src) {
2951 InstructionMark im(this);
2952 emit_byte(0xDC);
2953 emit_operand32(rdi, src);
2954 }
2955
2956 void Assembler::fdivr_s(Address src) {
2957 InstructionMark im(this);
2958 emit_byte(0xD8);
2959 emit_operand32(rdi, src);
2960 }
2961
2962 void Assembler::fdivra(int i) {
2963 emit_farith(0xDC, 0xF0, i);
2964 }
2965
2966 void Assembler::fdivrp(int i) {
2967 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
2968 }
2969
2970 void Assembler::ffree(int i) {
2971 emit_farith(0xDD, 0xC0, i);
2972 }
2973
2974 void Assembler::fild_d(Address adr) {
2975 InstructionMark im(this);
2976 emit_byte(0xDF);
2977 emit_operand32(rbp, adr);
2978 }
2979
2980 void Assembler::fild_s(Address adr) {
2981 InstructionMark im(this);
2982 emit_byte(0xDB);
2983 emit_operand32(rax, adr);
2984 }
2985
2986 void Assembler::fincstp() {
2987 emit_byte(0xD9);
2988 emit_byte(0xF7);
2989 }
2990
2991 void Assembler::finit() {
2992 emit_byte(0x9B);
2993 emit_byte(0xDB);
2994 emit_byte(0xE3);
2995 }
2996
2997 void Assembler::fist_s(Address adr) {
2998 InstructionMark im(this);
2999 emit_byte(0xDB);
3000 emit_operand32(rdx, adr);
3001 }
3002
3003 void Assembler::fistp_d(Address adr) {
3004 InstructionMark im(this);
3005 emit_byte(0xDF);
3006 emit_operand32(rdi, adr);
3007 }
3008
3009 void Assembler::fistp_s(Address adr) {
3010 InstructionMark im(this);
3011 emit_byte(0xDB);
3012 emit_operand32(rbx, adr);
3013 }
3014
3015 void Assembler::fld1() {
3016 emit_byte(0xD9);
3017 emit_byte(0xE8);
3018 }
3019
3020 void Assembler::fld_d(Address adr) {
3021 InstructionMark im(this);
3022 emit_byte(0xDD);
3023 emit_operand32(rax, adr);
3024 }
3025
3026 void Assembler::fld_s(Address adr) {
3027 InstructionMark im(this);
3028 emit_byte(0xD9);
3029 emit_operand32(rax, adr);
3030 }
3031
3032
3033 void Assembler::fld_s(int index) {
3034 emit_farith(0xD9, 0xC0, index);
3035 }
3036
3037 void Assembler::fld_x(Address adr) {
3038 InstructionMark im(this);
3039 emit_byte(0xDB);
3040 emit_operand32(rbp, adr);
3041 }
3042
3043 void Assembler::fldcw(Address src) {
3044 InstructionMark im(this);
3045 emit_byte(0xd9);
3046 emit_operand32(rbp, src);
3047 }
3048
3049 void Assembler::fldenv(Address src) {
3050 InstructionMark im(this);
3051 emit_byte(0xD9);
3052 emit_operand32(rsp, src);
3053 }
3054
3055 void Assembler::fldlg2() {
3056 emit_byte(0xD9);
3057 emit_byte(0xEC);
3058 }
3059
3060 void Assembler::fldln2() {
3061 emit_byte(0xD9);
3062 emit_byte(0xED);
3063 }
3064
3065 void Assembler::fldz() {
3066 emit_byte(0xD9);
3067 emit_byte(0xEE);
3068 }
3069
3070 void Assembler::flog() {
3071 fldln2();
3072 fxch();
3073 fyl2x();
3074 }
3075
3076 void Assembler::flog10() {
3077 fldlg2();
3078 fxch();
3079 fyl2x();
3080 }
3081
3082 void Assembler::fmul(int i) {
3083 emit_farith(0xD8, 0xC8, i);
3084 }
3085
3086 void Assembler::fmul_d(Address src) {
3087 InstructionMark im(this);
3088 emit_byte(0xDC);
3089 emit_operand32(rcx, src);
3090 }
3091
3092 void Assembler::fmul_s(Address src) {
3093 InstructionMark im(this);
3094 emit_byte(0xD8);
3095 emit_operand32(rcx, src);
3096 }
3097
3098 void Assembler::fmula(int i) {
3099 emit_farith(0xDC, 0xC8, i);
3100 }
3101
3102 void Assembler::fmulp(int i) {
3103 emit_farith(0xDE, 0xC8, i);
3104 }
3105
3106 void Assembler::fnsave(Address dst) {
3107 InstructionMark im(this);
3108 emit_byte(0xDD);
3109 emit_operand32(rsi, dst);
3110 }
3111
3112 void Assembler::fnstcw(Address src) {
3113 InstructionMark im(this);
3114 emit_byte(0x9B);
3115 emit_byte(0xD9);
3116 emit_operand32(rdi, src);
3117 }
3118
3119 void Assembler::fnstsw_ax() {
3120 emit_byte(0xdF);
3121 emit_byte(0xE0);
3122 }
3123
3124 void Assembler::fprem() {
3125 emit_byte(0xD9);
3126 emit_byte(0xF8);
3127 }
3128
3129 void Assembler::fprem1() {
3130 emit_byte(0xD9);
3131 emit_byte(0xF5);
3132 }
3133
3134 void Assembler::frstor(Address src) {
3135 InstructionMark im(this);
3136 emit_byte(0xDD);
3137 emit_operand32(rsp, src);
3138 }
3139
3140 void Assembler::fsin() {
3141 emit_byte(0xD9);
3142 emit_byte(0xFE);
3143 }
3144
3145 void Assembler::fsqrt() {
3146 emit_byte(0xD9);
3147 emit_byte(0xFA);
3148 }
3149
3150 void Assembler::fst_d(Address adr) {
3151 InstructionMark im(this);
3152 emit_byte(0xDD);
3153 emit_operand32(rdx, adr);
3154 }
3155
3156 void Assembler::fst_s(Address adr) {
3157 InstructionMark im(this);
3158 emit_byte(0xD9);
3159 emit_operand32(rdx, adr);
3160 }
3161
3162 void Assembler::fstp_d(Address adr) {
3163 InstructionMark im(this);
3164 emit_byte(0xDD);
3165 emit_operand32(rbx, adr);
3166 }
3167
3168 void Assembler::fstp_d(int index) {
3169 emit_farith(0xDD, 0xD8, index);
3170 }
3171
3172 void Assembler::fstp_s(Address adr) {
3173 InstructionMark im(this);
3174 emit_byte(0xD9);
3175 emit_operand32(rbx, adr);
3176 }
3177
3178 void Assembler::fstp_x(Address adr) {
3179 InstructionMark im(this);
3180 emit_byte(0xDB);
3181 emit_operand32(rdi, adr);
3182 }
3183
3184 void Assembler::fsub(int i) {
3185 emit_farith(0xD8, 0xE0, i);
3186 }
3187
3188 void Assembler::fsub_d(Address src) {
3189 InstructionMark im(this);
3190 emit_byte(0xDC);
3191 emit_operand32(rsp, src);
3192 }
3193
3194 void Assembler::fsub_s(Address src) {
3195 InstructionMark im(this);
3196 emit_byte(0xD8);
3197 emit_operand32(rsp, src);
3198 }
3199
3200 void Assembler::fsuba(int i) {
3201 emit_farith(0xDC, 0xE8, i);
3202 }
3203
3204 void Assembler::fsubp(int i) {
3205 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3206 }
3207
3208 void Assembler::fsubr(int i) {
3209 emit_farith(0xD8, 0xE8, i);
3210 }
3211
3212 void Assembler::fsubr_d(Address src) {
3213 InstructionMark im(this);
3214 emit_byte(0xDC);
3215 emit_operand32(rbp, src);
3216 }
3217
3218 void Assembler::fsubr_s(Address src) {
3219 InstructionMark im(this);
3220 emit_byte(0xD8);
3221 emit_operand32(rbp, src);
3222 }
3223
3224 void Assembler::fsubra(int i) {
3225 emit_farith(0xDC, 0xE0, i);
3226 }
3227
3228 void Assembler::fsubrp(int i) {
3229 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3230 }
3231
3232 void Assembler::ftan() {
3233 emit_byte(0xD9);
3234 emit_byte(0xF2);
3235 emit_byte(0xDD);
3236 emit_byte(0xD8);
3237 }
3238
3239 void Assembler::ftst() {
3240 emit_byte(0xD9);
3241 emit_byte(0xE4);
3242 }
3243
3244 void Assembler::fucomi(int i) {
3245 // make sure the instruction is supported (introduced for P6, together with cmov)
3246 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3247 emit_farith(0xDB, 0xE8, i);
3248 }
3249
3250 void Assembler::fucomip(int i) {
3251 // make sure the instruction is supported (introduced for P6, together with cmov)
3252 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3253 emit_farith(0xDF, 0xE8, i);
3254 }
3255
3256 void Assembler::fwait() {
3257 emit_byte(0x9B);
3258 }
3259
3260 void Assembler::fxch(int i) {
3261 emit_farith(0xD9, 0xC8, i);
3262 }
3263
3264 void Assembler::fyl2x() {
3265 emit_byte(0xD9);
3266 emit_byte(0xF1);
3267 }
3268
3269 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format) {
3270 InstructionMark im(this);
3271 int encode = prefix_and_encode(dst->encoding());
3272 emit_byte(0xB8 | encode);
3273 emit_data((int)imm32, rspec, format);
3274 }
3275
3276 #ifndef _LP64
3277
3278 void Assembler::incl(Register dst) {
3279 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3280 emit_byte(0x40 | dst->encoding());
3281 }
3282
3283 void Assembler::lea(Register dst, Address src) {
3284 leal(dst, src);
3285 }
3286
3287 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
3288 InstructionMark im(this);
3289 emit_byte(0xC7);
3290 emit_operand(rax, dst);
3291 emit_data((int)imm32, rspec, 0);
3292 }
3293
3294
3295 void Assembler::popa() { // 32bit
3296 emit_byte(0x61);
3297 }
3298
3299 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3300 InstructionMark im(this);
3301 emit_byte(0x68);
3302 emit_data(imm32, rspec, 0);
3303 }
3304
3305 void Assembler::pusha() { // 32bit
3306 emit_byte(0x60);
3307 }
3308
3309 void Assembler::set_byte_if_not_zero(Register dst) {
3310 emit_byte(0x0F);
3311 emit_byte(0x95);
3312 emit_byte(0xE0 | dst->encoding());
3313 }
3314
3315 void Assembler::shldl(Register dst, Register src) {
3316 emit_byte(0x0F);
3317 emit_byte(0xA5);
3318 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3319 }
3320
3321 void Assembler::shrdl(Register dst, Register src) {
3322 emit_byte(0x0F);
3323 emit_byte(0xAD);
3324 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3325 }
3326
3327 #else // LP64
3328
3329 // 64bit only pieces of the assembler
3330 // This should only be used by 64bit instructions that can use rip-relative
3331 // it cannot be used by instructions that want an immediate value.
3332
3333 bool Assembler::reachable(AddressLiteral adr) {
3334 int64_t disp;
3335 // None will force a 64bit literal to the code stream. Likely a placeholder
3336 // for something that will be patched later and we need to certain it will
3337 // always be reachable.
3338 if (adr.reloc() == relocInfo::none) {
3339 return false;
3340 }
3341 if (adr.reloc() == relocInfo::internal_word_type) {
3342 // This should be rip relative and easily reachable.
3343 return true;
3344 }
3345 if (adr.reloc() == relocInfo::virtual_call_type ||
3346 adr.reloc() == relocInfo::opt_virtual_call_type ||
3347 adr.reloc() == relocInfo::static_call_type ||
3348 adr.reloc() == relocInfo::static_stub_type ) {
3349 // This should be rip relative within the code cache and easily
3350 // reachable until we get huge code caches. (At which point
3351 // ic code is going to have issues).
3352 return true;
3353 }
3354 if (adr.reloc() != relocInfo::external_word_type &&
3355 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special
3356 adr.reloc() != relocInfo::poll_type && // relocs to identify them
3357 adr.reloc() != relocInfo::runtime_call_type ) {
3358 return false;
3359 }
3360
3361 // Stress the correction code
3362 if (ForceUnreachable) {
3363 // Must be runtimecall reloc, see if it is in the codecache
3364 // Flipping stuff in the codecache to be unreachable causes issues
3365 // with things like inline caches where the additional instructions
3366 // are not handled.
3367 if (CodeCache::find_blob(adr._target) == NULL) {
3368 return false;
3369 }
3370 }
3371 // For external_word_type/runtime_call_type if it is reachable from where we
3372 // are now (possibly a temp buffer) and where we might end up
3373 // anywhere in the codeCache then we are always reachable.
3374 // This would have to change if we ever save/restore shared code
3375 // to be more pessimistic.
3376
3377 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3378 if (!is_simm32(disp)) return false;
3379 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3380 if (!is_simm32(disp)) return false;
3381
3382 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3383
3384 // Because rip relative is a disp + address_of_next_instruction and we
3385 // don't know the value of address_of_next_instruction we apply a fudge factor
3386 // to make sure we will be ok no matter the size of the instruction we get placed into.
3387 // We don't have to fudge the checks above here because they are already worst case.
3388
3389 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3390 // + 4 because better safe than sorry.
3391 const int fudge = 12 + 4;
3392 if (disp < 0) {
3393 disp -= fudge;
3394 } else {
3395 disp += fudge;
3396 }
3397 return is_simm32(disp);
3398 }
3399
3400 void Assembler::emit_data64(jlong data,
3401 relocInfo::relocType rtype,
3402 int format) {
3403 if (rtype == relocInfo::none) {
3404 emit_long64(data);
3405 } else {
3406 emit_data64(data, Relocation::spec_simple(rtype), format);
3407 }
3408 }
3409
3410 void Assembler::emit_data64(jlong data,
3411 RelocationHolder const& rspec,
3412 int format) {
3413 assert(imm_operand == 0, "default format must be immediate in this file");
3414 assert(imm_operand == format, "must be immediate");
3415 assert(inst_mark() != NULL, "must be inside InstructionMark");
3416 // Do not use AbstractAssembler::relocate, which is not intended for
3417 // embedded words. Instead, relocate to the enclosing instruction.
3418 code_section()->relocate(inst_mark(), rspec, format);
3419 #ifdef ASSERT
3420 check_relocation(rspec, format);
3421 #endif
3422 emit_long64(data);
3423 }
3424
3425 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3426 if (reg_enc >= 8) {
3427 prefix(REX_B);
3428 reg_enc -= 8;
3429 } else if (byteinst && reg_enc >= 4) {
3430 prefix(REX);
3431 }
3432 return reg_enc;
3433 }
3434
3435 int Assembler::prefixq_and_encode(int reg_enc) {
3436 if (reg_enc < 8) {
3437 prefix(REX_W);
3438 } else {
3439 prefix(REX_WB);
3440 reg_enc -= 8;
3441 }
3442 return reg_enc;
3443 }
3444
3445 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3446 if (dst_enc < 8) {
3447 if (src_enc >= 8) {
3448 prefix(REX_B);
3449 src_enc -= 8;
3450 } else if (byteinst && src_enc >= 4) {
3451 prefix(REX);
3452 }
3453 } else {
3454 if (src_enc < 8) {
3455 prefix(REX_R);
3456 } else {
3457 prefix(REX_RB);
3458 src_enc -= 8;
3459 }
3460 dst_enc -= 8;
3461 }
3462 return dst_enc << 3 | src_enc;
3463 }
3464
3465 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3466 if (dst_enc < 8) {
3467 if (src_enc < 8) {
3468 prefix(REX_W);
3469 } else {
3470 prefix(REX_WB);
3471 src_enc -= 8;
3472 }
3473 } else {
3474 if (src_enc < 8) {
3475 prefix(REX_WR);
3476 } else {
3477 prefix(REX_WRB);
3478 src_enc -= 8;
3479 }
3480 dst_enc -= 8;
3481 }
3482 return dst_enc << 3 | src_enc;
3483 }
3484
3485 void Assembler::prefix(Register reg) {
3486 if (reg->encoding() >= 8) {
3487 prefix(REX_B);
3488 }
3489 }
3490
3491 void Assembler::prefix(Address adr) {
3492 if (adr.base_needs_rex()) {
3493 if (adr.index_needs_rex()) {
3494 prefix(REX_XB);
3495 } else {
3496 prefix(REX_B);
3497 }
3498 } else {
3499 if (adr.index_needs_rex()) {
3500 prefix(REX_X);
3501 }
3502 }
3503 }
3504
3505 void Assembler::prefixq(Address adr) {
3506 if (adr.base_needs_rex()) {
3507 if (adr.index_needs_rex()) {
3508 prefix(REX_WXB);
3509 } else {
3510 prefix(REX_WB);
3511 }
3512 } else {
3513 if (adr.index_needs_rex()) {
3514 prefix(REX_WX);
3515 } else {
3516 prefix(REX_W);
3517 }
3518 }
3519 }
3520
3521
3522 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3523 if (reg->encoding() < 8) {
3524 if (adr.base_needs_rex()) {
3525 if (adr.index_needs_rex()) {
3526 prefix(REX_XB);
3527 } else {
3528 prefix(REX_B);
3529 }
3530 } else {
3531 if (adr.index_needs_rex()) {
3532 prefix(REX_X);
3533 } else if (reg->encoding() >= 4 ) {
3534 prefix(REX);
3535 }
3536 }
3537 } else {
3538 if (adr.base_needs_rex()) {
3539 if (adr.index_needs_rex()) {
3540 prefix(REX_RXB);
3541 } else {
3542 prefix(REX_RB);
3543 }
3544 } else {
3545 if (adr.index_needs_rex()) {
3546 prefix(REX_RX);
3547 } else {
3548 prefix(REX_R);
3549 }
3550 }
3551 }
3552 }
3553
3554 void Assembler::prefixq(Address adr, Register src) {
3555 if (src->encoding() < 8) {
3556 if (adr.base_needs_rex()) {
3557 if (adr.index_needs_rex()) {
3558 prefix(REX_WXB);
3559 } else {
3560 prefix(REX_WB);
3561 }
3562 } else {
3563 if (adr.index_needs_rex()) {
3564 prefix(REX_WX);
3565 } else {
3566 prefix(REX_W);
3567 }
3568 }
3569 } else {
3570 if (adr.base_needs_rex()) {
3571 if (adr.index_needs_rex()) {
3572 prefix(REX_WRXB);
3573 } else {
3574 prefix(REX_WRB);
3575 }
3576 } else {
3577 if (adr.index_needs_rex()) {
3578 prefix(REX_WRX);
3579 } else {
3580 prefix(REX_WR);
3581 }
3582 }
3583 }
3584 }
3585
3586 void Assembler::prefix(Address adr, XMMRegister reg) {
3587 if (reg->encoding() < 8) {
3588 if (adr.base_needs_rex()) {
3589 if (adr.index_needs_rex()) {
3590 prefix(REX_XB);
3591 } else {
3592 prefix(REX_B);
3593 }
3594 } else {
3595 if (adr.index_needs_rex()) {
3596 prefix(REX_X);
3597 }
3598 }
3599 } else {
3600 if (adr.base_needs_rex()) {
3601 if (adr.index_needs_rex()) {
3602 prefix(REX_RXB);
3603 } else {
3604 prefix(REX_RB);
3605 }
3606 } else {
3607 if (adr.index_needs_rex()) {
3608 prefix(REX_RX);
3609 } else {
3610 prefix(REX_R);
3611 }
3612 }
3613 }
3614 }
3615
3616 void Assembler::adcq(Register dst, int32_t imm32) {
3617 (void) prefixq_and_encode(dst->encoding());
3618 emit_arith(0x81, 0xD0, dst, imm32);
3619 }
3620
3621 void Assembler::adcq(Register dst, Address src) {
3622 InstructionMark im(this);
3623 prefixq(src, dst);
3624 emit_byte(0x13);
3625 emit_operand(dst, src);
3626 }
3627
3628 void Assembler::adcq(Register dst, Register src) {
3629 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3630 emit_arith(0x13, 0xC0, dst, src);
3631 }
3632
3633 void Assembler::addq(Address dst, int32_t imm32) {
3634 InstructionMark im(this);
3635 prefixq(dst);
3636 emit_arith_operand(0x81, rax, dst,imm32);
3637 }
3638
3639 void Assembler::addq(Address dst, Register src) {
3640 InstructionMark im(this);
3641 prefixq(dst, src);
3642 emit_byte(0x01);
3643 emit_operand(src, dst);
3644 }
3645
3646 void Assembler::addq(Register dst, int32_t imm32) {
3647 (void) prefixq_and_encode(dst->encoding());
3648 emit_arith(0x81, 0xC0, dst, imm32);
3649 }
3650
3651 void Assembler::addq(Register dst, Address src) {
3652 InstructionMark im(this);
3653 prefixq(src, dst);
3654 emit_byte(0x03);
3655 emit_operand(dst, src);
3656 }
3657
3658 void Assembler::addq(Register dst, Register src) {
3659 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3660 emit_arith(0x03, 0xC0, dst, src);
3661 }
3662
3663 void Assembler::andq(Register dst, int32_t imm32) {
3664 (void) prefixq_and_encode(dst->encoding());
3665 emit_arith(0x81, 0xE0, dst, imm32);
3666 }
3667
3668 void Assembler::andq(Register dst, Address src) {
3669 InstructionMark im(this);
3670 prefixq(src, dst);
3671 emit_byte(0x23);
3672 emit_operand(dst, src);
3673 }
3674
3675 void Assembler::andq(Register dst, Register src) {
3676 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3677 emit_arith(0x23, 0xC0, dst, src);
3678 }
3679
3680 void Assembler::bswapq(Register reg) {
3681 int encode = prefixq_and_encode(reg->encoding());
3682 emit_byte(0x0F);
3683 emit_byte(0xC8 | encode);
3684 }
3685
3686 void Assembler::cdqq() {
3687 prefix(REX_W);
3688 emit_byte(0x99);
3689 }
3690
3691 void Assembler::clflush(Address adr) {
3692 prefix(adr);
3693 emit_byte(0x0F);
3694 emit_byte(0xAE);
3695 emit_operand(rdi, adr);
3696 }
3697
3698 void Assembler::cmovq(Condition cc, Register dst, Register src) {
3699 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3700 emit_byte(0x0F);
3701 emit_byte(0x40 | cc);
3702 emit_byte(0xC0 | encode);
3703 }
3704
3705 void Assembler::cmovq(Condition cc, Register dst, Address src) {
3706 InstructionMark im(this);
3707 prefixq(src, dst);
3708 emit_byte(0x0F);
3709 emit_byte(0x40 | cc);
3710 emit_operand(dst, src);
3711 }
3712
3713 void Assembler::cmpq(Address dst, int32_t imm32) {
3714 InstructionMark im(this);
3715 prefixq(dst);
3716 emit_byte(0x81);
3717 emit_operand(rdi, dst, 4);
3718 emit_long(imm32);
3719 }
3720
3721 void Assembler::cmpq(Register dst, int32_t imm32) {
3722 (void) prefixq_and_encode(dst->encoding());
3723 emit_arith(0x81, 0xF8, dst, imm32);
3724 }
3725
3726 void Assembler::cmpq(Address dst, Register src) {
3727 InstructionMark im(this);
3728 prefixq(dst, src);
3729 emit_byte(0x3B);
3730 emit_operand(src, dst);
3731 }
3732
3733 void Assembler::cmpq(Register dst, Register src) {
3734 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3735 emit_arith(0x3B, 0xC0, dst, src);
3736 }
3737
3738 void Assembler::cmpq(Register dst, Address src) {
3739 InstructionMark im(this);
3740 prefixq(src, dst);
3741 emit_byte(0x3B);
3742 emit_operand(dst, src);
3743 }
3744
3745 void Assembler::cmpxchgq(Register reg, Address adr) {
3746 InstructionMark im(this);
3747 prefixq(adr, reg);
3748 emit_byte(0x0F);
3749 emit_byte(0xB1);
3750 emit_operand(reg, adr);
3751 }
3752
3753 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3754 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3755 emit_byte(0xF2);
3756 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3757 emit_byte(0x0F);
3758 emit_byte(0x2A);
3759 emit_byte(0xC0 | encode);
3760 }
3761
3762 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3763 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3764 emit_byte(0xF3);
3765 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3766 emit_byte(0x0F);
3767 emit_byte(0x2A);
3768 emit_byte(0xC0 | encode);
3769 }
3770
3771 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3772 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3773 emit_byte(0xF2);
3774 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3775 emit_byte(0x0F);
3776 emit_byte(0x2C);
3777 emit_byte(0xC0 | encode);
3778 }
3779
3780 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3781 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3782 emit_byte(0xF3);
3783 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3784 emit_byte(0x0F);
3785 emit_byte(0x2C);
3786 emit_byte(0xC0 | encode);
3787 }
3788
3789 void Assembler::decl(Register dst) {
3790 // Don't use it directly. Use MacroAssembler::decrementl() instead.
3791 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3792 int encode = prefix_and_encode(dst->encoding());
3793 emit_byte(0xFF);
3794 emit_byte(0xC8 | encode);
3795 }
3796
3797 void Assembler::decq(Register dst) {
3798 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3799 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3800 int encode = prefixq_and_encode(dst->encoding());
3801 emit_byte(0xFF);
3802 emit_byte(0xC8 | encode);
3803 }
3804
3805 void Assembler::decq(Address dst) {
3806 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3807 InstructionMark im(this);
3808 prefixq(dst);
3809 emit_byte(0xFF);
3810 emit_operand(rcx, dst);
3811 }
3812
3813 void Assembler::fxrstor(Address src) {
3814 prefixq(src);
3815 emit_byte(0x0F);
3816 emit_byte(0xAE);
3817 emit_operand(as_Register(1), src);
3818 }
3819
3820 void Assembler::fxsave(Address dst) {
3821 prefixq(dst);
3822 emit_byte(0x0F);
3823 emit_byte(0xAE);
3824 emit_operand(as_Register(0), dst);
3825 }
3826
3827 void Assembler::idivq(Register src) {
3828 int encode = prefixq_and_encode(src->encoding());
3829 emit_byte(0xF7);
3830 emit_byte(0xF8 | encode);
3831 }
3832
3833 void Assembler::imulq(Register dst, Register src) {
3834 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3835 emit_byte(0x0F);
3836 emit_byte(0xAF);
3837 emit_byte(0xC0 | encode);
3838 }
3839
3840 void Assembler::imulq(Register dst, Register src, int value) {
3841 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3842 if (is8bit(value)) {
3843 emit_byte(0x6B);
3844 emit_byte(0xC0 | encode);
3845 emit_byte(value);
3846 } else {
3847 emit_byte(0x69);
3848 emit_byte(0xC0 | encode);
3849 emit_long(value);
3850 }
3851 }
3852
3853 void Assembler::incl(Register dst) {
3854 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3855 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3856 int encode = prefix_and_encode(dst->encoding());
3857 emit_byte(0xFF);
3858 emit_byte(0xC0 | encode);
3859 }
3860
3861 void Assembler::incq(Register dst) {
3862 // Don't use it directly. Use MacroAssembler::incrementq() instead.
3863 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3864 int encode = prefixq_and_encode(dst->encoding());
3865 emit_byte(0xFF);
3866 emit_byte(0xC0 | encode);
3867 }
3868
3869 void Assembler::incq(Address dst) {
3870 // Don't use it directly. Use MacroAssembler::incrementq() instead.
3871 InstructionMark im(this);
3872 prefixq(dst);
3873 emit_byte(0xFF);
3874 emit_operand(rax, dst);
3875 }
3876
3877 void Assembler::lea(Register dst, Address src) {
3878 leaq(dst, src);
3879 }
3880
3881 void Assembler::leaq(Register dst, Address src) {
3882 InstructionMark im(this);
3883 prefixq(src, dst);
3884 emit_byte(0x8D);
3885 emit_operand(dst, src);
3886 }
3887
3888 void Assembler::mov64(Register dst, int64_t imm64) {
3889 InstructionMark im(this);
3890 int encode = prefixq_and_encode(dst->encoding());
3891 emit_byte(0xB8 | encode);
3892 emit_long64(imm64);
3893 }
3894
3895 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
3896 InstructionMark im(this);
3897 int encode = prefixq_and_encode(dst->encoding());
3898 emit_byte(0xB8 | encode);
3899 emit_data64(imm64, rspec);
3900 }
3901
3902 void Assembler::movdq(XMMRegister dst, Register src) {
3903 // table D-1 says MMX/SSE2
3904 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
3905 emit_byte(0x66);
3906 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3907 emit_byte(0x0F);
3908 emit_byte(0x6E);
3909 emit_byte(0xC0 | encode);
3910 }
3911
3912 void Assembler::movdq(Register dst, XMMRegister src) {
3913 // table D-1 says MMX/SSE2
3914 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
3915 emit_byte(0x66);
3916 // swap src/dst to get correct prefix
3917 int encode = prefixq_and_encode(src->encoding(), dst->encoding());
3918 emit_byte(0x0F);
3919 emit_byte(0x7E);
3920 emit_byte(0xC0 | encode);
3921 }
3922
3923 void Assembler::movq(Register dst, Register src) {
3924 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3925 emit_byte(0x8B);
3926 emit_byte(0xC0 | encode);
3927 }
3928
3929 void Assembler::movq(Register dst, Address src) {
3930 InstructionMark im(this);
3931 prefixq(src, dst);
3932 emit_byte(0x8B);
3933 emit_operand(dst, src);
3934 }
3935
3936 void Assembler::movq(Address dst, Register src) {
3937 InstructionMark im(this);
3938 prefixq(dst, src);
3939 emit_byte(0x89);
3940 emit_operand(src, dst);
3941 }
3942
3943 void Assembler::movslq(Register dst, int32_t imm32) {
3944 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx)
3945 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx)
3946 // as a result we shouldn't use until tested at runtime...
3947 ShouldNotReachHere();
3948 InstructionMark im(this);
3949 int encode = prefixq_and_encode(dst->encoding());
3950 emit_byte(0xC7 | encode);
3951 emit_long(imm32);
3952 }
3953
3954 void Assembler::movslq(Address dst, int32_t imm32) {
3955 assert(is_simm32(imm32), "lost bits");
3956 InstructionMark im(this);
3957 prefixq(dst);
3958 emit_byte(0xC7);
3959 emit_operand(rax, dst, 4);
3960 emit_long(imm32);
3961 }
3962
3963 void Assembler::movslq(Register dst, Address src) {
3964 InstructionMark im(this);
3965 prefixq(src, dst);
3966 emit_byte(0x63);
3967 emit_operand(dst, src);
3968 }
3969
3970 void Assembler::movslq(Register dst, Register src) {
3971 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3972 emit_byte(0x63);
3973 emit_byte(0xC0 | encode);
3974 }
3975
3976 void Assembler::negq(Register dst) {
3977 int encode = prefixq_and_encode(dst->encoding());
3978 emit_byte(0xF7);
3979 emit_byte(0xD8 | encode);
3980 }
3981
3982 void Assembler::notq(Register dst) {
3983 int encode = prefixq_and_encode(dst->encoding());
3984 emit_byte(0xF7);
3985 emit_byte(0xD0 | encode);
3986 }
3987
3988 void Assembler::orq(Address dst, int32_t imm32) {
3989 InstructionMark im(this);
3990 prefixq(dst);
3991 emit_byte(0x81);
3992 emit_operand(rcx, dst, 4);
3993 emit_long(imm32);
3994 }
3995
3996 void Assembler::orq(Register dst, int32_t imm32) {
3997 (void) prefixq_and_encode(dst->encoding());
3998 emit_arith(0x81, 0xC8, dst, imm32);
3999 }
4000
4001 void Assembler::orq(Register dst, Address src) {
4002 InstructionMark im(this);
4003 prefixq(src, dst);
4004 emit_byte(0x0B);
4005 emit_operand(dst, src);
4006 }
4007
4008 void Assembler::orq(Register dst, Register src) {
4009 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4010 emit_arith(0x0B, 0xC0, dst, src);
4011 }
4012
4013 void Assembler::popa() { // 64bit
4014 movq(r15, Address(rsp, 0));
4015 movq(r14, Address(rsp, wordSize));
4016 movq(r13, Address(rsp, 2 * wordSize));
4017 movq(r12, Address(rsp, 3 * wordSize));
4018 movq(r11, Address(rsp, 4 * wordSize));
4019 movq(r10, Address(rsp, 5 * wordSize));
4020 movq(r9, Address(rsp, 6 * wordSize));
4021 movq(r8, Address(rsp, 7 * wordSize));
4022 movq(rdi, Address(rsp, 8 * wordSize));
4023 movq(rsi, Address(rsp, 9 * wordSize));
4024 movq(rbp, Address(rsp, 10 * wordSize));
4025 // skip rsp
4026 movq(rbx, Address(rsp, 12 * wordSize));
4027 movq(rdx, Address(rsp, 13 * wordSize));
4028 movq(rcx, Address(rsp, 14 * wordSize));
4029 movq(rax, Address(rsp, 15 * wordSize));
4030
4031 addq(rsp, 16 * wordSize);
4032 }
4033
4034 void Assembler::popq(Address dst) {
4035 InstructionMark im(this);
4036 prefixq(dst);
4037 emit_byte(0x8F);
4038 emit_operand(rax, dst);
4039 }
4040
4041 void Assembler::pusha() { // 64bit
4042 // we have to store original rsp. ABI says that 128 bytes
4043 // below rsp are local scratch.
4044 movq(Address(rsp, -5 * wordSize), rsp);
4045
4046 subq(rsp, 16 * wordSize);
4047
4048 movq(Address(rsp, 15 * wordSize), rax);
4049 movq(Address(rsp, 14 * wordSize), rcx);
4050 movq(Address(rsp, 13 * wordSize), rdx);
4051 movq(Address(rsp, 12 * wordSize), rbx);
4052 // skip rsp
4053 movq(Address(rsp, 10 * wordSize), rbp);
4054 movq(Address(rsp, 9 * wordSize), rsi);
4055 movq(Address(rsp, 8 * wordSize), rdi);
4056 movq(Address(rsp, 7 * wordSize), r8);
4057 movq(Address(rsp, 6 * wordSize), r9);
4058 movq(Address(rsp, 5 * wordSize), r10);
4059 movq(Address(rsp, 4 * wordSize), r11);
4060 movq(Address(rsp, 3 * wordSize), r12);
4061 movq(Address(rsp, 2 * wordSize), r13);
4062 movq(Address(rsp, wordSize), r14);
4063 movq(Address(rsp, 0), r15);
4064 }
4065
4066 void Assembler::pushq(Address src) {
4067 InstructionMark im(this);
4068 prefixq(src);
4069 emit_byte(0xFF);
4070 emit_operand(rsi, src);
4071 }
4072
4073 void Assembler::rclq(Register dst, int imm8) {
4074 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4075 int encode = prefixq_and_encode(dst->encoding());
4076 if (imm8 == 1) {
4077 emit_byte(0xD1);
4078 emit_byte(0xD0 | encode);
4079 } else {
4080 emit_byte(0xC1);
4081 emit_byte(0xD0 | encode);
4082 emit_byte(imm8);
4083 }
4084 }
4085 void Assembler::sarq(Register dst, int imm8) {
4086 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4087 int encode = prefixq_and_encode(dst->encoding());
4088 if (imm8 == 1) {
4089 emit_byte(0xD1);
4090 emit_byte(0xF8 | encode);
4091 } else {
4092 emit_byte(0xC1);
4093 emit_byte(0xF8 | encode);
4094 emit_byte(imm8);
4095 }
4096 }
4097
4098 void Assembler::sarq(Register dst) {
4099 int encode = prefixq_and_encode(dst->encoding());
4100 emit_byte(0xD3);
4101 emit_byte(0xF8 | encode);
4102 }
4103 void Assembler::sbbq(Address dst, int32_t imm32) {
4104 InstructionMark im(this);
4105 prefixq(dst);
4106 emit_arith_operand(0x81, rbx, dst, imm32);
4107 }
4108
4109 void Assembler::sbbq(Register dst, int32_t imm32) {
4110 (void) prefixq_and_encode(dst->encoding());
4111 emit_arith(0x81, 0xD8, dst, imm32);
4112 }
4113
4114 void Assembler::sbbq(Register dst, Address src) {
4115 InstructionMark im(this);
4116 prefixq(src, dst);
4117 emit_byte(0x1B);
4118 emit_operand(dst, src);
4119 }
4120
4121 void Assembler::sbbq(Register dst, Register src) {
4122 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4123 emit_arith(0x1B, 0xC0, dst, src);
4124 }
4125
4126 void Assembler::shlq(Register dst, int imm8) {
4127 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4128 int encode = prefixq_and_encode(dst->encoding());
4129 if (imm8 == 1) {
4130 emit_byte(0xD1);
4131 emit_byte(0xE0 | encode);
4132 } else {
4133 emit_byte(0xC1);
4134 emit_byte(0xE0 | encode);
4135 emit_byte(imm8);
4136 }
4137 }
4138
4139 void Assembler::shlq(Register dst) {
4140 int encode = prefixq_and_encode(dst->encoding());
4141 emit_byte(0xD3);
4142 emit_byte(0xE0 | encode);
4143 }
4144
4145 void Assembler::shrq(Register dst, int imm8) {
4146 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4147 int encode = prefixq_and_encode(dst->encoding());
4148 emit_byte(0xC1);
4149 emit_byte(0xE8 | encode);
4150 emit_byte(imm8);
4151 }
4152
4153 void Assembler::shrq(Register dst) {
4154 int encode = prefixq_and_encode(dst->encoding());
4155 emit_byte(0xD3);
4156 emit_byte(0xE8 | encode);
4157 }
4158
4159 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4160 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4161 InstructionMark im(this);
4162 emit_byte(0xF2);
4163 prefix(src, dst);
4164 emit_byte(0x0F);
4165 emit_byte(0x51);
4166 emit_operand(dst, src);
4167 }
4168
4169 void Assembler::subq(Address dst, int32_t imm32) {
4170 InstructionMark im(this);
4171 prefixq(dst);
4172 if (is8bit(imm32)) {
4173 emit_byte(0x83);
4174 emit_operand(rbp, dst, 1);
4175 emit_byte(imm32 & 0xFF);
4176 } else {
4177 emit_byte(0x81);
4178 emit_operand(rbp, dst, 4);
4179 emit_long(imm32);
4180 }
4181 }
4182
4183 void Assembler::subq(Register dst, int32_t imm32) {
4184 (void) prefixq_and_encode(dst->encoding());
4185 emit_arith(0x81, 0xE8, dst, imm32);
4186 }
4187
4188 void Assembler::subq(Address dst, Register src) {
4189 InstructionMark im(this);
4190 prefixq(dst, src);
4191 emit_byte(0x29);
4192 emit_operand(src, dst);
4193 }
4194
4195 void Assembler::subq(Register dst, Address src) {
4196 InstructionMark im(this);
4197 prefixq(src, dst);
4198 emit_byte(0x2B);
4199 emit_operand(dst, src);
4200 }
4201
4202 void Assembler::subq(Register dst, Register src) {
4203 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4204 emit_arith(0x2B, 0xC0, dst, src);
4205 }
4206
4207 void Assembler::testq(Register dst, int32_t imm32) {
4208 // not using emit_arith because test
4209 // doesn't support sign-extension of
4210 // 8bit operands
4211 int encode = dst->encoding();
4212 if (encode == 0) {
4213 prefix(REX_W);
4214 emit_byte(0xA9);
4215 } else {
4216 encode = prefixq_and_encode(encode);
4217 emit_byte(0xF7);
4218 emit_byte(0xC0 | encode);
4219 }
4220 emit_long(imm32);
4221 }
4222
4223 void Assembler::testq(Register dst, Register src) {
4224 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4225 emit_arith(0x85, 0xC0, dst, src);
4226 }
4227
4228 void Assembler::xaddq(Address dst, Register src) {
4229 InstructionMark im(this);
4230 prefixq(dst, src);
4231 emit_byte(0x0F);
4232 emit_byte(0xC1);
4233 emit_operand(src, dst);
4234 }
4235
4236 void Assembler::xchgq(Register dst, Address src) {
4237 InstructionMark im(this);
4238 prefixq(src, dst);
4239 emit_byte(0x87);
4240 emit_operand(dst, src);
4241 }
4242
4243 void Assembler::xchgq(Register dst, Register src) {
4244 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4245 emit_byte(0x87);
4246 emit_byte(0xc0 | encode);
4247 }
4248
4249 void Assembler::xorq(Register dst, Register src) {
4250 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4251 emit_arith(0x33, 0xC0, dst, src);
4252 }
4253
4254 void Assembler::xorq(Register dst, Address src) {
4255 InstructionMark im(this);
4256 prefixq(src, dst);
4257 emit_byte(0x33);
4258 emit_operand(dst, src);
4259 }
4260
4261 #endif // !LP64
4262
4263 static Assembler::Condition reverse[] = {
4264 Assembler::noOverflow /* overflow = 0x0 */ ,
4265 Assembler::overflow /* noOverflow = 0x1 */ ,
4266 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
4267 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
4268 Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
4269 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
4270 Assembler::above /* belowEqual = 0x6 */ ,
4271 Assembler::belowEqual /* above = 0x7 */ ,
4272 Assembler::positive /* negative = 0x8 */ ,
4273 Assembler::negative /* positive = 0x9 */ ,
4274 Assembler::noParity /* parity = 0xa */ ,
4275 Assembler::parity /* noParity = 0xb */ ,
4276 Assembler::greaterEqual /* less = 0xc */ ,
4277 Assembler::less /* greaterEqual = 0xd */ ,
4278 Assembler::greater /* lessEqual = 0xe */ ,
4279 Assembler::lessEqual /* greater = 0xf, */
4280
4281 };
4282
4283
4284 // Implementation of MacroAssembler
4285
4286 // First all the versions that have distinct versions depending on 32/64 bit
4287 // Unless the difference is trivial (1 line or so).
4288
4289 #ifndef _LP64
4290
4291 // 32bit versions
4292
4293 Address MacroAssembler::as_Address(AddressLiteral adr) {
4294 return Address(adr.target(), adr.rspec());
4295 }
4296
4297 Address MacroAssembler::as_Address(ArrayAddress adr) {
4298 return Address::make_array(adr);
4299 }
4300
4301 int MacroAssembler::biased_locking_enter(Register lock_reg,
4302 Register obj_reg,
4303 Register swap_reg,
4304 Register tmp_reg,
4305 bool swap_reg_contains_mark,
4306 Label& done,
4307 Label* slow_case,
4308 BiasedLockingCounters* counters) {
4309 assert(UseBiasedLocking, "why call this otherwise?");
4310 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4311 assert_different_registers(lock_reg, obj_reg, swap_reg);
4312
4313 if (PrintBiasedLockingStatistics && counters == NULL)
4314 counters = BiasedLocking::counters();
4315
4316 bool need_tmp_reg = false;
4317 if (tmp_reg == noreg) {
4318 need_tmp_reg = true;
4319 tmp_reg = lock_reg;
4320 } else {
4321 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4322 }
4323 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4324 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
4325 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
4326 Address saved_mark_addr(lock_reg, 0);
4327
4328 // Biased locking
4329 // See whether the lock is currently biased toward our thread and
4330 // whether the epoch is still valid
4331 // Note that the runtime guarantees sufficient alignment of JavaThread
4332 // pointers to allow age to be placed into low bits
4333 // First check to see whether biasing is even enabled for this object
4334 Label cas_label;
4335 int null_check_offset = -1;
4336 if (!swap_reg_contains_mark) {
4337 null_check_offset = offset();
4338 movl(swap_reg, mark_addr);
4339 }
4340 if (need_tmp_reg) {
4341 push(tmp_reg);
4342 }
4343 movl(tmp_reg, swap_reg);
4344 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4345 cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4346 if (need_tmp_reg) {
4347 pop(tmp_reg);
4348 }
4349 jcc(Assembler::notEqual, cas_label);
4350 // The bias pattern is present in the object's header. Need to check
4351 // whether the bias owner and the epoch are both still current.
4352 // Note that because there is no current thread register on x86 we
4353 // need to store off the mark word we read out of the object to
4354 // avoid reloading it and needing to recheck invariants below. This
4355 // store is unfortunate but it makes the overall code shorter and
4356 // simpler.
4357 movl(saved_mark_addr, swap_reg);
4358 if (need_tmp_reg) {
4359 push(tmp_reg);
4360 }
4361 get_thread(tmp_reg);
4362 xorl(swap_reg, tmp_reg);
4363 if (swap_reg_contains_mark) {
4364 null_check_offset = offset();
4365 }
4366 movl(tmp_reg, klass_addr);
4367 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4368 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4369 if (need_tmp_reg) {
4370 pop(tmp_reg);
4371 }
4372 if (counters != NULL) {
4373 cond_inc32(Assembler::zero,
4374 ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4375 }
4376 jcc(Assembler::equal, done);
4377
4378 Label try_revoke_bias;
4379 Label try_rebias;
4380
4381 // At this point we know that the header has the bias pattern and
4382 // that we are not the bias owner in the current epoch. We need to
4383 // figure out more details about the state of the header in order to
4384 // know what operations can be legally performed on the object's
4385 // header.
4386
4387 // If the low three bits in the xor result aren't clear, that means
4388 // the prototype header is no longer biased and we have to revoke
4389 // the bias on this object.
4390 testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4391 jcc(Assembler::notZero, try_revoke_bias);
4392
4393 // Biasing is still enabled for this data type. See whether the
4394 // epoch of the current bias is still valid, meaning that the epoch
4395 // bits of the mark word are equal to the epoch bits of the
4396 // prototype header. (Note that the prototype header's epoch bits
4397 // only change at a safepoint.) If not, attempt to rebias the object
4398 // toward the current thread. Note that we must be absolutely sure
4399 // that the current epoch is invalid in order to do this because
4400 // otherwise the manipulations it performs on the mark word are
4401 // illegal.
4402 testl(swap_reg, markOopDesc::epoch_mask_in_place);
4403 jcc(Assembler::notZero, try_rebias);
4404
4405 // The epoch of the current bias is still valid but we know nothing
4406 // about the owner; it might be set or it might be clear. Try to
4407 // acquire the bias of the object using an atomic operation. If this
4408 // fails we will go in to the runtime to revoke the object's bias.
4409 // Note that we first construct the presumed unbiased header so we
4410 // don't accidentally blow away another thread's valid bias.
4411 movl(swap_reg, saved_mark_addr);
4412 andl(swap_reg,
4413 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4414 if (need_tmp_reg) {
4415 push(tmp_reg);
4416 }
4417 get_thread(tmp_reg);
4418 orl(tmp_reg, swap_reg);
4419 if (os::is_MP()) {
4420 lock();
4421 }
4422 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4423 if (need_tmp_reg) {
4424 pop(tmp_reg);
4425 }
4426 // If the biasing toward our thread failed, this means that
4427 // another thread succeeded in biasing it toward itself and we
4428 // need to revoke that bias. The revocation will occur in the
4429 // interpreter runtime in the slow case.
4430 if (counters != NULL) {
4431 cond_inc32(Assembler::zero,
4432 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4433 }
4434 if (slow_case != NULL) {
4435 jcc(Assembler::notZero, *slow_case);
4436 }
4437 jmp(done);
4438
4439 bind(try_rebias);
4440 // At this point we know the epoch has expired, meaning that the
4441 // current "bias owner", if any, is actually invalid. Under these
4442 // circumstances _only_, we are allowed to use the current header's
4443 // value as the comparison value when doing the cas to acquire the
4444 // bias in the current epoch. In other words, we allow transfer of
4445 // the bias from one thread to another directly in this situation.
4446 //
4447 // FIXME: due to a lack of registers we currently blow away the age
4448 // bits in this situation. Should attempt to preserve them.
4449 if (need_tmp_reg) {
4450 push(tmp_reg);
4451 }
4452 get_thread(tmp_reg);
4453 movl(swap_reg, klass_addr);
4454 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4455 movl(swap_reg, saved_mark_addr);
4456 if (os::is_MP()) {
4457 lock();
4458 }
4459 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4460 if (need_tmp_reg) {
4461 pop(tmp_reg);
4462 }
4463 // If the biasing toward our thread failed, then another thread
4464 // succeeded in biasing it toward itself and we need to revoke that
4465 // bias. The revocation will occur in the runtime in the slow case.
4466 if (counters != NULL) {
4467 cond_inc32(Assembler::zero,
4468 ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4469 }
4470 if (slow_case != NULL) {
4471 jcc(Assembler::notZero, *slow_case);
4472 }
4473 jmp(done);
4474
4475 bind(try_revoke_bias);
4476 // The prototype mark in the klass doesn't have the bias bit set any
4477 // more, indicating that objects of this data type are not supposed
4478 // to be biased any more. We are going to try to reset the mark of
4479 // this object to the prototype value and fall through to the
4480 // CAS-based locking scheme. Note that if our CAS fails, it means
4481 // that another thread raced us for the privilege of revoking the
4482 // bias of this particular object, so it's okay to continue in the
4483 // normal locking code.
4484 //
4485 // FIXME: due to a lack of registers we currently blow away the age
4486 // bits in this situation. Should attempt to preserve them.
4487 movl(swap_reg, saved_mark_addr);
4488 if (need_tmp_reg) {
4489 push(tmp_reg);
4490 }
4491 movl(tmp_reg, klass_addr);
4492 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4493 if (os::is_MP()) {
4494 lock();
4495 }
4496 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4497 if (need_tmp_reg) {
4498 pop(tmp_reg);
4499 }
4500 // Fall through to the normal CAS-based lock, because no matter what
4501 // the result of the above CAS, some thread must have succeeded in
4502 // removing the bias bit from the object's header.
4503 if (counters != NULL) {
4504 cond_inc32(Assembler::zero,
4505 ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4506 }
4507
4508 bind(cas_label);
4509
4510 return null_check_offset;
4511 }
4512 void MacroAssembler::call_VM_leaf_base(address entry_point,
4513 int number_of_arguments) {
4514 call(RuntimeAddress(entry_point));
4515 increment(rsp, number_of_arguments * wordSize);
4516 }
4517
4518 void MacroAssembler::cmpoop(Address src1, jobject obj) {
4519 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4520 }
4521
4522 void MacroAssembler::cmpoop(Register src1, jobject obj) {
4523 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4524 }
4525
4526 void MacroAssembler::extend_sign(Register hi, Register lo) {
4527 // According to Intel Doc. AP-526, "Integer Divide", p.18.
4528 if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4529 cdql();
4530 } else {
4531 movl(hi, lo);
4532 sarl(hi, 31);
4533 }
4534 }
4535
4536 void MacroAssembler::fat_nop() {
4537 // A 5 byte nop that is safe for patching (see patch_verified_entry)
4538 emit_byte(0x26); // es:
4539 emit_byte(0x2e); // cs:
4540 emit_byte(0x64); // fs:
4541 emit_byte(0x65); // gs:
4542 emit_byte(0x90);
4543 }
4544
4545 void MacroAssembler::jC2(Register tmp, Label& L) {
4546 // set parity bit if FPU flag C2 is set (via rax)
4547 save_rax(tmp);
4548 fwait(); fnstsw_ax();
4549 sahf();
4550 restore_rax(tmp);
4551 // branch
4552 jcc(Assembler::parity, L);
4553 }
4554
4555 void MacroAssembler::jnC2(Register tmp, Label& L) {
4556 // set parity bit if FPU flag C2 is set (via rax)
4557 save_rax(tmp);
4558 fwait(); fnstsw_ax();
4559 sahf();
4560 restore_rax(tmp);
4561 // branch
4562 jcc(Assembler::noParity, L);
4563 }
4564
4565 // 32bit can do a case table jump in one instruction but we no longer allow the base
4566 // to be installed in the Address class
4567 void MacroAssembler::jump(ArrayAddress entry) {
4568 jmp(as_Address(entry));
4569 }
4570
4571 // Note: y_lo will be destroyed
4572 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4573 // Long compare for Java (semantics as described in JVM spec.)
4574 Label high, low, done;
4575
4576 cmpl(x_hi, y_hi);
4577 jcc(Assembler::less, low);
4578 jcc(Assembler::greater, high);
4579 // x_hi is the return register
4580 xorl(x_hi, x_hi);
4581 cmpl(x_lo, y_lo);
4582 jcc(Assembler::below, low);
4583 jcc(Assembler::equal, done);
4584
4585 bind(high);
4586 xorl(x_hi, x_hi);
4587 increment(x_hi);
4588 jmp(done);
4589
4590 bind(low);
4591 xorl(x_hi, x_hi);
4592 decrementl(x_hi);
4593
4594 bind(done);
4595 }
4596
4597 void MacroAssembler::lea(Register dst, AddressLiteral src) {
4598 mov_literal32(dst, (int32_t)src.target(), src.rspec());
4599 }
4600
4601 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4602 // leal(dst, as_Address(adr));
4603 // see note in movl as to why we must use a move
4604 mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4605 }
4606
4607 void MacroAssembler::leave() {
4608 mov(rsp, rbp);
4609 pop(rbp);
4610 }
4611
4612 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4613 // Multiplication of two Java long values stored on the stack
4614 // as illustrated below. Result is in rdx:rax.
4615 //
4616 // rsp ---> [ ?? ] \ \
4617 // .... | y_rsp_offset |
4618 // [ y_lo ] / (in bytes) | x_rsp_offset
4619 // [ y_hi ] | (in bytes)
4620 // .... |
4621 // [ x_lo ] /
4622 // [ x_hi ]
4623 // ....
4624 //
4625 // Basic idea: lo(result) = lo(x_lo * y_lo)
4626 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4627 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4628 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4629 Label quick;
4630 // load x_hi, y_hi and check if quick
4631 // multiplication is possible
4632 movl(rbx, x_hi);
4633 movl(rcx, y_hi);
4634 movl(rax, rbx);
4635 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4636 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
4637 // do full multiplication
4638 // 1st step
4639 mull(y_lo); // x_hi * y_lo
4640 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
4641 // 2nd step
4642 movl(rax, x_lo);
4643 mull(rcx); // x_lo * y_hi
4644 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
4645 // 3rd step
4646 bind(quick); // note: rbx, = 0 if quick multiply!
4647 movl(rax, x_lo);
4648 mull(y_lo); // x_lo * y_lo
4649 addl(rdx, rbx); // correct hi(x_lo * y_lo)
4650 }
4651
4652 void MacroAssembler::lneg(Register hi, Register lo) {
4653 negl(lo);
4654 adcl(hi, 0);
4655 negl(hi);
4656 }
4657
4658 void MacroAssembler::lshl(Register hi, Register lo) {
4659 // Java shift left long support (semantics as described in JVM spec., p.305)
4660 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4661 // shift value is in rcx !
4662 assert(hi != rcx, "must not use rcx");
4663 assert(lo != rcx, "must not use rcx");
4664 const Register s = rcx; // shift count
4665 const int n = BitsPerWord;
4666 Label L;
4667 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4668 cmpl(s, n); // if (s < n)
4669 jcc(Assembler::less, L); // else (s >= n)
4670 movl(hi, lo); // x := x << n
4671 xorl(lo, lo);
4672 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4673 bind(L); // s (mod n) < n
4674 shldl(hi, lo); // x := x << s
4675 shll(lo);
4676 }
4677
4678
4679 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4680 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4681 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4682 assert(hi != rcx, "must not use rcx");
4683 assert(lo != rcx, "must not use rcx");
4684 const Register s = rcx; // shift count
4685 const int n = BitsPerWord;
4686 Label L;
4687 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4688 cmpl(s, n); // if (s < n)
4689 jcc(Assembler::less, L); // else (s >= n)
4690 movl(lo, hi); // x := x >> n
4691 if (sign_extension) sarl(hi, 31);
4692 else xorl(hi, hi);
4693 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4694 bind(L); // s (mod n) < n
4695 shrdl(lo, hi); // x := x >> s
4696 if (sign_extension) sarl(hi);
4697 else shrl(hi);
4698 }
4699
4700 void MacroAssembler::movoop(Register dst, jobject obj) {
4701 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4702 }
4703
4704 void MacroAssembler::movoop(Address dst, jobject obj) {
4705 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4706 }
4707
4708 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4709 if (src.is_lval()) {
4710 mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4711 } else {
4712 movl(dst, as_Address(src));
4713 }
4714 }
4715
4716 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4717 movl(as_Address(dst), src);
4718 }
4719
4720 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4721 movl(dst, as_Address(src));
4722 }
4723
4724 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
4725 void MacroAssembler::movptr(Address dst, intptr_t src) {
4726 movl(dst, src);
4727 }
4728
4729
4730 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
4731 movsd(dst, as_Address(src));
4732 }
4733
4734 void MacroAssembler::pop_callee_saved_registers() {
4735 pop(rcx);
4736 pop(rdx);
4737 pop(rdi);
4738 pop(rsi);
4739 }
4740
4741 void MacroAssembler::pop_fTOS() {
4742 fld_d(Address(rsp, 0));
4743 addl(rsp, 2 * wordSize);
4744 }
4745
4746 void MacroAssembler::push_callee_saved_registers() {
4747 push(rsi);
4748 push(rdi);
4749 push(rdx);
4750 push(rcx);
4751 }
4752
4753 void MacroAssembler::push_fTOS() {
4754 subl(rsp, 2 * wordSize);
4755 fstp_d(Address(rsp, 0));
4756 }
4757
4758
4759 void MacroAssembler::pushoop(jobject obj) {
4760 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
4761 }
4762
4763
4764 void MacroAssembler::pushptr(AddressLiteral src) {
4765 if (src.is_lval()) {
4766 push_literal32((int32_t)src.target(), src.rspec());
4767 } else {
4768 pushl(as_Address(src));
4769 }
4770 }
4771
4772 void MacroAssembler::set_word_if_not_zero(Register dst) {
4773 xorl(dst, dst);
4774 set_byte_if_not_zero(dst);
4775 }
4776
4777 static void pass_arg0(MacroAssembler* masm, Register arg) {
4778 masm->push(arg);
4779 }
4780
4781 static void pass_arg1(MacroAssembler* masm, Register arg) {
4782 masm->push(arg);
4783 }
4784
4785 static void pass_arg2(MacroAssembler* masm, Register arg) {
4786 masm->push(arg);
4787 }
4788
4789 static void pass_arg3(MacroAssembler* masm, Register arg) {
4790 masm->push(arg);
4791 }
4792
4793 #ifndef PRODUCT
4794 extern "C" void findpc(intptr_t x);
4795 #endif
4796
4797 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
4798 // In order to get locks to work, we need to fake a in_VM state
4799 JavaThread* thread = JavaThread::current();
4800 JavaThreadState saved_state = thread->thread_state();
4801 thread->set_thread_state(_thread_in_vm);
4802 if (ShowMessageBoxOnError) {
4803 JavaThread* thread = JavaThread::current();
4804 JavaThreadState saved_state = thread->thread_state();
4805 thread->set_thread_state(_thread_in_vm);
4806 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
4807 ttyLocker ttyl;
4808 BytecodeCounter::print();
4809 }
4810 // To see where a verify_oop failed, get $ebx+40/X for this frame.
4811 // This is the value of eip which points to where verify_oop will return.
4812 if (os::message_box(msg, "Execution stopped, print registers?")) {
4813 ttyLocker ttyl;
4814 tty->print_cr("eip = 0x%08x", eip);
4815 #ifndef PRODUCT
4816 tty->cr();
4817 findpc(eip);
4818 tty->cr();
4819 #endif
4820 tty->print_cr("rax, = 0x%08x", rax);
4821 tty->print_cr("rbx, = 0x%08x", rbx);
4822 tty->print_cr("rcx = 0x%08x", rcx);
4823 tty->print_cr("rdx = 0x%08x", rdx);
4824 tty->print_cr("rdi = 0x%08x", rdi);
4825 tty->print_cr("rsi = 0x%08x", rsi);
4826 tty->print_cr("rbp, = 0x%08x", rbp);
4827 tty->print_cr("rsp = 0x%08x", rsp);
4828 BREAKPOINT;
4829 }
4830 } else {
4831 ttyLocker ttyl;
4832 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
4833 assert(false, "DEBUG MESSAGE");
4834 }
4835 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
4836 }
4837
4838 void MacroAssembler::stop(const char* msg) {
4839 ExternalAddress message((address)msg);
4840 // push address of message
4841 pushptr(message.addr());
4842 { Label L; call(L, relocInfo::none); bind(L); } // push eip
4843 pusha(); // push registers
4844 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
4845 hlt();
4846 }
4847
4848 void MacroAssembler::warn(const char* msg) {
4849 push_CPU_state();
4850
4851 ExternalAddress message((address) msg);
4852 // push address of message
4853 pushptr(message.addr());
4854
4855 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
4856 addl(rsp, wordSize); // discard argument
4857 pop_CPU_state();
4858 }
4859
4860 #else // _LP64
4861
4862 // 64 bit versions
4863
4864 Address MacroAssembler::as_Address(AddressLiteral adr) {
4865 // amd64 always does this as a pc-rel
4866 // we can be absolute or disp based on the instruction type
4867 // jmp/call are displacements others are absolute
4868 assert(!adr.is_lval(), "must be rval");
4869 assert(reachable(adr), "must be");
4870 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
4871
4872 }
4873
4874 Address MacroAssembler::as_Address(ArrayAddress adr) {
4875 AddressLiteral base = adr.base();
4876 lea(rscratch1, base);
4877 Address index = adr.index();
4878 assert(index._disp == 0, "must not have disp"); // maybe it can?
4879 Address array(rscratch1, index._index, index._scale, index._disp);
4880 return array;
4881 }
4882
4883 int MacroAssembler::biased_locking_enter(Register lock_reg,
4884 Register obj_reg,
4885 Register swap_reg,
4886 Register tmp_reg,
4887 bool swap_reg_contains_mark,
4888 Label& done,
4889 Label* slow_case,
4890 BiasedLockingCounters* counters) {
4891 assert(UseBiasedLocking, "why call this otherwise?");
4892 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
4893 assert(tmp_reg != noreg, "tmp_reg must be supplied");
4894 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4895 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4896 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
4897 Address saved_mark_addr(lock_reg, 0);
4898
4899 if (PrintBiasedLockingStatistics && counters == NULL)
4900 counters = BiasedLocking::counters();
4901
4902 // Biased locking
4903 // See whether the lock is currently biased toward our thread and
4904 // whether the epoch is still valid
4905 // Note that the runtime guarantees sufficient alignment of JavaThread
4906 // pointers to allow age to be placed into low bits
4907 // First check to see whether biasing is even enabled for this object
4908 Label cas_label;
4909 int null_check_offset = -1;
4910 if (!swap_reg_contains_mark) {
4911 null_check_offset = offset();
4912 movq(swap_reg, mark_addr);
4913 }
4914 movq(tmp_reg, swap_reg);
4915 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4916 cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
4917 jcc(Assembler::notEqual, cas_label);
4918 // The bias pattern is present in the object's header. Need to check
4919 // whether the bias owner and the epoch are both still current.
4920 load_prototype_header(tmp_reg, obj_reg);
4921 orq(tmp_reg, r15_thread);
4922 xorq(tmp_reg, swap_reg);
4923 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
4924 if (counters != NULL) {
4925 cond_inc32(Assembler::zero,
4926 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
4927 }
4928 jcc(Assembler::equal, done);
4929
4930 Label try_revoke_bias;
4931 Label try_rebias;
4932
4933 // At this point we know that the header has the bias pattern and
4934 // that we are not the bias owner in the current epoch. We need to
4935 // figure out more details about the state of the header in order to
4936 // know what operations can be legally performed on the object's
4937 // header.
4938
4939 // If the low three bits in the xor result aren't clear, that means
4940 // the prototype header is no longer biased and we have to revoke
4941 // the bias on this object.
4942 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4943 jcc(Assembler::notZero, try_revoke_bias);
4944
4945 // Biasing is still enabled for this data type. See whether the
4946 // epoch of the current bias is still valid, meaning that the epoch
4947 // bits of the mark word are equal to the epoch bits of the
4948 // prototype header. (Note that the prototype header's epoch bits
4949 // only change at a safepoint.) If not, attempt to rebias the object
4950 // toward the current thread. Note that we must be absolutely sure
4951 // that the current epoch is invalid in order to do this because
4952 // otherwise the manipulations it performs on the mark word are
4953 // illegal.
4954 testq(tmp_reg, markOopDesc::epoch_mask_in_place);
4955 jcc(Assembler::notZero, try_rebias);
4956
4957 // The epoch of the current bias is still valid but we know nothing
4958 // about the owner; it might be set or it might be clear. Try to
4959 // acquire the bias of the object using an atomic operation. If this
4960 // fails we will go in to the runtime to revoke the object's bias.
4961 // Note that we first construct the presumed unbiased header so we
4962 // don't accidentally blow away another thread's valid bias.
4963 andq(swap_reg,
4964 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4965 movq(tmp_reg, swap_reg);
4966 orq(tmp_reg, r15_thread);
4967 if (os::is_MP()) {
4968 lock();
4969 }
4970 cmpxchgq(tmp_reg, Address(obj_reg, 0));
4971 // If the biasing toward our thread failed, this means that
4972 // another thread succeeded in biasing it toward itself and we
4973 // need to revoke that bias. The revocation will occur in the
4974 // interpreter runtime in the slow case.
4975 if (counters != NULL) {
4976 cond_inc32(Assembler::zero,
4977 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
4978 }
4979 if (slow_case != NULL) {
4980 jcc(Assembler::notZero, *slow_case);
4981 }
4982 jmp(done);
4983
4984 bind(try_rebias);
4985 // At this point we know the epoch has expired, meaning that the
4986 // current "bias owner", if any, is actually invalid. Under these
4987 // circumstances _only_, we are allowed to use the current header's
4988 // value as the comparison value when doing the cas to acquire the
4989 // bias in the current epoch. In other words, we allow transfer of
4990 // the bias from one thread to another directly in this situation.
4991 //
4992 // FIXME: due to a lack of registers we currently blow away the age
4993 // bits in this situation. Should attempt to preserve them.
4994 load_prototype_header(tmp_reg, obj_reg);
4995 orq(tmp_reg, r15_thread);
4996 if (os::is_MP()) {
4997 lock();
4998 }
4999 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5000 // If the biasing toward our thread failed, then another thread
5001 // succeeded in biasing it toward itself and we need to revoke that
5002 // bias. The revocation will occur in the runtime in the slow case.
5003 if (counters != NULL) {
5004 cond_inc32(Assembler::zero,
5005 ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5006 }
5007 if (slow_case != NULL) {
5008 jcc(Assembler::notZero, *slow_case);
5009 }
5010 jmp(done);
5011
5012 bind(try_revoke_bias);
5013 // The prototype mark in the klass doesn't have the bias bit set any
5014 // more, indicating that objects of this data type are not supposed
5015 // to be biased any more. We are going to try to reset the mark of
5016 // this object to the prototype value and fall through to the
5017 // CAS-based locking scheme. Note that if our CAS fails, it means
5018 // that another thread raced us for the privilege of revoking the
5019 // bias of this particular object, so it's okay to continue in the
5020 // normal locking code.
5021 //
5022 // FIXME: due to a lack of registers we currently blow away the age
5023 // bits in this situation. Should attempt to preserve them.
5024 load_prototype_header(tmp_reg, obj_reg);
5025 if (os::is_MP()) {
5026 lock();
5027 }
5028 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5029 // Fall through to the normal CAS-based lock, because no matter what
5030 // the result of the above CAS, some thread must have succeeded in
5031 // removing the bias bit from the object's header.
5032 if (counters != NULL) {
5033 cond_inc32(Assembler::zero,
5034 ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5035 }
5036
5037 bind(cas_label);
5038
5039 return null_check_offset;
5040 }
5041
5042 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5043 Label L, E;
5044
5045 #ifdef _WIN64
5046 // Windows always allocates space for it's register args
5047 assert(num_args <= 4, "only register arguments supported");
5048 subq(rsp, frame::arg_reg_save_area_bytes);
5049 #endif
5050
5051 // Align stack if necessary
5052 testl(rsp, 15);
5053 jcc(Assembler::zero, L);
5054
5055 subq(rsp, 8);
5056 {
5057 call(RuntimeAddress(entry_point));
5058 }
5059 addq(rsp, 8);
5060 jmp(E);
5061
5062 bind(L);
5063 {
5064 call(RuntimeAddress(entry_point));
5065 }
5066
5067 bind(E);
5068
5069 #ifdef _WIN64
5070 // restore stack pointer
5071 addq(rsp, frame::arg_reg_save_area_bytes);
5072 #endif
5073
5074 }
5075
5076 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5077 assert(!src2.is_lval(), "should use cmpptr");
5078
5079 if (reachable(src2)) {
5080 cmpq(src1, as_Address(src2));
5081 } else {
5082 lea(rscratch1, src2);
5083 Assembler::cmpq(src1, Address(rscratch1, 0));
5084 }
5085 }
5086
5087 int MacroAssembler::corrected_idivq(Register reg) {
5088 // Full implementation of Java ldiv and lrem; checks for special
5089 // case as described in JVM spec., p.243 & p.271. The function
5090 // returns the (pc) offset of the idivl instruction - may be needed
5091 // for implicit exceptions.
5092 //
5093 // normal case special case
5094 //
5095 // input : rax: dividend min_long
5096 // reg: divisor (may not be eax/edx) -1
5097 //
5098 // output: rax: quotient (= rax idiv reg) min_long
5099 // rdx: remainder (= rax irem reg) 0
5100 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5101 static const int64_t min_long = 0x8000000000000000;
5102 Label normal_case, special_case;
5103
5104 // check for special case
5105 cmp64(rax, ExternalAddress((address) &min_long));
5106 jcc(Assembler::notEqual, normal_case);
5107 xorl(rdx, rdx); // prepare rdx for possible special case (where
5108 // remainder = 0)
5109 cmpq(reg, -1);
5110 jcc(Assembler::equal, special_case);
5111
5112 // handle normal case
5113 bind(normal_case);
5114 cdqq();
5115 int idivq_offset = offset();
5116 idivq(reg);
5117
5118 // normal and special case exit
5119 bind(special_case);
5120
5121 return idivq_offset;
5122 }
5123
5124 void MacroAssembler::decrementq(Register reg, int value) {
5125 if (value == min_jint) { subq(reg, value); return; }
5126 if (value < 0) { incrementq(reg, -value); return; }
5127 if (value == 0) { ; return; }
5128 if (value == 1 && UseIncDec) { decq(reg) ; return; }
5129 /* else */ { subq(reg, value) ; return; }
5130 }
5131
5132 void MacroAssembler::decrementq(Address dst, int value) {
5133 if (value == min_jint) { subq(dst, value); return; }
5134 if (value < 0) { incrementq(dst, -value); return; }
5135 if (value == 0) { ; return; }
5136 if (value == 1 && UseIncDec) { decq(dst) ; return; }
5137 /* else */ { subq(dst, value) ; return; }
5138 }
5139
5140 void MacroAssembler::fat_nop() {
5141 // A 5 byte nop that is safe for patching (see patch_verified_entry)
5142 // Recommened sequence from 'Software Optimization Guide for the AMD
5143 // Hammer Processor'
5144 emit_byte(0x66);
5145 emit_byte(0x66);
5146 emit_byte(0x90);
5147 emit_byte(0x66);
5148 emit_byte(0x90);
5149 }
5150
5151 void MacroAssembler::incrementq(Register reg, int value) {
5152 if (value == min_jint) { addq(reg, value); return; }
5153 if (value < 0) { decrementq(reg, -value); return; }
5154 if (value == 0) { ; return; }
5155 if (value == 1 && UseIncDec) { incq(reg) ; return; }
5156 /* else */ { addq(reg, value) ; return; }
5157 }
5158
5159 void MacroAssembler::incrementq(Address dst, int value) {
5160 if (value == min_jint) { addq(dst, value); return; }
5161 if (value < 0) { decrementq(dst, -value); return; }
5162 if (value == 0) { ; return; }
5163 if (value == 1 && UseIncDec) { incq(dst) ; return; }
5164 /* else */ { addq(dst, value) ; return; }
5165 }
5166
5167 // 32bit can do a case table jump in one instruction but we no longer allow the base
5168 // to be installed in the Address class
5169 void MacroAssembler::jump(ArrayAddress entry) {
5170 lea(rscratch1, entry.base());
5171 Address dispatch = entry.index();
5172 assert(dispatch._base == noreg, "must be");
5173 dispatch._base = rscratch1;
5174 jmp(dispatch);
5175 }
5176
5177 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5178 ShouldNotReachHere(); // 64bit doesn't use two regs
5179 cmpq(x_lo, y_lo);
5180 }
5181
5182 void MacroAssembler::lea(Register dst, AddressLiteral src) {
5183 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5184 }
5185
5186 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5187 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5188 movptr(dst, rscratch1);
5189 }
5190
5191 void MacroAssembler::leave() {
5192 // %%% is this really better? Why not on 32bit too?
5193 emit_byte(0xC9); // LEAVE
5194 }
5195
5196 void MacroAssembler::lneg(Register hi, Register lo) {
5197 ShouldNotReachHere(); // 64bit doesn't use two regs
5198 negq(lo);
5199 }
5200
5201 void MacroAssembler::movoop(Register dst, jobject obj) {
5202 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5203 }
5204
5205 void MacroAssembler::movoop(Address dst, jobject obj) {
5206 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5207 movq(dst, rscratch1);
5208 }
5209
5210 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5211 if (src.is_lval()) {
5212 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5213 } else {
5214 if (reachable(src)) {
5215 movq(dst, as_Address(src));
5216 } else {
5217 lea(rscratch1, src);
5218 movq(dst, Address(rscratch1,0));
5219 }
5220 }
5221 }
5222
5223 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5224 movq(as_Address(dst), src);
5225 }
5226
5227 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5228 movq(dst, as_Address(src));
5229 }
5230
5231 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
5232 void MacroAssembler::movptr(Address dst, intptr_t src) {
5233 mov64(rscratch1, src);
5234 movq(dst, rscratch1);
5235 }
5236
5237 // These are mostly for initializing NULL
5238 void MacroAssembler::movptr(Address dst, int32_t src) {
5239 movslq(dst, src);
5240 }
5241
5242 void MacroAssembler::movptr(Register dst, int32_t src) {
5243 mov64(dst, (intptr_t)src);
5244 }
5245
5246 void MacroAssembler::pushoop(jobject obj) {
5247 movoop(rscratch1, obj);
5248 push(rscratch1);
5249 }
5250
5251 void MacroAssembler::pushptr(AddressLiteral src) {
5252 lea(rscratch1, src);
5253 if (src.is_lval()) {
5254 push(rscratch1);
5255 } else {
5256 pushq(Address(rscratch1, 0));
5257 }
5258 }
5259
5260 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5261 bool clear_pc) {
5262 // we must set sp to zero to clear frame
5263 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), (int32_t)NULL_WORD);
5264 // must clear fp, so that compiled frames are not confused; it is
5265 // possible that we need it only for debugging
5266 if (clear_fp) {
5267 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), (int32_t)NULL_WORD);
5268 }
5269
5270 if (clear_pc) {
5271 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), (int32_t)NULL_WORD);
5272 }
5273 }
5274
5275 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5276 Register last_java_fp,
5277 address last_java_pc) {
5278 // determine last_java_sp register
5279 if (!last_java_sp->is_valid()) {
5280 last_java_sp = rsp;
5281 }
5282
5283 // last_java_fp is optional
5284 if (last_java_fp->is_valid()) {
5285 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5286 last_java_fp);
5287 }
5288
5289 // last_java_pc is optional
5290 if (last_java_pc != NULL) {
5291 Address java_pc(r15_thread,
5292 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5293 lea(rscratch1, InternalAddress(last_java_pc));
5294 movptr(java_pc, rscratch1);
5295 }
5296
5297 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5298 }
5299
5300 static void pass_arg0(MacroAssembler* masm, Register arg) {
5301 if (c_rarg0 != arg ) {
5302 masm->mov(c_rarg0, arg);
5303 }
5304 }
5305
5306 static void pass_arg1(MacroAssembler* masm, Register arg) {
5307 if (c_rarg1 != arg ) {
5308 masm->mov(c_rarg1, arg);
5309 }
5310 }
5311
5312 static void pass_arg2(MacroAssembler* masm, Register arg) {
5313 if (c_rarg2 != arg ) {
5314 masm->mov(c_rarg2, arg);
5315 }
5316 }
5317
5318 static void pass_arg3(MacroAssembler* masm, Register arg) {
5319 if (c_rarg3 != arg ) {
5320 masm->mov(c_rarg3, arg);
5321 }
5322 }
5323
5324 void MacroAssembler::stop(const char* msg) {
5325 address rip = pc();
5326 pusha(); // get regs on stack
5327 lea(c_rarg0, ExternalAddress((address) msg));
5328 lea(c_rarg1, InternalAddress(rip));
5329 movq(c_rarg2, rsp); // pass pointer to regs array
5330 andq(rsp, -16); // align stack as required by ABI
5331 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5332 hlt();
5333 }
5334
5335 void MacroAssembler::warn(const char* msg) {
5336 push(r12);
5337 movq(r12, rsp);
5338 andq(rsp, -16); // align stack as required by push_CPU_state and call
5339
5340 push_CPU_state(); // keeps alignment at 16 bytes
5341 lea(c_rarg0, ExternalAddress((address) msg));
5342 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5343 pop_CPU_state();
5344
5345 movq(rsp, r12);
5346 pop(r12);
5347 }
5348
5349 #ifndef PRODUCT
5350 extern "C" void findpc(intptr_t x);
5351 #endif
5352
5353 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5354 // In order to get locks to work, we need to fake a in_VM state
5355 if (ShowMessageBoxOnError ) {
5356 JavaThread* thread = JavaThread::current();
5357 JavaThreadState saved_state = thread->thread_state();
5358 thread->set_thread_state(_thread_in_vm);
5359 #ifndef PRODUCT
5360 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5361 ttyLocker ttyl;
5362 BytecodeCounter::print();
5363 }
5364 #endif
5365 // To see where a verify_oop failed, get $ebx+40/X for this frame.
5366 // XXX correct this offset for amd64
5367 // This is the value of eip which points to where verify_oop will return.
5368 if (os::message_box(msg, "Execution stopped, print registers?")) {
5369 ttyLocker ttyl;
5370 tty->print_cr("rip = 0x%016lx", pc);
5371 #ifndef PRODUCT
5372 tty->cr();
5373 findpc(pc);
5374 tty->cr();
5375 #endif
5376 tty->print_cr("rax = 0x%016lx", regs[15]);
5377 tty->print_cr("rbx = 0x%016lx", regs[12]);
5378 tty->print_cr("rcx = 0x%016lx", regs[14]);
5379 tty->print_cr("rdx = 0x%016lx", regs[13]);
5380 tty->print_cr("rdi = 0x%016lx", regs[8]);
5381 tty->print_cr("rsi = 0x%016lx", regs[9]);
5382 tty->print_cr("rbp = 0x%016lx", regs[10]);
5383 tty->print_cr("rsp = 0x%016lx", regs[11]);
5384 tty->print_cr("r8 = 0x%016lx", regs[7]);
5385 tty->print_cr("r9 = 0x%016lx", regs[6]);
5386 tty->print_cr("r10 = 0x%016lx", regs[5]);
5387 tty->print_cr("r11 = 0x%016lx", regs[4]);
5388 tty->print_cr("r12 = 0x%016lx", regs[3]);
5389 tty->print_cr("r13 = 0x%016lx", regs[2]);
5390 tty->print_cr("r14 = 0x%016lx", regs[1]);
5391 tty->print_cr("r15 = 0x%016lx", regs[0]);
5392 BREAKPOINT;
5393 }
5394 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5395 } else {
5396 ttyLocker ttyl;
5397 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5398 msg);
5399 }
5400 }
5401
5402 #endif // _LP64
5403
5404 // Now versions that are common to 32/64 bit
5405
5406 void MacroAssembler::addptr(Register dst, int32_t imm32) {
5407 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5408 }
5409
5410 void MacroAssembler::addptr(Register dst, Register src) {
5411 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5412 }
5413
5414 void MacroAssembler::addptr(Address dst, Register src) {
5415 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5416 }
5417
5418 void MacroAssembler::align(int modulus) {
5419 if (offset() % modulus != 0) {
5420 nop(modulus - (offset() % modulus));
5421 }
5422 }
5423
5424 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5425 andpd(dst, as_Address(src));
5426 }
5427
5428 void MacroAssembler::andptr(Register dst, int32_t imm32) {
5429 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5430 }
5431
5432 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5433 pushf();
5434 if (os::is_MP())
5435 lock();
5436 incrementl(counter_addr);
5437 popf();
5438 }
5439
5440 // Writes to stack successive pages until offset reached to check for
5441 // stack overflow + shadow pages. This clobbers tmp.
5442 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5443 movptr(tmp, rsp);
5444 // Bang stack for total size given plus shadow page size.
5445 // Bang one page at a time because large size can bang beyond yellow and
5446 // red zones.
5447 Label loop;
5448 bind(loop);
5449 movl(Address(tmp, (-os::vm_page_size())), size );
5450 subptr(tmp, os::vm_page_size());
5451 subl(size, os::vm_page_size());
5452 jcc(Assembler::greater, loop);
5453
5454 // Bang down shadow pages too.
5455 // The -1 because we already subtracted 1 page.
5456 for (int i = 0; i< StackShadowPages-1; i++) {
5457 // this could be any sized move but this is can be a debugging crumb
5458 // so the bigger the better.
5459 movptr(Address(tmp, (-i*os::vm_page_size())), size );
5460 }
5461 }
5462
5463 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5464 assert(UseBiasedLocking, "why call this otherwise?");
5465
5466 // Check for biased locking unlock case, which is a no-op
5467 // Note: we do not have to check the thread ID for two reasons.
5468 // First, the interpreter checks for IllegalMonitorStateException at
5469 // a higher level. Second, if the bias was revoked while we held the
5470 // lock, the object could not be rebiased toward another thread, so
5471 // the bias bit would be clear.
5472 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5473 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5474 cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5475 jcc(Assembler::equal, done);
5476 }
5477
5478 void MacroAssembler::c2bool(Register x) {
5479 // implements x == 0 ? 0 : 1
5480 // note: must only look at least-significant byte of x
5481 // since C-style booleans are stored in one byte
5482 // only! (was bug)
5483 andl(x, 0xFF);
5484 setb(Assembler::notZero, x);
5485 }
5486
5487 // Wouldn't need if AddressLiteral version had new name
5488 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5489 Assembler::call(L, rtype);
5490 }
5491
5492 void MacroAssembler::call(Register entry) {
5493 Assembler::call(entry);
5494 }
5495
5496 void MacroAssembler::call(AddressLiteral entry) {
5497 if (reachable(entry)) {
5498 Assembler::call_literal(entry.target(), entry.rspec());
5499 } else {
5500 lea(rscratch1, entry);
5501 Assembler::call(rscratch1);
5502 }
5503 }
5504
5505 // Implementation of call_VM versions
5506
5507 void MacroAssembler::call_VM(Register oop_result,
5508 address entry_point,
5509 bool check_exceptions) {
5510 Label C, E;
5511 call(C, relocInfo::none);
5512 jmp(E);
5513
5514 bind(C);
5515 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5516 ret(0);
5517
5518 bind(E);
5519 }
5520
5521 void MacroAssembler::call_VM(Register oop_result,
5522 address entry_point,
5523 Register arg_1,
5524 bool check_exceptions) {
5525 Label C, E;
5526 call(C, relocInfo::none);
5527 jmp(E);
5528
5529 bind(C);
5530 pass_arg1(this, arg_1);
5531 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5532 ret(0);
5533
5534 bind(E);
5535 }
5536
5537 void MacroAssembler::call_VM(Register oop_result,
5538 address entry_point,
5539 Register arg_1,
5540 Register arg_2,
5541 bool check_exceptions) {
5542 Label C, E;
5543 call(C, relocInfo::none);
5544 jmp(E);
5545
5546 bind(C);
5547
5548 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5549
5550 pass_arg2(this, arg_2);
5551 pass_arg1(this, arg_1);
5552 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5553 ret(0);
5554
5555 bind(E);
5556 }
5557
5558 void MacroAssembler::call_VM(Register oop_result,
5559 address entry_point,
5560 Register arg_1,
5561 Register arg_2,
5562 Register arg_3,
5563 bool check_exceptions) {
5564 Label C, E;
5565 call(C, relocInfo::none);
5566 jmp(E);
5567
5568 bind(C);
5569
5570 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5571 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5572 pass_arg3(this, arg_3);
5573
5574 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5575 pass_arg2(this, arg_2);
5576
5577 pass_arg1(this, arg_1);
5578 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5579 ret(0);
5580
5581 bind(E);
5582 }
5583
5584 void MacroAssembler::call_VM(Register oop_result,
5585 Register last_java_sp,
5586 address entry_point,
5587 int number_of_arguments,
5588 bool check_exceptions) {
5589 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5590 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5591 }
5592
5593 void MacroAssembler::call_VM(Register oop_result,
5594 Register last_java_sp,
5595 address entry_point,
5596 Register arg_1,
5597 bool check_exceptions) {
5598 pass_arg1(this, arg_1);
5599 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5600 }
5601
5602 void MacroAssembler::call_VM(Register oop_result,
5603 Register last_java_sp,
5604 address entry_point,
5605 Register arg_1,
5606 Register arg_2,
5607 bool check_exceptions) {
5608
5609 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5610 pass_arg2(this, arg_2);
5611 pass_arg1(this, arg_1);
5612 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5613 }
5614
5615 void MacroAssembler::call_VM(Register oop_result,
5616 Register last_java_sp,
5617 address entry_point,
5618 Register arg_1,
5619 Register arg_2,
5620 Register arg_3,
5621 bool check_exceptions) {
5622 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5623 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5624 pass_arg3(this, arg_3);
5625 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5626 pass_arg2(this, arg_2);
5627 pass_arg1(this, arg_1);
5628 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5629 }
5630
5631 void MacroAssembler::call_VM_base(Register oop_result,
5632 Register java_thread,
5633 Register last_java_sp,
5634 address entry_point,
5635 int number_of_arguments,
5636 bool check_exceptions) {
5637 // determine java_thread register
5638 if (!java_thread->is_valid()) {
5639 #ifdef _LP64
5640 java_thread = r15_thread;
5641 #else
5642 java_thread = rdi;
5643 get_thread(java_thread);
5644 #endif // LP64
5645 }
5646 // determine last_java_sp register
5647 if (!last_java_sp->is_valid()) {
5648 last_java_sp = rsp;
5649 }
5650 // debugging support
5651 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
5652 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5653 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
5654 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5655
5656 // push java thread (becomes first argument of C function)
5657
5658 NOT_LP64(push(java_thread); number_of_arguments++);
5659 LP64_ONLY(mov(c_rarg0, r15_thread));
5660
5661 // set last Java frame before call
5662 assert(last_java_sp != rbp, "can't use ebp/rbp");
5663
5664 // Only interpreter should have to set fp
5665 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5666
5667 // do the call, remove parameters
5668 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5669
5670 // restore the thread (cannot use the pushed argument since arguments
5671 // may be overwritten by C code generated by an optimizing compiler);
5672 // however can use the register value directly if it is callee saved.
5673 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5674 // rdi & rsi (also r15) are callee saved -> nothing to do
5675 #ifdef ASSERT
5676 guarantee(java_thread != rax, "change this code");
5677 push(rax);
5678 { Label L;
5679 get_thread(rax);
5680 cmpptr(java_thread, rax);
5681 jcc(Assembler::equal, L);
5682 stop("MacroAssembler::call_VM_base: rdi not callee saved?");
5683 bind(L);
5684 }
5685 pop(rax);
5686 #endif
5687 } else {
5688 get_thread(java_thread);
5689 }
5690 // reset last Java frame
5691 // Only interpreter should have to clear fp
5692 reset_last_Java_frame(java_thread, true, false);
5693
5694 #ifndef CC_INTERP
5695 // C++ interp handles this in the interpreter
5696 check_and_handle_popframe(java_thread);
5697 check_and_handle_earlyret(java_thread);
5698 #endif /* CC_INTERP */
5699
5700 if (check_exceptions) {
5701 // check for pending exceptions (java_thread is set upon return)
5702 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
5703 #ifndef _LP64
5704 jump_cc(Assembler::notEqual,
5705 RuntimeAddress(StubRoutines::forward_exception_entry()));
5706 #else
5707 // This used to conditionally jump to forward_exception however it is
5708 // possible if we relocate that the branch will not reach. So we must jump
5709 // around so we can always reach
5710
5711 Label ok;
5712 jcc(Assembler::equal, ok);
5713 jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5714 bind(ok);
5715 #endif // LP64
5716 }
5717
5718 // get oop result if there is one and reset the value in the thread
5719 if (oop_result->is_valid()) {
5720 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
5721 movptr(Address(java_thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
5722 verify_oop(oop_result, "broken oop in call_VM_base");
5723 }
5724 }
5725
5726 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
5727
5728 // Calculate the value for last_Java_sp
5729 // somewhat subtle. call_VM does an intermediate call
5730 // which places a return address on the stack just under the
5731 // stack pointer as the user finsihed with it. This allows
5732 // use to retrieve last_Java_pc from last_Java_sp[-1].
5733 // On 32bit we then have to push additional args on the stack to accomplish
5734 // the actual requested call. On 64bit call_VM only can use register args
5735 // so the only extra space is the return address that call_VM created.
5736 // This hopefully explains the calculations here.
5737
5738 #ifdef _LP64
5739 // We've pushed one address, correct last_Java_sp
5740 lea(rax, Address(rsp, wordSize));
5741 #else
5742 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
5743 #endif // LP64
5744
5745 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
5746
5747 }
5748
5749 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
5750 call_VM_leaf_base(entry_point, number_of_arguments);
5751 }
5752
5753 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
5754 pass_arg0(this, arg_0);
5755 call_VM_leaf(entry_point, 1);
5756 }
5757
5758 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
5759
5760 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5761 pass_arg1(this, arg_1);
5762 pass_arg0(this, arg_0);
5763 call_VM_leaf(entry_point, 2);
5764 }
5765
5766 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
5767 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
5768 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5769 pass_arg2(this, arg_2);
5770 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5771 pass_arg1(this, arg_1);
5772 pass_arg0(this, arg_0);
5773 call_VM_leaf(entry_point, 3);
5774 }
5775
5776 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
5777 }
5778
5779 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
5780 }
5781
5782 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
5783 if (reachable(src1)) {
5784 cmpl(as_Address(src1), imm);
5785 } else {
5786 lea(rscratch1, src1);
5787 cmpl(Address(rscratch1, 0), imm);
5788 }
5789 }
5790
5791 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
5792 assert(!src2.is_lval(), "use cmpptr");
5793 if (reachable(src2)) {
5794 cmpl(src1, as_Address(src2));
5795 } else {
5796 lea(rscratch1, src2);
5797 cmpl(src1, Address(rscratch1, 0));
5798 }
5799 }
5800
5801 void MacroAssembler::cmp32(Register src1, int32_t imm) {
5802 Assembler::cmpl(src1, imm);
5803 }
5804
5805 void MacroAssembler::cmp32(Register src1, Address src2) {
5806 Assembler::cmpl(src1, src2);
5807 }
5808
5809 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5810 ucomisd(opr1, opr2);
5811
5812 Label L;
5813 if (unordered_is_less) {
5814 movl(dst, -1);
5815 jcc(Assembler::parity, L);
5816 jcc(Assembler::below , L);
5817 movl(dst, 0);
5818 jcc(Assembler::equal , L);
5819 increment(dst);
5820 } else { // unordered is greater
5821 movl(dst, 1);
5822 jcc(Assembler::parity, L);
5823 jcc(Assembler::above , L);
5824 movl(dst, 0);
5825 jcc(Assembler::equal , L);
5826 decrementl(dst);
5827 }
5828 bind(L);
5829 }
5830
5831 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5832 ucomiss(opr1, opr2);
5833
5834 Label L;
5835 if (unordered_is_less) {
5836 movl(dst, -1);
5837 jcc(Assembler::parity, L);
5838 jcc(Assembler::below , L);
5839 movl(dst, 0);
5840 jcc(Assembler::equal , L);
5841 increment(dst);
5842 } else { // unordered is greater
5843 movl(dst, 1);
5844 jcc(Assembler::parity, L);
5845 jcc(Assembler::above , L);
5846 movl(dst, 0);
5847 jcc(Assembler::equal , L);
5848 decrementl(dst);
5849 }
5850 bind(L);
5851 }
5852
5853
5854 void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
5855 if (reachable(src1)) {
5856 cmpb(as_Address(src1), imm);
5857 } else {
5858 lea(rscratch1, src1);
5859 cmpb(Address(rscratch1, 0), imm);
5860 }
5861 }
5862
5863 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
5864 #ifdef _LP64
5865 if (src2.is_lval()) {
5866 movptr(rscratch1, src2);
5867 Assembler::cmpq(src1, rscratch1);
5868 } else if (reachable(src2)) {
5869 cmpq(src1, as_Address(src2));
5870 } else {
5871 lea(rscratch1, src2);
5872 Assembler::cmpq(src1, Address(rscratch1, 0));
5873 }
5874 #else
5875 if (src2.is_lval()) {
5876 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
5877 } else {
5878 cmpl(src1, as_Address(src2));
5879 }
5880 #endif // _LP64
5881 }
5882
5883 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
5884 assert(src2.is_lval(), "not a mem-mem compare");
5885 #ifdef _LP64
5886 // moves src2's literal address
5887 movptr(rscratch1, src2);
5888 Assembler::cmpq(src1, rscratch1);
5889 #else
5890 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
5891 #endif // _LP64
5892 }
5893
5894 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
5895 if (reachable(adr)) {
5896 if (os::is_MP())
5897 lock();
5898 cmpxchgptr(reg, as_Address(adr));
5899 } else {
5900 lea(rscratch1, adr);
5901 if (os::is_MP())
5902 lock();
5903 cmpxchgptr(reg, Address(rscratch1, 0));
5904 }
5905 }
5906
5907 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
5908 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
5909 }
5910
5911 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
5912 comisd(dst, as_Address(src));
5913 }
5914
5915 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
5916 comiss(dst, as_Address(src));
5917 }
5918
5919
5920 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
5921 Condition negated_cond = negate_condition(cond);
5922 Label L;
5923 jcc(negated_cond, L);
5924 atomic_incl(counter_addr);
5925 bind(L);
5926 }
5927
5928 int MacroAssembler::corrected_idivl(Register reg) {
5929 // Full implementation of Java idiv and irem; checks for
5930 // special case as described in JVM spec., p.243 & p.271.
5931 // The function returns the (pc) offset of the idivl
5932 // instruction - may be needed for implicit exceptions.
5933 //
5934 // normal case special case
5935 //
5936 // input : rax,: dividend min_int
5937 // reg: divisor (may not be rax,/rdx) -1
5938 //
5939 // output: rax,: quotient (= rax, idiv reg) min_int
5940 // rdx: remainder (= rax, irem reg) 0
5941 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
5942 const int min_int = 0x80000000;
5943 Label normal_case, special_case;
5944
5945 // check for special case
5946 cmpl(rax, min_int);
5947 jcc(Assembler::notEqual, normal_case);
5948 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
5949 cmpl(reg, -1);
5950 jcc(Assembler::equal, special_case);
5951
5952 // handle normal case
5953 bind(normal_case);
5954 cdql();
5955 int idivl_offset = offset();
5956 idivl(reg);
5957
5958 // normal and special case exit
5959 bind(special_case);
5960
5961 return idivl_offset;
5962 }
5963
5964
5965
5966 void MacroAssembler::decrementl(Register reg, int value) {
5967 if (value == min_jint) {subl(reg, value) ; return; }
5968 if (value < 0) { incrementl(reg, -value); return; }
5969 if (value == 0) { ; return; }
5970 if (value == 1 && UseIncDec) { decl(reg) ; return; }
5971 /* else */ { subl(reg, value) ; return; }
5972 }
5973
5974 void MacroAssembler::decrementl(Address dst, int value) {
5975 if (value == min_jint) {subl(dst, value) ; return; }
5976 if (value < 0) { incrementl(dst, -value); return; }
5977 if (value == 0) { ; return; }
5978 if (value == 1 && UseIncDec) { decl(dst) ; return; }
5979 /* else */ { subl(dst, value) ; return; }
5980 }
5981
5982 void MacroAssembler::division_with_shift (Register reg, int shift_value) {
5983 assert (shift_value > 0, "illegal shift value");
5984 Label _is_positive;
5985 testl (reg, reg);
5986 jcc (Assembler::positive, _is_positive);
5987 int offset = (1 << shift_value) - 1 ;
5988
5989 if (offset == 1) {
5990 incrementl(reg);
5991 } else {
5992 addl(reg, offset);
5993 }
5994
5995 bind (_is_positive);
5996 sarl(reg, shift_value);
5997 }
5998
5999 // !defined(COMPILER2) is because of stupid core builds
6000 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6001 void MacroAssembler::empty_FPU_stack() {
6002 if (VM_Version::supports_mmx()) {
6003 emms();
6004 } else {
6005 for (int i = 8; i-- > 0; ) ffree(i);
6006 }
6007 }
6008 #endif // !LP64 || C1 || !C2
6009
6010
6011 // Defines obj, preserves var_size_in_bytes
6012 void MacroAssembler::eden_allocate(Register obj,
6013 Register var_size_in_bytes,
6014 int con_size_in_bytes,
6015 Register t1,
6016 Label& slow_case) {
6017 assert(obj == rax, "obj must be in rax, for cmpxchg");
6018 assert_different_registers(obj, var_size_in_bytes, t1);
6019 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6020 jmp(slow_case);
6021 } else {
6022 Register end = t1;
6023 Label retry;
6024 bind(retry);
6025 ExternalAddress heap_top((address) Universe::heap()->top_addr());
6026 movptr(obj, heap_top);
6027 if (var_size_in_bytes == noreg) {
6028 lea(end, Address(obj, con_size_in_bytes));
6029 } else {
6030 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6031 }
6032 // if end < obj then we wrapped around => object too long => slow case
6033 cmpptr(end, obj);
6034 jcc(Assembler::below, slow_case);
6035 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6036 jcc(Assembler::above, slow_case);
6037 // Compare obj with the top addr, and if still equal, store the new top addr in
6038 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6039 // it otherwise. Use lock prefix for atomicity on MPs.
6040 locked_cmpxchgptr(end, heap_top);
6041 jcc(Assembler::notEqual, retry);
6042 }
6043 }
6044
6045 void MacroAssembler::enter() {
6046 push(rbp);
6047 mov(rbp, rsp);
6048 }
6049
6050 void MacroAssembler::fcmp(Register tmp) {
6051 fcmp(tmp, 1, true, true);
6052 }
6053
6054 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6055 assert(!pop_right || pop_left, "usage error");
6056 if (VM_Version::supports_cmov()) {
6057 assert(tmp == noreg, "unneeded temp");
6058 if (pop_left) {
6059 fucomip(index);
6060 } else {
6061 fucomi(index);
6062 }
6063 if (pop_right) {
6064 fpop();
6065 }
6066 } else {
6067 assert(tmp != noreg, "need temp");
6068 if (pop_left) {
6069 if (pop_right) {
6070 fcompp();
6071 } else {
6072 fcomp(index);
6073 }
6074 } else {
6075 fcom(index);
6076 }
6077 // convert FPU condition into eflags condition via rax,
6078 save_rax(tmp);
6079 fwait(); fnstsw_ax();
6080 sahf();
6081 restore_rax(tmp);
6082 }
6083 // condition codes set as follows:
6084 //
6085 // CF (corresponds to C0) if x < y
6086 // PF (corresponds to C2) if unordered
6087 // ZF (corresponds to C3) if x = y
6088 }
6089
6090 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6091 fcmp2int(dst, unordered_is_less, 1, true, true);
6092 }
6093
6094 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6095 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6096 Label L;
6097 if (unordered_is_less) {
6098 movl(dst, -1);
6099 jcc(Assembler::parity, L);
6100 jcc(Assembler::below , L);
6101 movl(dst, 0);
6102 jcc(Assembler::equal , L);
6103 increment(dst);
6104 } else { // unordered is greater
6105 movl(dst, 1);
6106 jcc(Assembler::parity, L);
6107 jcc(Assembler::above , L);
6108 movl(dst, 0);
6109 jcc(Assembler::equal , L);
6110 decrementl(dst);
6111 }
6112 bind(L);
6113 }
6114
6115 void MacroAssembler::fld_d(AddressLiteral src) {
6116 fld_d(as_Address(src));
6117 }
6118
6119 void MacroAssembler::fld_s(AddressLiteral src) {
6120 fld_s(as_Address(src));
6121 }
6122
6123 void MacroAssembler::fld_x(AddressLiteral src) {
6124 Assembler::fld_x(as_Address(src));
6125 }
6126
6127 void MacroAssembler::fldcw(AddressLiteral src) {
6128 Assembler::fldcw(as_Address(src));
6129 }
6130
6131 void MacroAssembler::fpop() {
6132 ffree();
6133 fincstp();
6134 }
6135
6136 void MacroAssembler::fremr(Register tmp) {
6137 save_rax(tmp);
6138 { Label L;
6139 bind(L);
6140 fprem();
6141 fwait(); fnstsw_ax();
6142 #ifdef _LP64
6143 testl(rax, 0x400);
6144 jcc(Assembler::notEqual, L);
6145 #else
6146 sahf();
6147 jcc(Assembler::parity, L);
6148 #endif // _LP64
6149 }
6150 restore_rax(tmp);
6151 // Result is in ST0.
6152 // Note: fxch & fpop to get rid of ST1
6153 // (otherwise FPU stack could overflow eventually)
6154 fxch(1);
6155 fpop();
6156 }
6157
6158
6159 void MacroAssembler::incrementl(AddressLiteral dst) {
6160 if (reachable(dst)) {
6161 incrementl(as_Address(dst));
6162 } else {
6163 lea(rscratch1, dst);
6164 incrementl(Address(rscratch1, 0));
6165 }
6166 }
6167
6168 void MacroAssembler::incrementl(ArrayAddress dst) {
6169 incrementl(as_Address(dst));
6170 }
6171
6172 void MacroAssembler::incrementl(Register reg, int value) {
6173 if (value == min_jint) {addl(reg, value) ; return; }
6174 if (value < 0) { decrementl(reg, -value); return; }
6175 if (value == 0) { ; return; }
6176 if (value == 1 && UseIncDec) { incl(reg) ; return; }
6177 /* else */ { addl(reg, value) ; return; }
6178 }
6179
6180 void MacroAssembler::incrementl(Address dst, int value) {
6181 if (value == min_jint) {addl(dst, value) ; return; }
6182 if (value < 0) { decrementl(dst, -value); return; }
6183 if (value == 0) { ; return; }
6184 if (value == 1 && UseIncDec) { incl(dst) ; return; }
6185 /* else */ { addl(dst, value) ; return; }
6186 }
6187
6188 void MacroAssembler::jump(AddressLiteral dst) {
6189 if (reachable(dst)) {
6190 jmp_literal(dst.target(), dst.rspec());
6191 } else {
6192 lea(rscratch1, dst);
6193 jmp(rscratch1);
6194 }
6195 }
6196
6197 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6198 if (reachable(dst)) {
6199 InstructionMark im(this);
6200 relocate(dst.reloc());
6201 const int short_size = 2;
6202 const int long_size = 6;
6203 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6204 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6205 // 0111 tttn #8-bit disp
6206 emit_byte(0x70 | cc);
6207 emit_byte((offs - short_size) & 0xFF);
6208 } else {
6209 // 0000 1111 1000 tttn #32-bit disp
6210 emit_byte(0x0F);
6211 emit_byte(0x80 | cc);
6212 emit_long(offs - long_size);
6213 }
6214 } else {
6215 #ifdef ASSERT
6216 warning("reversing conditional branch");
6217 #endif /* ASSERT */
6218 Label skip;
6219 jccb(reverse[cc], skip);
6220 lea(rscratch1, dst);
6221 Assembler::jmp(rscratch1);
6222 bind(skip);
6223 }
6224 }
6225
6226 void MacroAssembler::ldmxcsr(AddressLiteral src) {
6227 if (reachable(src)) {
6228 Assembler::ldmxcsr(as_Address(src));
6229 } else {
6230 lea(rscratch1, src);
6231 Assembler::ldmxcsr(Address(rscratch1, 0));
6232 }
6233 }
6234
6235 int MacroAssembler::load_signed_byte(Register dst, Address src) {
6236 int off;
6237 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6238 off = offset();
6239 movsbl(dst, src); // movsxb
6240 } else {
6241 off = load_unsigned_byte(dst, src);
6242 shll(dst, 24);
6243 sarl(dst, 24);
6244 }
6245 return off;
6246 }
6247
6248 // word => int32 which seems bad for 64bit
6249 int MacroAssembler::load_signed_word(Register dst, Address src) {
6250 int off;
6251 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6252 // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6253 // version but this is what 64bit has always done. This seems to imply
6254 // that users are only using 32bits worth.
6255 off = offset();
6256 movswl(dst, src); // movsxw
6257 } else {
6258 off = load_unsigned_word(dst, src);
6259 shll(dst, 16);
6260 sarl(dst, 16);
6261 }
6262 return off;
6263 }
6264
6265 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6266 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6267 // and "3.9 Partial Register Penalties", p. 22).
6268 int off;
6269 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6270 off = offset();
6271 movzbl(dst, src); // movzxb
6272 } else {
6273 xorl(dst, dst);
6274 off = offset();
6275 movb(dst, src);
6276 }
6277 return off;
6278 }
6279
6280 int MacroAssembler::load_unsigned_word(Register dst, Address src) {
6281 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6282 // and "3.9 Partial Register Penalties", p. 22).
6283 int off;
6284 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6285 off = offset();
6286 movzwl(dst, src); // movzxw
6287 } else {
6288 xorl(dst, dst);
6289 off = offset();
6290 movw(dst, src);
6291 }
6292 return off;
6293 }
6294
6295 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6296 if (reachable(dst)) {
6297 movl(as_Address(dst), src);
6298 } else {
6299 lea(rscratch1, dst);
6300 movl(Address(rscratch1, 0), src);
6301 }
6302 }
6303
6304 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6305 if (reachable(src)) {
6306 movl(dst, as_Address(src));
6307 } else {
6308 lea(rscratch1, src);
6309 movl(dst, Address(rscratch1, 0));
6310 }
6311 }
6312
6313 // C++ bool manipulation
6314
6315 void MacroAssembler::movbool(Register dst, Address src) {
6316 if(sizeof(bool) == 1)
6317 movb(dst, src);
6318 else if(sizeof(bool) == 2)
6319 movw(dst, src);
6320 else if(sizeof(bool) == 4)
6321 movl(dst, src);
6322 else
6323 // unsupported
6324 ShouldNotReachHere();
6325 }
6326
6327 void MacroAssembler::movbool(Address dst, bool boolconst) {
6328 if(sizeof(bool) == 1)
6329 movb(dst, (int) boolconst);
6330 else if(sizeof(bool) == 2)
6331 movw(dst, (int) boolconst);
6332 else if(sizeof(bool) == 4)
6333 movl(dst, (int) boolconst);
6334 else
6335 // unsupported
6336 ShouldNotReachHere();
6337 }
6338
6339 void MacroAssembler::movbool(Address dst, Register src) {
6340 if(sizeof(bool) == 1)
6341 movb(dst, src);
6342 else if(sizeof(bool) == 2)
6343 movw(dst, src);
6344 else if(sizeof(bool) == 4)
6345 movl(dst, src);
6346 else
6347 // unsupported
6348 ShouldNotReachHere();
6349 }
6350
6351 void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6352 movb(as_Address(dst), src);
6353 }
6354
6355 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6356 if (reachable(src)) {
6357 if (UseXmmLoadAndClearUpper) {
6358 movsd (dst, as_Address(src));
6359 } else {
6360 movlpd(dst, as_Address(src));
6361 }
6362 } else {
6363 lea(rscratch1, src);
6364 if (UseXmmLoadAndClearUpper) {
6365 movsd (dst, Address(rscratch1, 0));
6366 } else {
6367 movlpd(dst, Address(rscratch1, 0));
6368 }
6369 }
6370 }
6371
6372 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6373 if (reachable(src)) {
6374 movss(dst, as_Address(src));
6375 } else {
6376 lea(rscratch1, src);
6377 movss(dst, Address(rscratch1, 0));
6378 }
6379 }
6380
6381 void MacroAssembler::movptr(Register dst, Register src) {
6382 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6383 }
6384
6385 void MacroAssembler::movptr(Register dst, Address src) {
6386 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6387 }
6388
6389 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
6390 void MacroAssembler::movptr(Register dst, intptr_t src) {
6391 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6392 }
6393
6394 void MacroAssembler::movptr(Address dst, Register src) {
6395 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6396 }
6397
6398 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6399 if (reachable(src)) {
6400 movss(dst, as_Address(src));
6401 } else {
6402 lea(rscratch1, src);
6403 movss(dst, Address(rscratch1, 0));
6404 }
6405 }
6406
6407 void MacroAssembler::null_check(Register reg, int offset) {
6408 if (needs_explicit_null_check(offset)) {
6409 // provoke OS NULL exception if reg = NULL by
6410 // accessing M[reg] w/o changing any (non-CC) registers
6411 // NOTE: cmpl is plenty here to provoke a segv
6412 cmpptr(rax, Address(reg, 0));
6413 // Note: should probably use testl(rax, Address(reg, 0));
6414 // may be shorter code (however, this version of
6415 // testl needs to be implemented first)
6416 } else {
6417 // nothing to do, (later) access of M[reg + offset]
6418 // will provoke OS NULL exception if reg = NULL
6419 }
6420 }
6421
6422 void MacroAssembler::os_breakpoint() {
6423 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6424 // (e.g., MSVC can't call ps() otherwise)
6425 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6426 }
6427
6428 void MacroAssembler::pop_CPU_state() {
6429 pop_FPU_state();
6430 pop_IU_state();
6431 }
6432
6433 void MacroAssembler::pop_FPU_state() {
6434 NOT_LP64(frstor(Address(rsp, 0));)
6435 LP64_ONLY(fxrstor(Address(rsp, 0));)
6436 addptr(rsp, FPUStateSizeInWords * wordSize);
6437 }
6438
6439 void MacroAssembler::pop_IU_state() {
6440 popa();
6441 LP64_ONLY(addq(rsp, 8));
6442 popf();
6443 }
6444
6445 // Save Integer and Float state
6446 // Warning: Stack must be 16 byte aligned (64bit)
6447 void MacroAssembler::push_CPU_state() {
6448 push_IU_state();
6449 push_FPU_state();
6450 }
6451
6452 void MacroAssembler::push_FPU_state() {
6453 subptr(rsp, FPUStateSizeInWords * wordSize);
6454 #ifndef _LP64
6455 fnsave(Address(rsp, 0));
6456 fwait();
6457 #else
6458 fxsave(Address(rsp, 0));
6459 #endif // LP64
6460 }
6461
6462 void MacroAssembler::push_IU_state() {
6463 // Push flags first because pusha kills them
6464 pushf();
6465 // Make sure rsp stays 16-byte aligned
6466 LP64_ONLY(subq(rsp, 8));
6467 pusha();
6468 }
6469
6470 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6471 // determine java_thread register
6472 if (!java_thread->is_valid()) {
6473 java_thread = rdi;
6474 get_thread(java_thread);
6475 }
6476 // we must set sp to zero to clear frame
6477 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), (int32_t)NULL_WORD);
6478 if (clear_fp) {
6479 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), (int32_t)NULL_WORD);
6480 }
6481
6482 if (clear_pc)
6483 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), (int32_t)NULL_WORD);
6484
6485 }
6486
6487 void MacroAssembler::restore_rax(Register tmp) {
6488 if (tmp == noreg) pop(rax);
6489 else if (tmp != rax) mov(rax, tmp);
6490 }
6491
6492 void MacroAssembler::round_to(Register reg, int modulus) {
6493 addptr(reg, modulus - 1);
6494 andptr(reg, -modulus);
6495 }
6496
6497 void MacroAssembler::save_rax(Register tmp) {
6498 if (tmp == noreg) push(rax);
6499 else if (tmp != rax) mov(tmp, rax);
6500 }
6501
6502 // Write serialization page so VM thread can do a pseudo remote membar.
6503 // We use the current thread pointer to calculate a thread specific
6504 // offset to write to within the page. This minimizes bus traffic
6505 // due to cache line collision.
6506 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6507 movl(tmp, thread);
6508 shrl(tmp, os::get_serialize_page_shift_count());
6509 andl(tmp, (os::vm_page_size() - sizeof(int)));
6510
6511 Address index(noreg, tmp, Address::times_1);
6512 ExternalAddress page(os::get_memory_serialize_page());
6513
6514 movptr(ArrayAddress(page, index), tmp);
6515 }
6516
6517 // Calls to C land
6518 //
6519 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6520 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6521 // has to be reset to 0. This is required to allow proper stack traversal.
6522 void MacroAssembler::set_last_Java_frame(Register java_thread,
6523 Register last_java_sp,
6524 Register last_java_fp,
6525 address last_java_pc) {
6526 // determine java_thread register
6527 if (!java_thread->is_valid()) {
6528 java_thread = rdi;
6529 get_thread(java_thread);
6530 }
6531 // determine last_java_sp register
6532 if (!last_java_sp->is_valid()) {
6533 last_java_sp = rsp;
6534 }
6535
6536 // last_java_fp is optional
6537
6538 if (last_java_fp->is_valid()) {
6539 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6540 }
6541
6542 // last_java_pc is optional
6543
6544 if (last_java_pc != NULL) {
6545 lea(Address(java_thread,
6546 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6547 InternalAddress(last_java_pc));
6548
6549 }
6550 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6551 }
6552
6553 void MacroAssembler::shlptr(Register dst, int imm8) {
6554 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6555 }
6556
6557 void MacroAssembler::shrptr(Register dst, int imm8) {
6558 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6559 }
6560
6561 void MacroAssembler::sign_extend_byte(Register reg) {
6562 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6563 movsbl(reg, reg); // movsxb
6564 } else {
6565 shll(reg, 24);
6566 sarl(reg, 24);
6567 }
6568 }
6569
6570 void MacroAssembler::sign_extend_short(Register reg) {
6571 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6572 movswl(reg, reg); // movsxw
6573 } else {
6574 shll(reg, 16);
6575 sarl(reg, 16);
6576 }
6577 }
6578
6579 //////////////////////////////////////////////////////////////////////////////////
6580 #ifndef SERIALGC
6581
6582 void MacroAssembler::g1_write_barrier_pre(Register obj,
6583 #ifndef _LP64
6584 Register thread,
6585 #endif
6586 Register tmp,
6587 Register tmp2,
6588 bool tosca_live) {
6589 LP64_ONLY(Register thread = r15_thread;)
6590 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6591 PtrQueue::byte_offset_of_active()));
6592
6593 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6594 PtrQueue::byte_offset_of_index()));
6595 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6596 PtrQueue::byte_offset_of_buf()));
6597
6598
6599 Label done;
6600 Label runtime;
6601
6602 // if (!marking_in_progress) goto done;
6603 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
6604 cmpl(in_progress, 0);
6605 } else {
6606 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
6607 cmpb(in_progress, 0);
6608 }
6609 jcc(Assembler::equal, done);
6610
6611 // if (x.f == NULL) goto done;
6612 cmpptr(Address(obj, 0), NULL_WORD);
6613 jcc(Assembler::equal, done);
6614
6615 // Can we store original value in the thread's buffer?
6616
6617 LP64_ONLY(movslq(tmp, index);)
6618 movptr(tmp2, Address(obj, 0));
6619 #ifdef _LP64
6620 cmpq(tmp, 0);
6621 #else
6622 cmpl(index, 0);
6623 #endif
6624 jcc(Assembler::equal, runtime);
6625 #ifdef _LP64
6626 subq(tmp, wordSize);
6627 movl(index, tmp);
6628 addq(tmp, buffer);
6629 #else
6630 subl(index, wordSize);
6631 movl(tmp, buffer);
6632 addl(tmp, index);
6633 #endif
6634 movptr(Address(tmp, 0), tmp2);
6635 jmp(done);
6636 bind(runtime);
6637 // save the live input values
6638 if(tosca_live) push(rax);
6639 push(obj);
6640 #ifdef _LP64
6641 movq(c_rarg0, Address(obj, 0));
6642 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
6643 #else
6644 push(thread);
6645 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
6646 pop(thread);
6647 #endif
6648 pop(obj);
6649 if(tosca_live) pop(rax);
6650 bind(done);
6651
6652 }
6653
6654 void MacroAssembler::g1_write_barrier_post(Register store_addr,
6655 Register new_val,
6656 #ifndef _LP64
6657 Register thread,
6658 #endif
6659 Register tmp,
6660 Register tmp2) {
6661
6662 LP64_ONLY(Register thread = r15_thread;)
6663 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6664 PtrQueue::byte_offset_of_index()));
6665 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6666 PtrQueue::byte_offset_of_buf()));
6667 BarrierSet* bs = Universe::heap()->barrier_set();
6668 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6669 Label done;
6670 Label runtime;
6671
6672 // Does store cross heap regions?
6673
6674 movptr(tmp, store_addr);
6675 xorptr(tmp, new_val);
6676 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
6677 jcc(Assembler::equal, done);
6678
6679 // crosses regions, storing NULL?
6680
6681 cmpptr(new_val, (int32_t) NULL_WORD);
6682 jcc(Assembler::equal, done);
6683
6684 // storing region crossing non-NULL, is card already dirty?
6685
6686 ExternalAddress cardtable((address) ct->byte_map_base);
6687 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6688 #ifdef _LP64
6689 const Register card_addr = tmp;
6690
6691 movq(card_addr, store_addr);
6692 shrq(card_addr, CardTableModRefBS::card_shift);
6693
6694 lea(tmp2, cardtable);
6695
6696 // get the address of the card
6697 addq(card_addr, tmp2);
6698 #else
6699 const Register card_index = tmp;
6700
6701 movl(card_index, store_addr);
6702 shrl(card_index, CardTableModRefBS::card_shift);
6703
6704 Address index(noreg, card_index, Address::times_1);
6705 const Register card_addr = tmp;
6706 lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
6707 #endif
6708 cmpb(Address(card_addr, 0), 0);
6709 jcc(Assembler::equal, done);
6710
6711 // storing a region crossing, non-NULL oop, card is clean.
6712 // dirty card and log.
6713
6714 movb(Address(card_addr, 0), 0);
6715
6716 cmpl(queue_index, 0);
6717 jcc(Assembler::equal, runtime);
6718 subl(queue_index, wordSize);
6719 movptr(tmp2, buffer);
6720 #ifdef _LP64
6721 movslq(rscratch1, queue_index);
6722 addq(tmp2, rscratch1);
6723 movq(Address(tmp2, 0), card_addr);
6724 #else
6725 addl(tmp2, queue_index);
6726 movl(Address(tmp2, 0), card_index);
6727 #endif
6728 jmp(done);
6729
6730 bind(runtime);
6731 // save the live input values
6732 push(store_addr);
6733 push(new_val);
6734 #ifdef _LP64
6735 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
6736 #else
6737 push(thread);
6738 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
6739 pop(thread);
6740 #endif
6741 pop(new_val);
6742 pop(store_addr);
6743
6744 bind(done);
6745
6746 }
6747
6748 #endif // SERIALGC
6749 //////////////////////////////////////////////////////////////////////////////////
6750
6751
6752 void MacroAssembler::store_check(Register obj) {
6753 // Does a store check for the oop in register obj. The content of
6754 // register obj is destroyed afterwards.
6755 store_check_part_1(obj);
6756 store_check_part_2(obj);
6757 }
6758
6759 void MacroAssembler::store_check(Register obj, Address dst) {
6760 store_check(obj);
6761 }
6762
6763
6764 // split the store check operation so that other instructions can be scheduled inbetween
6765 void MacroAssembler::store_check_part_1(Register obj) {
6766 BarrierSet* bs = Universe::heap()->barrier_set();
6767 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6768 shrptr(obj, CardTableModRefBS::card_shift);
6769 }
6770
6771 void MacroAssembler::store_check_part_2(Register obj) {
6772 BarrierSet* bs = Universe::heap()->barrier_set();
6773 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6774 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6775 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6776
6777 // The calculation for byte_map_base is as follows:
6778 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
6779 // So this essentially converts an address to a displacement and
6780 // it will never need to be relocated. On 64bit however the value may be too
6781 // large for a 32bit displacement
6782
6783 intptr_t disp = (intptr_t) ct->byte_map_base;
6784 if (is_simm32(disp)) {
6785 Address cardtable(noreg, obj, Address::times_1, disp);
6786 movb(cardtable, 0);
6787 } else {
6788 // By doing it as an ExternalAddress disp could be converted to a rip-relative
6789 // displacement and done in a single instruction given favorable mapping and
6790 // a smarter version of as_Address. Worst case it is two instructions which
6791 // is no worse off then loading disp into a register and doing as a simple
6792 // Address() as above.
6793 // We can't do as ExternalAddress as the only style since if disp == 0 we'll
6794 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
6795 // in some cases we'll get a single instruction version.
6796
6797 ExternalAddress cardtable((address)disp);
6798 Address index(noreg, obj, Address::times_1);
6799 movb(as_Address(ArrayAddress(cardtable, index)), 0);
6800 }
6801 }
6802
6803 void MacroAssembler::subptr(Register dst, int32_t imm32) {
6804 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
6805 }
6806
6807 void MacroAssembler::subptr(Register dst, Register src) {
6808 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
6809 }
6810
6811 void MacroAssembler::test32(Register src1, AddressLiteral src2) {
6812 // src2 must be rval
6813
6814 if (reachable(src2)) {
6815 testl(src1, as_Address(src2));
6816 } else {
6817 lea(rscratch1, src2);
6818 testl(src1, Address(rscratch1, 0));
6819 }
6820 }
6821
6822 // C++ bool manipulation
6823 void MacroAssembler::testbool(Register dst) {
6824 if(sizeof(bool) == 1)
6825 testb(dst, 0xff);
6826 else if(sizeof(bool) == 2) {
6827 // testw implementation needed for two byte bools
6828 ShouldNotReachHere();
6829 } else if(sizeof(bool) == 4)
6830 testl(dst, dst);
6831 else
6832 // unsupported
6833 ShouldNotReachHere();
6834 }
6835
6836 void MacroAssembler::testptr(Register dst, Register src) {
6837 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
6838 }
6839
6840 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
6841 void MacroAssembler::tlab_allocate(Register obj,
6842 Register var_size_in_bytes,
6843 int con_size_in_bytes,
6844 Register t1,
6845 Register t2,
6846 Label& slow_case) {
6847 assert_different_registers(obj, t1, t2);
6848 assert_different_registers(obj, var_size_in_bytes, t1);
6849 Register end = t2;
6850 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
6851
6852 verify_tlab();
6853
6854 NOT_LP64(get_thread(thread));
6855
6856 movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
6857 if (var_size_in_bytes == noreg) {
6858 lea(end, Address(obj, con_size_in_bytes));
6859 } else {
6860 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6861 }
6862 cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
6863 jcc(Assembler::above, slow_case);
6864
6865 // update the tlab top pointer
6866 movptr(Address(thread, JavaThread::tlab_top_offset()), end);
6867
6868 // recover var_size_in_bytes if necessary
6869 if (var_size_in_bytes == end) {
6870 subptr(var_size_in_bytes, obj);
6871 }
6872 verify_tlab();
6873 }
6874
6875 // Preserves rbx, and rdx.
6876 void MacroAssembler::tlab_refill(Label& retry,
6877 Label& try_eden,
6878 Label& slow_case) {
6879 Register top = rax;
6880 Register t1 = rcx;
6881 Register t2 = rsi;
6882 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
6883 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
6884 Label do_refill, discard_tlab;
6885
6886 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6887 // No allocation in the shared eden.
6888 jmp(slow_case);
6889 }
6890
6891 NOT_LP64(get_thread(thread_reg));
6892
6893 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
6894 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
6895
6896 // calculate amount of free space
6897 subptr(t1, top);
6898 shrptr(t1, LogHeapWordSize);
6899
6900 // Retain tlab and allocate object in shared space if
6901 // the amount free in the tlab is too large to discard.
6902 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
6903 jcc(Assembler::lessEqual, discard_tlab);
6904
6905 // Retain
6906 // %%% yuck as movptr...
6907 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
6908 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
6909 if (TLABStats) {
6910 // increment number of slow_allocations
6911 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
6912 }
6913 jmp(try_eden);
6914
6915 bind(discard_tlab);
6916 if (TLABStats) {
6917 // increment number of refills
6918 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
6919 // accumulate wastage -- t1 is amount free in tlab
6920 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
6921 }
6922
6923 // if tlab is currently allocated (top or end != null) then
6924 // fill [top, end + alignment_reserve) with array object
6925 testptr (top, top);
6926 jcc(Assembler::zero, do_refill);
6927
6928 // set up the mark word
6929 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
6930 // set the length to the remaining space
6931 subptr(t1, typeArrayOopDesc::header_size(T_INT));
6932 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
6933 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
6934 movptr(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
6935 // set klass to intArrayKlass
6936 // dubious reloc why not an oop reloc?
6937 movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
6938 // store klass last. concurrent gcs assumes klass length is valid if
6939 // klass field is not null.
6940 store_klass(top, t1);
6941
6942 // refill the tlab with an eden allocation
6943 bind(do_refill);
6944 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
6945 shlptr(t1, LogHeapWordSize);
6946 // add object_size ??
6947 eden_allocate(top, t1, 0, t2, slow_case);
6948
6949 // Check that t1 was preserved in eden_allocate.
6950 #ifdef ASSERT
6951 if (UseTLAB) {
6952 Label ok;
6953 Register tsize = rsi;
6954 assert_different_registers(tsize, thread_reg, t1);
6955 push(tsize);
6956 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
6957 shlptr(tsize, LogHeapWordSize);
6958 cmpptr(t1, tsize);
6959 jcc(Assembler::equal, ok);
6960 stop("assert(t1 != tlab size)");
6961 should_not_reach_here();
6962
6963 bind(ok);
6964 pop(tsize);
6965 }
6966 #endif
6967 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
6968 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
6969 addptr(top, t1);
6970 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
6971 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
6972 verify_tlab();
6973 jmp(retry);
6974 }
6975
6976 static const double pi_4 = 0.7853981633974483;
6977
6978 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
6979 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
6980 // was attempted in this code; unfortunately it appears that the
6981 // switch to 80-bit precision and back causes this to be
6982 // unprofitable compared with simply performing a runtime call if
6983 // the argument is out of the (-pi/4, pi/4) range.
6984
6985 Register tmp = noreg;
6986 if (!VM_Version::supports_cmov()) {
6987 // fcmp needs a temporary so preserve rbx,
6988 tmp = rbx;
6989 push(tmp);
6990 }
6991
6992 Label slow_case, done;
6993
6994 // x ?<= pi/4
6995 fld_d(ExternalAddress((address)&pi_4));
6996 fld_s(1); // Stack: X PI/4 X
6997 fabs(); // Stack: |X| PI/4 X
6998 fcmp(tmp);
6999 jcc(Assembler::above, slow_case);
7000
7001 // fastest case: -pi/4 <= x <= pi/4
7002 switch(trig) {
7003 case 's':
7004 fsin();
7005 break;
7006 case 'c':
7007 fcos();
7008 break;
7009 case 't':
7010 ftan();
7011 break;
7012 default:
7013 assert(false, "bad intrinsic");
7014 break;
7015 }
7016 jmp(done);
7017
7018 // slow case: runtime call
7019 bind(slow_case);
7020 // Preserve registers across runtime call
7021 pusha();
7022 int incoming_argument_and_return_value_offset = -1;
7023 if (num_fpu_regs_in_use > 1) {
7024 // Must preserve all other FPU regs (could alternatively convert
7025 // SharedRuntime::dsin and dcos into assembly routines known not to trash
7026 // FPU state, but can not trust C compiler)
7027 NEEDS_CLEANUP;
7028 // NOTE that in this case we also push the incoming argument to
7029 // the stack and restore it later; we also use this stack slot to
7030 // hold the return value from dsin or dcos.
7031 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7032 subptr(rsp, sizeof(jdouble));
7033 fstp_d(Address(rsp, 0));
7034 }
7035 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
7036 fld_d(Address(rsp, incoming_argument_and_return_value_offset));
7037 }
7038 subptr(rsp, sizeof(jdouble));
7039 fstp_d(Address(rsp, 0));
7040 #ifdef _LP64
7041 movdbl(xmm0, Address(rsp, 0));
7042 #endif // _LP64
7043
7044 // NOTE: we must not use call_VM_leaf here because that requires a
7045 // complete interpreter frame in debug mode -- same bug as 4387334
7046 // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7047 // do proper 64bit abi
7048
7049 NEEDS_CLEANUP;
7050 // Need to add stack banging before this runtime call if it needs to
7051 // be taken; however, there is no generic stack banging routine at
7052 // the MacroAssembler level
7053 switch(trig) {
7054 case 's':
7055 {
7056 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7057 }
7058 break;
7059 case 'c':
7060 {
7061 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7062 }
7063 break;
7064 case 't':
7065 {
7066 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7067 }
7068 break;
7069 default:
7070 assert(false, "bad intrinsic");
7071 break;
7072 }
7073 #ifdef _LP64
7074 movsd(Address(rsp, 0), xmm0);
7075 fld_d(Address(rsp, 0));
7076 #endif // _LP64
7077 addptr(rsp, sizeof(jdouble));
7078 if (num_fpu_regs_in_use > 1) {
7079 // Must save return value to stack and then restore entire FPU stack
7080 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7081 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7082 fld_d(Address(rsp, 0));
7083 addptr(rsp, sizeof(jdouble));
7084 }
7085 }
7086 popa();
7087
7088 // Come here with result in F-TOS
7089 bind(done);
7090
7091 if (tmp != noreg) {
7092 pop(tmp);
7093 }
7094 }
7095
7096
7097 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7098 ucomisd(dst, as_Address(src));
7099 }
7100
7101 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7102 ucomiss(dst, as_Address(src));
7103 }
7104
7105 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7106 if (reachable(src)) {
7107 xorpd(dst, as_Address(src));
7108 } else {
7109 lea(rscratch1, src);
7110 xorpd(dst, Address(rscratch1, 0));
7111 }
7112 }
7113
7114 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7115 if (reachable(src)) {
7116 xorps(dst, as_Address(src));
7117 } else {
7118 lea(rscratch1, src);
7119 xorps(dst, Address(rscratch1, 0));
7120 }
7121 }
7122
7123 void MacroAssembler::verify_oop(Register reg, const char* s) {
7124 if (!VerifyOops) return;
7125
7126 // Pass register number to verify_oop_subroutine
7127 char* b = new char[strlen(s) + 50];
7128 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7129 push(rax); // save rax,
7130 push(reg); // pass register argument
7131 ExternalAddress buffer((address) b);
7132 // avoid using pushptr, as it modifies scratch registers
7133 // and our contract is not to modify anything
7134 movptr(rax, buffer.addr());
7135 push(rax);
7136 // call indirectly to solve generation ordering problem
7137 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7138 call(rax);
7139 }
7140
7141
7142 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
7143 if (!VerifyOops) return;
7144
7145 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
7146 // Pass register number to verify_oop_subroutine
7147 char* b = new char[strlen(s) + 50];
7148 sprintf(b, "verify_oop_addr: %s", s);
7149
7150 push(rax); // save rax,
7151 // addr may contain rsp so we will have to adjust it based on the push
7152 // we just did
7153 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
7154 // stores rax into addr which is backwards of what was intended.
7155 if (addr.uses(rsp)) {
7156 lea(rax, addr);
7157 pushptr(Address(rax, BytesPerWord));
7158 } else {
7159 pushptr(addr);
7160 }
7161
7162 ExternalAddress buffer((address) b);
7163 // pass msg argument
7164 // avoid using pushptr, as it modifies scratch registers
7165 // and our contract is not to modify anything
7166 movptr(rax, buffer.addr());
7167 push(rax);
7168
7169 // call indirectly to solve generation ordering problem
7170 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7171 call(rax);
7172 // Caller pops the arguments and restores rax, from the stack
7173 }
7174
7175 void MacroAssembler::verify_tlab() {
7176 #ifdef ASSERT
7177 if (UseTLAB && VerifyOops) {
7178 Label next, ok;
7179 Register t1 = rsi;
7180 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
7181
7182 push(t1);
7183 NOT_LP64(push(thread_reg));
7184 NOT_LP64(get_thread(thread_reg));
7185
7186 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7187 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7188 jcc(Assembler::aboveEqual, next);
7189 stop("assert(top >= start)");
7190 should_not_reach_here();
7191
7192 bind(next);
7193 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7194 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7195 jcc(Assembler::aboveEqual, ok);
7196 stop("assert(top <= end)");
7197 should_not_reach_here();
7198
7199 bind(ok);
7200 NOT_LP64(pop(thread_reg));
7201 pop(t1);
7202 }
7203 #endif
7204 }
7205
7206 class ControlWord {
7207 public:
7208 int32_t _value;
7209
7210 int rounding_control() const { return (_value >> 10) & 3 ; }
7211 int precision_control() const { return (_value >> 8) & 3 ; }
7212 bool precision() const { return ((_value >> 5) & 1) != 0; }
7213 bool underflow() const { return ((_value >> 4) & 1) != 0; }
7214 bool overflow() const { return ((_value >> 3) & 1) != 0; }
7215 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
7216 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
7217 bool invalid() const { return ((_value >> 0) & 1) != 0; }
7218
7219 void print() const {
7220 // rounding control
7221 const char* rc;
7222 switch (rounding_control()) {
7223 case 0: rc = "round near"; break;
7224 case 1: rc = "round down"; break;
7225 case 2: rc = "round up "; break;
7226 case 3: rc = "chop "; break;
7227 };
7228 // precision control
7229 const char* pc;
7230 switch (precision_control()) {
7231 case 0: pc = "24 bits "; break;
7232 case 1: pc = "reserved"; break;
7233 case 2: pc = "53 bits "; break;
7234 case 3: pc = "64 bits "; break;
7235 };
7236 // flags
7237 char f[9];
7238 f[0] = ' ';
7239 f[1] = ' ';
7240 f[2] = (precision ()) ? 'P' : 'p';
7241 f[3] = (underflow ()) ? 'U' : 'u';
7242 f[4] = (overflow ()) ? 'O' : 'o';
7243 f[5] = (zero_divide ()) ? 'Z' : 'z';
7244 f[6] = (denormalized()) ? 'D' : 'd';
7245 f[7] = (invalid ()) ? 'I' : 'i';
7246 f[8] = '\x0';
7247 // output
7248 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
7249 }
7250
7251 };
7252
7253 class StatusWord {
7254 public:
7255 int32_t _value;
7256
7257 bool busy() const { return ((_value >> 15) & 1) != 0; }
7258 bool C3() const { return ((_value >> 14) & 1) != 0; }
7259 bool C2() const { return ((_value >> 10) & 1) != 0; }
7260 bool C1() const { return ((_value >> 9) & 1) != 0; }
7261 bool C0() const { return ((_value >> 8) & 1) != 0; }
7262 int top() const { return (_value >> 11) & 7 ; }
7263 bool error_status() const { return ((_value >> 7) & 1) != 0; }
7264 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
7265 bool precision() const { return ((_value >> 5) & 1) != 0; }
7266 bool underflow() const { return ((_value >> 4) & 1) != 0; }
7267 bool overflow() const { return ((_value >> 3) & 1) != 0; }
7268 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
7269 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
7270 bool invalid() const { return ((_value >> 0) & 1) != 0; }
7271
7272 void print() const {
7273 // condition codes
7274 char c[5];
7275 c[0] = (C3()) ? '3' : '-';
7276 c[1] = (C2()) ? '2' : '-';
7277 c[2] = (C1()) ? '1' : '-';
7278 c[3] = (C0()) ? '0' : '-';
7279 c[4] = '\x0';
7280 // flags
7281 char f[9];
7282 f[0] = (error_status()) ? 'E' : '-';
7283 f[1] = (stack_fault ()) ? 'S' : '-';
7284 f[2] = (precision ()) ? 'P' : '-';
7285 f[3] = (underflow ()) ? 'U' : '-';
7286 f[4] = (overflow ()) ? 'O' : '-';
7287 f[5] = (zero_divide ()) ? 'Z' : '-';
7288 f[6] = (denormalized()) ? 'D' : '-';
7289 f[7] = (invalid ()) ? 'I' : '-';
7290 f[8] = '\x0';
7291 // output
7292 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
7293 }
7294
7295 };
7296
7297 class TagWord {
7298 public:
7299 int32_t _value;
7300
7301 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
7302
7303 void print() const {
7304 printf("%04x", _value & 0xFFFF);
7305 }
7306
7307 };
7308
7309 class FPU_Register {
7310 public:
7311 int32_t _m0;
7312 int32_t _m1;
7313 int16_t _ex;
7314
7315 bool is_indefinite() const {
7316 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
7317 }
7318
7319 void print() const {
7320 char sign = (_ex < 0) ? '-' : '+';
7321 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
7322 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
7323 };
7324
7325 };
7326
7327 class FPU_State {
7328 public:
7329 enum {
7330 register_size = 10,
7331 number_of_registers = 8,
7332 register_mask = 7
7333 };
7334
7335 ControlWord _control_word;
7336 StatusWord _status_word;
7337 TagWord _tag_word;
7338 int32_t _error_offset;
7339 int32_t _error_selector;
7340 int32_t _data_offset;
7341 int32_t _data_selector;
7342 int8_t _register[register_size * number_of_registers];
7343
7344 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
7345 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
7346
7347 const char* tag_as_string(int tag) const {
7348 switch (tag) {
7349 case 0: return "valid";
7350 case 1: return "zero";
7351 case 2: return "special";
7352 case 3: return "empty";
7353 }
7354 ShouldNotReachHere()
7355 return NULL;
7356 }
7357
7358 void print() const {
7359 // print computation registers
7360 { int t = _status_word.top();
7361 for (int i = 0; i < number_of_registers; i++) {
7362 int j = (i - t) & register_mask;
7363 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
7364 st(j)->print();
7365 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
7366 }
7367 }
7368 printf("\n");
7369 // print control registers
7370 printf("ctrl = "); _control_word.print(); printf("\n");
7371 printf("stat = "); _status_word .print(); printf("\n");
7372 printf("tags = "); _tag_word .print(); printf("\n");
7373 }
7374
7375 };
7376
7377 class Flag_Register {
7378 public:
7379 int32_t _value;
7380
7381 bool overflow() const { return ((_value >> 11) & 1) != 0; }
7382 bool direction() const { return ((_value >> 10) & 1) != 0; }
7383 bool sign() const { return ((_value >> 7) & 1) != 0; }
7384 bool zero() const { return ((_value >> 6) & 1) != 0; }
7385 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
7386 bool parity() const { return ((_value >> 2) & 1) != 0; }
7387 bool carry() const { return ((_value >> 0) & 1) != 0; }
7388
7389 void print() const {
7390 // flags
7391 char f[8];
7392 f[0] = (overflow ()) ? 'O' : '-';
7393 f[1] = (direction ()) ? 'D' : '-';
7394 f[2] = (sign ()) ? 'S' : '-';
7395 f[3] = (zero ()) ? 'Z' : '-';
7396 f[4] = (auxiliary_carry()) ? 'A' : '-';
7397 f[5] = (parity ()) ? 'P' : '-';
7398 f[6] = (carry ()) ? 'C' : '-';
7399 f[7] = '\x0';
7400 // output
7401 printf("%08x flags = %s", _value, f);
7402 }
7403
7404 };
7405
7406 class IU_Register {
7407 public:
7408 int32_t _value;
7409
7410 void print() const {
7411 printf("%08x %11d", _value, _value);
7412 }
7413
7414 };
7415
7416 class IU_State {
7417 public:
7418 Flag_Register _eflags;
7419 IU_Register _rdi;
7420 IU_Register _rsi;
7421 IU_Register _rbp;
7422 IU_Register _rsp;
7423 IU_Register _rbx;
7424 IU_Register _rdx;
7425 IU_Register _rcx;
7426 IU_Register _rax;
7427
7428 void print() const {
7429 // computation registers
7430 printf("rax, = "); _rax.print(); printf("\n");
7431 printf("rbx, = "); _rbx.print(); printf("\n");
7432 printf("rcx = "); _rcx.print(); printf("\n");
7433 printf("rdx = "); _rdx.print(); printf("\n");
7434 printf("rdi = "); _rdi.print(); printf("\n");
7435 printf("rsi = "); _rsi.print(); printf("\n");
7436 printf("rbp, = "); _rbp.print(); printf("\n");
7437 printf("rsp = "); _rsp.print(); printf("\n");
7438 printf("\n");
7439 // control registers
7440 printf("flgs = "); _eflags.print(); printf("\n");
7441 }
7442 };
7443
7444
7445 class CPU_State {
7446 public:
7447 FPU_State _fpu_state;
7448 IU_State _iu_state;
7449
7450 void print() const {
7451 printf("--------------------------------------------------\n");
7452 _iu_state .print();
7453 printf("\n");
7454 _fpu_state.print();
7455 printf("--------------------------------------------------\n");
7456 }
7457
7458 };
7459
7460
7461 static void _print_CPU_state(CPU_State* state) {
7462 state->print();
7463 };
7464
7465
7466 void MacroAssembler::print_CPU_state() {
7467 push_CPU_state();
7468 push(rsp); // pass CPU state
7469 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
7470 addptr(rsp, wordSize); // discard argument
7471 pop_CPU_state();
7472 }
7473
7474
7475 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
7476 static int counter = 0;
7477 FPU_State* fs = &state->_fpu_state;
7478 counter++;
7479 // For leaf calls, only verify that the top few elements remain empty.
7480 // We only need 1 empty at the top for C2 code.
7481 if( stack_depth < 0 ) {
7482 if( fs->tag_for_st(7) != 3 ) {
7483 printf("FPR7 not empty\n");
7484 state->print();
7485 assert(false, "error");
7486 return false;
7487 }
7488 return true; // All other stack states do not matter
7489 }
7490
7491 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
7492 "bad FPU control word");
7493
7494 // compute stack depth
7495 int i = 0;
7496 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
7497 int d = i;
7498 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
7499 // verify findings
7500 if (i != FPU_State::number_of_registers) {
7501 // stack not contiguous
7502 printf("%s: stack not contiguous at ST%d\n", s, i);
7503 state->print();
7504 assert(false, "error");
7505 return false;
7506 }
7507 // check if computed stack depth corresponds to expected stack depth
7508 if (stack_depth < 0) {
7509 // expected stack depth is -stack_depth or less
7510 if (d > -stack_depth) {
7511 // too many elements on the stack
7512 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
7513 state->print();
7514 assert(false, "error");
7515 return false;
7516 }
7517 } else {
7518 // expected stack depth is stack_depth
7519 if (d != stack_depth) {
7520 // wrong stack depth
7521 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
7522 state->print();
7523 assert(false, "error");
7524 return false;
7525 }
7526 }
7527 // everything is cool
7528 return true;
7529 }
7530
7531
7532 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
7533 if (!VerifyFPU) return;
7534 push_CPU_state();
7535 push(rsp); // pass CPU state
7536 ExternalAddress msg((address) s);
7537 // pass message string s
7538 pushptr(msg.addr());
7539 push(stack_depth); // pass stack depth
7540 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
7541 addptr(rsp, 3 * wordSize); // discard arguments
7542 // check for error
7543 { Label L;
7544 testl(rax, rax);
7545 jcc(Assembler::notZero, L);
7546 int3(); // break if error condition
7547 bind(L);
7548 }
7549 pop_CPU_state();
7550 }
7551
7552 void MacroAssembler::load_klass(Register dst, Register src) {
7553 #ifdef _LP64
7554 if (UseCompressedOops) {
7555 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7556 decode_heap_oop_not_null(dst);
7557 } else
7558 #endif
7559 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7560 }
7561
7562 void MacroAssembler::load_prototype_header(Register dst, Register src) {
7563 #ifdef _LP64
7564 if (UseCompressedOops) {
7565 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7566 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
7567 } else
7568 #endif
7569 {
7570 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
7571 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
7572 }
7573 }
7574
7575 void MacroAssembler::store_klass(Register dst, Register src) {
7576 #ifdef _LP64
7577 if (UseCompressedOops) {
7578 encode_heap_oop_not_null(src);
7579 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
7580 } else
7581 #endif
7582 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
7583 }
7584
7585 #ifdef _LP64
7586 void MacroAssembler::store_klass_gap(Register dst, Register src) {
7587 if (UseCompressedOops) {
7588 // Store to klass gap in destination
7589 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
7590 }
7591 }
7592
7593 void MacroAssembler::load_heap_oop(Register dst, Address src) {
7594 if (UseCompressedOops) {
7595 movl(dst, src);
7596 decode_heap_oop(dst);
7597 } else {
7598 movq(dst, src);
7599 }
7600 }
7601
7602 void MacroAssembler::store_heap_oop(Address dst, Register src) {
7603 if (UseCompressedOops) {
7604 assert(!dst.uses(src), "not enough registers");
7605 encode_heap_oop(src);
7606 movl(dst, src);
7607 } else {
7608 movq(dst, src);
7609 }
7610 }
7611
7612 // Algorithm must match oop.inline.hpp encode_heap_oop.
7613 void MacroAssembler::encode_heap_oop(Register r) {
7614 assert (UseCompressedOops, "should be compressed");
7615 #ifdef ASSERT
7616 if (CheckCompressedOops) {
7617 Label ok;
7618 push(rscratch1); // cmpptr trashes rscratch1
7619 cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
7620 jcc(Assembler::equal, ok);
7621 stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
7622 bind(ok);
7623 pop(rscratch1);
7624 }
7625 #endif
7626 verify_oop(r, "broken oop in encode_heap_oop");
7627 testq(r, r);
7628 cmovq(Assembler::equal, r, r12_heapbase);
7629 subq(r, r12_heapbase);
7630 shrq(r, LogMinObjAlignmentInBytes);
7631 }
7632
7633 void MacroAssembler::encode_heap_oop_not_null(Register r) {
7634 assert (UseCompressedOops, "should be compressed");
7635 #ifdef ASSERT
7636 if (CheckCompressedOops) {
7637 Label ok;
7638 testq(r, r);
7639 jcc(Assembler::notEqual, ok);
7640 stop("null oop passed to encode_heap_oop_not_null");
7641 bind(ok);
7642 }
7643 #endif
7644 verify_oop(r, "broken oop in encode_heap_oop_not_null");
7645 subq(r, r12_heapbase);
7646 shrq(r, LogMinObjAlignmentInBytes);
7647 }
7648
7649 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
7650 assert (UseCompressedOops, "should be compressed");
7651 #ifdef ASSERT
7652 if (CheckCompressedOops) {
7653 Label ok;
7654 testq(src, src);
7655 jcc(Assembler::notEqual, ok);
7656 stop("null oop passed to encode_heap_oop_not_null2");
7657 bind(ok);
7658 }
7659 #endif
7660 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
7661 if (dst != src) {
7662 movq(dst, src);
7663 }
7664 subq(dst, r12_heapbase);
7665 shrq(dst, LogMinObjAlignmentInBytes);
7666 }
7667
7668 void MacroAssembler::decode_heap_oop(Register r) {
7669 assert (UseCompressedOops, "should be compressed");
7670 #ifdef ASSERT
7671 if (CheckCompressedOops) {
7672 Label ok;
7673 push(rscratch1);
7674 cmpptr(r12_heapbase,
7675 ExternalAddress((address)Universe::heap_base_addr()));
7676 jcc(Assembler::equal, ok);
7677 stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
7678 bind(ok);
7679 pop(rscratch1);
7680 }
7681 #endif
7682
7683 Label done;
7684 shlq(r, LogMinObjAlignmentInBytes);
7685 jccb(Assembler::equal, done);
7686 addq(r, r12_heapbase);
7687 #if 0
7688 // alternate decoding probably a wash.
7689 testq(r, r);
7690 jccb(Assembler::equal, done);
7691 leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
7692 #endif
7693 bind(done);
7694 verify_oop(r, "broken oop in decode_heap_oop");
7695 }
7696
7697 void MacroAssembler::decode_heap_oop_not_null(Register r) {
7698 assert (UseCompressedOops, "should only be used for compressed headers");
7699 // Cannot assert, unverified entry point counts instructions (see .ad file)
7700 // vtableStubs also counts instructions in pd_code_size_limit.
7701 // Also do not verify_oop as this is called by verify_oop.
7702 assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
7703 leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
7704 }
7705
7706 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
7707 assert (UseCompressedOops, "should only be used for compressed headers");
7708 // Cannot assert, unverified entry point counts instructions (see .ad file)
7709 // vtableStubs also counts instructions in pd_code_size_limit.
7710 // Also do not verify_oop as this is called by verify_oop.
7711 assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
7712 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
7713 }
7714
7715 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
7716 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
7717 int oop_index = oop_recorder()->find_index(obj);
7718 RelocationHolder rspec = oop_Relocation::spec(oop_index);
7719 mov_literal32(dst, oop_index, rspec, narrow_oop_operand);
7720 }
7721
7722 void MacroAssembler::reinit_heapbase() {
7723 if (UseCompressedOops) {
7724 movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
7725 }
7726 }
7727 #endif // _LP64
7728
7729 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
7730 switch (cond) {
7731 // Note some conditions are synonyms for others
7732 case Assembler::zero: return Assembler::notZero;
7733 case Assembler::notZero: return Assembler::zero;
7734 case Assembler::less: return Assembler::greaterEqual;
7735 case Assembler::lessEqual: return Assembler::greater;
7736 case Assembler::greater: return Assembler::lessEqual;
7737 case Assembler::greaterEqual: return Assembler::less;
7738 case Assembler::below: return Assembler::aboveEqual;
7739 case Assembler::belowEqual: return Assembler::above;
7740 case Assembler::above: return Assembler::belowEqual;
7741 case Assembler::aboveEqual: return Assembler::below;
7742 case Assembler::overflow: return Assembler::noOverflow;
7743 case Assembler::noOverflow: return Assembler::overflow;
7744 case Assembler::negative: return Assembler::positive;
7745 case Assembler::positive: return Assembler::negative;
7746 case Assembler::parity: return Assembler::noParity;
7747 case Assembler::noParity: return Assembler::parity;
7748 }
7749 ShouldNotReachHere(); return Assembler::overflow;
7750 }
7751
7752 SkipIfEqual::SkipIfEqual(
7753 MacroAssembler* masm, const bool* flag_addr, bool value) {
7754 _masm = masm;
7755 _masm->cmp8(ExternalAddress((address)flag_addr), value);
7756 _masm->jcc(Assembler::equal, _label);
7757 }
7758
7759 SkipIfEqual::~SkipIfEqual() {
7760 _masm->bind(_label);
7761 }