3723 masm.movl(rcx, Address(rdi, offset_offset));
3724 masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3725
3726 // Compute the minimum of the string lengths(rsi) and the
3727 // difference of the string lengths (stack)
3728
3729
3730 if (VM_Version::supports_cmov()) {
3731 masm.movl(rdi, Address(rdi, count_offset));
3732 masm.movl(rsi, Address(rsi, count_offset));
3733 masm.movl(rcx, rdi);
3734 masm.subl(rdi, rsi);
3735 masm.push(rdi);
3736 masm.cmovl(Assembler::lessEqual, rsi, rcx);
3737 } else {
3738 masm.movl(rdi, Address(rdi, count_offset));
3739 masm.movl(rcx, Address(rsi, count_offset));
3740 masm.movl(rsi, rdi);
3741 masm.subl(rdi, rcx);
3742 masm.push(rdi);
3743 masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL);
3744 masm.movl(rsi, rcx);
3745 // rsi holds min, rcx is unused
3746 }
3747
3748 // Is the minimum length zero?
3749 masm.bind(ECX_GOOD_LABEL);
3750 masm.testl(rsi, rsi);
3751 masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3752
3753 // Load first characters
3754 masm.load_unsigned_word(rcx, Address(rbx, 0));
3755 masm.load_unsigned_word(rdi, Address(rax, 0));
3756
3757 // Compare first characters
3758 masm.subl(rcx, rdi);
3759 masm.jcc(Assembler::notZero, POP_LABEL);
3760 masm.decrementl(rsi);
3761 masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3762
3763 {
3764 // Check after comparing first character to see if strings are equivalent
3765 Label LSkip2;
3766 // Check if the strings start at same location
3767 masm.cmpptr(rbx,rax);
3768 masm.jcc(Assembler::notEqual, LSkip2);
3769
3770 // Check if the length difference is zero (from stack)
3771 masm.cmpl(Address(rsp, 0), 0x0);
3772 masm.jcc(Assembler::equal, LENGTH_DIFF_LABEL);
3773
3774 // Strings might not be equivalent
3775 masm.bind(LSkip2);
3776 }
3777
3778 // Shift rax, and rbx, to the end of the arrays, negate min
3779 masm.lea(rax, Address(rax, rsi, Address::times_2, 2));
3780 masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2));
3781 masm.negl(rsi);
3782
3783 // Compare the rest of the characters
3784 masm.bind(WHILE_HEAD_LABEL);
3785 masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
3786 masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
3787 masm.subl(rcx, rdi);
3788 masm.jcc(Assembler::notZero, POP_LABEL);
3789 masm.incrementl(rsi);
3790 masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3791
3792 // Strings are equal up to min length. Return the length difference.
3793 masm.bind(LENGTH_DIFF_LABEL);
3794 masm.pop(rcx);
3795 masm.jmp(DONE_LABEL);
3796
3797 // Discard the stored length difference
3798 masm.bind(POP_LABEL);
3799 masm.addptr(rsp, 4);
3800
3801 // That's it
3802 masm.bind(DONE_LABEL);
3803 %}
3804
3805 enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{
3806 Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP;
3807 MacroAssembler masm(&cbuf);
3808
3809 Register ary1Reg = as_Register($ary1$$reg);
3810 Register ary2Reg = as_Register($ary2$$reg);
3811 Register tmp1Reg = as_Register($tmp1$$reg);
3812 Register tmp2Reg = as_Register($tmp2$$reg);
3813 Register resultReg = as_Register($result$$reg);
3814
3815 int length_offset = arrayOopDesc::length_offset_in_bytes();
3816 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3817
3818 // Check the input args
3819 masm.cmpl(ary1Reg, ary2Reg);
3820 masm.jcc(Assembler::equal, TRUE_LABEL);
3821 masm.testl(ary1Reg, ary1Reg);
3822 masm.jcc(Assembler::zero, FALSE_LABEL);
3823 masm.testl(ary2Reg, ary2Reg);
3824 masm.jcc(Assembler::zero, FALSE_LABEL);
3825
3826 // Check the lengths
3827 masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
3828 masm.movl(resultReg, Address(ary2Reg, length_offset));
3829 masm.cmpl(tmp2Reg, resultReg);
3830 masm.jcc(Assembler::notEqual, FALSE_LABEL);
3831 masm.testl(resultReg, resultReg);
3832 masm.jcc(Assembler::zero, TRUE_LABEL);
3833
3834 // Get the number of 4 byte vectors to compare
3835 masm.shrl(resultReg, 1);
3836
3837 // Check for odd-length arrays
3838 masm.andl(tmp2Reg, 1);
3839 masm.testl(tmp2Reg, tmp2Reg);
3840 masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
3841
3842 // Compare 2-byte "tail" at end of arrays
3843 masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
3844 masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
3845 masm.cmpl(tmp1Reg, tmp2Reg);
3846 masm.jcc(Assembler::notEqual, FALSE_LABEL);
3847 masm.testl(resultReg, resultReg);
3848 masm.jcc(Assembler::zero, TRUE_LABEL);
3849
3850 // Setup compare loop
3851 masm.bind(COMPARE_LOOP_HDR);
3852 // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays
3853 masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
3854 masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
3855 masm.negl(resultReg);
3856
3857 // 4-byte-wide compare loop
3858 masm.bind(COMPARE_LOOP);
3859 masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
3860 masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
3861 masm.cmpl(ary1Reg, ary2Reg);
3862 masm.jcc(Assembler::notEqual, FALSE_LABEL);
3863 masm.increment(resultReg);
3864 masm.jcc(Assembler::notZero, COMPARE_LOOP);
3865
3866 masm.bind(TRUE_LABEL);
3867 masm.movl(resultReg, 1); // return true
3868 masm.jmp(DONE_LABEL);
3869
3870 masm.bind(FALSE_LABEL);
3871 masm.xorl(resultReg, resultReg); // return false
3872
3873 // That's it
3874 masm.bind(DONE_LABEL);
3875 %}
3876
3877 enc_class enc_pop_rdx() %{
3878 emit_opcode(cbuf,0x5A);
3879 %}
3880
3881 enc_class enc_rethrow() %{
3882 cbuf.set_inst_mark();
3883 emit_opcode(cbuf, 0xE9); // jmp entry
3884 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.code_end())-4,
3885 runtime_call_Relocation::spec(), RELOC_IMM32 );
3886 %}
3887
3888
3889 // Convert a double to an int. Java semantics require we do complex
3890 // manglelations in the corner cases. So we set the rounding mode to
3891 // 'zero', store the darned double down as an int, and reset the
3892 // rounding mode to 'nearest'. The hardware throws an exception which
3893 // patches up the correct value directly to the stack.
3894 enc_class D2I_encoding( regD src ) %{
11885 %}
11886
11887 // Replicate scalar to packed single precision floating point values in xmm
11888 instruct Repl2F_regX(regXD dst, regX src) %{
11889 predicate(UseSSE>=2);
11890 match(Set dst (Replicate2F src));
11891 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11892 ins_encode( pshufd(dst, src, 0xe0));
11893 ins_pipe( fpu_reg_reg );
11894 %}
11895
11896 // Replicate scalar to packed single precision floating point values in xmm
11897 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
11898 predicate(UseSSE>=2);
11899 match(Set dst (Replicate2F zero));
11900 format %{ "PXOR $dst,$dst\t! replicate2F" %}
11901 ins_encode( pxor(dst, dst));
11902 ins_pipe( fpu_reg_reg );
11903 %}
11904
11905
11906
11907 // =======================================================================
11908 // fast clearing of an array
11909
11910 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11911 match(Set dummy (ClearArray cnt base));
11912 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11913 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
11914 "XOR EAX,EAX\n\t"
11915 "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11916 opcode(0,0x4);
11917 ins_encode( Opcode(0xD1), RegOpc(ECX),
11918 OpcRegReg(0x33,EAX,EAX),
11919 Opcode(0xF3), Opcode(0xAB) );
11920 ins_pipe( pipe_slow );
11921 %}
11922
11923 instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
11924 match(Set result (StrComp str1 str2));
11925 effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr);
11926 //ins_cost(300);
11927
11928 format %{ "String Compare $str1,$str2 -> $result // KILL EAX, EBX" %}
11929 ins_encode( enc_String_Compare() );
11930 ins_pipe( pipe_slow );
11931 %}
11932
11933 // fast array equals
11934 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
11935 match(Set result (AryEq ary1 ary2));
11936 effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr);
11937 //ins_cost(300);
11938
11939 format %{ "Array Equals $ary1,$ary2 -> $result // KILL EAX, EBX" %}
11940 ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) );
11941 ins_pipe( pipe_slow );
11942 %}
11943
11944 //----------Control Flow Instructions------------------------------------------
11945 // Signed compare Instructions
11946 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
11947 match(Set cr (CmpI op1 op2));
11948 effect( DEF cr, USE op1, USE op2 );
11949 format %{ "CMP $op1,$op2" %}
11950 opcode(0x3B); /* Opcode 3B /r */
11951 ins_encode( OpcP, RegReg( op1, op2) );
11952 ins_pipe( ialu_cr_reg_reg );
11953 %}
11954
11955 instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
11956 match(Set cr (CmpI op1 op2));
11957 effect( DEF cr, USE op1 );
11958 format %{ "CMP $op1,$op2" %}
11959 opcode(0x81,0x07); /* Opcode 81 /7 */
11960 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
|
3723 masm.movl(rcx, Address(rdi, offset_offset));
3724 masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3725
3726 // Compute the minimum of the string lengths(rsi) and the
3727 // difference of the string lengths (stack)
3728
3729
3730 if (VM_Version::supports_cmov()) {
3731 masm.movl(rdi, Address(rdi, count_offset));
3732 masm.movl(rsi, Address(rsi, count_offset));
3733 masm.movl(rcx, rdi);
3734 masm.subl(rdi, rsi);
3735 masm.push(rdi);
3736 masm.cmovl(Assembler::lessEqual, rsi, rcx);
3737 } else {
3738 masm.movl(rdi, Address(rdi, count_offset));
3739 masm.movl(rcx, Address(rsi, count_offset));
3740 masm.movl(rsi, rdi);
3741 masm.subl(rdi, rcx);
3742 masm.push(rdi);
3743 masm.jccb(Assembler::lessEqual, ECX_GOOD_LABEL);
3744 masm.movl(rsi, rcx);
3745 // rsi holds min, rcx is unused
3746 }
3747
3748 // Is the minimum length zero?
3749 masm.bind(ECX_GOOD_LABEL);
3750 masm.testl(rsi, rsi);
3751 masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
3752
3753 // Load first characters
3754 masm.load_unsigned_word(rcx, Address(rbx, 0));
3755 masm.load_unsigned_word(rdi, Address(rax, 0));
3756
3757 // Compare first characters
3758 masm.subl(rcx, rdi);
3759 masm.jccb(Assembler::notZero, POP_LABEL);
3760 masm.decrementl(rsi);
3761 masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
3762
3763 {
3764 // Check after comparing first character to see if strings are equivalent
3765 Label LSkip2;
3766 // Check if the strings start at same location
3767 masm.cmpptr(rbx,rax);
3768 masm.jccb(Assembler::notEqual, LSkip2);
3769
3770 // Check if the length difference is zero (from stack)
3771 masm.cmpl(Address(rsp, 0), 0x0);
3772 masm.jccb(Assembler::equal, LENGTH_DIFF_LABEL);
3773
3774 // Strings might not be equivalent
3775 masm.bind(LSkip2);
3776 }
3777
3778 // Advance to next character
3779 masm.addptr(rax, 2);
3780 masm.addptr(rbx, 2);
3781
3782 if (UseSSE >=4 && UseSSE42Intrinsics) {
3783 // With SSE4.2, use double quad vector compare
3784 Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
3785 // Setup to compare 16-byte vectors
3786 masm.movl(rdi, rsi);
3787 masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
3788 masm.andl(rdi, 0x00000007); // rdi holds the tail count
3789 masm.testl(rsi, rsi);
3790 masm.jccb(Assembler::zero, COMPARE_TAIL);
3791
3792 masm.lea(rax, Address(rax, rsi, Address::times_2));
3793 masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3794 masm.negl(rsi);
3795
3796 masm.bind(COMPARE_VECTORS);
3797 masm.movdqu(xmm6, Address(rax, rsi, Address::times_2));
3798 masm.movdqu(xmm7, Address(rbx, rsi, Address::times_2));
3799 masm.pxor(xmm6, xmm7);
3800 masm.ptest(xmm6, xmm6);
3801 masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
3802 masm.addl(rsi, 8);
3803 masm.jcc(Assembler::notZero, COMPARE_VECTORS);
3804 masm.jmpb(COMPARE_TAIL);
3805
3806 // Mismatched characters in the vectors
3807 masm.bind(VECTOR_NOT_EQUAL);
3808 masm.lea(rax, Address(rax, rsi, Address::times_2));
3809 masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3810 masm.movl(rdi, 8);
3811
3812 // Compare tail (< 8 chars), or rescan last vectors to
3813 // find 1st mismatched characters
3814 masm.bind(COMPARE_TAIL);
3815 masm.testl(rdi, rdi);
3816 masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
3817 masm.movl(rsi, rdi);
3818 // Fallthru to tail compare
3819 }
3820
3821 //Shift rax, and rbx, to the end of the arrays, negate min
3822 masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
3823 masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
3824 masm.negl(rsi);
3825
3826 // Compare the rest of the characters
3827 masm.bind(WHILE_HEAD_LABEL);
3828 masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
3829 masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
3830 masm.subl(rcx, rdi);
3831 masm.jccb(Assembler::notZero, POP_LABEL);
3832 masm.incrementl(rsi);
3833 masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3834
3835 // Strings are equal up to min length. Return the length difference.
3836 masm.bind(LENGTH_DIFF_LABEL);
3837 masm.pop(rcx);
3838 masm.jmpb(DONE_LABEL);
3839
3840 // Discard the stored length difference
3841 masm.bind(POP_LABEL);
3842 masm.addptr(rsp, 4);
3843
3844 // That's it
3845 masm.bind(DONE_LABEL);
3846 %}
3847
3848 enc_class enc_String_Equals() %{
3849 Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
3850 MacroAssembler masm(&cbuf);
3851
3852 int value_offset = java_lang_String::value_offset_in_bytes();
3853 int offset_offset = java_lang_String::offset_offset_in_bytes();
3854 int count_offset = java_lang_String::count_offset_in_bytes();
3855 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3856
3857 // does source == target string?
3858 masm.cmpptr(rdi, rsi);
3859 masm.jccb(Assembler::equal, RET_TRUE);
3860
3861 // get and compare counts
3862 masm.movl(rcx, Address(rdi, count_offset));
3863 masm.movl(rax, Address(rsi, count_offset));
3864 masm.cmpl(rcx, rax);
3865 masm.jccb(Assembler::notEqual, RET_FALSE);
3866 masm.testl(rax, rax);
3867 masm.jccb(Assembler::zero, RET_TRUE);
3868
3869 // get source string offset and value
3870 masm.movptr(rbx, Address(rsi, value_offset));
3871 masm.movl(rax, Address(rsi, offset_offset));
3872 masm.leal(rsi, Address(rbx, rax, Address::times_2, base_offset));
3873
3874 // get compare string offset and value
3875 masm.movptr(rbx, Address(rdi, value_offset));
3876 masm.movl(rax, Address(rdi, offset_offset));
3877 masm.leal(rdi, Address(rbx, rax, Address::times_2, base_offset));
3878
3879 // Set byte count
3880 masm.shll(rcx, 1);
3881 masm.movl(rax, rcx);
3882
3883 if (UseSSE >=4 && UseSSE42Intrinsics) {
3884 // With SSE4.2, use double quad vector compare
3885 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3886 // Compare 16-byte vectors
3887 masm.andl(rcx, 0xfffffff0); // vector count (in bytes)
3888 masm.andl(rax, 0x0000000e); // tail count (in bytes)
3889 masm.testl(rcx, rcx);
3890 masm.jccb(Assembler::zero, COMPARE_TAIL);
3891 masm.lea(rdi, Address(rdi, rcx, Address::times_1));
3892 masm.lea(rsi, Address(rsi, rcx, Address::times_1));
3893 masm.negl(rcx);
3894
3895 masm.bind(COMPARE_WIDE_VECTORS);
3896 masm.movdqu(xmm6, Address(rdi, rcx, Address::times_1));
3897 masm.movdqu(xmm7, Address(rsi, rcx, Address::times_1));
3898 masm.pxor(xmm6, xmm7);
3899 masm.ptest(xmm6, xmm6);
3900 masm.jccb(Assembler::notZero, RET_FALSE);
3901 masm.addl(rcx, 16);
3902 masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3903 masm.bind(COMPARE_TAIL);
3904 masm.movl(rcx, rax);
3905 // Fallthru to tail compare
3906 }
3907
3908 // Compare 4-byte vectors
3909 masm.andl(rcx, 0xfffffffc); // vector count (in bytes)
3910 masm.andl(rax, 0x00000002); // tail char (in bytes)
3911 masm.testl(rcx, rcx);
3912 masm.jccb(Assembler::zero, COMPARE_CHAR);
3913 masm.lea(rdi, Address(rdi, rcx, Address::times_1));
3914 masm.lea(rsi, Address(rsi, rcx, Address::times_1));
3915 masm.negl(rcx);
3916
3917 masm.bind(COMPARE_VECTORS);
3918 masm.movl(rbx, Address(rdi, rcx, Address::times_1));
3919 masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
3920 masm.jccb(Assembler::notEqual, RET_FALSE);
3921 masm.addl(rcx, 4);
3922 masm.jcc(Assembler::notZero, COMPARE_VECTORS);
3923
3924 // Compare trailing char (final 2 bytes), if any
3925 masm.bind(COMPARE_CHAR);
3926 masm.testl(rax, rax);
3927 masm.jccb(Assembler::zero, RET_TRUE);
3928 masm.load_unsigned_word(rbx, Address(rdi, 0));
3929 masm.load_unsigned_word(rcx, Address(rsi, 0));
3930 masm.cmpl(rbx, rcx);
3931 masm.jccb(Assembler::notEqual, RET_FALSE);
3932
3933 masm.bind(RET_TRUE);
3934 masm.movl(rax, 1); // return true
3935 masm.jmpb(DONE);
3936
3937 masm.bind(RET_FALSE);
3938 masm.xorl(rax, rax); // return false
3939
3940 masm.bind(DONE);
3941 %}
3942
3943 enc_class enc_String_IndexOf() %{
3944 // SSE4.2 version
3945 Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
3946 SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
3947 MacroAssembler masm(&cbuf);
3948
3949 // Get the first character position in both strings
3950 // [8] char array, [12] offset, [16] count
3951 int value_offset = java_lang_String::value_offset_in_bytes();
3952 int offset_offset = java_lang_String::offset_offset_in_bytes();
3953 int count_offset = java_lang_String::count_offset_in_bytes();
3954 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3955
3956 // Get counts for string and substr
3957 masm.movl(rdx, Address(rsi, count_offset));
3958 masm.movl(rax, Address(rdi, count_offset));
3959 // Check for substr count > string count
3960 masm.cmpl(rax, rdx);
3961 masm.jccb(Assembler::greater, RET_NEG_ONE);
3962
3963 // Start the indexOf operation
3964 // Get start addr of string
3965 masm.movptr(rbx, Address(rsi, value_offset));
3966 masm.movl(rcx, Address(rsi, offset_offset));
3967 masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
3968 masm.push(rsi);
3969
3970 // Get start addr of substr
3971 masm.movptr(rbx, Address(rdi, value_offset));
3972 masm.movl(rcx, Address(rdi, offset_offset));
3973 masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
3974 masm.push(rdi);
3975 masm.push(rax);
3976 masm.jmpb(PREP_FOR_SCAN);
3977
3978 // Substr count saved at sp
3979 // Substr saved at sp+4
3980 // String saved at sp+8
3981
3982 // Prep to load substr for scan
3983 masm.bind(LOAD_SUBSTR);
3984 masm.movptr(rdi, Address(rsp, 4));
3985 masm.movl(rax, Address(rsp, 0));
3986
3987 // Load substr
3988 masm.bind(PREP_FOR_SCAN);
3989 masm.movdqu(xmm6, Address(rdi, 0));
3990 masm.addl(rdx, 8); // prime the loop
3991 masm.subptr(rsi, 16);
3992
3993 // Scan string for substr in 16-byte vectors
3994 masm.bind(SCAN_TO_SUBSTR);
3995 masm.subl(rdx, 8);
3996 masm.addptr(rsi, 16);
3997 masm.pcmpestri(xmm6, Address(rsi, 0), 0x0d);
3998 masm.jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0
3999 masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
4000
4001 // Fallthru: found a potential substr
4002
4003 // Make sure string is still long enough
4004 masm.subl(rdx, rcx);
4005 masm.cmpl(rdx, rax);
4006 masm.jccb(Assembler::negative, RET_NOT_FOUND);
4007 // Compute start addr of substr
4008 masm.lea(rsi, Address(rsi, rcx, Address::times_2));
4009 masm.movptr(rbx, rsi);
4010
4011 // Compare potential substr
4012 masm.addl(rdx, 8); // prime the loop
4013 masm.addl(rax, 8);
4014 masm.subptr(rsi, 16);
4015 masm.subptr(rdi, 16);
4016
4017 // Scan 16-byte vectors of string and substr
4018 masm.bind(SCAN_SUBSTR);
4019 masm.subl(rax, 8);
4020 masm.subl(rdx, 8);
4021 masm.addptr(rsi, 16);
4022 masm.addptr(rdi, 16);
4023 masm.movdqu(xmm6, Address(rdi, 0));
4024 masm.pcmpestri(xmm6, Address(rsi, 0), 0x0d);
4025 masm.jcc(Assembler::noOverflow, LOAD_SUBSTR); // OF == 0
4026 masm.jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0
4027
4028 // Compute substr offset
4029 masm.movptr(rsi, Address(rsp, 8));
4030 masm.subptr(rbx, rsi);
4031 masm.shrl(rbx, 1);
4032 masm.jmpb(CLEANUP);
4033
4034 masm.bind(RET_NEG_ONE);
4035 masm.movl(rbx, -1);
4036 masm.jmpb(DONE);
4037
4038 masm.bind(RET_NOT_FOUND);
4039 masm.movl(rbx, -1);
4040
4041 masm.bind(CLEANUP);
4042 masm.addptr(rsp, 12);
4043
4044 masm.bind(DONE);
4045 %}
4046
4047
4048 enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eBXRegI tmp1, eDXRegI tmp2, regXD6 tmp3, regXD7 tmp5, eAXRegI result) %{
4049 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
4050 MacroAssembler masm(&cbuf);
4051
4052 Register ary1Reg = as_Register($ary1$$reg);
4053 Register ary2Reg = as_Register($ary2$$reg);
4054 Register tmp1Reg = as_Register($tmp1$$reg);
4055 Register tmp2Reg = as_Register($tmp2$$reg);
4056 Register resultReg = as_Register($result$$reg);
4057
4058 int length_offset = arrayOopDesc::length_offset_in_bytes();
4059 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
4060
4061 // Check the input args
4062 masm.cmpptr(ary1Reg, ary2Reg);
4063 masm.jccb(Assembler::equal, TRUE_LABEL);
4064 masm.testptr(ary1Reg, ary1Reg);
4065 masm.jccb(Assembler::zero, FALSE_LABEL);
4066 masm.testptr(ary2Reg, ary2Reg);
4067 masm.jccb(Assembler::zero, FALSE_LABEL);
4068
4069 // Check the lengths
4070 masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
4071 masm.movl(resultReg, Address(ary2Reg, length_offset));
4072 masm.cmpl(tmp2Reg, resultReg);
4073 masm.jccb(Assembler::notEqual, FALSE_LABEL);
4074 masm.testl(resultReg, resultReg);
4075 masm.jccb(Assembler::zero, TRUE_LABEL);
4076
4077 // Load array addrs
4078 masm.lea(ary1Reg, Address(ary1Reg, base_offset));
4079 masm.lea(ary2Reg, Address(ary2Reg, base_offset));
4080
4081 // Set byte count
4082 masm.shll(tmp2Reg, 1);
4083 masm.movl(resultReg, tmp2Reg);
4084
4085
4086 if (UseSSE >=4 && UseSSE42Intrinsics) {
4087 // With SSE4.2, use double quad vector compare
4088 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
4089 // Compare 16-byte vectors
4090 masm.andl(tmp2Reg, 0xfffffff0); // vector count (in bytes)
4091 masm.andl(resultReg, 0x0000000e); // tail count (in bytes)
4092 masm.testl(tmp2Reg, tmp2Reg);
4093 masm.jccb(Assembler::zero, COMPARE_TAIL);
4094 masm.lea(ary1Reg, Address(ary1Reg, tmp2Reg, Address::times_1));
4095 masm.lea(ary2Reg, Address(ary2Reg, tmp2Reg, Address::times_1));
4096 masm.negl(tmp2Reg);
4097
4098 masm.bind(COMPARE_WIDE_VECTORS);
4099 masm.movdqu(xmm6, Address(ary1Reg, tmp2Reg, Address::times_1));
4100 masm.movdqu(xmm7, Address(ary2Reg, tmp2Reg, Address::times_1));
4101 masm.pxor(xmm6, xmm7);
4102 masm.ptest(xmm6, xmm6);
4103
4104 masm.jccb(Assembler::notZero, FALSE_LABEL);
4105 masm.addl(tmp2Reg, 16);
4106 masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
4107 masm.bind(COMPARE_TAIL);
4108 masm.movl(tmp2Reg, resultReg);
4109 // Fallthru to tail compare
4110 }
4111
4112 // Compare 4-byte vectors
4113 masm.andl(tmp2Reg, 0xfffffffc); // vector count (in bytes)
4114 masm.andl(resultReg, 0x00000002); // tail char (in bytes)
4115 masm.testl(tmp2Reg, tmp2Reg);
4116 masm.jccb(Assembler::zero, COMPARE_CHAR);
4117 masm.lea(ary1Reg, Address(ary1Reg, tmp2Reg, Address::times_1));
4118 masm.lea(ary2Reg, Address(ary2Reg, tmp2Reg, Address::times_1));
4119 masm.negl(tmp2Reg);
4120
4121 masm.bind(COMPARE_VECTORS);
4122 masm.movl(tmp1Reg, Address(ary1Reg, tmp2Reg, Address::times_1));
4123 masm.cmpl(tmp1Reg, Address(ary2Reg, tmp2Reg, Address::times_1));
4124 masm.jccb(Assembler::notEqual, FALSE_LABEL);
4125 masm.addl(tmp2Reg, 4);
4126 masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4127
4128 // Compare trailing char (final 2 bytes), if any
4129 masm.bind(COMPARE_CHAR);
4130 masm.testl(resultReg, resultReg);
4131 masm.jccb(Assembler::zero, TRUE_LABEL);
4132 masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, 0));
4133 masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, 0));
4134 masm.cmpl(tmp1Reg, tmp2Reg);
4135 masm.jccb(Assembler::notEqual, FALSE_LABEL);
4136
4137 masm.bind(TRUE_LABEL);
4138 masm.movl(resultReg, 1); // return true
4139 masm.jmpb(DONE);
4140
4141 masm.bind(FALSE_LABEL);
4142 masm.xorl(resultReg, resultReg); // return false
4143
4144 // That's it
4145 masm.bind(DONE);
4146 %}
4147
4148 enc_class enc_pop_rdx() %{
4149 emit_opcode(cbuf,0x5A);
4150 %}
4151
4152 enc_class enc_rethrow() %{
4153 cbuf.set_inst_mark();
4154 emit_opcode(cbuf, 0xE9); // jmp entry
4155 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.code_end())-4,
4156 runtime_call_Relocation::spec(), RELOC_IMM32 );
4157 %}
4158
4159
4160 // Convert a double to an int. Java semantics require we do complex
4161 // manglelations in the corner cases. So we set the rounding mode to
4162 // 'zero', store the darned double down as an int, and reset the
4163 // rounding mode to 'nearest'. The hardware throws an exception which
4164 // patches up the correct value directly to the stack.
4165 enc_class D2I_encoding( regD src ) %{
12156 %}
12157
12158 // Replicate scalar to packed single precision floating point values in xmm
12159 instruct Repl2F_regX(regXD dst, regX src) %{
12160 predicate(UseSSE>=2);
12161 match(Set dst (Replicate2F src));
12162 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12163 ins_encode( pshufd(dst, src, 0xe0));
12164 ins_pipe( fpu_reg_reg );
12165 %}
12166
12167 // Replicate scalar to packed single precision floating point values in xmm
12168 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12169 predicate(UseSSE>=2);
12170 match(Set dst (Replicate2F zero));
12171 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12172 ins_encode( pxor(dst, dst));
12173 ins_pipe( fpu_reg_reg );
12174 %}
12175
12176 // =======================================================================
12177 // fast clearing of an array
12178
12179 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12180 match(Set dummy (ClearArray cnt base));
12181 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12182 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
12183 "XOR EAX,EAX\n\t"
12184 "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12185 opcode(0,0x4);
12186 ins_encode( Opcode(0xD1), RegOpc(ECX),
12187 OpcRegReg(0x33,EAX,EAX),
12188 Opcode(0xF3), Opcode(0xAB) );
12189 ins_pipe( pipe_slow );
12190 %}
12191
12192 instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, regXD6 tmp3, regXD7 tmp4, eCXRegI result, eFlagsReg cr) %{
12193 match(Set result (StrComp str1 str2));
12194 effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
12195 //ins_cost(300);
12196
12197 format %{ "String Compare $str1,$str2 -> $result // KILL EAX, EBX, XMM6, XMM7" %}
12198 ins_encode( enc_String_Compare() );
12199 ins_pipe( pipe_slow );
12200 %}
12201
12202 // fast string equals
12203 instruct string_equals(eDIRegP str1, eSIRegP str2, eBXRegI tmp1, eCXRegI tmp2, regXD6 tmp3, regXD7 tmp4, eAXRegI result, eFlagsReg cr) %{
12204 match(Set result (StrEquals str1 str2));
12205 effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
12206
12207 format %{ "String Equals $str1,$str2 -> $result // KILL EBX, ECX, EDX, XMM6, XMM7" %}
12208 ins_encode( enc_String_Equals() );
12209 ins_pipe( pipe_slow );
12210 %}
12211
12212 instruct string_indexof(eSIRegP str1, eDIRegP str2, eAXRegI tmp1, eCXRegI tmp2, eDXRegI tmp3, regXD6 tmp4, eBXRegI result, eFlagsReg cr) %{
12213 predicate(UseSSE >=4 && UseSSE42Intrinsics);
12214 match(Set result (StrIndexOf str1 str2));
12215 effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
12216
12217 format %{ "String IndexOf $str1,$str2 -> $result // KILL EAX, ECX, EDX, XMM6" %}
12218 ins_encode( enc_String_IndexOf() );
12219 ins_pipe( pipe_slow );
12220 %}
12221
12222 // fast array equals
12223 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eBXRegI tmp1, eDXRegI tmp2, regXD6 tmp3, regXD7 tmp4, eAXRegI result, eFlagsReg cr) %{
12224 match(Set result (AryEq ary1 ary2));
12225 effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
12226 //ins_cost(300);
12227
12228 format %{ "Array Equals $ary1,$ary2 -> $result // KILL EBX, EDX, XMM6, XMM7" %}
12229 ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
12230 ins_pipe( pipe_slow );
12231 %}
12232
12233 //----------Control Flow Instructions------------------------------------------
12234 // Signed compare Instructions
12235 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12236 match(Set cr (CmpI op1 op2));
12237 effect( DEF cr, USE op1, USE op2 );
12238 format %{ "CMP $op1,$op2" %}
12239 opcode(0x3B); /* Opcode 3B /r */
12240 ins_encode( OpcP, RegReg( op1, op2) );
12241 ins_pipe( ialu_cr_reg_reg );
12242 %}
12243
12244 instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12245 match(Set cr (CmpI op1 op2));
12246 effect( DEF cr, USE op1 );
12247 format %{ "CMP $op1,$op2" %}
12248 opcode(0x81,0x07); /* Opcode 81 /7 */
12249 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
|