774 __ lea(end, Address(start, count, Address::times_ptr, -wordSize));
775 __ shrptr(start, CardTableModRefBS::card_shift);
776 __ shrptr(end, CardTableModRefBS::card_shift);
777 __ subptr(end, start); // end --> count
778 __ BIND(L_loop);
779 intptr_t disp = (intptr_t) ct->byte_map_base;
780 Address cardtable(start, count, Address::times_1, disp);
781 __ movb(cardtable, 0);
782 __ decrement(count);
783 __ jcc(Assembler::greaterEqual, L_loop);
784 }
785 break;
786 case BarrierSet::ModRef:
787 break;
788 default :
789 ShouldNotReachHere();
790
791 }
792 }
793
794 // Copy 64 bytes chunks
795 //
796 // Inputs:
797 // from - source array address
798 // to_from - destination array address - from
799 // qword_count - 8-bytes element count, negative
800 //
801 void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
802 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
803 // Copy 64-byte chunks
804 __ jmpb(L_copy_64_bytes);
805 __ align(16);
806 __ BIND(L_copy_64_bytes_loop);
807 __ movq(mmx0, Address(from, 0));
808 __ movq(mmx1, Address(from, 8));
809 __ movq(mmx2, Address(from, 16));
810 __ movq(Address(from, to_from, Address::times_1, 0), mmx0);
811 __ movq(mmx3, Address(from, 24));
812 __ movq(Address(from, to_from, Address::times_1, 8), mmx1);
813 __ movq(mmx4, Address(from, 32));
814 __ movq(Address(from, to_from, Address::times_1, 16), mmx2);
815 __ movq(mmx5, Address(from, 40));
816 __ movq(Address(from, to_from, Address::times_1, 24), mmx3);
817 __ movq(mmx6, Address(from, 48));
818 __ movq(Address(from, to_from, Address::times_1, 32), mmx4);
819 __ movq(mmx7, Address(from, 56));
820 __ movq(Address(from, to_from, Address::times_1, 40), mmx5);
821 __ movq(Address(from, to_from, Address::times_1, 48), mmx6);
859
860 __ enter(); // required for proper stackwalking of RuntimeStub frame
861 __ push(rsi);
862 __ push(rdi);
863 __ movptr(from , Address(rsp, 12+ 4));
864 __ movptr(to , Address(rsp, 12+ 8));
865 __ movl(count, Address(rsp, 12+ 12));
866 if (t == T_OBJECT) {
867 __ testl(count, count);
868 __ jcc(Assembler::zero, L_0_count);
869 gen_write_ref_array_pre_barrier(to, count);
870 __ mov(saved_to, to); // save 'to'
871 }
872
873 *entry = __ pc(); // Entry point from conjoint arraycopy stub.
874 BLOCK_COMMENT("Entry:");
875
876 __ subptr(to, from); // to --> to_from
877 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
878 __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
879 if (!aligned && (t == T_BYTE || t == T_SHORT)) {
880 // align source address at 4 bytes address boundary
881 if (t == T_BYTE) {
882 // One byte misalignment happens only for byte arrays
883 __ testl(from, 1);
884 __ jccb(Assembler::zero, L_skip_align1);
885 __ movb(rax, Address(from, 0));
886 __ movb(Address(from, to_from, Address::times_1, 0), rax);
887 __ increment(from);
888 __ decrement(count);
889 __ BIND(L_skip_align1);
890 }
891 // Two bytes misalignment happens only for byte and short (char) arrays
892 __ testl(from, 2);
893 __ jccb(Assembler::zero, L_skip_align2);
894 __ movw(rax, Address(from, 0));
895 __ movw(Address(from, to_from, Address::times_1, 0), rax);
896 __ addptr(from, 2);
897 __ subl(count, 1<<(shift-1));
898 __ BIND(L_skip_align2);
899 }
900 if (!VM_Version::supports_mmx()) {
901 __ mov(rax, count); // save 'count'
902 __ shrl(count, shift); // bytes count
903 __ addptr(to_from, from);// restore 'to'
904 __ rep_mov();
905 __ subptr(to_from, from);// restore 'to_from'
906 __ mov(count, rax); // restore 'count'
907 __ jmpb(L_copy_2_bytes); // all dwords were copied
908 } else {
909 // align to 8 bytes, we know we are 4 byte aligned to start
910 __ testptr(from, 4);
911 __ jccb(Assembler::zero, L_copy_64_bytes);
912 __ movl(rax, Address(from, 0));
913 __ movl(Address(from, to_from, Address::times_1, 0), rax);
914 __ addptr(from, 4);
915 __ subl(count, 1<<shift);
916 __ BIND(L_copy_64_bytes);
917 __ mov(rax, count);
918 __ shrl(rax, shift+1); // 8 bytes chunk count
919 //
920 // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
921 //
922 mmx_copy_forward(from, to_from, rax);
923 }
924 // copy tailing dword
925 __ BIND(L_copy_4_bytes);
926 __ testl(count, 1<<shift);
927 __ jccb(Assembler::zero, L_copy_2_bytes);
928 __ movl(rax, Address(from, 0));
929 __ movl(Address(from, to_from, Address::times_1, 0), rax);
930 if (t == T_BYTE || t == T_SHORT) {
931 __ addptr(from, 4);
932 __ BIND(L_copy_2_bytes);
933 // copy tailing word
934 __ testl(count, 1<<(shift-1));
935 __ jccb(Assembler::zero, L_copy_byte);
936 __ movw(rax, Address(from, 0));
937 __ movw(Address(from, to_from, Address::times_1, 0), rax);
938 if (t == T_BYTE) {
939 __ addptr(from, 2);
940 __ BIND(L_copy_byte);
941 // copy tailing byte
942 __ testl(count, 1);
943 __ jccb(Assembler::zero, L_exit);
1052 __ shrptr(count, shift); // bytes count
1053 __ rep_mov();
1054 __ cld();
1055 __ mov(count, rax); // restore 'count'
1056 __ andl(count, (1<<shift)-1); // mask the number of rest elements
1057 __ movptr(from, Address(rsp, 12+4)); // reread 'from'
1058 __ mov(to, rdx); // restore 'to'
1059 __ jmpb(L_copy_2_bytes); // all dword were copied
1060 } else {
1061 // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
1062 __ testptr(end, 4);
1063 __ jccb(Assembler::zero, L_copy_8_bytes);
1064 __ subl(count, 1<<shift);
1065 __ movl(rdx, Address(from, count, sf, 0));
1066 __ movl(Address(to, count, sf, 0), rdx);
1067 __ jmpb(L_copy_8_bytes);
1068
1069 __ align(16);
1070 // Move 8 bytes
1071 __ BIND(L_copy_8_bytes_loop);
1072 __ movq(mmx0, Address(from, count, sf, 0));
1073 __ movq(Address(to, count, sf, 0), mmx0);
1074 __ BIND(L_copy_8_bytes);
1075 __ subl(count, 2<<shift);
1076 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1077 __ addl(count, 2<<shift);
1078 __ emms();
1079 }
1080 __ BIND(L_copy_4_bytes);
1081 // copy prefix qword
1082 __ testl(count, 1<<shift);
1083 __ jccb(Assembler::zero, L_copy_2_bytes);
1084 __ movl(rdx, Address(from, count, sf, -4));
1085 __ movl(Address(to, count, sf, -4), rdx);
1086
1087 if (t == T_BYTE || t == T_SHORT) {
1088 __ subl(count, (1<<shift));
1089 __ BIND(L_copy_2_bytes);
1090 // copy prefix dword
1091 __ testl(count, 1<<(shift-1));
1092 __ jccb(Assembler::zero, L_copy_byte);
1093 __ movw(rdx, Address(from, count, sf, -2));
1094 __ movw(Address(to, count, sf, -2), rdx);
1095 if (t == T_BYTE) {
1096 __ subl(count, 1<<(shift-1));
1097 __ BIND(L_copy_byte);
1098 // copy prefix byte
1099 __ testl(count, 1);
1126 __ align(CodeEntryAlignment);
1127 StubCodeMark mark(this, "StubRoutines", name);
1128 address start = __ pc();
1129
1130 Label L_copy_8_bytes, L_copy_8_bytes_loop;
1131 const Register from = rax; // source array address
1132 const Register to = rdx; // destination array address
1133 const Register count = rcx; // elements count
1134 const Register to_from = rdx; // (to - from)
1135
1136 __ enter(); // required for proper stackwalking of RuntimeStub frame
1137 __ movptr(from , Address(rsp, 8+0)); // from
1138 __ movptr(to , Address(rsp, 8+4)); // to
1139 __ movl2ptr(count, Address(rsp, 8+8)); // count
1140
1141 *entry = __ pc(); // Entry point from conjoint arraycopy stub.
1142 BLOCK_COMMENT("Entry:");
1143
1144 __ subptr(to, from); // to --> to_from
1145 if (VM_Version::supports_mmx()) {
1146 mmx_copy_forward(from, to_from, count);
1147 } else {
1148 __ jmpb(L_copy_8_bytes);
1149 __ align(16);
1150 __ BIND(L_copy_8_bytes_loop);
1151 __ fild_d(Address(from, 0));
1152 __ fistp_d(Address(from, to_from, Address::times_1));
1153 __ addptr(from, 8);
1154 __ BIND(L_copy_8_bytes);
1155 __ decrement(count);
1156 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1157 }
1158 inc_copy_counter_np(T_LONG);
1159 __ leave(); // required for proper stackwalking of RuntimeStub frame
1160 __ xorptr(rax, rax); // return 0
1161 __ ret(0);
1162 return start;
1163 }
1164
1165 address generate_conjoint_long_copy(address nooverlap_target,
1166 address* entry, const char *name) {
1179 __ movptr(to , Address(rsp, 8+4)); // to
1180 __ movl2ptr(count, Address(rsp, 8+8)); // count
1181
1182 *entry = __ pc(); // Entry point from generic arraycopy stub.
1183 BLOCK_COMMENT("Entry:");
1184
1185 // arrays overlap test
1186 __ cmpptr(to, from);
1187 RuntimeAddress nooverlap(nooverlap_target);
1188 __ jump_cc(Assembler::belowEqual, nooverlap);
1189 __ lea(end_from, Address(from, count, Address::times_8, 0));
1190 __ cmpptr(to, end_from);
1191 __ movptr(from, Address(rsp, 8)); // from
1192 __ jump_cc(Assembler::aboveEqual, nooverlap);
1193
1194 __ jmpb(L_copy_8_bytes);
1195
1196 __ align(16);
1197 __ BIND(L_copy_8_bytes_loop);
1198 if (VM_Version::supports_mmx()) {
1199 __ movq(mmx0, Address(from, count, Address::times_8));
1200 __ movq(Address(to, count, Address::times_8), mmx0);
1201 } else {
1202 __ fild_d(Address(from, count, Address::times_8));
1203 __ fistp_d(Address(to, count, Address::times_8));
1204 }
1205 __ BIND(L_copy_8_bytes);
1206 __ decrement(count);
1207 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1208
1209 if (VM_Version::supports_mmx()) {
1210 __ emms();
1211 }
1212 inc_copy_counter_np(T_LONG);
1213 __ leave(); // required for proper stackwalking of RuntimeStub frame
1214 __ xorptr(rax, rax); // return 0
1215 __ ret(0);
1216 return start;
1217 }
1218
1219
1220 // Helper for generating a dynamic type check.
1221 // The sub_klass must be one of {rbx, rdx, rsi}.
1222 // The temp is killed.
1223 void generate_type_check(Register sub_klass,
1224 Address& super_check_offset_addr,
1225 Address& super_klass_addr,
1226 Register temp,
1227 Label* L_success_ptr, Label* L_failure_ptr) {
1228 BLOCK_COMMENT("type_check:");
1229
|
774 __ lea(end, Address(start, count, Address::times_ptr, -wordSize));
775 __ shrptr(start, CardTableModRefBS::card_shift);
776 __ shrptr(end, CardTableModRefBS::card_shift);
777 __ subptr(end, start); // end --> count
778 __ BIND(L_loop);
779 intptr_t disp = (intptr_t) ct->byte_map_base;
780 Address cardtable(start, count, Address::times_1, disp);
781 __ movb(cardtable, 0);
782 __ decrement(count);
783 __ jcc(Assembler::greaterEqual, L_loop);
784 }
785 break;
786 case BarrierSet::ModRef:
787 break;
788 default :
789 ShouldNotReachHere();
790
791 }
792 }
793
794
795 // Copy 64 bytes chunks
796 //
797 // Inputs:
798 // from - source array address
799 // to_from - destination array address - from
800 // qword_count - 8-bytes element count, negative
801 //
802 void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
803 assert( UseSSE >= 2, "supported cpu only" );
804 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
805 // Copy 64-byte chunks
806 __ jmpb(L_copy_64_bytes);
807 __ align(16);
808 __ BIND(L_copy_64_bytes_loop);
809
810 if(UseUnalignedLoadStores) {
811 __ movdqu(xmm0, Address(from, 0));
812 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
813 __ movdqu(xmm1, Address(from, 16));
814 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
815 __ movdqu(xmm2, Address(from, 32));
816 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
817 __ movdqu(xmm3, Address(from, 48));
818 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
819
820 } else {
821 __ movq(xmm0, Address(from, 0));
822 __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
823 __ movq(xmm1, Address(from, 8));
824 __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
825 __ movq(xmm2, Address(from, 16));
826 __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
827 __ movq(xmm3, Address(from, 24));
828 __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
829 __ movq(xmm4, Address(from, 32));
830 __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
831 __ movq(xmm5, Address(from, 40));
832 __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
833 __ movq(xmm6, Address(from, 48));
834 __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
835 __ movq(xmm7, Address(from, 56));
836 __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
837 }
838
839 __ addl(from, 64);
840 __ BIND(L_copy_64_bytes);
841 __ subl(qword_count, 8);
842 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
843 __ addl(qword_count, 8);
844 __ jccb(Assembler::zero, L_exit);
845 //
846 // length is too short, just copy qwords
847 //
848 __ BIND(L_copy_8_bytes);
849 __ movq(xmm0, Address(from, 0));
850 __ movq(Address(from, to_from, Address::times_1), xmm0);
851 __ addl(from, 8);
852 __ decrement(qword_count);
853 __ jcc(Assembler::greater, L_copy_8_bytes);
854 __ BIND(L_exit);
855 }
856
857 // Copy 64 bytes chunks
858 //
859 // Inputs:
860 // from - source array address
861 // to_from - destination array address - from
862 // qword_count - 8-bytes element count, negative
863 //
864 void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
865 assert( VM_Version::supports_mmx(), "supported cpu only" );
866 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
867 // Copy 64-byte chunks
868 __ jmpb(L_copy_64_bytes);
869 __ align(16);
870 __ BIND(L_copy_64_bytes_loop);
871 __ movq(mmx0, Address(from, 0));
872 __ movq(mmx1, Address(from, 8));
873 __ movq(mmx2, Address(from, 16));
874 __ movq(Address(from, to_from, Address::times_1, 0), mmx0);
875 __ movq(mmx3, Address(from, 24));
876 __ movq(Address(from, to_from, Address::times_1, 8), mmx1);
877 __ movq(mmx4, Address(from, 32));
878 __ movq(Address(from, to_from, Address::times_1, 16), mmx2);
879 __ movq(mmx5, Address(from, 40));
880 __ movq(Address(from, to_from, Address::times_1, 24), mmx3);
881 __ movq(mmx6, Address(from, 48));
882 __ movq(Address(from, to_from, Address::times_1, 32), mmx4);
883 __ movq(mmx7, Address(from, 56));
884 __ movq(Address(from, to_from, Address::times_1, 40), mmx5);
885 __ movq(Address(from, to_from, Address::times_1, 48), mmx6);
923
924 __ enter(); // required for proper stackwalking of RuntimeStub frame
925 __ push(rsi);
926 __ push(rdi);
927 __ movptr(from , Address(rsp, 12+ 4));
928 __ movptr(to , Address(rsp, 12+ 8));
929 __ movl(count, Address(rsp, 12+ 12));
930 if (t == T_OBJECT) {
931 __ testl(count, count);
932 __ jcc(Assembler::zero, L_0_count);
933 gen_write_ref_array_pre_barrier(to, count);
934 __ mov(saved_to, to); // save 'to'
935 }
936
937 *entry = __ pc(); // Entry point from conjoint arraycopy stub.
938 BLOCK_COMMENT("Entry:");
939
940 __ subptr(to, from); // to --> to_from
941 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
942 __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
943 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
944 // align source address at 4 bytes address boundary
945 if (t == T_BYTE) {
946 // One byte misalignment happens only for byte arrays
947 __ testl(from, 1);
948 __ jccb(Assembler::zero, L_skip_align1);
949 __ movb(rax, Address(from, 0));
950 __ movb(Address(from, to_from, Address::times_1, 0), rax);
951 __ increment(from);
952 __ decrement(count);
953 __ BIND(L_skip_align1);
954 }
955 // Two bytes misalignment happens only for byte and short (char) arrays
956 __ testl(from, 2);
957 __ jccb(Assembler::zero, L_skip_align2);
958 __ movw(rax, Address(from, 0));
959 __ movw(Address(from, to_from, Address::times_1, 0), rax);
960 __ addptr(from, 2);
961 __ subl(count, 1<<(shift-1));
962 __ BIND(L_skip_align2);
963 }
964 if (!VM_Version::supports_mmx()) {
965 __ mov(rax, count); // save 'count'
966 __ shrl(count, shift); // bytes count
967 __ addptr(to_from, from);// restore 'to'
968 __ rep_mov();
969 __ subptr(to_from, from);// restore 'to_from'
970 __ mov(count, rax); // restore 'count'
971 __ jmpb(L_copy_2_bytes); // all dwords were copied
972 } else {
973 if (!UseUnalignedLoadStores) {
974 // align to 8 bytes, we know we are 4 byte aligned to start
975 __ testptr(from, 4);
976 __ jccb(Assembler::zero, L_copy_64_bytes);
977 __ movl(rax, Address(from, 0));
978 __ movl(Address(from, to_from, Address::times_1, 0), rax);
979 __ addptr(from, 4);
980 __ subl(count, 1<<shift);
981 }
982 __ BIND(L_copy_64_bytes);
983 __ mov(rax, count);
984 __ shrl(rax, shift+1); // 8 bytes chunk count
985 //
986 // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
987 //
988 if (UseXMMForArrayCopy) {
989 xmm_copy_forward(from, to_from, rax);
990 } else {
991 mmx_copy_forward(from, to_from, rax);
992 }
993 }
994 // copy tailing dword
995 __ BIND(L_copy_4_bytes);
996 __ testl(count, 1<<shift);
997 __ jccb(Assembler::zero, L_copy_2_bytes);
998 __ movl(rax, Address(from, 0));
999 __ movl(Address(from, to_from, Address::times_1, 0), rax);
1000 if (t == T_BYTE || t == T_SHORT) {
1001 __ addptr(from, 4);
1002 __ BIND(L_copy_2_bytes);
1003 // copy tailing word
1004 __ testl(count, 1<<(shift-1));
1005 __ jccb(Assembler::zero, L_copy_byte);
1006 __ movw(rax, Address(from, 0));
1007 __ movw(Address(from, to_from, Address::times_1, 0), rax);
1008 if (t == T_BYTE) {
1009 __ addptr(from, 2);
1010 __ BIND(L_copy_byte);
1011 // copy tailing byte
1012 __ testl(count, 1);
1013 __ jccb(Assembler::zero, L_exit);
1122 __ shrptr(count, shift); // bytes count
1123 __ rep_mov();
1124 __ cld();
1125 __ mov(count, rax); // restore 'count'
1126 __ andl(count, (1<<shift)-1); // mask the number of rest elements
1127 __ movptr(from, Address(rsp, 12+4)); // reread 'from'
1128 __ mov(to, rdx); // restore 'to'
1129 __ jmpb(L_copy_2_bytes); // all dword were copied
1130 } else {
1131 // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
1132 __ testptr(end, 4);
1133 __ jccb(Assembler::zero, L_copy_8_bytes);
1134 __ subl(count, 1<<shift);
1135 __ movl(rdx, Address(from, count, sf, 0));
1136 __ movl(Address(to, count, sf, 0), rdx);
1137 __ jmpb(L_copy_8_bytes);
1138
1139 __ align(16);
1140 // Move 8 bytes
1141 __ BIND(L_copy_8_bytes_loop);
1142 if (UseXMMForArrayCopy) {
1143 __ movq(xmm0, Address(from, count, sf, 0));
1144 __ movq(Address(to, count, sf, 0), xmm0);
1145 } else {
1146 __ movq(mmx0, Address(from, count, sf, 0));
1147 __ movq(Address(to, count, sf, 0), mmx0);
1148 }
1149 __ BIND(L_copy_8_bytes);
1150 __ subl(count, 2<<shift);
1151 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1152 __ addl(count, 2<<shift);
1153 if (!UseXMMForArrayCopy) {
1154 __ emms();
1155 }
1156 }
1157 __ BIND(L_copy_4_bytes);
1158 // copy prefix qword
1159 __ testl(count, 1<<shift);
1160 __ jccb(Assembler::zero, L_copy_2_bytes);
1161 __ movl(rdx, Address(from, count, sf, -4));
1162 __ movl(Address(to, count, sf, -4), rdx);
1163
1164 if (t == T_BYTE || t == T_SHORT) {
1165 __ subl(count, (1<<shift));
1166 __ BIND(L_copy_2_bytes);
1167 // copy prefix dword
1168 __ testl(count, 1<<(shift-1));
1169 __ jccb(Assembler::zero, L_copy_byte);
1170 __ movw(rdx, Address(from, count, sf, -2));
1171 __ movw(Address(to, count, sf, -2), rdx);
1172 if (t == T_BYTE) {
1173 __ subl(count, 1<<(shift-1));
1174 __ BIND(L_copy_byte);
1175 // copy prefix byte
1176 __ testl(count, 1);
1203 __ align(CodeEntryAlignment);
1204 StubCodeMark mark(this, "StubRoutines", name);
1205 address start = __ pc();
1206
1207 Label L_copy_8_bytes, L_copy_8_bytes_loop;
1208 const Register from = rax; // source array address
1209 const Register to = rdx; // destination array address
1210 const Register count = rcx; // elements count
1211 const Register to_from = rdx; // (to - from)
1212
1213 __ enter(); // required for proper stackwalking of RuntimeStub frame
1214 __ movptr(from , Address(rsp, 8+0)); // from
1215 __ movptr(to , Address(rsp, 8+4)); // to
1216 __ movl2ptr(count, Address(rsp, 8+8)); // count
1217
1218 *entry = __ pc(); // Entry point from conjoint arraycopy stub.
1219 BLOCK_COMMENT("Entry:");
1220
1221 __ subptr(to, from); // to --> to_from
1222 if (VM_Version::supports_mmx()) {
1223 if (UseXMMForArrayCopy) {
1224 xmm_copy_forward(from, to_from, count);
1225 } else {
1226 mmx_copy_forward(from, to_from, count);
1227 }
1228 } else {
1229 __ jmpb(L_copy_8_bytes);
1230 __ align(16);
1231 __ BIND(L_copy_8_bytes_loop);
1232 __ fild_d(Address(from, 0));
1233 __ fistp_d(Address(from, to_from, Address::times_1));
1234 __ addptr(from, 8);
1235 __ BIND(L_copy_8_bytes);
1236 __ decrement(count);
1237 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1238 }
1239 inc_copy_counter_np(T_LONG);
1240 __ leave(); // required for proper stackwalking of RuntimeStub frame
1241 __ xorptr(rax, rax); // return 0
1242 __ ret(0);
1243 return start;
1244 }
1245
1246 address generate_conjoint_long_copy(address nooverlap_target,
1247 address* entry, const char *name) {
1260 __ movptr(to , Address(rsp, 8+4)); // to
1261 __ movl2ptr(count, Address(rsp, 8+8)); // count
1262
1263 *entry = __ pc(); // Entry point from generic arraycopy stub.
1264 BLOCK_COMMENT("Entry:");
1265
1266 // arrays overlap test
1267 __ cmpptr(to, from);
1268 RuntimeAddress nooverlap(nooverlap_target);
1269 __ jump_cc(Assembler::belowEqual, nooverlap);
1270 __ lea(end_from, Address(from, count, Address::times_8, 0));
1271 __ cmpptr(to, end_from);
1272 __ movptr(from, Address(rsp, 8)); // from
1273 __ jump_cc(Assembler::aboveEqual, nooverlap);
1274
1275 __ jmpb(L_copy_8_bytes);
1276
1277 __ align(16);
1278 __ BIND(L_copy_8_bytes_loop);
1279 if (VM_Version::supports_mmx()) {
1280 if (UseXMMForArrayCopy) {
1281 __ movq(xmm0, Address(from, count, Address::times_8));
1282 __ movq(Address(to, count, Address::times_8), xmm0);
1283 } else {
1284 __ movq(mmx0, Address(from, count, Address::times_8));
1285 __ movq(Address(to, count, Address::times_8), mmx0);
1286 }
1287 } else {
1288 __ fild_d(Address(from, count, Address::times_8));
1289 __ fistp_d(Address(to, count, Address::times_8));
1290 }
1291 __ BIND(L_copy_8_bytes);
1292 __ decrement(count);
1293 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1294
1295 if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
1296 __ emms();
1297 }
1298 inc_copy_counter_np(T_LONG);
1299 __ leave(); // required for proper stackwalking of RuntimeStub frame
1300 __ xorptr(rax, rax); // return 0
1301 __ ret(0);
1302 return start;
1303 }
1304
1305
1306 // Helper for generating a dynamic type check.
1307 // The sub_klass must be one of {rbx, rdx, rsi}.
1308 // The temp is killed.
1309 void generate_type_check(Register sub_klass,
1310 Address& super_check_offset_addr,
1311 Address& super_klass_addr,
1312 Register temp,
1313 Label* L_success_ptr, Label* L_failure_ptr) {
1314 BLOCK_COMMENT("type_check:");
1315
|