src/cpu/x86/vm/vm_version_x86_64.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 6532536 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/vm_version_x86_64.cpp

Print this page




 170 
 171 void VM_Version::get_processor_features() {
 172 
 173   _logical_processors_per_package = 1;
 174   // Get raw processor info
 175   getPsrInfo_stub(&_cpuid_info);
 176   assert_is_initialized();
 177   _cpu = extended_cpu_family();
 178   _model = extended_cpu_model();
 179   _stepping = cpu_stepping();
 180   _cpuFeatures = feature_flags();
 181   // Logical processors are only available on P4s and above,
 182   // and only if hyperthreading is available.
 183   _logical_processors_per_package = logical_processor_count();
 184   _supports_cx8    = supports_cmpxchg8();
 185   // OS should support SSE for x64 and hardware should support at least SSE2.
 186   if (!VM_Version::supports_sse2()) {
 187     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 188   }
 189   if (UseSSE < 4)
 190     _cpuFeatures &= ~CPU_SSE4;

 191   if (UseSSE < 3) {
 192     _cpuFeatures &= ~CPU_SSE3;
 193     _cpuFeatures &= ~CPU_SSSE3;
 194     _cpuFeatures &= ~CPU_SSE4A;
 195   }
 196   if (UseSSE < 2)
 197     _cpuFeatures &= ~CPU_SSE2;
 198   if (UseSSE < 1)
 199     _cpuFeatures &= ~CPU_SSE;
 200 
 201   if (logical_processors_per_package() == 1) {
 202     // HT processor could be installed on a system which doesn't support HT.
 203     _cpuFeatures &= ~CPU_HT;
 204   }
 205 
 206   char buf[256];
 207   jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
 208                cores_per_cpu(), threads_per_core(),
 209                cpu_family(), _model, _stepping,
 210                (supports_cmov() ? ", cmov" : ""),
 211                (supports_cmpxchg8() ? ", cx8" : ""),
 212                (supports_fxsr() ? ", fxsr" : ""),
 213                (supports_mmx()  ? ", mmx"  : ""),
 214                (supports_sse()  ? ", sse"  : ""),
 215                (supports_sse2() ? ", sse2" : ""),
 216                (supports_sse3() ? ", sse3" : ""),
 217                (supports_ssse3()? ", ssse3": ""),
 218                (supports_sse4() ? ", sse4" : ""),

 219                (supports_mmx_ext() ? ", mmxext" : ""),
 220                (supports_3dnow()   ? ", 3dnow"  : ""),
 221                (supports_3dnow2()  ? ", 3dnowext" : ""),
 222                (supports_sse4a()   ? ", sse4a": ""),
 223                (supports_ht() ? ", ht": ""));
 224   _features_str = strdup(buf);
 225 
 226   // UseSSE is set to the smaller of what hardware supports and what
 227   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 228   // older Pentiums which do not support it.
 229   if( UseSSE > 4 ) UseSSE=4;
 230   if( UseSSE < 0 ) UseSSE=0;
 231   if( !supports_sse4() ) // Drop to 3 if no SSE4 support
 232     UseSSE = MIN2((intx)3,UseSSE);
 233   if( !supports_sse3() ) // Drop to 2 if no SSE3 support
 234     UseSSE = MIN2((intx)2,UseSSE);
 235   if( !supports_sse2() ) // Drop to 1 if no SSE2 support
 236     UseSSE = MIN2((intx)1,UseSSE);
 237   if( !supports_sse () ) // Drop to 0 if no SSE  support
 238     UseSSE = 0;
 239 
 240   // On new cpus instructions which update whole XMM register should be used
 241   // to prevent partial register stall due to dependencies on high half.
 242   //
 243   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
 244   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
 245   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
 246   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
 247 
 248   if( is_amd() ) { // AMD cpus specific settings
 249     if( FLAG_IS_DEFAULT(UseAddressNop) ) {
 250       // Use it on all AMD cpus starting from Opteron (don't need
 251       // a cpu check since only Opteron and new cpus support 64-bits mode).


 297       if( supports_sse3() ) {
 298         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
 299       } else {
 300         UseXmmRegToRegMoveAll = false;
 301       }
 302     }
 303     if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
 304 #ifdef COMPILER2
 305       if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
 306         // For new Intel cpus do the next optimization:
 307         // don't align the beginning of a loop if there are enough instructions
 308         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
 309         // in current fetch line (OptoLoopAlignment) or the padding
 310         // is big (> MaxLoopPad).
 311         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
 312         // generated NOP instructions. 11 is the largest size of one
 313         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
 314         MaxLoopPad = 11;
 315       }
 316 #endif // COMPILER2


 317     }



 318   }



 319 
 320   assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
 321   assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
 322 
 323   // set valid Prefetch instruction
 324   if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
 325   if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
 326   if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
 327 
 328   if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
 329   if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
 330   if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
 331 
 332   // Allocation prefetch settings
 333   intx cache_line_size = L1_data_cache_line_size();
 334   if( cache_line_size > AllocatePrefetchStepSize )
 335     AllocatePrefetchStepSize = cache_line_size;
 336   if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
 337     AllocatePrefetchLines = 3; // Optimistic value
 338   assert(AllocatePrefetchLines > 0, "invalid value");
 339   if( AllocatePrefetchLines < 1 ) // set valid value in product VM
 340     AllocatePrefetchLines = 1; // Conservative value
 341 
 342   AllocatePrefetchDistance = allocate_prefetch_distance();
 343   AllocatePrefetchStyle    = allocate_prefetch_style();
 344 
 345   if( AllocatePrefetchStyle == 2 && is_intel() &&
 346       cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
 347     AllocatePrefetchDistance = 384;
 348   }
 349   assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
 350 
 351   // Prefetch settings
 352   PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
 353   PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
 354   PrefetchFieldsAhead         = prefetch_fields_ahead();
 355 
 356 #ifndef PRODUCT
 357   if (PrintMiscellaneous && Verbose) {
 358     tty->print_cr("Logical CPUs per package: %u",
 359                   logical_processors_per_package());
 360     tty->print_cr("UseSSE=%d",UseSSE);
 361     tty->print("Allocation: ");
 362     if (AllocatePrefetchStyle <= 0) {
 363       tty->print_cr("no prefetching");
 364     } else {
 365       if (AllocatePrefetchInstr == 0) {
 366         tty->print("PREFETCHNTA");
 367       } else if (AllocatePrefetchInstr == 1) {
 368         tty->print("PREFETCHT0");
 369       } else if (AllocatePrefetchInstr == 2) {
 370         tty->print("PREFETCHT2");
 371       } else if (AllocatePrefetchInstr == 3) {
 372         tty->print("PREFETCHW");
 373       }
 374       if (AllocatePrefetchLines > 1) {
 375         tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
 376       } else {
 377         tty->print_cr(" %d, one line", AllocatePrefetchDistance);
 378       }




 170 
 171 void VM_Version::get_processor_features() {
 172 
 173   _logical_processors_per_package = 1;
 174   // Get raw processor info
 175   getPsrInfo_stub(&_cpuid_info);
 176   assert_is_initialized();
 177   _cpu = extended_cpu_family();
 178   _model = extended_cpu_model();
 179   _stepping = cpu_stepping();
 180   _cpuFeatures = feature_flags();
 181   // Logical processors are only available on P4s and above,
 182   // and only if hyperthreading is available.
 183   _logical_processors_per_package = logical_processor_count();
 184   _supports_cx8    = supports_cmpxchg8();
 185   // OS should support SSE for x64 and hardware should support at least SSE2.
 186   if (!VM_Version::supports_sse2()) {
 187     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 188   }
 189   if (UseSSE < 4)
 190     _cpuFeatures &= ~CPU_SSE4_1;
 191     _cpuFeatures &= ~CPU_SSE4_2;
 192   if (UseSSE < 3) {
 193     _cpuFeatures &= ~CPU_SSE3;
 194     _cpuFeatures &= ~CPU_SSSE3;
 195     _cpuFeatures &= ~CPU_SSE4A;
 196   }
 197   if (UseSSE < 2)
 198     _cpuFeatures &= ~CPU_SSE2;
 199   if (UseSSE < 1)
 200     _cpuFeatures &= ~CPU_SSE;
 201 
 202   if (logical_processors_per_package() == 1) {
 203     // HT processor could be installed on a system which doesn't support HT.
 204     _cpuFeatures &= ~CPU_HT;
 205   }
 206 
 207   char buf[256];
 208   jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
 209                cores_per_cpu(), threads_per_core(),
 210                cpu_family(), _model, _stepping,
 211                (supports_cmov() ? ", cmov" : ""),
 212                (supports_cmpxchg8() ? ", cx8" : ""),
 213                (supports_fxsr() ? ", fxsr" : ""),
 214                (supports_mmx()  ? ", mmx"  : ""),
 215                (supports_sse()  ? ", sse"  : ""),
 216                (supports_sse2() ? ", sse2" : ""),
 217                (supports_sse3() ? ", sse3" : ""),
 218                (supports_ssse3()? ", ssse3": ""),
 219                (supports_sse4_1() ? ", sse4.1" : ""),
 220                (supports_sse4_2() ? ", sse4.2" : ""),
 221                (supports_mmx_ext() ? ", mmxext" : ""),
 222                (supports_3dnow()   ? ", 3dnow"  : ""),
 223                (supports_3dnow2()  ? ", 3dnowext" : ""),
 224                (supports_sse4a()   ? ", sse4a": ""),
 225                (supports_ht() ? ", ht": ""));
 226   _features_str = strdup(buf);
 227 
 228   // UseSSE is set to the smaller of what hardware supports and what
 229   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 230   // older Pentiums which do not support it.
 231   if( UseSSE > 4 ) UseSSE=4;
 232   if( UseSSE < 0 ) UseSSE=0;
 233   if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
 234     UseSSE = MIN2((intx)3,UseSSE);
 235   if( !supports_sse3() ) // Drop to 2 if no SSE3 support
 236     UseSSE = MIN2((intx)2,UseSSE);
 237   if( !supports_sse2() ) // Drop to 1 if no SSE2 support
 238     UseSSE = MIN2((intx)1,UseSSE);
 239   if( !supports_sse () ) // Drop to 0 if no SSE  support
 240     UseSSE = 0;
 241 
 242   // On new cpus instructions which update whole XMM register should be used
 243   // to prevent partial register stall due to dependencies on high half.
 244   //
 245   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
 246   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
 247   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
 248   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
 249 
 250   if( is_amd() ) { // AMD cpus specific settings
 251     if( FLAG_IS_DEFAULT(UseAddressNop) ) {
 252       // Use it on all AMD cpus starting from Opteron (don't need
 253       // a cpu check since only Opteron and new cpus support 64-bits mode).


 299       if( supports_sse3() ) {
 300         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
 301       } else {
 302         UseXmmRegToRegMoveAll = false;
 303       }
 304     }
 305     if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
 306 #ifdef COMPILER2
 307       if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
 308         // For new Intel cpus do the next optimization:
 309         // don't align the beginning of a loop if there are enough instructions
 310         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
 311         // in current fetch line (OptoLoopAlignment) or the padding
 312         // is big (> MaxLoopPad).
 313         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
 314         // generated NOP instructions. 11 is the largest size of one
 315         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
 316         MaxLoopPad = 11;
 317       }
 318 #endif // COMPILER2
 319       if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
 320         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
 321       }
 322       if( supports_sse4_1() && supports_ht() ) { // Newest Intel cpus
 323         if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
 324           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
 325         }
 326       }
 327     }
 328   }
 329 
 330   assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
 331   assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
 332 
 333   // set valid Prefetch instruction
 334   if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
 335   if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
 336   if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
 337 
 338   if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
 339   if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
 340   if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
 341 
 342   // Allocation prefetch settings
 343   intx cache_line_size = L1_data_cache_line_size();
 344   if( cache_line_size > AllocatePrefetchStepSize )
 345     AllocatePrefetchStepSize = cache_line_size;
 346   if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
 347     AllocatePrefetchLines = 3; // Optimistic value
 348   assert(AllocatePrefetchLines > 0, "invalid value");
 349   if( AllocatePrefetchLines < 1 ) // set valid value in product VM
 350     AllocatePrefetchLines = 1; // Conservative value
 351 
 352   AllocatePrefetchDistance = allocate_prefetch_distance();
 353   AllocatePrefetchStyle    = allocate_prefetch_style();
 354 
 355   if( AllocatePrefetchStyle == 2 && is_intel() &&
 356       cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
 357     AllocatePrefetchDistance = 384;
 358   }
 359   assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
 360 
 361   // Prefetch settings
 362   PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
 363   PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
 364   PrefetchFieldsAhead         = prefetch_fields_ahead();
 365 
 366 #ifndef PRODUCT
 367   if (PrintMiscellaneous && Verbose) {
 368     tty->print_cr("Logical CPUs per core: %u",
 369                   logical_processors_per_package());
 370     tty->print_cr("UseSSE=%d",UseSSE);
 371     tty->print("Allocation: ");
 372     if (AllocatePrefetchStyle <= 0) {
 373       tty->print_cr("no prefetching");
 374     } else {
 375       if (AllocatePrefetchInstr == 0) {
 376         tty->print("PREFETCHNTA");
 377       } else if (AllocatePrefetchInstr == 1) {
 378         tty->print("PREFETCHT0");
 379       } else if (AllocatePrefetchInstr == 2) {
 380         tty->print("PREFETCHT2");
 381       } else if (AllocatePrefetchInstr == 3) {
 382         tty->print("PREFETCHW");
 383       }
 384       if (AllocatePrefetchLines > 1) {
 385         tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
 386       } else {
 387         tty->print_cr(" %d, one line", AllocatePrefetchDistance);
 388       }


src/cpu/x86/vm/vm_version_x86_64.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File