1 /*
   2  * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20  * CA 95054 USA or visit www.sun.com if you need additional information or
  21  * have any questions.
  22  *
  23  */
  24 
  25 #include "incls/_precompiled.incl"
  26 #include "incls/_concurrentMark.cpp.incl"
  27 
  28 //
  29 // CMS Bit Map Wrapper
  30 
  31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  32   _bm((uintptr_t*)NULL,0),
  33   _shifter(shifter) {
  34   _bmStartWord = (HeapWord*)(rs.base());
  35   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  36   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  37                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  38 
  39   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  40   // For now we'll just commit all of the bit map up fromt.
  41   // Later on we'll try to be more parsimonious with swap.
  42   guarantee(_virtual_space.initialize(brs, brs.size()),
  43             "couldn't reseve backing store for CMS bit map");
  44   assert(_virtual_space.committed_size() == brs.size(),
  45          "didn't reserve backing store for all of CMS bit map?");
  46   _bm.set_map((uintptr_t*)_virtual_space.low());
  47   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  48          _bmWordSize, "inconsistency in bit map sizing");
  49   _bm.set_size(_bmWordSize >> _shifter);
  50 }
  51 
  52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  53                                                HeapWord* limit) const {
  54   // First we must round addr *up* to a possible object boundary.
  55   addr = (HeapWord*)align_size_up((intptr_t)addr,
  56                                   HeapWordSize << _shifter);
  57   size_t addrOffset = heapWordToOffset(addr);
  58   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  59   size_t limitOffset = heapWordToOffset(limit);
  60   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  61   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  62   assert(nextAddr >= addr, "get_next_one postcondition");
  63   assert(nextAddr == limit || isMarked(nextAddr),
  64          "get_next_one postcondition");
  65   return nextAddr;
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  69                                                  HeapWord* limit) const {
  70   size_t addrOffset = heapWordToOffset(addr);
  71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  72   size_t limitOffset = heapWordToOffset(limit);
  73   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  75   assert(nextAddr >= addr, "get_next_one postcondition");
  76   assert(nextAddr == limit || !isMarked(nextAddr),
  77          "get_next_one postcondition");
  78   return nextAddr;
  79 }
  80 
  81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  82   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  83   return (int) (diff >> _shifter);
  84 }
  85 
  86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
  87   HeapWord* left  = MAX2(_bmStartWord, mr.start());
  88   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
  89   if (right > left) {
  90     // Right-open interval [leftOffset, rightOffset).
  91     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
  92   } else {
  93     return true;
  94   }
  95 }
  96 
  97 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
  98                                              size_t    from_start_index,
  99                                              HeapWord* to_start_word,
 100                                              size_t    word_num) {
 101   _bm.mostly_disjoint_range_union(from_bitmap,
 102                                   from_start_index,
 103                                   heapWordToOffset(to_start_word),
 104                                   word_num);
 105 }
 106 
 107 #ifndef PRODUCT
 108 bool CMBitMapRO::covers(ReservedSpace rs) const {
 109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 110   assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
 111          "size inconsistency");
 112   return _bmStartWord == (HeapWord*)(rs.base()) &&
 113          _bmWordSize  == rs.size()>>LogHeapWordSize;
 114 }
 115 #endif
 116 
 117 void CMBitMap::clearAll() {
 118   _bm.clear();
 119   return;
 120 }
 121 
 122 void CMBitMap::markRange(MemRegion mr) {
 123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 124   assert(!mr.is_empty(), "unexpected empty region");
 125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 126           ((HeapWord *) mr.end())),
 127          "markRange memory region end is not card aligned");
 128   // convert address range into offset range
 129   _bm.at_put_range(heapWordToOffset(mr.start()),
 130                    heapWordToOffset(mr.end()), true);
 131 }
 132 
 133 void CMBitMap::clearRange(MemRegion mr) {
 134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 135   assert(!mr.is_empty(), "unexpected empty region");
 136   // convert address range into offset range
 137   _bm.at_put_range(heapWordToOffset(mr.start()),
 138                    heapWordToOffset(mr.end()), false);
 139 }
 140 
 141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 142                                             HeapWord* end_addr) {
 143   HeapWord* start = getNextMarkedWordAddress(addr);
 144   start = MIN2(start, end_addr);
 145   HeapWord* end   = getNextUnmarkedWordAddress(start);
 146   end = MIN2(end, end_addr);
 147   assert(start <= end, "Consistency check");
 148   MemRegion mr(start, end);
 149   if (!mr.is_empty()) {
 150     clearRange(mr);
 151   }
 152   return mr;
 153 }
 154 
 155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 156   _base(NULL), _cm(cm)
 157 #ifdef ASSERT
 158   , _drain_in_progress(false)
 159   , _drain_in_progress_yields(false)
 160 #endif
 161 {}
 162 
 163 void CMMarkStack::allocate(size_t size) {
 164   _base = NEW_C_HEAP_ARRAY(oop, size);
 165   if (_base == NULL)
 166     vm_exit_during_initialization("Failed to allocate "
 167                                   "CM region mark stack");
 168   _index = 0;
 169   // QQQQ cast ...
 170   _capacity = (jint) size;
 171   _oops_do_bound = -1;
 172   NOT_PRODUCT(_max_depth = 0);
 173 }
 174 
 175 CMMarkStack::~CMMarkStack() {
 176   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 177 }
 178 
 179 void CMMarkStack::par_push(oop ptr) {
 180   while (true) {
 181     if (isFull()) {
 182       _overflow = true;
 183       return;
 184     }
 185     // Otherwise...
 186     jint index = _index;
 187     jint next_index = index+1;
 188     jint res = Atomic::cmpxchg(next_index, &_index, index);
 189     if (res == index) {
 190       _base[index] = ptr;
 191       // Note that we don't maintain this atomically.  We could, but it
 192       // doesn't seem necessary.
 193       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 194       return;
 195     }
 196     // Otherwise, we need to try again.
 197   }
 198 }
 199 
 200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 201   while (true) {
 202     if (isFull()) {
 203       _overflow = true;
 204       return;
 205     }
 206     // Otherwise...
 207     jint index = _index;
 208     jint next_index = index + n;
 209     if (next_index > _capacity) {
 210       _overflow = true;
 211       return;
 212     }
 213     jint res = Atomic::cmpxchg(next_index, &_index, index);
 214     if (res == index) {
 215       for (int i = 0; i < n; i++) {
 216         int ind = index + i;
 217         assert(ind < _capacity, "By overflow test above.");
 218         _base[ind] = ptr_arr[i];
 219       }
 220       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 221       return;
 222     }
 223     // Otherwise, we need to try again.
 224   }
 225 }
 226 
 227 
 228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 229   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 230   jint start = _index;
 231   jint next_index = start + n;
 232   if (next_index > _capacity) {
 233     _overflow = true;
 234     return;
 235   }
 236   // Otherwise.
 237   _index = next_index;
 238   for (int i = 0; i < n; i++) {
 239     int ind = start + i;
 240     guarantee(ind < _capacity, "By overflow test above.");
 241     _base[ind] = ptr_arr[i];
 242   }
 243 }
 244 
 245 
 246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 247   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 248   jint index = _index;
 249   if (index == 0) {
 250     *n = 0;
 251     return false;
 252   } else {
 253     int k = MIN2(max, index);
 254     jint new_ind = index - k;
 255     for (int j = 0; j < k; j++) {
 256       ptr_arr[j] = _base[new_ind + j];
 257     }
 258     _index = new_ind;
 259     *n = k;
 260     return true;
 261   }
 262 }
 263 
 264 
 265 CMRegionStack::CMRegionStack() : _base(NULL) {}
 266 
 267 void CMRegionStack::allocate(size_t size) {
 268   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 269   if (_base == NULL)
 270     vm_exit_during_initialization("Failed to allocate "
 271                                   "CM region mark stack");
 272   _index = 0;
 273   // QQQQ cast ...
 274   _capacity = (jint) size;
 275 }
 276 
 277 CMRegionStack::~CMRegionStack() {
 278   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 279 }
 280 
 281 void CMRegionStack::push(MemRegion mr) {
 282   assert(mr.word_size() > 0, "Precondition");
 283   while (true) {
 284     if (isFull()) {
 285       _overflow = true;
 286       return;
 287     }
 288     // Otherwise...
 289     jint index = _index;
 290     jint next_index = index+1;
 291     jint res = Atomic::cmpxchg(next_index, &_index, index);
 292     if (res == index) {
 293       _base[index] = mr;
 294       return;
 295     }
 296     // Otherwise, we need to try again.
 297   }
 298 }
 299 
 300 MemRegion CMRegionStack::pop() {
 301   while (true) {
 302     // Otherwise...
 303     jint index = _index;
 304 
 305     if (index == 0) {
 306       return MemRegion();
 307     }
 308     jint next_index = index-1;
 309     jint res = Atomic::cmpxchg(next_index, &_index, index);
 310     if (res == index) {
 311       MemRegion mr = _base[next_index];
 312       if (mr.start() != NULL) {
 313         tmp_guarantee_CM( mr.end() != NULL, "invariant" );
 314         tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
 315         return mr;
 316       } else {
 317         // that entry was invalidated... let's skip it
 318         tmp_guarantee_CM( mr.end() == NULL, "invariant" );
 319       }
 320     }
 321     // Otherwise, we need to try again.
 322   }
 323 }
 324 
 325 bool CMRegionStack::invalidate_entries_into_cset() {
 326   bool result = false;
 327   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 328   for (int i = 0; i < _oops_do_bound; ++i) {
 329     MemRegion mr = _base[i];
 330     if (mr.start() != NULL) {
 331       tmp_guarantee_CM( mr.end() != NULL, "invariant");
 332       tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
 333       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 334       tmp_guarantee_CM( hr != NULL, "invariant" );
 335       if (hr->in_collection_set()) {
 336         // The region points into the collection set
 337         _base[i] = MemRegion();
 338         result = true;
 339       }
 340     } else {
 341       // that entry was invalidated... let's skip it
 342       tmp_guarantee_CM( mr.end() == NULL, "invariant" );
 343     }
 344   }
 345   return result;
 346 }
 347 
 348 template<class OopClosureClass>
 349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 350   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 351          || SafepointSynchronize::is_at_safepoint(),
 352          "Drain recursion must be yield-safe.");
 353   bool res = true;
 354   debug_only(_drain_in_progress = true);
 355   debug_only(_drain_in_progress_yields = yield_after);
 356   while (!isEmpty()) {
 357     oop newOop = pop();
 358     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 359     assert(newOop->is_oop(), "Expected an oop");
 360     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 361            "only grey objects on this stack");
 362     // iterate over the oops in this oop, marking and pushing
 363     // the ones in CMS generation.
 364     newOop->oop_iterate(cl);
 365     if (yield_after && _cm->do_yield_check()) {
 366       res = false; break;
 367     }
 368   }
 369   debug_only(_drain_in_progress = false);
 370   return res;
 371 }
 372 
 373 void CMMarkStack::oops_do(OopClosure* f) {
 374   if (_index == 0) return;
 375   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 376          "Bound must be set.");
 377   for (int i = 0; i < _oops_do_bound; i++) {
 378     f->do_oop(&_base[i]);
 379   }
 380   _oops_do_bound = -1;
 381 }
 382 
 383 bool ConcurrentMark::not_yet_marked(oop obj) const {
 384   return (_g1h->is_obj_ill(obj)
 385           || (_g1h->is_in_permanent(obj)
 386               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 387 }
 388 
 389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 391 #endif // _MSC_VER
 392 
 393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 394                                int max_regions) :
 395   _markBitMap1(rs, MinObjAlignment - 1),
 396   _markBitMap2(rs, MinObjAlignment - 1),
 397 
 398   _parallel_marking_threads(0),
 399   _sleep_factor(0.0),
 400   _marking_task_overhead(1.0),
 401   _cleanup_sleep_factor(0.0),
 402   _cleanup_task_overhead(1.0),
 403   _region_bm(max_regions, false /* in_resource_area*/),
 404   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 405            CardTableModRefBS::card_shift,
 406            false /* in_resource_area*/),
 407   _prevMarkBitMap(&_markBitMap1),
 408   _nextMarkBitMap(&_markBitMap2),
 409   _at_least_one_mark_complete(false),
 410 
 411   _markStack(this),
 412   _regionStack(),
 413   // _finger set in set_non_marking_state
 414 
 415   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 416   // _active_tasks set in set_non_marking_state
 417   // _tasks set inside the constructor
 418   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 419   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 420 
 421   _has_overflown(false),
 422   _concurrent(false),
 423 
 424   // _verbose_level set below
 425 
 426   _init_times(),
 427   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 428   _cleanup_times(),
 429   _total_counting_time(0.0),
 430   _total_rs_scrub_time(0.0),
 431 
 432   _parallel_workers(NULL),
 433   _cleanup_co_tracker(G1CLGroup)
 434 {
 435   CMVerboseLevel verbose_level =
 436     (CMVerboseLevel) G1MarkingVerboseLevel;
 437   if (verbose_level < no_verbose)
 438     verbose_level = no_verbose;
 439   if (verbose_level > high_verbose)
 440     verbose_level = high_verbose;
 441   _verbose_level = verbose_level;
 442 
 443   if (verbose_low())
 444     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 445                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 446 
 447   _markStack.allocate(G1CMStackSize);
 448   _regionStack.allocate(G1CMRegionStackSize);
 449 
 450   // Create & start a ConcurrentMark thread.
 451   if (G1ConcMark) {
 452     _cmThread = new ConcurrentMarkThread(this);
 453     assert(cmThread() != NULL, "CM Thread should have been created");
 454     assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 455   } else {
 456     _cmThread = NULL;
 457   }
 458   _g1h = G1CollectedHeap::heap();
 459   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 460   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 461   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 462 
 463   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 464   satb_qs.set_buffer_size(G1SATBLogBufferSize);
 465 
 466   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
 467   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
 468   for (int i = 0 ; i < size; i++) {
 469     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
 470   }
 471 
 472   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 473   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 474 
 475   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 476   _active_tasks = _max_task_num;
 477   for (int i = 0; i < (int) _max_task_num; ++i) {
 478     CMTaskQueue* task_queue = new CMTaskQueue();
 479     task_queue->initialize();
 480     _task_queues->register_queue(i, task_queue);
 481 
 482     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 483     _accum_task_vtime[i] = 0.0;
 484   }
 485 
 486   if (ParallelMarkingThreads > ParallelGCThreads) {
 487     vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
 488                                   "than ParallelGCThreads.");
 489   }
 490   if (ParallelGCThreads == 0) {
 491     // if we are not running with any parallel GC threads we will not
 492     // spawn any marking threads either
 493     _parallel_marking_threads =   0;
 494     _sleep_factor             = 0.0;
 495     _marking_task_overhead    = 1.0;
 496   } else {
 497     if (ParallelMarkingThreads > 0) {
 498       // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc
 499       // if both are set
 500 
 501       _parallel_marking_threads = ParallelMarkingThreads;
 502       _sleep_factor             = 0.0;
 503       _marking_task_overhead    = 1.0;
 504     } else if (G1MarkingOverheadPerc > 0) {
 505       // we will calculate the number of parallel marking threads
 506       // based on a target overhead with respect to the soft real-time
 507       // goal
 508 
 509       double marking_overhead = (double) G1MarkingOverheadPerc / 100.0;
 510       double overall_cm_overhead =
 511         (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS;
 512       double cpu_ratio = 1.0 / (double) os::processor_count();
 513       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 514       double marking_task_overhead =
 515         overall_cm_overhead / marking_thread_num *
 516                                                 (double) os::processor_count();
 517       double sleep_factor =
 518                          (1.0 - marking_task_overhead) / marking_task_overhead;
 519 
 520       _parallel_marking_threads = (size_t) marking_thread_num;
 521       _sleep_factor             = sleep_factor;
 522       _marking_task_overhead    = marking_task_overhead;
 523     } else {
 524       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 525       _sleep_factor             = 0.0;
 526       _marking_task_overhead    = 1.0;
 527     }
 528 
 529     if (parallel_marking_threads() > 1)
 530       _cleanup_task_overhead = 1.0;
 531     else
 532       _cleanup_task_overhead = marking_task_overhead();
 533     _cleanup_sleep_factor =
 534                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 535 
 536 #if 0
 537     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 538     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 539     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 540     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 541     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 542 #endif
 543 
 544     guarantee( parallel_marking_threads() > 0, "peace of mind" );
 545     _parallel_workers = new WorkGang("Parallel Marking Threads",
 546                                      (int) parallel_marking_threads(), false, true);
 547     if (_parallel_workers == NULL)
 548       vm_exit_during_initialization("Failed necessary allocation.");
 549   }
 550 
 551   // so that the call below can read a sensible value
 552   _heap_start = (HeapWord*) rs.base();
 553   set_non_marking_state();
 554 }
 555 
 556 void ConcurrentMark::update_g1_committed(bool force) {
 557   // If concurrent marking is not in progress, then we do not need to
 558   // update _heap_end. This has a subtle and important
 559   // side-effect. Imagine that two evacuation pauses happen between
 560   // marking completion and remark. The first one can grow the
 561   // heap (hence now the finger is below the heap end). Then, the
 562   // second one could unnecessarily push regions on the region
 563   // stack. This causes the invariant that the region stack is empty
 564   // at the beginning of remark to be false. By ensuring that we do
 565   // not observe heap expansions after marking is complete, then we do
 566   // not have this problem.
 567   if (!concurrent_marking_in_progress() && !force)
 568     return;
 569 
 570   MemRegion committed = _g1h->g1_committed();
 571   tmp_guarantee_CM( committed.start() == _heap_start,
 572                     "start shouldn't change" );
 573   HeapWord* new_end = committed.end();
 574   if (new_end > _heap_end) {
 575     // The heap has been expanded.
 576 
 577     _heap_end = new_end;
 578   }
 579   // Notice that the heap can also shrink. However, this only happens
 580   // during a Full GC (at least currently) and the entire marking
 581   // phase will bail out and the task will not be restarted. So, let's
 582   // do nothing.
 583 }
 584 
 585 void ConcurrentMark::reset() {
 586   // Starting values for these two. This should be called in a STW
 587   // phase. CM will be notified of any future g1_committed expansions
 588   // will be at the end of evacuation pauses, when tasks are
 589   // inactive.
 590   MemRegion committed = _g1h->g1_committed();
 591   _heap_start = committed.start();
 592   _heap_end   = committed.end();
 593 
 594   guarantee( _heap_start != NULL &&
 595              _heap_end != NULL   &&
 596              _heap_start < _heap_end, "heap bounds should look ok" );
 597 
 598   // reset all the marking data structures and any necessary flags
 599   clear_marking_state();
 600 
 601   if (verbose_low())
 602     gclog_or_tty->print_cr("[global] resetting");
 603 
 604   // We do reset all of them, since different phases will use
 605   // different number of active threads. So, it's easiest to have all
 606   // of them ready.
 607   for (int i = 0; i < (int) _max_task_num; ++i)
 608     _tasks[i]->reset(_nextMarkBitMap);
 609 
 610   // we need this to make sure that the flag is on during the evac
 611   // pause with initial mark piggy-backed
 612   set_concurrent_marking_in_progress();
 613 }
 614 
 615 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 616   guarantee( active_tasks <= _max_task_num, "we should not have more" );
 617 
 618   _active_tasks = active_tasks;
 619   // Need to update the three data structures below according to the
 620   // number of active threads for this phase.
 621   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 622   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 623   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 624 
 625   _concurrent = concurrent;
 626   // We propagate this to all tasks, not just the active ones.
 627   for (int i = 0; i < (int) _max_task_num; ++i)
 628     _tasks[i]->set_concurrent(concurrent);
 629 
 630   if (concurrent) {
 631     set_concurrent_marking_in_progress();
 632   } else {
 633     // We currently assume that the concurrent flag has been set to
 634     // false before we start remark. At this point we should also be
 635     // in a STW phase.
 636     guarantee( !concurrent_marking_in_progress(), "invariant" );
 637     guarantee( _finger == _heap_end, "only way to get here" );
 638     update_g1_committed(true);
 639   }
 640 }
 641 
 642 void ConcurrentMark::set_non_marking_state() {
 643   // We set the global marking state to some default values when we're
 644   // not doing marking.
 645   clear_marking_state();
 646   _active_tasks = 0;
 647   clear_concurrent_marking_in_progress();
 648 }
 649 
 650 ConcurrentMark::~ConcurrentMark() {
 651   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
 652   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
 653   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
 654                     _par_cleanup_thread_state);
 655 
 656   for (int i = 0; i < (int) _max_task_num; ++i) {
 657     delete _task_queues->queue(i);
 658     delete _tasks[i];
 659   }
 660   delete _task_queues;
 661   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 662 }
 663 
 664 // This closure is used to mark refs into the g1 generation
 665 // from external roots in the CMS bit map.
 666 // Called at the first checkpoint.
 667 //
 668 
 669 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
 670 #if PRINT_REACHABLE_AT_INITIAL_MARK
 671 static FILE* reachable_file = NULL;
 672 
 673 class PrintReachableClosure: public OopsInGenClosure {
 674   CMBitMap* _bm;
 675   int _level;
 676 public:
 677   PrintReachableClosure(CMBitMap* bm) :
 678     _bm(bm), _level(0) {
 679     guarantee(reachable_file != NULL, "pre-condition");
 680   }
 681   void do_oop(oop* p) {
 682     oop obj = *p;
 683     HeapWord* obj_addr = (HeapWord*)obj;
 684     if (obj == NULL) return;
 685     fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
 686             _level, p, (void*) obj, _bm->isMarked(obj_addr));
 687     if (!_bm->isMarked(obj_addr)) {
 688       _bm->mark(obj_addr);
 689       _level++;
 690       obj->oop_iterate(this);
 691       _level--;
 692     }
 693   }
 694 };
 695 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
 696 
 697 #define SEND_HEAP_DUMP_TO_FILE 0
 698 #if SEND_HEAP_DUMP_TO_FILE
 699 static FILE* heap_dump_file = NULL;
 700 #endif // SEND_HEAP_DUMP_TO_FILE
 701 
 702 void ConcurrentMark::clearNextBitmap() {
 703    guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
 704 
 705    // clear the mark bitmap (no grey objects to start with).
 706    // We need to do this in chunks and offer to yield in between
 707    // each chunk.
 708    HeapWord* start  = _nextMarkBitMap->startWord();
 709    HeapWord* end    = _nextMarkBitMap->endWord();
 710    HeapWord* cur    = start;
 711    size_t chunkSize = M;
 712    while (cur < end) {
 713      HeapWord* next = cur + chunkSize;
 714      if (next > end)
 715        next = end;
 716      MemRegion mr(cur,next);
 717      _nextMarkBitMap->clearRange(mr);
 718      cur = next;
 719      do_yield_check();
 720    }
 721 }
 722 
 723 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 724 public:
 725   bool doHeapRegion(HeapRegion* r) {
 726     if (!r->continuesHumongous()) {
 727       r->note_start_of_marking(true);
 728     }
 729     return false;
 730   }
 731 };
 732 
 733 void ConcurrentMark::checkpointRootsInitialPre() {
 734   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 735   G1CollectorPolicy* g1p = g1h->g1_policy();
 736 
 737   _has_aborted = false;
 738 
 739   // Find all the reachable objects...
 740 #if PRINT_REACHABLE_AT_INITIAL_MARK
 741   guarantee(reachable_file == NULL, "Protocol");
 742   char fn_buf[100];
 743   sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
 744   reachable_file = fopen(fn_buf, "w");
 745   // clear the mark bitmap (no grey objects to start with)
 746   _nextMarkBitMap->clearAll();
 747   PrintReachableClosure prcl(_nextMarkBitMap);
 748   g1h->process_strong_roots(
 749                             false,   // fake perm gen collection
 750                             SharedHeap::SO_AllClasses,
 751                             &prcl, // Regular roots
 752                             &prcl    // Perm Gen Roots
 753                             );
 754   // The root iteration above "consumed" dirty cards in the perm gen.
 755   // Therefore, as a shortcut, we dirty all such cards.
 756   g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
 757   fclose(reachable_file);
 758   reachable_file = NULL;
 759   // clear the mark bitmap again.
 760   _nextMarkBitMap->clearAll();
 761   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 762   COMPILER2_PRESENT(DerivedPointerTable::clear());
 763 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
 764 
 765   // Initialise marking structures. This has to be done in a STW phase.
 766   reset();
 767 }
 768 
 769 class CMMarkRootsClosure: public OopsInGenClosure {
 770 private:
 771   ConcurrentMark*  _cm;
 772   G1CollectedHeap* _g1h;
 773   bool             _do_barrier;
 774 
 775 public:
 776   CMMarkRootsClosure(ConcurrentMark* cm,
 777                      G1CollectedHeap* g1h,
 778                      bool do_barrier) : _cm(cm), _g1h(g1h),
 779                                         _do_barrier(do_barrier) { }
 780 
 781   virtual void do_oop(narrowOop* p) {
 782     guarantee(false, "NYI");
 783   }
 784 
 785   virtual void do_oop(oop* p) {
 786     oop thisOop = *p;
 787     if (thisOop != NULL) {
 788       assert(thisOop->is_oop() || thisOop->mark() == NULL,
 789              "expected an oop, possibly with mark word displaced");
 790       HeapWord* addr = (HeapWord*)thisOop;
 791       if (_g1h->is_in_g1_reserved(addr)) {
 792         _cm->grayRoot(thisOop);
 793       }
 794     }
 795     if (_do_barrier) {
 796       assert(!_g1h->is_in_g1_reserved(p),
 797              "Should be called on external roots");
 798       do_barrier(p);
 799     }
 800   }
 801 };
 802 
 803 void ConcurrentMark::checkpointRootsInitialPost() {
 804   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 805 
 806   // For each region note start of marking.
 807   NoteStartOfMarkHRClosure startcl;
 808   g1h->heap_region_iterate(&startcl);
 809 
 810   // Start weak-reference discovery.
 811   ReferenceProcessor* rp = g1h->ref_processor();
 812   rp->verify_no_references_recorded();
 813   rp->enable_discovery(); // enable ("weak") refs discovery
 814   rp->snap_policy(false); // snapshot the soft ref policy to be used in this cycle
 815 
 816   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 817   satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
 818   satb_mq_set.set_active_all_threads(true);
 819 
 820   // update_g1_committed() will be called at the end of an evac pause
 821   // when marking is on. So, it's also called at the end of the
 822   // initial-mark pause to update the heap end, if the heap expands
 823   // during it. No need to call it here.
 824 
 825   guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
 826 
 827   size_t max_marking_threads =
 828     MAX2((size_t) 1, parallel_marking_threads());
 829   for (int i = 0; i < (int)_max_task_num; ++i) {
 830     _tasks[i]->enable_co_tracker();
 831     if (i < (int) max_marking_threads)
 832       _tasks[i]->reset_co_tracker(marking_task_overhead());
 833     else
 834       _tasks[i]->reset_co_tracker(0.0);
 835   }
 836 }
 837 
 838 // Checkpoint the roots into this generation from outside
 839 // this generation. [Note this initial checkpoint need only
 840 // be approximate -- we'll do a catch up phase subsequently.]
 841 void ConcurrentMark::checkpointRootsInitial() {
 842   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 843   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 844 
 845   double start = os::elapsedTime();
 846   GCOverheadReporter::recordSTWStart(start);
 847 
 848   // If there has not been a GC[n-1] since last GC[n] cycle completed,
 849   // precede our marking with a collection of all
 850   // younger generations to keep floating garbage to a minimum.
 851   // YSR: we won't do this for now -- it's an optimization to be
 852   // done post-beta.
 853 
 854   // YSR:    ignoring weak refs for now; will do at bug fixing stage
 855   // EVM:    assert(discoveredRefsAreClear());
 856 
 857 
 858   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
 859   g1p->record_concurrent_mark_init_start();
 860   checkpointRootsInitialPre();
 861 
 862   // YSR: when concurrent precleaning is in place, we'll
 863   // need to clear the cached card table here
 864 
 865   ResourceMark rm;
 866   HandleMark  hm;
 867 
 868   g1h->ensure_parsability(false);
 869   g1h->perm_gen()->save_marks();
 870 
 871   CMMarkRootsClosure notOlder(this, g1h, false);
 872   CMMarkRootsClosure older(this, g1h, true);
 873 
 874   g1h->set_marking_started();
 875   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
 876 
 877   g1h->process_strong_roots(false,   // fake perm gen collection
 878                             SharedHeap::SO_AllClasses,
 879                             &notOlder, // Regular roots
 880                             &older    // Perm Gen Roots
 881                             );
 882   checkpointRootsInitialPost();
 883 
 884   // Statistics.
 885   double end = os::elapsedTime();
 886   _init_times.add((end - start) * 1000.0);
 887   GCOverheadReporter::recordSTWEnd(end);
 888 
 889   g1p->record_concurrent_mark_init_end();
 890 }
 891 
 892 /*
 893    Notice that in the next two methods, we actually leave the STS
 894    during the barrier sync and join it immediately afterwards. If we
 895    do not do this, this then the following deadlock can occur: one
 896    thread could be in the barrier sync code, waiting for the other
 897    thread to also sync up, whereas another one could be trying to
 898    yield, while also waiting for the other threads to sync up too.
 899 
 900    Because the thread that does the sync barrier has left the STS, it
 901    is possible to be suspended for a Full GC or an evacuation pause
 902    could occur. This is actually safe, since the entering the sync
 903    barrier is one of the last things do_marking_step() does, and it
 904    doesn't manipulate any data structures afterwards.
 905 */
 906 
 907 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 908   if (verbose_low())
 909     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 910 
 911   ConcurrentGCThread::stsLeave();
 912   _first_overflow_barrier_sync.enter();
 913   ConcurrentGCThread::stsJoin();
 914   // at this point everyone should have synced up and not be doing any
 915   // more work
 916 
 917   if (verbose_low())
 918     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 919 
 920   // let task 0 do this
 921   if (task_num == 0) {
 922     // task 0 is responsible for clearing the global data structures
 923     clear_marking_state();
 924 
 925     if (PrintGC) {
 926       gclog_or_tty->date_stamp(PrintGCDateStamps);
 927       gclog_or_tty->stamp(PrintGCTimeStamps);
 928       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 929     }
 930   }
 931 
 932   // after this, each task should reset its own data structures then
 933   // then go into the second barrier
 934 }
 935 
 936 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 937   if (verbose_low())
 938     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 939 
 940   ConcurrentGCThread::stsLeave();
 941   _second_overflow_barrier_sync.enter();
 942   ConcurrentGCThread::stsJoin();
 943   // at this point everything should be re-initialised and ready to go
 944 
 945   if (verbose_low())
 946     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 947 }
 948 
 949 void ConcurrentMark::grayRoot(oop p) {
 950   HeapWord* addr = (HeapWord*) p;
 951   // We can't really check against _heap_start and _heap_end, since it
 952   // is possible during an evacuation pause with piggy-backed
 953   // initial-mark that the committed space is expanded during the
 954   // pause without CM observing this change. So the assertions below
 955   // is a bit conservative; but better than nothing.
 956   tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
 957                     "address should be within the heap bounds" );
 958 
 959   if (!_nextMarkBitMap->isMarked(addr))
 960     _nextMarkBitMap->parMark(addr);
 961 }
 962 
 963 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 964   // The objects on the region have already been marked "in bulk" by
 965   // the caller. We only need to decide whether to push the region on
 966   // the region stack or not.
 967 
 968   if (!concurrent_marking_in_progress() || !_should_gray_objects)
 969     // We're done with marking and waiting for remark. We do not need to
 970     // push anything else on the region stack.
 971     return;
 972 
 973   HeapWord* finger = _finger;
 974 
 975   if (verbose_low())
 976     gclog_or_tty->print_cr("[global] attempting to push "
 977                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 978                            PTR_FORMAT, mr.start(), mr.end(), finger);
 979 
 980   if (mr.start() < finger) {
 981     // The finger is always heap region aligned and it is not possible
 982     // for mr to span heap regions.
 983     tmp_guarantee_CM( mr.end() <= finger, "invariant" );
 984 
 985     tmp_guarantee_CM( mr.start() <= mr.end() &&
 986                       _heap_start <= mr.start() &&
 987                       mr.end() <= _heap_end,
 988                   "region boundaries should fall within the committed space" );
 989     if (verbose_low())
 990       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
 991                              "below the finger, pushing it",
 992                              mr.start(), mr.end());
 993 
 994     if (!region_stack_push(mr)) {
 995       if (verbose_low())
 996         gclog_or_tty->print_cr("[global] region stack has overflown.");
 997     }
 998   }
 999 }
1000 
1001 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1002   // The object is not marked by the caller. We need to at least mark
1003   // it and maybe push in on the stack.
1004 
1005   HeapWord* addr = (HeapWord*)p;
1006   if (!_nextMarkBitMap->isMarked(addr)) {
1007     // We definitely need to mark it, irrespective whether we bail out
1008     // because we're done with marking.
1009     if (_nextMarkBitMap->parMark(addr)) {
1010       if (!concurrent_marking_in_progress() || !_should_gray_objects)
1011         // If we're done with concurrent marking and we're waiting for
1012         // remark, then we're not pushing anything on the stack.
1013         return;
1014 
1015       // No OrderAccess:store_load() is needed. It is implicit in the
1016       // CAS done in parMark(addr) above
1017       HeapWord* finger = _finger;
1018 
1019       if (addr < finger) {
1020         if (!mark_stack_push(oop(addr))) {
1021           if (verbose_low())
1022             gclog_or_tty->print_cr("[global] global stack overflow "
1023                                    "during parMark");
1024         }
1025       }
1026     }
1027   }
1028 }
1029 
1030 class CMConcurrentMarkingTask: public AbstractGangTask {
1031 private:
1032   ConcurrentMark*       _cm;
1033   ConcurrentMarkThread* _cmt;
1034 
1035 public:
1036   void work(int worker_i) {
1037     guarantee( Thread::current()->is_ConcurrentGC_thread(),
1038                "this should only be done by a conc GC thread" );
1039 
1040     double start_vtime = os::elapsedVTime();
1041 
1042     ConcurrentGCThread::stsJoin();
1043 
1044     guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
1045     CMTask* the_task = _cm->task(worker_i);
1046     the_task->start_co_tracker();
1047     the_task->record_start_time();
1048     if (!_cm->has_aborted()) {
1049       do {
1050         double start_vtime_sec = os::elapsedVTime();
1051         double start_time_sec = os::elapsedTime();
1052         the_task->do_marking_step(10.0);
1053         double end_time_sec = os::elapsedTime();
1054         double end_vtime_sec = os::elapsedVTime();
1055         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1056         double elapsed_time_sec = end_time_sec - start_time_sec;
1057         _cm->clear_has_overflown();
1058 
1059         bool ret = _cm->do_yield_check(worker_i);
1060 
1061         jlong sleep_time_ms;
1062         if (!_cm->has_aborted() && the_task->has_aborted()) {
1063           sleep_time_ms =
1064             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1065           ConcurrentGCThread::stsLeave();
1066           os::sleep(Thread::current(), sleep_time_ms, false);
1067           ConcurrentGCThread::stsJoin();
1068         }
1069         double end_time2_sec = os::elapsedTime();
1070         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1071 
1072         the_task->update_co_tracker();
1073 
1074 #if 0
1075           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1076                                  "overhead %1.4lf",
1077                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1078                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1079           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1080                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1081 #endif
1082       } while (!_cm->has_aborted() && the_task->has_aborted());
1083     }
1084     the_task->record_end_time();
1085     guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
1086 
1087     ConcurrentGCThread::stsLeave();
1088 
1089     double end_vtime = os::elapsedVTime();
1090     the_task->update_co_tracker(true);
1091     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1092   }
1093 
1094   CMConcurrentMarkingTask(ConcurrentMark* cm,
1095                           ConcurrentMarkThread* cmt) :
1096       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1097 
1098   ~CMConcurrentMarkingTask() { }
1099 };
1100 
1101 void ConcurrentMark::markFromRoots() {
1102   // we might be tempted to assert that:
1103   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1104   //        "inconsistent argument?");
1105   // However that wouldn't be right, because it's possible that
1106   // a safepoint is indeed in progress as a younger generation
1107   // stop-the-world GC happens even as we mark in this generation.
1108 
1109   _restart_for_overflow = false;
1110 
1111   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1112 
1113   CMConcurrentMarkingTask markingTask(this, cmThread());
1114   if (parallel_marking_threads() > 0)
1115     _parallel_workers->run_task(&markingTask);
1116   else
1117     markingTask.work(0);
1118   print_stats();
1119 }
1120 
1121 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1122   // world is stopped at this checkpoint
1123   assert(SafepointSynchronize::is_at_safepoint(),
1124          "world should be stopped");
1125   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1126 
1127   // If a full collection has happened, we shouldn't do this.
1128   if (has_aborted()) {
1129     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1130     return;
1131   }
1132 
1133   G1CollectorPolicy* g1p = g1h->g1_policy();
1134   g1p->record_concurrent_mark_remark_start();
1135 
1136   double start = os::elapsedTime();
1137   GCOverheadReporter::recordSTWStart(start);
1138 
1139   checkpointRootsFinalWork();
1140 
1141   double mark_work_end = os::elapsedTime();
1142 
1143   weakRefsWork(clear_all_soft_refs);
1144 
1145   if (has_overflown()) {
1146     // Oops.  We overflowed.  Restart concurrent marking.
1147     _restart_for_overflow = true;
1148     // Clear the flag. We do not need it any more.
1149     clear_has_overflown();
1150     if (G1TraceMarkStackOverflow)
1151       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1152   } else {
1153     // We're done with marking.
1154     JavaThread::satb_mark_queue_set().set_active_all_threads(false);
1155   }
1156 
1157 #if VERIFY_OBJS_PROCESSED
1158   _scan_obj_cl.objs_processed = 0;
1159   ThreadLocalObjQueue::objs_enqueued = 0;
1160 #endif
1161 
1162   // Statistics
1163   double now = os::elapsedTime();
1164   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1165   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1166   _remark_times.add((now - start) * 1000.0);
1167 
1168   GCOverheadReporter::recordSTWEnd(now);
1169   for (int i = 0; i < (int)_max_task_num; ++i)
1170     _tasks[i]->disable_co_tracker();
1171   _cleanup_co_tracker.enable();
1172   _cleanup_co_tracker.reset(cleanup_task_overhead());
1173   g1p->record_concurrent_mark_remark_end();
1174 }
1175 
1176 
1177 #define CARD_BM_TEST_MODE 0
1178 
1179 class CalcLiveObjectsClosure: public HeapRegionClosure {
1180 
1181   CMBitMapRO* _bm;
1182   ConcurrentMark* _cm;
1183   COTracker* _co_tracker;
1184   bool _changed;
1185   bool _yield;
1186   size_t _words_done;
1187   size_t _tot_live;
1188   size_t _tot_used;
1189   size_t _regions_done;
1190   double _start_vtime_sec;
1191 
1192   BitMap* _region_bm;
1193   BitMap* _card_bm;
1194   intptr_t _bottom_card_num;
1195   bool _final;
1196 
1197   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1198     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1199 #if CARD_BM_TEST_MODE
1200       guarantee(_card_bm->at(i - _bottom_card_num),
1201                 "Should already be set.");
1202 #else
1203       _card_bm->par_at_put(i - _bottom_card_num, 1);
1204 #endif
1205     }
1206   }
1207 
1208 public:
1209   CalcLiveObjectsClosure(bool final,
1210                          CMBitMapRO *bm, ConcurrentMark *cm,
1211                          BitMap* region_bm, BitMap* card_bm,
1212                          COTracker* co_tracker) :
1213     _bm(bm), _cm(cm), _changed(false), _yield(true),
1214     _words_done(0), _tot_live(0), _tot_used(0),
1215     _region_bm(region_bm), _card_bm(card_bm),
1216     _final(final), _co_tracker(co_tracker),
1217     _regions_done(0), _start_vtime_sec(0.0)
1218   {
1219     _bottom_card_num =
1220       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1221                CardTableModRefBS::card_shift);
1222   }
1223 
1224   bool doHeapRegion(HeapRegion* hr) {
1225     if (_co_tracker != NULL)
1226       _co_tracker->update();
1227 
1228     if (!_final && _regions_done == 0)
1229       _start_vtime_sec = os::elapsedVTime();
1230 
1231     if (hr->continuesHumongous()) return false;
1232 
1233     HeapWord* nextTop = hr->next_top_at_mark_start();
1234     HeapWord* start   = hr->top_at_conc_mark_count();
1235     assert(hr->bottom() <= start && start <= hr->end() &&
1236            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1237            start <= nextTop,
1238            "Preconditions.");
1239     // Otherwise, record the number of word's we'll examine.
1240     size_t words_done = (nextTop - start);
1241     // Find the first marked object at or after "start".
1242     start = _bm->getNextMarkedWordAddress(start, nextTop);
1243     size_t marked_bytes = 0;
1244 
1245     // Below, the term "card num" means the result of shifting an address
1246     // by the card shift -- address 0 corresponds to card number 0.  One
1247     // must subtract the card num of the bottom of the heap to obtain a
1248     // card table index.
1249     // The first card num of the sequence of live cards currently being
1250     // constructed.  -1 ==> no sequence.
1251     intptr_t start_card_num = -1;
1252     // The last card num of the sequence of live cards currently being
1253     // constructed.  -1 ==> no sequence.
1254     intptr_t last_card_num = -1;
1255 
1256     while (start < nextTop) {
1257       if (_yield && _cm->do_yield_check()) {
1258         // We yielded.  It might be for a full collection, in which case
1259         // all bets are off; terminate the traversal.
1260         if (_cm->has_aborted()) {
1261           _changed = false;
1262           return true;
1263         } else {
1264           // Otherwise, it might be a collection pause, and the region
1265           // we're looking at might be in the collection set.  We'll
1266           // abandon this region.
1267           return false;
1268         }
1269       }
1270       oop obj = oop(start);
1271       int obj_sz = obj->size();
1272       // The card num of the start of the current object.
1273       intptr_t obj_card_num =
1274         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1275 
1276       HeapWord* obj_last = start + obj_sz - 1;
1277       intptr_t obj_last_card_num =
1278         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1279 
1280       if (obj_card_num != last_card_num) {
1281         if (start_card_num == -1) {
1282           assert(last_card_num == -1, "Both or neither.");
1283           start_card_num = obj_card_num;
1284         } else {
1285           assert(last_card_num != -1, "Both or neither.");
1286           assert(obj_card_num >= last_card_num, "Inv");
1287           if ((obj_card_num - last_card_num) > 1) {
1288             // Mark the last run, and start a new one.
1289             mark_card_num_range(start_card_num, last_card_num);
1290             start_card_num = obj_card_num;
1291           }
1292         }
1293 #if CARD_BM_TEST_MODE
1294         /*
1295         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1296                                obj_card_num - _bottom_card_num,
1297                                obj_last_card_num - _bottom_card_num);
1298         */
1299         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1300           _card_bm->par_at_put(j - _bottom_card_num, 1);
1301         }
1302 #endif
1303       }
1304       // In any case, we set the last card num.
1305       last_card_num = obj_last_card_num;
1306 
1307       marked_bytes += obj_sz * HeapWordSize;
1308       // Find the next marked object after this one.
1309       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1310       _changed = true;
1311     }
1312     // Handle the last range, if any.
1313     if (start_card_num != -1)
1314       mark_card_num_range(start_card_num, last_card_num);
1315     if (_final) {
1316       // Mark the allocated-since-marking portion...
1317       HeapWord* tp = hr->top();
1318       if (nextTop < tp) {
1319         start_card_num =
1320           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1321         last_card_num =
1322           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1323         mark_card_num_range(start_card_num, last_card_num);
1324         // This definitely means the region has live objects.
1325         _region_bm->par_at_put(hr->hrs_index(), 1);
1326       }
1327     }
1328 
1329     hr->add_to_marked_bytes(marked_bytes);
1330     // Update the live region bitmap.
1331     if (marked_bytes > 0) {
1332       _region_bm->par_at_put(hr->hrs_index(), 1);
1333     }
1334     hr->set_top_at_conc_mark_count(nextTop);
1335     _tot_live += hr->next_live_bytes();
1336     _tot_used += hr->used();
1337     _words_done = words_done;
1338 
1339     if (!_final) {
1340       ++_regions_done;
1341       if (_regions_done % 10 == 0) {
1342         double end_vtime_sec = os::elapsedVTime();
1343         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1344         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1345           jlong sleep_time_ms =
1346             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1347 #if 0
1348           gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
1349                                  "overhead %1.4lf",
1350                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1351                                  _co_tracker->concOverhead(os::elapsedTime()));
1352 #endif
1353           os::sleep(Thread::current(), sleep_time_ms, false);
1354           _start_vtime_sec = end_vtime_sec;
1355         }
1356       }
1357     }
1358 
1359     return false;
1360   }
1361 
1362   bool changed() { return _changed;  }
1363   void reset()   { _changed = false; _words_done = 0; }
1364   void no_yield() { _yield = false; }
1365   size_t words_done() { return _words_done; }
1366   size_t tot_live() { return _tot_live; }
1367   size_t tot_used() { return _tot_used; }
1368 };
1369 
1370 
1371 void ConcurrentMark::calcDesiredRegions() {
1372   guarantee( _cleanup_co_tracker.enabled(), "invariant" );
1373   _cleanup_co_tracker.start();
1374 
1375   _region_bm.clear();
1376   _card_bm.clear();
1377   CalcLiveObjectsClosure calccl(false /*final*/,
1378                                 nextMarkBitMap(), this,
1379                                 &_region_bm, &_card_bm,
1380                                 &_cleanup_co_tracker);
1381   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1382   g1h->heap_region_iterate(&calccl);
1383 
1384   do {
1385     calccl.reset();
1386     g1h->heap_region_iterate(&calccl);
1387   } while (calccl.changed());
1388 
1389   _cleanup_co_tracker.update(true);
1390 }
1391 
1392 class G1ParFinalCountTask: public AbstractGangTask {
1393 protected:
1394   G1CollectedHeap* _g1h;
1395   CMBitMap* _bm;
1396   size_t _n_workers;
1397   size_t *_live_bytes;
1398   size_t *_used_bytes;
1399   BitMap* _region_bm;
1400   BitMap* _card_bm;
1401 public:
1402   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1403                       BitMap* region_bm, BitMap* card_bm) :
1404     AbstractGangTask("G1 final counting"), _g1h(g1h),
1405     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1406   {
1407     if (ParallelGCThreads > 0)
1408       _n_workers = _g1h->workers()->total_workers();
1409     else
1410       _n_workers = 1;
1411     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1412     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1413   }
1414 
1415   ~G1ParFinalCountTask() {
1416     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1417     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1418   }
1419 
1420   void work(int i) {
1421     CalcLiveObjectsClosure calccl(true /*final*/,
1422                                   _bm, _g1h->concurrent_mark(),
1423                                   _region_bm, _card_bm,
1424                                   NULL /* CO tracker */);
1425     calccl.no_yield();
1426     if (ParallelGCThreads > 0) {
1427       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1428                                             HeapRegion::FinalCountClaimValue);
1429     } else {
1430       _g1h->heap_region_iterate(&calccl);
1431     }
1432     assert(calccl.complete(), "Shouldn't have yielded!");
1433 
1434     guarantee( (size_t)i < _n_workers, "invariant" );
1435     _live_bytes[i] = calccl.tot_live();
1436     _used_bytes[i] = calccl.tot_used();
1437   }
1438   size_t live_bytes()  {
1439     size_t live_bytes = 0;
1440     for (size_t i = 0; i < _n_workers; ++i)
1441       live_bytes += _live_bytes[i];
1442     return live_bytes;
1443   }
1444   size_t used_bytes()  {
1445     size_t used_bytes = 0;
1446     for (size_t i = 0; i < _n_workers; ++i)
1447       used_bytes += _used_bytes[i];
1448     return used_bytes;
1449   }
1450 };
1451 
1452 class G1ParNoteEndTask;
1453 
1454 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1455   G1CollectedHeap* _g1;
1456   int _worker_num;
1457   size_t _max_live_bytes;
1458   size_t _regions_claimed;
1459   size_t _freed_bytes;
1460   size_t _cleared_h_regions;
1461   size_t _freed_regions;
1462   UncleanRegionList* _unclean_region_list;
1463   double _claimed_region_time;
1464   double _max_region_time;
1465 
1466 public:
1467   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1468                              UncleanRegionList* list,
1469                              int worker_num);
1470   size_t freed_bytes() { return _freed_bytes; }
1471   size_t cleared_h_regions() { return _cleared_h_regions; }
1472   size_t freed_regions() { return  _freed_regions; }
1473   UncleanRegionList* unclean_region_list() {
1474     return _unclean_region_list;
1475   }
1476 
1477   bool doHeapRegion(HeapRegion *r);
1478 
1479   size_t max_live_bytes() { return _max_live_bytes; }
1480   size_t regions_claimed() { return _regions_claimed; }
1481   double claimed_region_time_sec() { return _claimed_region_time; }
1482   double max_region_time_sec() { return _max_region_time; }
1483 };
1484 
1485 class G1ParNoteEndTask: public AbstractGangTask {
1486   friend class G1NoteEndOfConcMarkClosure;
1487 protected:
1488   G1CollectedHeap* _g1h;
1489   size_t _max_live_bytes;
1490   size_t _freed_bytes;
1491   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1492 public:
1493   G1ParNoteEndTask(G1CollectedHeap* g1h,
1494                    ConcurrentMark::ParCleanupThreadState**
1495                    par_cleanup_thread_state) :
1496     AbstractGangTask("G1 note end"), _g1h(g1h),
1497     _max_live_bytes(0), _freed_bytes(0),
1498     _par_cleanup_thread_state(par_cleanup_thread_state)
1499   {}
1500 
1501   void work(int i) {
1502     double start = os::elapsedTime();
1503     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1504                                            &_par_cleanup_thread_state[i]->list,
1505                                            i);
1506     if (ParallelGCThreads > 0) {
1507       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1508                                             HeapRegion::NoteEndClaimValue);
1509     } else {
1510       _g1h->heap_region_iterate(&g1_note_end);
1511     }
1512     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1513 
1514     // Now finish up freeing the current thread's regions.
1515     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1516                                   g1_note_end.cleared_h_regions(),
1517                                   0, NULL);
1518     {
1519       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1520       _max_live_bytes += g1_note_end.max_live_bytes();
1521       _freed_bytes += g1_note_end.freed_bytes();
1522     }
1523     double end = os::elapsedTime();
1524     if (G1PrintParCleanupStats) {
1525       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1526                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1527                           i, start, end, (end-start)*1000.0,
1528                           g1_note_end.regions_claimed(),
1529                           g1_note_end.claimed_region_time_sec()*1000.0,
1530                           g1_note_end.max_region_time_sec()*1000.0);
1531     }
1532   }
1533   size_t max_live_bytes() { return _max_live_bytes; }
1534   size_t freed_bytes() { return _freed_bytes; }
1535 };
1536 
1537 class G1ParScrubRemSetTask: public AbstractGangTask {
1538 protected:
1539   G1RemSet* _g1rs;
1540   BitMap* _region_bm;
1541   BitMap* _card_bm;
1542 public:
1543   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1544                        BitMap* region_bm, BitMap* card_bm) :
1545     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1546     _region_bm(region_bm), _card_bm(card_bm)
1547   {}
1548 
1549   void work(int i) {
1550     if (ParallelGCThreads > 0) {
1551       _g1rs->scrub_par(_region_bm, _card_bm, i,
1552                        HeapRegion::ScrubRemSetClaimValue);
1553     } else {
1554       _g1rs->scrub(_region_bm, _card_bm);
1555     }
1556   }
1557 
1558 };
1559 
1560 G1NoteEndOfConcMarkClosure::
1561 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1562                            UncleanRegionList* list,
1563                            int worker_num)
1564   : _g1(g1), _worker_num(worker_num),
1565     _max_live_bytes(0), _regions_claimed(0),
1566     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1567     _claimed_region_time(0.0), _max_region_time(0.0),
1568     _unclean_region_list(list)
1569 {}
1570 
1571 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1572   // We use a claim value of zero here because all regions
1573   // were claimed with value 1 in the FinalCount task.
1574   r->reset_gc_time_stamp();
1575   if (!r->continuesHumongous()) {
1576     double start = os::elapsedTime();
1577     _regions_claimed++;
1578     r->note_end_of_marking();
1579     _max_live_bytes += r->max_live_bytes();
1580     _g1->free_region_if_totally_empty_work(r,
1581                                            _freed_bytes,
1582                                            _cleared_h_regions,
1583                                            _freed_regions,
1584                                            _unclean_region_list,
1585                                            true /*par*/);
1586     double region_time = (os::elapsedTime() - start);
1587     _claimed_region_time += region_time;
1588     if (region_time > _max_region_time) _max_region_time = region_time;
1589   }
1590   return false;
1591 }
1592 
1593 void ConcurrentMark::cleanup() {
1594   // world is stopped at this checkpoint
1595   assert(SafepointSynchronize::is_at_safepoint(),
1596          "world should be stopped");
1597   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1598 
1599   // If a full collection has happened, we shouldn't do this.
1600   if (has_aborted()) {
1601     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1602     return;
1603   }
1604 
1605   _cleanup_co_tracker.disable();
1606 
1607   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1608   g1p->record_concurrent_mark_cleanup_start();
1609 
1610   double start = os::elapsedTime();
1611   GCOverheadReporter::recordSTWStart(start);
1612 
1613   // Do counting once more with the world stopped for good measure.
1614   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1615                                         &_region_bm, &_card_bm);
1616   if (ParallelGCThreads > 0) {
1617     assert(g1h->check_heap_region_claim_values(
1618                                                HeapRegion::InitialClaimValue),
1619            "sanity check");
1620 
1621     int n_workers = g1h->workers()->total_workers();
1622     g1h->set_par_threads(n_workers);
1623     g1h->workers()->run_task(&g1_par_count_task);
1624     g1h->set_par_threads(0);
1625 
1626     assert(g1h->check_heap_region_claim_values(
1627                                              HeapRegion::FinalCountClaimValue),
1628            "sanity check");
1629   } else {
1630     g1_par_count_task.work(0);
1631   }
1632 
1633   size_t known_garbage_bytes =
1634     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1635 #if 0
1636   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1637                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1638                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1639                          (double) known_garbage_bytes / (double) (1024 * 1024));
1640 #endif // 0
1641   g1p->set_known_garbage_bytes(known_garbage_bytes);
1642 
1643   size_t start_used_bytes = g1h->used();
1644   _at_least_one_mark_complete = true;
1645   g1h->set_marking_complete();
1646 
1647   double count_end = os::elapsedTime();
1648   double this_final_counting_time = (count_end - start);
1649   if (G1PrintParCleanupStats) {
1650     gclog_or_tty->print_cr("Cleanup:");
1651     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1652                            this_final_counting_time*1000.0);
1653   }
1654   _total_counting_time += this_final_counting_time;
1655 
1656   // Install newly created mark bitMap as "prev".
1657   swapMarkBitMaps();
1658 
1659   g1h->reset_gc_time_stamp();
1660 
1661   // Note end of marking in all heap regions.
1662   double note_end_start = os::elapsedTime();
1663   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1664   if (ParallelGCThreads > 0) {
1665     int n_workers = g1h->workers()->total_workers();
1666     g1h->set_par_threads(n_workers);
1667     g1h->workers()->run_task(&g1_par_note_end_task);
1668     g1h->set_par_threads(0);
1669 
1670     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1671            "sanity check");
1672   } else {
1673     g1_par_note_end_task.work(0);
1674   }
1675   g1h->set_unclean_regions_coming(true);
1676   double note_end_end = os::elapsedTime();
1677   // Tell the mutators that there might be unclean regions coming...
1678   if (G1PrintParCleanupStats) {
1679     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1680                            (note_end_end - note_end_start)*1000.0);
1681   }
1682 
1683 
1684   // call below, since it affects the metric by which we sort the heap
1685   // regions.
1686   if (G1ScrubRemSets) {
1687     double rs_scrub_start = os::elapsedTime();
1688     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1689     if (ParallelGCThreads > 0) {
1690       int n_workers = g1h->workers()->total_workers();
1691       g1h->set_par_threads(n_workers);
1692       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1693       g1h->set_par_threads(0);
1694 
1695       assert(g1h->check_heap_region_claim_values(
1696                                             HeapRegion::ScrubRemSetClaimValue),
1697              "sanity check");
1698     } else {
1699       g1_par_scrub_rs_task.work(0);
1700     }
1701 
1702     double rs_scrub_end = os::elapsedTime();
1703     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1704     _total_rs_scrub_time += this_rs_scrub_time;
1705   }
1706 
1707   // this will also free any regions totally full of garbage objects,
1708   // and sort the regions.
1709   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1710                         g1_par_note_end_task.freed_bytes(),
1711                         g1_par_note_end_task.max_live_bytes());
1712 
1713   // Statistics.
1714   double end = os::elapsedTime();
1715   _cleanup_times.add((end - start) * 1000.0);
1716   GCOverheadReporter::recordSTWEnd(end);
1717 
1718   // G1CollectedHeap::heap()->print();
1719   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1720   // G1CollectedHeap::heap()->get_gc_time_stamp());
1721 
1722   if (PrintGC || PrintGCDetails) {
1723     g1h->print_size_transition(gclog_or_tty,
1724                                start_used_bytes,
1725                                g1h->used(),
1726                                g1h->capacity());
1727   }
1728 
1729   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1730   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1731 
1732   // We need to make this be a "collection" so any collection pause that
1733   // races with it goes around and waits for completeCleanup to finish.
1734   g1h->increment_total_collections();
1735 
1736 #ifndef PRODUCT
1737   if (G1VerifyConcMark) {
1738     G1CollectedHeap::heap()->prepare_for_verify();
1739     G1CollectedHeap::heap()->verify(true,false);
1740   }
1741 #endif
1742 }
1743 
1744 void ConcurrentMark::completeCleanup() {
1745   // A full collection intervened.
1746   if (has_aborted()) return;
1747 
1748   int first = 0;
1749   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1750   for (int t = 0; t < last; t++) {
1751     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1752     assert(list->well_formed(), "Inv");
1753     HeapRegion* hd = list->hd();
1754     while (hd != NULL) {
1755       // Now finish up the other stuff.
1756       hd->rem_set()->clear();
1757       HeapRegion* next_hd = hd->next_from_unclean_list();
1758       (void)list->pop();
1759       guarantee(list->hd() == next_hd, "how not?");
1760       _g1h->put_region_on_unclean_list(hd);
1761       if (!hd->isHumongous()) {
1762         // Add this to the _free_regions count by 1.
1763         _g1h->finish_free_region_work(0, 0, 1, NULL);
1764       }
1765       hd = list->hd();
1766       guarantee(hd == next_hd, "how not?");
1767     }
1768   }
1769 }
1770 
1771 
1772 class G1CMIsAliveClosure: public BoolObjectClosure {
1773   G1CollectedHeap* _g1;
1774  public:
1775   G1CMIsAliveClosure(G1CollectedHeap* g1) :
1776     _g1(g1)
1777   {}
1778 
1779   void do_object(oop obj) {
1780     assert(false, "not to be invoked");
1781   }
1782   bool do_object_b(oop obj) {
1783     HeapWord* addr = (HeapWord*)obj;
1784     return addr != NULL &&
1785            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1786   }
1787 };
1788 
1789 class G1CMKeepAliveClosure: public OopClosure {
1790   G1CollectedHeap* _g1;
1791   ConcurrentMark*  _cm;
1792   CMBitMap*        _bitMap;
1793  public:
1794   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1795                        CMBitMap* bitMap) :
1796     _g1(g1), _cm(cm),
1797     _bitMap(bitMap) {}
1798 
1799   void do_oop(narrowOop* p) {
1800     guarantee(false, "NYI");
1801   }
1802 
1803   void do_oop(oop* p) {
1804     oop thisOop = *p;
1805     HeapWord* addr = (HeapWord*)thisOop;
1806     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1807       _bitMap->mark(addr);
1808       _cm->mark_stack_push(thisOop);
1809     }
1810   }
1811 };
1812 
1813 class G1CMDrainMarkingStackClosure: public VoidClosure {
1814   CMMarkStack*                  _markStack;
1815   CMBitMap*                     _bitMap;
1816   G1CMKeepAliveClosure*         _oopClosure;
1817  public:
1818   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1819                                G1CMKeepAliveClosure* oopClosure) :
1820     _bitMap(bitMap),
1821     _markStack(markStack),
1822     _oopClosure(oopClosure)
1823   {}
1824 
1825   void do_void() {
1826     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1827   }
1828 };
1829 
1830 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1831   ResourceMark rm;
1832   HandleMark   hm;
1833   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
1834   ReferenceProcessor* rp = g1h->ref_processor();
1835 
1836   // Process weak references.
1837   rp->snap_policy(clear_all_soft_refs);
1838   assert(_markStack.isEmpty(), "mark stack should be empty");
1839 
1840   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
1841   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1842   G1CMDrainMarkingStackClosure
1843     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1844                                &g1KeepAliveClosure);
1845 
1846   // XXXYYY  Also: copy the parallel ref processing code from CMS.
1847   rp->process_discovered_references(&g1IsAliveClosure,
1848                                     &g1KeepAliveClosure,
1849                                     &g1DrainMarkingStackClosure,
1850                                     NULL);
1851   assert(_markStack.overflow() || _markStack.isEmpty(),
1852          "mark stack should be empty (unless it overflowed)");
1853   if (_markStack.overflow()) {
1854     set_has_overflown();
1855   }
1856 
1857   rp->enqueue_discovered_references();
1858   rp->verify_no_references_recorded();
1859   assert(!rp->discovery_enabled(), "should have been disabled");
1860 
1861   // Now clean up stale oops in SymbolTable and StringTable
1862   SymbolTable::unlink(&g1IsAliveClosure);
1863   StringTable::unlink(&g1IsAliveClosure);
1864 }
1865 
1866 void ConcurrentMark::swapMarkBitMaps() {
1867   CMBitMapRO* temp = _prevMarkBitMap;
1868   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
1869   _nextMarkBitMap  = (CMBitMap*)  temp;
1870 }
1871 
1872 class CMRemarkTask: public AbstractGangTask {
1873 private:
1874   ConcurrentMark *_cm;
1875 
1876 public:
1877   void work(int worker_i) {
1878     // Since all available tasks are actually started, we should
1879     // only proceed if we're supposed to be actived.
1880     if ((size_t)worker_i < _cm->active_tasks()) {
1881       CMTask* task = _cm->task(worker_i);
1882       task->record_start_time();
1883       do {
1884         task->do_marking_step(1000000000.0 /* something very large */);
1885       } while (task->has_aborted() && !_cm->has_overflown());
1886       // If we overflow, then we do not want to restart. We instead
1887       // want to abort remark and do concurrent marking again.
1888       task->record_end_time();
1889     }
1890   }
1891 
1892   CMRemarkTask(ConcurrentMark* cm) :
1893     AbstractGangTask("Par Remark"), _cm(cm) { }
1894 };
1895 
1896 void ConcurrentMark::checkpointRootsFinalWork() {
1897   ResourceMark rm;
1898   HandleMark   hm;
1899   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1900 
1901   g1h->ensure_parsability(false);
1902 
1903   if (ParallelGCThreads > 0) {
1904     g1h->change_strong_roots_parity();
1905     // this is remark, so we'll use up all available threads
1906     int active_workers = ParallelGCThreads;
1907     set_phase(active_workers, false);
1908 
1909     CMRemarkTask remarkTask(this);
1910     // We will start all available threads, even if we decide that the
1911     // active_workers will be fewer. The extra ones will just bail out
1912     // immediately.
1913     int n_workers = g1h->workers()->total_workers();
1914     g1h->set_par_threads(n_workers);
1915     g1h->workers()->run_task(&remarkTask);
1916     g1h->set_par_threads(0);
1917 
1918     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1919     guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1920   } else {
1921     g1h->change_strong_roots_parity();
1922     // this is remark, so we'll use up all available threads
1923     int active_workers = 1;
1924     set_phase(active_workers, false);
1925 
1926     CMRemarkTask remarkTask(this);
1927     // We will start all available threads, even if we decide that the
1928     // active_workers will be fewer. The extra ones will just bail out
1929     // immediately.
1930     remarkTask.work(0);
1931 
1932     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1933     guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1934   }
1935 
1936   print_stats();
1937 
1938   if (!restart_for_overflow())
1939     set_non_marking_state();
1940 
1941 #if VERIFY_OBJS_PROCESSED
1942   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1943     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1944                            _scan_obj_cl.objs_processed,
1945                            ThreadLocalObjQueue::objs_enqueued);
1946     guarantee(_scan_obj_cl.objs_processed ==
1947               ThreadLocalObjQueue::objs_enqueued,
1948               "Different number of objs processed and enqueued.");
1949   }
1950 #endif
1951 }
1952 
1953 class ReachablePrinterOopClosure: public OopClosure {
1954 private:
1955   G1CollectedHeap* _g1h;
1956   CMBitMapRO*      _bitmap;
1957   outputStream*    _out;
1958 
1959 public:
1960   ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
1961     _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
1962 
1963   void do_oop(narrowOop* p) {
1964     guarantee(false, "NYI");
1965   }
1966 
1967   void do_oop(oop* p) {
1968     oop         obj = *p;
1969     const char* str = NULL;
1970     const char* str2 = "";
1971 
1972     if (!_g1h->is_in_g1_reserved(obj))
1973       str = "outside G1 reserved";
1974     else {
1975       HeapRegion* hr  = _g1h->heap_region_containing(obj);
1976       guarantee( hr != NULL, "invariant" );
1977       if (hr->obj_allocated_since_prev_marking(obj)) {
1978         str = "over TAMS";
1979         if (_bitmap->isMarked((HeapWord*) obj))
1980           str2 = " AND MARKED";
1981       } else if (_bitmap->isMarked((HeapWord*) obj))
1982         str = "marked";
1983       else
1984         str = "#### NOT MARKED ####";
1985     }
1986 
1987     _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
1988                    p, (void*) obj, str, str2);
1989   }
1990 };
1991 
1992 class ReachablePrinterClosure: public BitMapClosure {
1993 private:
1994   CMBitMapRO* _bitmap;
1995   outputStream* _out;
1996 
1997 public:
1998   ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
1999     _bitmap(bitmap), _out(out) { }
2000 
2001   bool do_bit(size_t offset) {
2002     HeapWord* addr = _bitmap->offsetToHeapWord(offset);
2003     ReachablePrinterOopClosure oopCl(_bitmap, _out);
2004 
2005     _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
2006     oop(addr)->oop_iterate(&oopCl);
2007     _out->print_cr("");
2008 
2009     return true;
2010   }
2011 };
2012 
2013 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2014 private:
2015   CMBitMapRO* _bitmap;
2016   outputStream* _out;
2017 
2018 public:
2019   void do_object(oop o) {
2020     ReachablePrinterOopClosure oopCl(_bitmap, _out);
2021 
2022     _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
2023     o->oop_iterate(&oopCl);
2024     _out->print_cr("");
2025   }
2026 
2027   ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2028     _bitmap(bitmap), _out(out) { }
2029 };
2030 
2031 class RegionReachablePrinterClosure : public HeapRegionClosure {
2032 private:
2033   CMBitMapRO* _bitmap;
2034   outputStream* _out;
2035 
2036 public:
2037   bool doHeapRegion(HeapRegion* hr) {
2038     HeapWord* b = hr->bottom();
2039     HeapWord* e = hr->end();
2040     HeapWord* t = hr->top();
2041     HeapWord* p = hr->prev_top_at_mark_start();
2042     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2043                    "PTAMS: "PTR_FORMAT, b, e, t, p);
2044     _out->print_cr("");
2045 
2046     ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
2047     hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2048 
2049     return false;
2050   }
2051 
2052   RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2053                                 outputStream* out) :
2054     _bitmap(bitmap), _out(out) { }
2055 };
2056 
2057 void ConcurrentMark::print_prev_bitmap_reachable() {
2058   outputStream* out = gclog_or_tty;
2059 
2060 #if SEND_HEAP_DUMP_TO_FILE
2061   guarantee(heap_dump_file == NULL, "Protocol");
2062   char fn_buf[100];
2063   sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
2064   heap_dump_file = fopen(fn_buf, "w");
2065   fileStream fstream(heap_dump_file);
2066   out = &fstream;
2067 #endif // SEND_HEAP_DUMP_TO_FILE
2068 
2069   RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
2070   out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
2071   _g1h->heap_region_iterate(&rcl);
2072   out->print_cr("");
2073 
2074   ReachablePrinterClosure cl(_prevMarkBitMap, out);
2075   out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
2076   _prevMarkBitMap->iterate(&cl);
2077   out->print_cr("");
2078 
2079 #if SEND_HEAP_DUMP_TO_FILE
2080   fclose(heap_dump_file);
2081   heap_dump_file = NULL;
2082 #endif // SEND_HEAP_DUMP_TO_FILE
2083 }
2084 
2085 // This note is for drainAllSATBBuffers and the code in between.
2086 // In the future we could reuse a task to do this work during an
2087 // evacuation pause (since now tasks are not active and can be claimed
2088 // during an evacuation pause). This was a late change to the code and
2089 // is currently not being taken advantage of.
2090 
2091 class CMGlobalObjectClosure : public ObjectClosure {
2092 private:
2093   ConcurrentMark* _cm;
2094 
2095 public:
2096   void do_object(oop obj) {
2097     _cm->deal_with_reference(obj);
2098   }
2099 
2100   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2101 };
2102 
2103 void ConcurrentMark::deal_with_reference(oop obj) {
2104   if (verbose_high())
2105     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2106                            (void*) obj);
2107 
2108 
2109   HeapWord* objAddr = (HeapWord*) obj;
2110   if (_g1h->is_in_g1_reserved(objAddr)) {
2111     tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2112     HeapRegion* hr = _g1h->heap_region_containing(obj);
2113     if (_g1h->is_obj_ill(obj, hr)) {
2114       if (verbose_high())
2115         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2116                                "marked", (void*) obj);
2117 
2118       // we need to mark it first
2119       if (_nextMarkBitMap->parMark(objAddr)) {
2120         // No OrderAccess:store_load() is needed. It is implicit in the
2121         // CAS done in parMark(objAddr) above
2122         HeapWord* finger = _finger;
2123         if (objAddr < finger) {
2124           if (verbose_high())
2125             gclog_or_tty->print_cr("[global] below the global finger "
2126                                    "("PTR_FORMAT"), pushing it", finger);
2127           if (!mark_stack_push(obj)) {
2128             if (verbose_low())
2129               gclog_or_tty->print_cr("[global] global stack overflow during "
2130                                      "deal_with_reference");
2131           }
2132         }
2133       }
2134     }
2135   }
2136 }
2137 
2138 void ConcurrentMark::drainAllSATBBuffers() {
2139   CMGlobalObjectClosure oc(this);
2140   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2141   satb_mq_set.set_closure(&oc);
2142 
2143   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2144     if (verbose_medium())
2145       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2146   }
2147 
2148   // no need to check whether we should do this, as this is only
2149   // called during an evacuation pause
2150   satb_mq_set.iterate_closure_all_threads();
2151 
2152   satb_mq_set.set_closure(NULL);
2153   guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
2154 }
2155 
2156 void ConcurrentMark::markPrev(oop p) {
2157   // Note we are overriding the read-only view of the prev map here, via
2158   // the cast.
2159   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2160 }
2161 
2162 void ConcurrentMark::clear(oop p) {
2163   assert(p != NULL && p->is_oop(), "expected an oop");
2164   HeapWord* addr = (HeapWord*)p;
2165   assert(addr >= _nextMarkBitMap->startWord() ||
2166          addr < _nextMarkBitMap->endWord(), "in a region");
2167 
2168   _nextMarkBitMap->clear(addr);
2169 }
2170 
2171 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2172   // Note we are overriding the read-only view of the prev map here, via
2173   // the cast.
2174   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2175   _nextMarkBitMap->clearRange(mr);
2176 }
2177 
2178 HeapRegion*
2179 ConcurrentMark::claim_region(int task_num) {
2180   // "checkpoint" the finger
2181   HeapWord* finger = _finger;
2182 
2183   // _heap_end will not change underneath our feet; it only changes at
2184   // yield points.
2185   while (finger < _heap_end) {
2186     tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
2187 
2188     // is the gap between reading the finger and doing the CAS too long?
2189 
2190     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
2191     HeapWord*   bottom        = curr_region->bottom();
2192     HeapWord*   end           = curr_region->end();
2193     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2194 
2195     if (verbose_low())
2196       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2197                              "["PTR_FORMAT", "PTR_FORMAT"), "
2198                              "limit = "PTR_FORMAT,
2199                              task_num, curr_region, bottom, end, limit);
2200 
2201     HeapWord* res =
2202       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2203     if (res == finger) {
2204       // we succeeded
2205 
2206       // notice that _finger == end cannot be guaranteed here since,
2207       // someone else might have moved the finger even further
2208       guarantee( _finger >= end, "the finger should have moved forward" );
2209 
2210       if (verbose_low())
2211         gclog_or_tty->print_cr("[%d] we were successful with region = "
2212                                PTR_FORMAT, task_num, curr_region);
2213 
2214       if (limit > bottom) {
2215         if (verbose_low())
2216           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2217                                  "returning it ", task_num, curr_region);
2218         return curr_region;
2219       } else {
2220         tmp_guarantee_CM( limit == bottom,
2221                           "the region limit should be at bottom" );
2222         if (verbose_low())
2223           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2224                                  "returning NULL", task_num, curr_region);
2225         // we return NULL and the caller should try calling
2226         // claim_region() again.
2227         return NULL;
2228       }
2229     } else {
2230       guarantee( _finger > finger, "the finger should have moved forward" );
2231       if (verbose_low())
2232         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2233                                "global finger = "PTR_FORMAT", "
2234                                "our finger = "PTR_FORMAT,
2235                                task_num, _finger, finger);
2236 
2237       // read it again
2238       finger = _finger;
2239     }
2240   }
2241 
2242   return NULL;
2243 }
2244 
2245 void ConcurrentMark::oops_do(OopClosure* cl) {
2246   if (_markStack.size() > 0 && verbose_low())
2247     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2248                            "size = %d", _markStack.size());
2249   // we first iterate over the contents of the mark stack...
2250   _markStack.oops_do(cl);
2251 
2252   for (int i = 0; i < (int)_max_task_num; ++i) {
2253     OopTaskQueue* queue = _task_queues->queue((int)i);
2254 
2255     if (queue->size() > 0 && verbose_low())
2256       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2257                              "size = %d", i, queue->size());
2258 
2259     // ...then over the contents of the all the task queues.
2260     queue->oops_do(cl);
2261   }
2262 
2263   // finally, invalidate any entries that in the region stack that
2264   // point into the collection set
2265   if (_regionStack.invalidate_entries_into_cset()) {
2266     // otherwise, any gray objects copied during the evacuation pause
2267     // might not be visited.
2268     guarantee( _should_gray_objects, "invariant" );
2269   }
2270 }
2271 
2272 void ConcurrentMark::clear_marking_state() {
2273   _markStack.setEmpty();
2274   _markStack.clear_overflow();
2275   _regionStack.setEmpty();
2276   _regionStack.clear_overflow();
2277   clear_has_overflown();
2278   _finger = _heap_start;
2279 
2280   for (int i = 0; i < (int)_max_task_num; ++i) {
2281     OopTaskQueue* queue = _task_queues->queue(i);
2282     queue->set_empty();
2283   }
2284 }
2285 
2286 void ConcurrentMark::print_stats() {
2287   if (verbose_stats()) {
2288     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2289     for (size_t i = 0; i < _active_tasks; ++i) {
2290       _tasks[i]->print_stats();
2291       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2292     }
2293   }
2294 }
2295 
2296 class CSMarkOopClosure: public OopClosure {
2297   friend class CSMarkBitMapClosure;
2298 
2299   G1CollectedHeap* _g1h;
2300   CMBitMap*        _bm;
2301   ConcurrentMark*  _cm;
2302   oop*             _ms;
2303   jint*            _array_ind_stack;
2304   int              _ms_size;
2305   int              _ms_ind;
2306   int              _array_increment;
2307 
2308   bool push(oop obj, int arr_ind = 0) {
2309     if (_ms_ind == _ms_size) {
2310       gclog_or_tty->print_cr("Mark stack is full.");
2311       return false;
2312     }
2313     _ms[_ms_ind] = obj;
2314     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2315     _ms_ind++;
2316     return true;
2317   }
2318 
2319   oop pop() {
2320     if (_ms_ind == 0) return NULL;
2321     else {
2322       _ms_ind--;
2323       return _ms[_ms_ind];
2324     }
2325   }
2326 
2327   bool drain() {
2328     while (_ms_ind > 0) {
2329       oop obj = pop();
2330       assert(obj != NULL, "Since index was non-zero.");
2331       if (obj->is_objArray()) {
2332         jint arr_ind = _array_ind_stack[_ms_ind];
2333         objArrayOop aobj = objArrayOop(obj);
2334         jint len = aobj->length();
2335         jint next_arr_ind = arr_ind + _array_increment;
2336         if (next_arr_ind < len) {
2337           push(obj, next_arr_ind);
2338         }
2339         // Now process this portion of this one.
2340         int lim = MIN2(next_arr_ind, len);
2341         assert(!UseCompressedOops, "This needs to be fixed");
2342         for (int j = arr_ind; j < lim; j++) {
2343           do_oop(aobj->obj_at_addr<oop>(j));
2344         }
2345 
2346       } else {
2347         obj->oop_iterate(this);
2348       }
2349       if (abort()) return false;
2350     }
2351     return true;
2352   }
2353 
2354 public:
2355   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2356     _g1h(G1CollectedHeap::heap()),
2357     _cm(cm),
2358     _bm(cm->nextMarkBitMap()),
2359     _ms_size(ms_size), _ms_ind(0),
2360     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2361     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2362     _array_increment(MAX2(ms_size/8, 16))
2363   {}
2364 
2365   ~CSMarkOopClosure() {
2366     FREE_C_HEAP_ARRAY(oop, _ms);
2367     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2368   }
2369 
2370   void do_oop(narrowOop* p) {
2371     guarantee(false, "NYI");
2372   }
2373 
2374   void do_oop(oop* p) {
2375     oop obj = *p;
2376     if (obj == NULL) return;
2377     if (obj->is_forwarded()) {
2378       // If the object has already been forwarded, we have to make sure
2379       // that it's marked.  So follow the forwarding pointer.  Note that
2380       // this does the right thing for self-forwarding pointers in the
2381       // evacuation failure case.
2382       obj = obj->forwardee();
2383     }
2384     HeapRegion* hr = _g1h->heap_region_containing(obj);
2385     if (hr != NULL) {
2386       if (hr->in_collection_set()) {
2387         if (_g1h->is_obj_ill(obj)) {
2388           _bm->mark((HeapWord*)obj);
2389           if (!push(obj)) {
2390             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2391             set_abort();
2392           }
2393         }
2394       } else {
2395         // Outside the collection set; we need to gray it
2396         _cm->deal_with_reference(obj);
2397       }
2398     }
2399   }
2400 };
2401 
2402 class CSMarkBitMapClosure: public BitMapClosure {
2403   G1CollectedHeap* _g1h;
2404   CMBitMap*        _bitMap;
2405   ConcurrentMark*  _cm;
2406   CSMarkOopClosure _oop_cl;
2407 public:
2408   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2409     _g1h(G1CollectedHeap::heap()),
2410     _bitMap(cm->nextMarkBitMap()),
2411     _oop_cl(cm, ms_size)
2412   {}
2413 
2414   ~CSMarkBitMapClosure() {}
2415 
2416   bool do_bit(size_t offset) {
2417     // convert offset into a HeapWord*
2418     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2419     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2420            "address out of range");
2421     assert(_bitMap->isMarked(addr), "tautology");
2422     oop obj = oop(addr);
2423     if (!obj->is_forwarded()) {
2424       if (!_oop_cl.push(obj)) return false;
2425       if (!_oop_cl.drain()) return false;
2426     }
2427     // Otherwise...
2428     return true;
2429   }
2430 };
2431 
2432 
2433 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2434   CMBitMap* _bm;
2435   CSMarkBitMapClosure _bit_cl;
2436   enum SomePrivateConstants {
2437     MSSize = 1000
2438   };
2439   bool _completed;
2440 public:
2441   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2442     _bm(cm->nextMarkBitMap()),
2443     _bit_cl(cm, MSSize),
2444     _completed(true)
2445   {}
2446 
2447   ~CompleteMarkingInCSHRClosure() {}
2448 
2449   bool doHeapRegion(HeapRegion* r) {
2450     if (!r->evacuation_failed()) {
2451       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2452       if (!mr.is_empty()) {
2453         if (!_bm->iterate(&_bit_cl, mr)) {
2454           _completed = false;
2455           return true;
2456         }
2457       }
2458     }
2459     return false;
2460   }
2461 
2462   bool completed() { return _completed; }
2463 };
2464 
2465 class ClearMarksInHRClosure: public HeapRegionClosure {
2466   CMBitMap* _bm;
2467 public:
2468   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2469 
2470   bool doHeapRegion(HeapRegion* r) {
2471     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2472       MemRegion usedMR = r->used_region();
2473       _bm->clearRange(r->used_region());
2474     }
2475     return false;
2476   }
2477 };
2478 
2479 void ConcurrentMark::complete_marking_in_collection_set() {
2480   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
2481 
2482   if (!g1h->mark_in_progress()) {
2483     g1h->g1_policy()->record_mark_closure_time(0.0);
2484     return;
2485   }
2486 
2487   int i = 1;
2488   double start = os::elapsedTime();
2489   while (true) {
2490     i++;
2491     CompleteMarkingInCSHRClosure cmplt(this);
2492     g1h->collection_set_iterate(&cmplt);
2493     if (cmplt.completed()) break;
2494   }
2495   double end_time = os::elapsedTime();
2496   double elapsed_time_ms = (end_time - start) * 1000.0;
2497   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2498   if (PrintGCDetails) {
2499     gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2500   }
2501 
2502   ClearMarksInHRClosure clr(nextMarkBitMap());
2503   g1h->collection_set_iterate(&clr);
2504 }
2505 
2506 // The next two methods deal with the following optimisation. Some
2507 // objects are gray by being marked and located above the finger. If
2508 // they are copied, during an evacuation pause, below the finger then
2509 // the need to be pushed on the stack. The observation is that, if
2510 // there are no regions in the collection set located above the
2511 // finger, then the above cannot happen, hence we do not need to
2512 // explicitly gray any objects when copying them to below the
2513 // finger. The global stack will be scanned to ensure that, if it
2514 // points to objects being copied, it will update their
2515 // location. There is a tricky situation with the gray objects in
2516 // region stack that are being coped, however. See the comment in
2517 // newCSet().
2518 
2519 void ConcurrentMark::newCSet() {
2520   if (!concurrent_marking_in_progress())
2521     // nothing to do if marking is not in progress
2522     return;
2523 
2524   // find what the lowest finger is among the global and local fingers
2525   _min_finger = _finger;
2526   for (int i = 0; i < (int)_max_task_num; ++i) {
2527     CMTask* task = _tasks[i];
2528     HeapWord* task_finger = task->finger();
2529     if (task_finger != NULL && task_finger < _min_finger)
2530       _min_finger = task_finger;
2531   }
2532 
2533   _should_gray_objects = false;
2534 
2535   // This fixes a very subtle and fustrating bug. It might be the case
2536   // that, during en evacuation pause, heap regions that contain
2537   // objects that are gray (by being in regions contained in the
2538   // region stack) are included in the collection set. Since such gray
2539   // objects will be moved, and because it's not easy to redirect
2540   // region stack entries to point to a new location (because objects
2541   // in one region might be scattered to multiple regions after they
2542   // are copied), one option is to ensure that all marked objects
2543   // copied during a pause are pushed on the stack. Notice, however,
2544   // that this problem can only happen when the region stack is not
2545   // empty during an evacuation pause. So, we make the fix a bit less
2546   // conservative and ensure that regions are pushed on the stack,
2547   // irrespective whether all collection set regions are below the
2548   // finger, if the region stack is not empty. This is expected to be
2549   // a rare case, so I don't think it's necessary to be smarted about it.
2550   if (!region_stack_empty())
2551     _should_gray_objects = true;
2552 }
2553 
2554 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2555   if (!concurrent_marking_in_progress())
2556     return;
2557 
2558   HeapWord* region_end = hr->end();
2559   if (region_end > _min_finger)
2560     _should_gray_objects = true;
2561 }
2562 
2563 void ConcurrentMark::disable_co_trackers() {
2564   if (has_aborted()) {
2565     if (_cleanup_co_tracker.enabled())
2566       _cleanup_co_tracker.disable();
2567     for (int i = 0; i < (int)_max_task_num; ++i) {
2568       CMTask* task = _tasks[i];
2569       if (task->co_tracker_enabled())
2570         task->disable_co_tracker();
2571     }
2572   } else {
2573     guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
2574     for (int i = 0; i < (int)_max_task_num; ++i) {
2575       CMTask* task = _tasks[i];
2576       guarantee( !task->co_tracker_enabled(), "invariant" );
2577     }
2578   }
2579 }
2580 
2581 // abandon current marking iteration due to a Full GC
2582 void ConcurrentMark::abort() {
2583   // If we're not marking, nothing to do.
2584   if (!G1ConcMark) return;
2585 
2586   // Clear all marks to force marking thread to do nothing
2587   _nextMarkBitMap->clearAll();
2588   // Empty mark stack
2589   clear_marking_state();
2590   for (int i = 0; i < (int)_max_task_num; ++i)
2591     _tasks[i]->clear_region_fields();
2592   _has_aborted = true;
2593 
2594   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2595   satb_mq_set.abandon_partial_marking();
2596   satb_mq_set.set_active_all_threads(false);
2597 }
2598 
2599 static void print_ms_time_info(const char* prefix, const char* name,
2600                                NumberSeq& ns) {
2601   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2602                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2603   if (ns.num() > 0) {
2604     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2605                            prefix, ns.sd(), ns.maximum());
2606   }
2607 }
2608 
2609 void ConcurrentMark::print_summary_info() {
2610   gclog_or_tty->print_cr(" Concurrent marking:");
2611   print_ms_time_info("  ", "init marks", _init_times);
2612   print_ms_time_info("  ", "remarks", _remark_times);
2613   {
2614     print_ms_time_info("     ", "final marks", _remark_mark_times);
2615     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2616 
2617   }
2618   print_ms_time_info("  ", "cleanups", _cleanup_times);
2619   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2620                          _total_counting_time,
2621                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2622                           (double)_cleanup_times.num()
2623                          : 0.0));
2624   if (G1ScrubRemSets) {
2625     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2626                            _total_rs_scrub_time,
2627                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2628                             (double)_cleanup_times.num()
2629                            : 0.0));
2630   }
2631   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2632                          (_init_times.sum() + _remark_times.sum() +
2633                           _cleanup_times.sum())/1000.0);
2634   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2635                 "(%8.2f s marking, %8.2f s counting).",
2636                 cmThread()->vtime_accum(),
2637                 cmThread()->vtime_mark_accum(),
2638                 cmThread()->vtime_count_accum());
2639 }
2640 
2641 // Closures
2642 // XXX: there seems to be a lot of code  duplication here;
2643 // should refactor and consolidate the shared code.
2644 
2645 // This closure is used to mark refs into the CMS generation in
2646 // the CMS bit map. Called at the first checkpoint.
2647 
2648 // We take a break if someone is trying to stop the world.
2649 bool ConcurrentMark::do_yield_check(int worker_i) {
2650   if (should_yield()) {
2651     if (worker_i == 0)
2652       _g1h->g1_policy()->record_concurrent_pause();
2653     cmThread()->yield();
2654     if (worker_i == 0)
2655       _g1h->g1_policy()->record_concurrent_pause_end();
2656     return true;
2657   } else {
2658     return false;
2659   }
2660 }
2661 
2662 bool ConcurrentMark::should_yield() {
2663   return cmThread()->should_yield();
2664 }
2665 
2666 bool ConcurrentMark::containing_card_is_marked(void* p) {
2667   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2668   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2669 }
2670 
2671 bool ConcurrentMark::containing_cards_are_marked(void* start,
2672                                                  void* last) {
2673   return
2674     containing_card_is_marked(start) &&
2675     containing_card_is_marked(last);
2676 }
2677 
2678 #ifndef PRODUCT
2679 // for debugging purposes
2680 void ConcurrentMark::print_finger() {
2681   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2682                          _heap_start, _heap_end, _finger);
2683   for (int i = 0; i < (int) _max_task_num; ++i) {
2684     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
2685   }
2686   gclog_or_tty->print_cr("");
2687 }
2688 #endif
2689 
2690 // Closure for iteration over bitmaps
2691 class CMBitMapClosure : public BitMapClosure {
2692 private:
2693   // the bitmap that is being iterated over
2694   CMBitMap*                   _nextMarkBitMap;
2695   ConcurrentMark*             _cm;
2696   CMTask*                     _task;
2697   // true if we're scanning a heap region claimed by the task (so that
2698   // we move the finger along), false if we're not, i.e. currently when
2699   // scanning a heap region popped from the region stack (so that we
2700   // do not move the task finger along; it'd be a mistake if we did so).
2701   bool                        _scanning_heap_region;
2702 
2703 public:
2704   CMBitMapClosure(CMTask *task,
2705                   ConcurrentMark* cm,
2706                   CMBitMap* nextMarkBitMap)
2707     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2708 
2709   void set_scanning_heap_region(bool scanning_heap_region) {
2710     _scanning_heap_region = scanning_heap_region;
2711   }
2712 
2713   bool do_bit(size_t offset) {
2714     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2715     tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
2716     tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
2717 
2718     if (_scanning_heap_region) {
2719       statsOnly( _task->increase_objs_found_on_bitmap() );
2720       tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
2721       // We move that task's local finger along.
2722       _task->move_finger_to(addr);
2723     } else {
2724       // We move the task's region finger along.
2725       _task->move_region_finger_to(addr);
2726     }
2727 
2728     _task->scan_object(oop(addr));
2729     // we only partially drain the local queue and global stack
2730     _task->drain_local_queue(true);
2731     _task->drain_global_stack(true);
2732 
2733     // if the has_aborted flag has been raised, we need to bail out of
2734     // the iteration
2735     return !_task->has_aborted();
2736   }
2737 };
2738 
2739 // Closure for iterating over objects, currently only used for
2740 // processing SATB buffers.
2741 class CMObjectClosure : public ObjectClosure {
2742 private:
2743   CMTask* _task;
2744 
2745 public:
2746   void do_object(oop obj) {
2747     _task->deal_with_reference(obj);
2748   }
2749 
2750   CMObjectClosure(CMTask* task) : _task(task) { }
2751 };
2752 
2753 // Closure for iterating over object fields
2754 class CMOopClosure : public OopClosure {
2755 private:
2756   G1CollectedHeap*   _g1h;
2757   ConcurrentMark*    _cm;
2758   CMTask*            _task;
2759 
2760 public:
2761   void do_oop(narrowOop* p) {
2762     guarantee(false, "NYI");
2763   }
2764 
2765   void do_oop(oop* p) {
2766     tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
2767 
2768     oop obj = *p;
2769     if (_cm->verbose_high())
2770       gclog_or_tty->print_cr("[%d] we're looking at location "
2771                              "*"PTR_FORMAT" = "PTR_FORMAT,
2772                              _task->task_id(), p, (void*) obj);
2773     _task->deal_with_reference(obj);
2774   }
2775 
2776   CMOopClosure(G1CollectedHeap* g1h,
2777                ConcurrentMark* cm,
2778                CMTask* task)
2779     : _g1h(g1h), _cm(cm), _task(task) { }
2780 };
2781 
2782 void CMTask::setup_for_region(HeapRegion* hr) {
2783   tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
2784       "claim_region() should have filtered out continues humongous regions" );
2785 
2786   if (_cm->verbose_low())
2787     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2788                            _task_id, hr);
2789 
2790   _curr_region  = hr;
2791   _finger       = hr->bottom();
2792   update_region_limit();
2793 }
2794 
2795 void CMTask::update_region_limit() {
2796   HeapRegion* hr            = _curr_region;
2797   HeapWord* bottom          = hr->bottom();
2798   HeapWord* limit           = hr->next_top_at_mark_start();
2799 
2800   if (limit == bottom) {
2801     if (_cm->verbose_low())
2802       gclog_or_tty->print_cr("[%d] found an empty region "
2803                              "["PTR_FORMAT", "PTR_FORMAT")",
2804                              _task_id, bottom, limit);
2805     // The region was collected underneath our feet.
2806     // We set the finger to bottom to ensure that the bitmap
2807     // iteration that will follow this will not do anything.
2808     // (this is not a condition that holds when we set the region up,
2809     // as the region is not supposed to be empty in the first place)
2810     _finger = bottom;
2811   } else if (limit >= _region_limit) {
2812     tmp_guarantee_CM( limit >= _finger, "peace of mind" );
2813   } else {
2814     tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
2815     // This can happen under some pretty unusual circumstances.  An
2816     // evacuation pause empties the region underneath our feet (NTAMS
2817     // at bottom). We then do some allocation in the region (NTAMS
2818     // stays at bottom), followed by the region being used as a GC
2819     // alloc region (NTAMS will move to top() and the objects
2820     // originally below it will be grayed). All objects now marked in
2821     // the region are explicitly grayed, if below the global finger,
2822     // and we do not need in fact to scan anything else. So, we simply
2823     // set _finger to be limit to ensure that the bitmap iteration
2824     // doesn't do anything.
2825     _finger = limit;
2826   }
2827 
2828   _region_limit = limit;
2829 }
2830 
2831 void CMTask::giveup_current_region() {
2832   tmp_guarantee_CM( _curr_region != NULL, "invariant" );
2833   if (_cm->verbose_low())
2834     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2835                            _task_id, _curr_region);
2836   clear_region_fields();
2837 }
2838 
2839 void CMTask::clear_region_fields() {
2840   // Values for these three fields that indicate that we're not
2841   // holding on to a region.
2842   _curr_region   = NULL;
2843   _finger        = NULL;
2844   _region_limit  = NULL;
2845 
2846   _region_finger = NULL;
2847 }
2848 
2849 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2850   guarantee( nextMarkBitMap != NULL, "invariant" );
2851 
2852   if (_cm->verbose_low())
2853     gclog_or_tty->print_cr("[%d] resetting", _task_id);
2854 
2855   _nextMarkBitMap                = nextMarkBitMap;
2856   clear_region_fields();
2857 
2858   _calls                         = 0;
2859   _elapsed_time_ms               = 0.0;
2860   _termination_time_ms           = 0.0;
2861   _termination_start_time_ms     = 0.0;
2862 
2863 #if _MARKING_STATS_
2864   _local_pushes                  = 0;
2865   _local_pops                    = 0;
2866   _local_max_size                = 0;
2867   _objs_scanned                  = 0;
2868   _global_pushes                 = 0;
2869   _global_pops                   = 0;
2870   _global_max_size               = 0;
2871   _global_transfers_to           = 0;
2872   _global_transfers_from         = 0;
2873   _region_stack_pops             = 0;
2874   _regions_claimed               = 0;
2875   _objs_found_on_bitmap          = 0;
2876   _satb_buffers_processed        = 0;
2877   _steal_attempts                = 0;
2878   _steals                        = 0;
2879   _aborted                       = 0;
2880   _aborted_overflow              = 0;
2881   _aborted_cm_aborted            = 0;
2882   _aborted_yield                 = 0;
2883   _aborted_timed_out             = 0;
2884   _aborted_satb                  = 0;
2885   _aborted_termination           = 0;
2886 #endif // _MARKING_STATS_
2887 }
2888 
2889 bool CMTask::should_exit_termination() {
2890   regular_clock_call();
2891   // This is called when we are in the termination protocol. We should
2892   // quit if, for some reason, this task wants to abort or the global
2893   // stack is not empty (this means that we can get work from it).
2894   return !_cm->mark_stack_empty() || has_aborted();
2895 }
2896 
2897 // This determines whether the method below will check both the local
2898 // and global fingers when determining whether to push on the stack a
2899 // gray object (value 1) or whether it will only check the global one
2900 // (value 0). The tradeoffs are that the former will be a bit more
2901 // accurate and possibly push less on the stack, but it might also be
2902 // a little bit slower.
2903 
2904 #define _CHECK_BOTH_FINGERS_      1
2905 
2906 void CMTask::deal_with_reference(oop obj) {
2907   if (_cm->verbose_high())
2908     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2909                            _task_id, (void*) obj);
2910 
2911   ++_refs_reached;
2912 
2913   HeapWord* objAddr = (HeapWord*) obj;
2914   if (_g1h->is_in_g1_reserved(objAddr)) {
2915     tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2916     HeapRegion* hr =  _g1h->heap_region_containing(obj);
2917     if (_g1h->is_obj_ill(obj, hr)) {
2918       if (_cm->verbose_high())
2919         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2920                                _task_id, (void*) obj);
2921 
2922       // we need to mark it first
2923       if (_nextMarkBitMap->parMark(objAddr)) {
2924         // No OrderAccess:store_load() is needed. It is implicit in the
2925         // CAS done in parMark(objAddr) above
2926         HeapWord* global_finger = _cm->finger();
2927 
2928 #if _CHECK_BOTH_FINGERS_
2929         // we will check both the local and global fingers
2930 
2931         if (_finger != NULL && objAddr < _finger) {
2932           if (_cm->verbose_high())
2933             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2934                                    "pushing it", _task_id, _finger);
2935           push(obj);
2936         } else if (_curr_region != NULL && objAddr < _region_limit) {
2937           // do nothing
2938         } else if (objAddr < global_finger) {
2939           // Notice that the global finger might be moving forward
2940           // concurrently. This is not a problem. In the worst case, we
2941           // mark the object while it is above the global finger and, by
2942           // the time we read the global finger, it has moved forward
2943           // passed this object. In this case, the object will probably
2944           // be visited when a task is scanning the region and will also
2945           // be pushed on the stack. So, some duplicate work, but no
2946           // correctness problems.
2947 
2948           if (_cm->verbose_high())
2949             gclog_or_tty->print_cr("[%d] below the global finger "
2950                                    "("PTR_FORMAT"), pushing it",
2951                                    _task_id, global_finger);
2952           push(obj);
2953         } else {
2954           // do nothing
2955         }
2956 #else // _CHECK_BOTH_FINGERS_
2957       // we will only check the global finger
2958 
2959         if (objAddr < global_finger) {
2960           // see long comment above
2961 
2962           if (_cm->verbose_high())
2963             gclog_or_tty->print_cr("[%d] below the global finger "
2964                                    "("PTR_FORMAT"), pushing it",
2965                                    _task_id, global_finger);
2966           push(obj);
2967         }
2968 #endif // _CHECK_BOTH_FINGERS_
2969       }
2970     }
2971   }
2972 }
2973 
2974 void CMTask::push(oop obj) {
2975   HeapWord* objAddr = (HeapWord*) obj;
2976   tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
2977   tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
2978   tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
2979 
2980   if (_cm->verbose_high())
2981     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
2982 
2983   if (!_task_queue->push(obj)) {
2984     // The local task queue looks full. We need to push some entries
2985     // to the global stack.
2986 
2987     if (_cm->verbose_medium())
2988       gclog_or_tty->print_cr("[%d] task queue overflow, "
2989                              "moving entries to the global stack",
2990                              _task_id);
2991     move_entries_to_global_stack();
2992 
2993     // this should succeed since, even if we overflow the global
2994     // stack, we should have definitely removed some entries from the
2995     // local queue. So, there must be space on it.
2996     bool success = _task_queue->push(obj);
2997     tmp_guarantee_CM( success, "invariant" );
2998   }
2999 
3000   statsOnly( int tmp_size = _task_queue->size();
3001              if (tmp_size > _local_max_size)
3002                _local_max_size = tmp_size;
3003              ++_local_pushes );
3004 }
3005 
3006 void CMTask::reached_limit() {
3007   tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
3008                     _refs_reached >= _refs_reached_limit ,
3009                  "shouldn't have been called otherwise" );
3010   regular_clock_call();
3011 }
3012 
3013 void CMTask::regular_clock_call() {
3014   if (has_aborted())
3015     return;
3016 
3017   // First, we need to recalculate the words scanned and refs reached
3018   // limits for the next clock call.
3019   recalculate_limits();
3020 
3021   // During the regular clock call we do the following
3022 
3023   // (1) If an overflow has been flagged, then we abort.
3024   if (_cm->has_overflown()) {
3025     set_has_aborted();
3026     return;
3027   }
3028 
3029   // If we are not concurrent (i.e. we're doing remark) we don't need
3030   // to check anything else. The other steps are only needed during
3031   // the concurrent marking phase.
3032   if (!concurrent())
3033     return;
3034 
3035   // (2) If marking has been aborted for Full GC, then we also abort.
3036   if (_cm->has_aborted()) {
3037     set_has_aborted();
3038     statsOnly( ++_aborted_cm_aborted );
3039     return;
3040   }
3041 
3042   double curr_time_ms = os::elapsedVTime() * 1000.0;
3043 
3044   // (3) If marking stats are enabled, then we update the step history.
3045 #if _MARKING_STATS_
3046   if (_words_scanned >= _words_scanned_limit)
3047     ++_clock_due_to_scanning;
3048   if (_refs_reached >= _refs_reached_limit)
3049     ++_clock_due_to_marking;
3050 
3051   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3052   _interval_start_time_ms = curr_time_ms;
3053   _all_clock_intervals_ms.add(last_interval_ms);
3054 
3055   if (_cm->verbose_medium()) {
3056     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3057                            "scanned = %d%s, refs reached = %d%s",
3058                            _task_id, last_interval_ms,
3059                            _words_scanned,
3060                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3061                            _refs_reached,
3062                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3063   }
3064 #endif // _MARKING_STATS_
3065 
3066   // (4) We check whether we should yield. If we have to, then we abort.
3067   if (_cm->should_yield()) {
3068     // We should yield. To do this we abort the task. The caller is
3069     // responsible for yielding.
3070     set_has_aborted();
3071     statsOnly( ++_aborted_yield );
3072     return;
3073   }
3074 
3075   // (5) We check whether we've reached our time quota. If we have,
3076   // then we abort.
3077   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3078   if (elapsed_time_ms > _time_target_ms) {
3079     set_has_aborted();
3080     _has_aborted_timed_out = true;
3081     statsOnly( ++_aborted_timed_out );
3082     return;
3083   }
3084 
3085   // (6) Finally, we check whether there are enough completed STAB
3086   // buffers available for processing. If there are, we abort.
3087   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3088   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3089     if (_cm->verbose_low())
3090       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3091                              _task_id);
3092     // we do need to process SATB buffers, we'll abort and restart
3093     // the marking task to do so
3094     set_has_aborted();
3095     statsOnly( ++_aborted_satb );
3096     return;
3097   }
3098 }
3099 
3100 void CMTask::recalculate_limits() {
3101   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3102   _words_scanned_limit      = _real_words_scanned_limit;
3103 
3104   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3105   _refs_reached_limit       = _real_refs_reached_limit;
3106 }
3107 
3108 void CMTask::decrease_limits() {
3109   // This is called when we believe that we're going to do an infrequent
3110   // operation which will increase the per byte scanned cost (i.e. move
3111   // entries to/from the global stack). It basically tries to decrease the
3112   // scanning limit so that the clock is called earlier.
3113 
3114   if (_cm->verbose_medium())
3115     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3116 
3117   _words_scanned_limit = _real_words_scanned_limit -
3118     3 * words_scanned_period / 4;
3119   _refs_reached_limit  = _real_refs_reached_limit -
3120     3 * refs_reached_period / 4;
3121 }
3122 
3123 void CMTask::move_entries_to_global_stack() {
3124   // local array where we'll store the entries that will be popped
3125   // from the local queue
3126   oop buffer[global_stack_transfer_size];
3127 
3128   int n = 0;
3129   oop obj;
3130   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3131     buffer[n] = obj;
3132     ++n;
3133   }
3134 
3135   if (n > 0) {
3136     // we popped at least one entry from the local queue
3137 
3138     statsOnly( ++_global_transfers_to; _local_pops += n );
3139 
3140     if (!_cm->mark_stack_push(buffer, n)) {
3141       if (_cm->verbose_low())
3142         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3143       set_has_aborted();
3144     } else {
3145       // the transfer was successful
3146 
3147       if (_cm->verbose_medium())
3148         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3149                                _task_id, n);
3150       statsOnly( int tmp_size = _cm->mark_stack_size();
3151                  if (tmp_size > _global_max_size)
3152                    _global_max_size = tmp_size;
3153                  _global_pushes += n );
3154     }
3155   }
3156 
3157   // this operation was quite expensive, so decrease the limits
3158   decrease_limits();
3159 }
3160 
3161 void CMTask::get_entries_from_global_stack() {
3162   // local array where we'll store the entries that will be popped
3163   // from the global stack.
3164   oop buffer[global_stack_transfer_size];
3165   int n;
3166   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3167   tmp_guarantee_CM( n <= global_stack_transfer_size,
3168                     "we should not pop more than the given limit" );
3169   if (n > 0) {
3170     // yes, we did actually pop at least one entry
3171 
3172     statsOnly( ++_global_transfers_from; _global_pops += n );
3173     if (_cm->verbose_medium())
3174       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3175                              _task_id, n);
3176     for (int i = 0; i < n; ++i) {
3177       bool success = _task_queue->push(buffer[i]);
3178       // We only call this when the local queue is empty or under a
3179       // given target limit. So, we do not expect this push to fail.
3180       tmp_guarantee_CM( success, "invariant" );
3181     }
3182 
3183     statsOnly( int tmp_size = _task_queue->size();
3184                if (tmp_size > _local_max_size)
3185                  _local_max_size = tmp_size;
3186                _local_pushes += n );
3187   }
3188 
3189   // this operation was quite expensive, so decrease the limits
3190   decrease_limits();
3191 }
3192 
3193 void CMTask::drain_local_queue(bool partially) {
3194   if (has_aborted())
3195     return;
3196 
3197   // Decide what the target size is, depending whether we're going to
3198   // drain it partially (so that other tasks can steal if they run out
3199   // of things to do) or totally (at the very end).
3200   size_t target_size;
3201   if (partially)
3202     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3203   else
3204     target_size = 0;
3205 
3206   if (_task_queue->size() > target_size) {
3207     if (_cm->verbose_high())
3208       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3209                              _task_id, target_size);
3210 
3211     oop obj;
3212     bool ret = _task_queue->pop_local(obj);
3213     while (ret) {
3214       statsOnly( ++_local_pops );
3215 
3216       if (_cm->verbose_high())
3217         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3218                                (void*) obj);
3219 
3220       tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
3221                         "invariant" );
3222 
3223       scan_object(obj);
3224 
3225       if (_task_queue->size() <= target_size || has_aborted())
3226         ret = false;
3227       else
3228         ret = _task_queue->pop_local(obj);
3229     }
3230 
3231     if (_cm->verbose_high())
3232       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3233                              _task_id, _task_queue->size());
3234   }
3235 }
3236 
3237 void CMTask::drain_global_stack(bool partially) {
3238   if (has_aborted())
3239     return;
3240 
3241   // We have a policy to drain the local queue before we attempt to
3242   // drain the global stack.
3243   tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
3244 
3245   // Decide what the target size is, depending whether we're going to
3246   // drain it partially (so that other tasks can steal if they run out
3247   // of things to do) or totally (at the very end).  Notice that,
3248   // because we move entries from the global stack in chunks or
3249   // because another task might be doing the same, we might in fact
3250   // drop below the target. But, this is not a problem.
3251   size_t target_size;
3252   if (partially)
3253     target_size = _cm->partial_mark_stack_size_target();
3254   else
3255     target_size = 0;
3256 
3257   if (_cm->mark_stack_size() > target_size) {
3258     if (_cm->verbose_low())
3259       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3260                              _task_id, target_size);
3261 
3262     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3263       get_entries_from_global_stack();
3264       drain_local_queue(partially);
3265     }
3266 
3267     if (_cm->verbose_low())
3268       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3269                              _task_id, _cm->mark_stack_size());
3270   }
3271 }
3272 
3273 // SATB Queue has several assumptions on whether to call the par or
3274 // non-par versions of the methods. this is why some of the code is
3275 // replicated. We should really get rid of the single-threaded version
3276 // of the code to simplify things.
3277 void CMTask::drain_satb_buffers() {
3278   if (has_aborted())
3279     return;
3280 
3281   // We set this so that the regular clock knows that we're in the
3282   // middle of draining buffers and doesn't set the abort flag when it
3283   // notices that SATB buffers are available for draining. It'd be
3284   // very counter productive if it did that. :-)
3285   _draining_satb_buffers = true;
3286 
3287   CMObjectClosure oc(this);
3288   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3289   if (ParallelGCThreads > 0)
3290     satb_mq_set.set_par_closure(_task_id, &oc);
3291   else
3292     satb_mq_set.set_closure(&oc);
3293 
3294   // This keeps claiming and applying the closure to completed buffers
3295   // until we run out of buffers or we need to abort.
3296   if (ParallelGCThreads > 0) {
3297     while (!has_aborted() &&
3298            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3299       if (_cm->verbose_medium())
3300         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3301       statsOnly( ++_satb_buffers_processed );
3302       regular_clock_call();
3303     }
3304   } else {
3305     while (!has_aborted() &&
3306            satb_mq_set.apply_closure_to_completed_buffer()) {
3307       if (_cm->verbose_medium())
3308         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3309       statsOnly( ++_satb_buffers_processed );
3310       regular_clock_call();
3311     }
3312   }
3313 
3314   if (!concurrent() && !has_aborted()) {
3315     // We should only do this during remark.
3316     if (ParallelGCThreads > 0)
3317       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3318     else
3319       satb_mq_set.iterate_closure_all_threads();
3320   }
3321 
3322   _draining_satb_buffers = false;
3323 
3324   tmp_guarantee_CM( has_aborted() ||
3325                     concurrent() ||
3326                     satb_mq_set.completed_buffers_num() == 0, "invariant" );
3327 
3328   if (ParallelGCThreads > 0)
3329     satb_mq_set.set_par_closure(_task_id, NULL);
3330   else
3331     satb_mq_set.set_closure(NULL);
3332 
3333   // again, this was a potentially expensive operation, decrease the
3334   // limits to get the regular clock call early
3335   decrease_limits();
3336 }
3337 
3338 void CMTask::drain_region_stack(BitMapClosure* bc) {
3339   if (has_aborted())
3340     return;
3341 
3342   tmp_guarantee_CM( _region_finger == NULL,
3343                     "it should be NULL when we're not scanning a region" );
3344 
3345   if (!_cm->region_stack_empty()) {
3346     if (_cm->verbose_low())
3347       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3348                              _task_id, _cm->region_stack_size());
3349 
3350     MemRegion mr = _cm->region_stack_pop();
3351     // it returns MemRegion() if the pop fails
3352     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3353 
3354     while (mr.start() != NULL) {
3355       if (_cm->verbose_medium())
3356         gclog_or_tty->print_cr("[%d] we are scanning region "
3357                                "["PTR_FORMAT", "PTR_FORMAT")",
3358                                _task_id, mr.start(), mr.end());
3359       tmp_guarantee_CM( mr.end() <= _cm->finger(),
3360                         "otherwise the region shouldn't be on the stack" );
3361       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3362       if (_nextMarkBitMap->iterate(bc, mr)) {
3363         tmp_guarantee_CM( !has_aborted(),
3364                "cannot abort the task without aborting the bitmap iteration" );
3365 
3366         // We finished iterating over the region without aborting.
3367         regular_clock_call();
3368         if (has_aborted())
3369           mr = MemRegion();
3370         else {
3371           mr = _cm->region_stack_pop();
3372           // it returns MemRegion() if the pop fails
3373           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3374         }
3375       } else {
3376         guarantee( has_aborted(), "currently the only way to do so" );
3377 
3378         // The only way to abort the bitmap iteration is to return
3379         // false from the do_bit() method. However, inside the
3380         // do_bit() method we move the _region_finger to point to the
3381         // object currently being looked at. So, if we bail out, we
3382         // have definitely set _region_finger to something non-null.
3383         guarantee( _region_finger != NULL, "invariant" );
3384 
3385         // The iteration was actually aborted. So now _region_finger
3386         // points to the address of the object we last scanned. If we
3387         // leave it there, when we restart this task, we will rescan
3388         // the object. It is easy to avoid this. We move the finger by
3389         // enough to point to the next possible object header (the
3390         // bitmap knows by how much we need to move it as it knows its
3391         // granularity).
3392         MemRegion newRegion =
3393           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3394 
3395         if (!newRegion.is_empty()) {
3396           if (_cm->verbose_low()) {
3397             gclog_or_tty->print_cr("[%d] pushing unscanned region"
3398                                    "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3399                                    _task_id,
3400                                    newRegion.start(), newRegion.end());
3401           }
3402           // Now push the part of the region we didn't scan on the
3403           // region stack to make sure a task scans it later.
3404           _cm->region_stack_push(newRegion);
3405         }
3406         // break from while
3407         mr = MemRegion();
3408       }
3409       _region_finger = NULL;
3410     }
3411 
3412     // We only push regions on the region stack during evacuation
3413     // pauses. So if we come out the above iteration because we region
3414     // stack is empty, it will remain empty until the next yield
3415     // point. So, the guarantee below is safe.
3416     guarantee( has_aborted() || _cm->region_stack_empty(),
3417                "only way to exit the loop" );
3418 
3419     if (_cm->verbose_low())
3420       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3421                              _task_id, _cm->region_stack_size());
3422   }
3423 }
3424 
3425 void CMTask::print_stats() {
3426   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3427                          _task_id, _calls);
3428   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3429                          _elapsed_time_ms, _termination_time_ms);
3430   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3431                          _step_times_ms.num(), _step_times_ms.avg(),
3432                          _step_times_ms.sd());
3433   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3434                          _step_times_ms.maximum(), _step_times_ms.sum());
3435 
3436 #if _MARKING_STATS_
3437   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3438                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3439                          _all_clock_intervals_ms.sd());
3440   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3441                          _all_clock_intervals_ms.maximum(),
3442                          _all_clock_intervals_ms.sum());
3443   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3444                          _clock_due_to_scanning, _clock_due_to_marking);
3445   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3446                          _objs_scanned, _objs_found_on_bitmap);
3447   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3448                          _local_pushes, _local_pops, _local_max_size);
3449   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3450                          _global_pushes, _global_pops, _global_max_size);
3451   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3452                          _global_transfers_to,_global_transfers_from);
3453   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
3454                          _regions_claimed, _region_stack_pops);
3455   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3456   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3457                          _steal_attempts, _steals);
3458   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3459   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3460                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3461   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3462                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3463 #endif // _MARKING_STATS_
3464 }
3465 
3466 /*****************************************************************************
3467 
3468     The do_marking_step(time_target_ms) method is the building block
3469     of the parallel marking framework. It can be called in parallel
3470     with other invocations of do_marking_step() on different tasks
3471     (but only one per task, obviously) and concurrently with the
3472     mutator threads, or during remark, hence it eliminates the need
3473     for two versions of the code. When called during remark, it will
3474     pick up from where the task left off during the concurrent marking
3475     phase. Interestingly, tasks are also claimable during evacuation
3476     pauses too, since do_marking_step() ensures that it aborts before
3477     it needs to yield.
3478 
3479     The data structures that is uses to do marking work are the
3480     following:
3481 
3482       (1) Marking Bitmap. If there are gray objects that appear only
3483       on the bitmap (this happens either when dealing with an overflow
3484       or when the initial marking phase has simply marked the roots
3485       and didn't push them on the stack), then tasks claim heap
3486       regions whose bitmap they then scan to find gray objects. A
3487       global finger indicates where the end of the last claimed region
3488       is. A local finger indicates how far into the region a task has
3489       scanned. The two fingers are used to determine how to gray an
3490       object (i.e. whether simply marking it is OK, as it will be
3491       visited by a task in the future, or whether it needs to be also
3492       pushed on a stack).
3493 
3494       (2) Local Queue. The local queue of the task which is accessed
3495       reasonably efficiently by the task. Other tasks can steal from
3496       it when they run out of work. Throughout the marking phase, a
3497       task attempts to keep its local queue short but not totally
3498       empty, so that entries are available for stealing by other
3499       tasks. Only when there is no more work, a task will totally
3500       drain its local queue.
3501 
3502       (3) Global Mark Stack. This handles local queue overflow. During
3503       marking only sets of entries are moved between it and the local
3504       queues, as access to it requires a mutex and more fine-grain
3505       interaction with it which might cause contention. If it
3506       overflows, then the marking phase should restart and iterate
3507       over the bitmap to identify gray objects. Throughout the marking
3508       phase, tasks attempt to keep the global mark stack at a small
3509       length but not totally empty, so that entries are available for
3510       popping by other tasks. Only when there is no more work, tasks
3511       will totally drain the global mark stack.
3512 
3513       (4) Global Region Stack. Entries on it correspond to areas of
3514       the bitmap that need to be scanned since they contain gray
3515       objects. Pushes on the region stack only happen during
3516       evacuation pauses and typically correspond to areas covered by
3517       GC LABS. If it overflows, then the marking phase should restart
3518       and iterate over the bitmap to identify gray objects. Tasks will
3519       try to totally drain the region stack as soon as possible.
3520 
3521       (5) SATB Buffer Queue. This is where completed SATB buffers are
3522       made available. Buffers are regularly removed from this queue
3523       and scanned for roots, so that the queue doesn't get too
3524       long. During remark, all completed buffers are processed, as
3525       well as the filled in parts of any uncompleted buffers.
3526 
3527     The do_marking_step() method tries to abort when the time target
3528     has been reached. There are a few other cases when the
3529     do_marking_step() method also aborts:
3530 
3531       (1) When the marking phase has been aborted (after a Full GC).
3532 
3533       (2) When a global overflow (either on the global stack or the
3534       region stack) has been triggered. Before the task aborts, it
3535       will actually sync up with the other tasks to ensure that all
3536       the marking data structures (local queues, stacks, fingers etc.)
3537       are re-initialised so that when do_marking_step() completes,
3538       the marking phase can immediately restart.
3539 
3540       (3) When enough completed SATB buffers are available. The
3541       do_marking_step() method only tries to drain SATB buffers right
3542       at the beginning. So, if enough buffers are available, the
3543       marking step aborts and the SATB buffers are processed at
3544       the beginning of the next invocation.
3545 
3546       (4) To yield. when we have to yield then we abort and yield
3547       right at the end of do_marking_step(). This saves us from a lot
3548       of hassle as, by yielding we might allow a Full GC. If this
3549       happens then objects will be compacted underneath our feet, the
3550       heap might shrink, etc. We save checking for this by just
3551       aborting and doing the yield right at the end.
3552 
3553     From the above it follows that the do_marking_step() method should
3554     be called in a loop (or, otherwise, regularly) until it completes.
3555 
3556     If a marking step completes without its has_aborted() flag being
3557     true, it means it has completed the current marking phase (and
3558     also all other marking tasks have done so and have all synced up).
3559 
3560     A method called regular_clock_call() is invoked "regularly" (in
3561     sub ms intervals) throughout marking. It is this clock method that
3562     checks all the abort conditions which were mentioned above and
3563     decides when the task should abort. A work-based scheme is used to
3564     trigger this clock method: when the number of object words the
3565     marking phase has scanned or the number of references the marking
3566     phase has visited reach a given limit. Additional invocations to
3567     the method clock have been planted in a few other strategic places
3568     too. The initial reason for the clock method was to avoid calling
3569     vtime too regularly, as it is quite expensive. So, once it was in
3570     place, it was natural to piggy-back all the other conditions on it
3571     too and not constantly check them throughout the code.
3572 
3573  *****************************************************************************/
3574 
3575 void CMTask::do_marking_step(double time_target_ms) {
3576   guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
3577   guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
3578 
3579   guarantee( concurrent() || _cm->region_stack_empty(),
3580              "the region stack should have been cleared before remark" );
3581   guarantee( _region_finger == NULL,
3582              "this should be non-null only when a region is being scanned" );
3583 
3584   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3585   guarantee( _task_queues != NULL, "invariant" );
3586   guarantee( _task_queue != NULL,  "invariant" );
3587   guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
3588 
3589   guarantee( !_claimed,
3590              "only one thread should claim this task at any one time" );
3591 
3592   // OK, this doesn't safeguard again all possible scenarios, as it is
3593   // possible for two threads to set the _claimed flag at the same
3594   // time. But it is only for debugging purposes anyway and it will
3595   // catch most problems.
3596   _claimed = true;
3597 
3598   _start_time_ms = os::elapsedVTime() * 1000.0;
3599   statsOnly( _interval_start_time_ms = _start_time_ms );
3600 
3601   double diff_prediction_ms =
3602     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3603   _time_target_ms = time_target_ms - diff_prediction_ms;
3604 
3605   // set up the variables that are used in the work-based scheme to
3606   // call the regular clock method
3607   _words_scanned = 0;
3608   _refs_reached  = 0;
3609   recalculate_limits();
3610 
3611   // clear all flags
3612   clear_has_aborted();
3613   _has_aborted_timed_out = false;
3614   _draining_satb_buffers = false;
3615 
3616   ++_calls;
3617 
3618   if (_cm->verbose_low())
3619     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3620                            "target = %1.2lfms >>>>>>>>>>",
3621                            _task_id, _calls, _time_target_ms);
3622 
3623   // Set up the bitmap and oop closures. Anything that uses them is
3624   // eventually called from this method, so it is OK to allocate these
3625   // statically.
3626   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3627   CMOopClosure    oop_closure(_g1h, _cm, this);
3628   set_oop_closure(&oop_closure);
3629 
3630   if (_cm->has_overflown()) {
3631     // This can happen if the region stack or the mark stack overflows
3632     // during a GC pause and this task, after a yield point,
3633     // restarts. We have to abort as we need to get into the overflow
3634     // protocol which happens right at the end of this task.
3635     set_has_aborted();
3636   }
3637 
3638   // First drain any available SATB buffers. After this, we will not
3639   // look at SATB buffers before the next invocation of this method.
3640   // If enough completed SATB buffers are queued up, the regular clock
3641   // will abort this task so that it restarts.
3642   drain_satb_buffers();
3643   // ...then partially drain the local queue and the global stack
3644   drain_local_queue(true);
3645   drain_global_stack(true);
3646 
3647   // Then totally drain the region stack.  We will not look at
3648   // it again before the next invocation of this method. Entries on
3649   // the region stack are only added during evacuation pauses, for
3650   // which we have to yield. When we do, we abort the task anyway so
3651   // it will look at the region stack again when it restarts.
3652   bitmap_closure.set_scanning_heap_region(false);
3653   drain_region_stack(&bitmap_closure);
3654   // ...then partially drain the local queue and the global stack
3655   drain_local_queue(true);
3656   drain_global_stack(true);
3657 
3658   do {
3659     if (!has_aborted() && _curr_region != NULL) {
3660       // This means that we're already holding on to a region.
3661       tmp_guarantee_CM( _finger != NULL,
3662                         "if region is not NULL, then the finger "
3663                         "should not be NULL either" );
3664 
3665       // We might have restarted this task after an evacuation pause
3666       // which might have evacuated the region we're holding on to
3667       // underneath our feet. Let's read its limit again to make sure
3668       // that we do not iterate over a region of the heap that
3669       // contains garbage (update_region_limit() will also move
3670       // _finger to the start of the region if it is found empty).
3671       update_region_limit();
3672       // We will start from _finger not from the start of the region,
3673       // as we might be restarting this task after aborting half-way
3674       // through scanning this region. In this case, _finger points to
3675       // the address where we last found a marked object. If this is a
3676       // fresh region, _finger points to start().
3677       MemRegion mr = MemRegion(_finger, _region_limit);
3678 
3679       if (_cm->verbose_low())
3680         gclog_or_tty->print_cr("[%d] we're scanning part "
3681                                "["PTR_FORMAT", "PTR_FORMAT") "
3682                                "of region "PTR_FORMAT,
3683                                _task_id, _finger, _region_limit, _curr_region);
3684 
3685       // Let's iterate over the bitmap of the part of the
3686       // region that is left.
3687       bitmap_closure.set_scanning_heap_region(true);
3688       if (mr.is_empty() ||
3689           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3690         // We successfully completed iterating over the region. Now,
3691         // let's give up the region.
3692         giveup_current_region();
3693         regular_clock_call();
3694       } else {
3695         guarantee( has_aborted(), "currently the only way to do so" );
3696         // The only way to abort the bitmap iteration is to return
3697         // false from the do_bit() method. However, inside the
3698         // do_bit() method we move the _finger to point to the
3699         // object currently being looked at. So, if we bail out, we
3700         // have definitely set _finger to something non-null.
3701         guarantee( _finger != NULL, "invariant" );
3702 
3703         // Region iteration was actually aborted. So now _finger
3704         // points to the address of the object we last scanned. If we
3705         // leave it there, when we restart this task, we will rescan
3706         // the object. It is easy to avoid this. We move the finger by
3707         // enough to point to the next possible object header (the
3708         // bitmap knows by how much we need to move it as it knows its
3709         // granularity).
3710         move_finger_to(_nextMarkBitMap->nextWord(_finger));
3711       }
3712     }
3713     // At this point we have either completed iterating over the
3714     // region we were holding on to, or we have aborted.
3715 
3716     // We then partially drain the local queue and the global stack.
3717     // (Do we really need this?)
3718     drain_local_queue(true);
3719     drain_global_stack(true);
3720 
3721     // Read the note on the claim_region() method on why it might
3722     // return NULL with potentially more regions available for
3723     // claiming and why we have to check out_of_regions() to determine
3724     // whether we're done or not.
3725     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3726       // We are going to try to claim a new region. We should have
3727       // given up on the previous one.
3728       tmp_guarantee_CM( _curr_region  == NULL &&
3729                         _finger       == NULL &&
3730                         _region_limit == NULL, "invariant" );
3731       if (_cm->verbose_low())
3732         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3733       HeapRegion* claimed_region = _cm->claim_region(_task_id);
3734       if (claimed_region != NULL) {
3735         // Yes, we managed to claim one
3736         statsOnly( ++_regions_claimed );
3737 
3738         if (_cm->verbose_low())
3739           gclog_or_tty->print_cr("[%d] we successfully claimed "
3740                                  "region "PTR_FORMAT,
3741                                  _task_id, claimed_region);
3742 
3743         setup_for_region(claimed_region);
3744         tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
3745       }
3746       // It is important to call the regular clock here. It might take
3747       // a while to claim a region if, for example, we hit a large
3748       // block of empty regions. So we need to call the regular clock
3749       // method once round the loop to make sure it's called
3750       // frequently enough.
3751       regular_clock_call();
3752     }
3753 
3754     if (!has_aborted() && _curr_region == NULL) {
3755       tmp_guarantee_CM( _cm->out_of_regions(),
3756                         "at this point we should be out of regions" );
3757     }
3758   } while ( _curr_region != NULL && !has_aborted());
3759 
3760   if (!has_aborted()) {
3761     // We cannot check whether the global stack is empty, since other
3762     // tasks might be pushing objects to it concurrently. We also cannot
3763     // check if the region stack is empty because if a thread is aborting
3764     // it can push a partially done region back.
3765     tmp_guarantee_CM( _cm->out_of_regions(),
3766                       "at this point we should be out of regions" );
3767 
3768     if (_cm->verbose_low())
3769       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3770 
3771     // Try to reduce the number of available SATB buffers so that
3772     // remark has less work to do.
3773     drain_satb_buffers();
3774   }
3775 
3776   // Since we've done everything else, we can now totally drain the
3777   // local queue and global stack.
3778   drain_local_queue(false);
3779   drain_global_stack(false);
3780 
3781   // Attempt at work stealing from other task's queues.
3782   if (!has_aborted()) {
3783     // We have not aborted. This means that we have finished all that
3784     // we could. Let's try to do some stealing...
3785 
3786     // We cannot check whether the global stack is empty, since other
3787     // tasks might be pushing objects to it concurrently. We also cannot
3788     // check if the region stack is empty because if a thread is aborting
3789     // it can push a partially done region back.
3790     guarantee( _cm->out_of_regions() &&
3791                _task_queue->size() == 0, "only way to reach here" );
3792 
3793     if (_cm->verbose_low())
3794       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3795 
3796     while (!has_aborted()) {
3797       oop obj;
3798       statsOnly( ++_steal_attempts );
3799 
3800       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3801         if (_cm->verbose_medium())
3802           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3803                                  _task_id, (void*) obj);
3804 
3805         statsOnly( ++_steals );
3806 
3807         tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
3808                           "any stolen object should be marked" );
3809         scan_object(obj);
3810 
3811         // And since we're towards the end, let's totally drain the
3812         // local queue and global stack.
3813         drain_local_queue(false);
3814         drain_global_stack(false);
3815       } else {
3816         break;
3817       }
3818     }
3819   }
3820 
3821   // We still haven't aborted. Now, let's try to get into the
3822   // termination protocol.
3823   if (!has_aborted()) {
3824     // We cannot check whether the global stack is empty, since other
3825     // tasks might be concurrently pushing objects on it. We also cannot
3826     // check if the region stack is empty because if a thread is aborting
3827     // it can push a partially done region back.
3828     guarantee( _cm->out_of_regions() &&
3829                _task_queue->size() == 0, "only way to reach here" );
3830 
3831     if (_cm->verbose_low())
3832       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3833 
3834     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3835     // The CMTask class also extends the TerminatorTerminator class,
3836     // hence its should_exit_termination() method will also decide
3837     // whether to exit the termination protocol or not.
3838     bool finished = _cm->terminator()->offer_termination(this);
3839     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3840     _termination_time_ms +=
3841       termination_end_time_ms - _termination_start_time_ms;
3842 
3843     if (finished) {
3844       // We're all done.
3845 
3846       if (_task_id == 0) {
3847         // let's allow task 0 to do this
3848         if (concurrent()) {
3849           guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
3850           // we need to set this to false before the next
3851           // safepoint. This way we ensure that the marking phase
3852           // doesn't observe any more heap expansions.
3853           _cm->clear_concurrent_marking_in_progress();
3854         }
3855       }
3856 
3857       // We can now guarantee that the global stack is empty, since
3858       // all other tasks have finished.
3859       guarantee( _cm->out_of_regions() &&
3860                  _cm->region_stack_empty() &&
3861                  _cm->mark_stack_empty() &&
3862                  _task_queue->size() == 0 &&
3863                  !_cm->has_overflown() &&
3864                  !_cm->mark_stack_overflow() &&
3865                  !_cm->region_stack_overflow(),
3866                  "only way to reach here" );
3867 
3868       if (_cm->verbose_low())
3869         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3870     } else {
3871       // Apparently there's more work to do. Let's abort this task. It
3872       // will restart it and we can hopefully find more things to do.
3873 
3874       if (_cm->verbose_low())
3875         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3876 
3877       set_has_aborted();
3878       statsOnly( ++_aborted_termination );
3879     }
3880   }
3881 
3882   // Mainly for debugging purposes to make sure that a pointer to the
3883   // closure which was statically allocated in this frame doesn't
3884   // escape it by accident.
3885   set_oop_closure(NULL);
3886   double end_time_ms = os::elapsedVTime() * 1000.0;
3887   double elapsed_time_ms = end_time_ms - _start_time_ms;
3888   // Update the step history.
3889   _step_times_ms.add(elapsed_time_ms);
3890 
3891   if (has_aborted()) {
3892     // The task was aborted for some reason.
3893 
3894     statsOnly( ++_aborted );
3895 
3896     if (_has_aborted_timed_out) {
3897       double diff_ms = elapsed_time_ms - _time_target_ms;
3898       // Keep statistics of how well we did with respect to hitting
3899       // our target only if we actually timed out (if we aborted for
3900       // other reasons, then the results might get skewed).
3901       _marking_step_diffs_ms.add(diff_ms);
3902     }
3903 
3904     if (_cm->has_overflown()) {
3905       // This is the interesting one. We aborted because a global
3906       // overflow was raised. This means we have to restart the
3907       // marking phase and start iterating over regions. However, in
3908       // order to do this we have to make sure that all tasks stop
3909       // what they are doing and re-initialise in a safe manner. We
3910       // will achieve this with the use of two barrier sync points.
3911 
3912       if (_cm->verbose_low())
3913         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3914 
3915       _cm->enter_first_sync_barrier(_task_id);
3916       // When we exit this sync barrier we know that all tasks have
3917       // stopped doing marking work. So, it's now safe to
3918       // re-initialise our data structures. At the end of this method,
3919       // task 0 will clear the global data structures.
3920 
3921       statsOnly( ++_aborted_overflow );
3922 
3923       // We clear the local state of this task...
3924       clear_region_fields();
3925 
3926       // ...and enter the second barrier.
3927       _cm->enter_second_sync_barrier(_task_id);
3928       // At this point everything has bee re-initialised and we're
3929       // ready to restart.
3930     }
3931 
3932     if (_cm->verbose_low()) {
3933       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3934                              "elapsed = %1.2lfms <<<<<<<<<<",
3935                              _task_id, _time_target_ms, elapsed_time_ms);
3936       if (_cm->has_aborted())
3937         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
3938                                _task_id);
3939     }
3940   } else {
3941     if (_cm->verbose_low())
3942       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
3943                              "elapsed = %1.2lfms <<<<<<<<<<",
3944                              _task_id, _time_target_ms, elapsed_time_ms);
3945   }
3946 
3947   _claimed = false;
3948 }
3949 
3950 CMTask::CMTask(int task_id,
3951                ConcurrentMark* cm,
3952                CMTaskQueue* task_queue,
3953                CMTaskQueueSet* task_queues)
3954   : _g1h(G1CollectedHeap::heap()),
3955     _co_tracker(G1CMGroup),
3956     _task_id(task_id), _cm(cm),
3957     _claimed(false),
3958     _nextMarkBitMap(NULL), _hash_seed(17),
3959     _task_queue(task_queue),
3960     _task_queues(task_queues),
3961     _oop_closure(NULL) {
3962   guarantee( task_queue != NULL, "invariant" );
3963   guarantee( task_queues != NULL, "invariant" );
3964 
3965   statsOnly( _clock_due_to_scanning = 0;
3966              _clock_due_to_marking  = 0 );
3967 
3968   _marking_step_diffs_ms.add(0.5);
3969 }