1 /*
   2  * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20  * CA 95054 USA or visit www.sun.com if you need additional information or
  21  * have any questions.
  22  *
  23  */
  24 
  25 #include "incls/_precompiled.incl"
  26 #include "incls/_concurrentMark.cpp.incl"
  27 
  28 //
  29 // CMS Bit Map Wrapper
  30 
  31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  32   _bm((uintptr_t*)NULL,0),
  33   _shifter(shifter) {
  34   _bmStartWord = (HeapWord*)(rs.base());
  35   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  36   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  37                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  38 
  39   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  40   // For now we'll just commit all of the bit map up fromt.
  41   // Later on we'll try to be more parsimonious with swap.
  42   guarantee(_virtual_space.initialize(brs, brs.size()),
  43             "couldn't reseve backing store for CMS bit map");
  44   assert(_virtual_space.committed_size() == brs.size(),
  45          "didn't reserve backing store for all of CMS bit map?");
  46   _bm.set_map((uintptr_t*)_virtual_space.low());
  47   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  48          _bmWordSize, "inconsistency in bit map sizing");
  49   _bm.set_size(_bmWordSize >> _shifter);
  50 }
  51 
  52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  53                                                HeapWord* limit) const {
  54   // First we must round addr *up* to a possible object boundary.
  55   addr = (HeapWord*)align_size_up((intptr_t)addr,
  56                                   HeapWordSize << _shifter);
  57   size_t addrOffset = heapWordToOffset(addr);
  58   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  59   size_t limitOffset = heapWordToOffset(limit);
  60   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  61   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  62   assert(nextAddr >= addr, "get_next_one postcondition");
  63   assert(nextAddr == limit || isMarked(nextAddr),
  64          "get_next_one postcondition");
  65   return nextAddr;
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  69                                                  HeapWord* limit) const {
  70   size_t addrOffset = heapWordToOffset(addr);
  71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  72   size_t limitOffset = heapWordToOffset(limit);
  73   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  75   assert(nextAddr >= addr, "get_next_one postcondition");
  76   assert(nextAddr == limit || !isMarked(nextAddr),
  77          "get_next_one postcondition");
  78   return nextAddr;
  79 }
  80 
  81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  82   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  83   return (int) (diff >> _shifter);
  84 }
  85 
  86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
  87   HeapWord* left  = MAX2(_bmStartWord, mr.start());
  88   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
  89   if (right > left) {
  90     // Right-open interval [leftOffset, rightOffset).
  91     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
  92   } else {
  93     return true;
  94   }
  95 }
  96 
  97 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
  98                                              size_t    from_start_index,
  99                                              HeapWord* to_start_word,
 100                                              size_t    word_num) {
 101   _bm.mostly_disjoint_range_union(from_bitmap,
 102                                   from_start_index,
 103                                   heapWordToOffset(to_start_word),
 104                                   word_num);
 105 }
 106 
 107 #ifndef PRODUCT
 108 bool CMBitMapRO::covers(ReservedSpace rs) const {
 109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 110   assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
 111          "size inconsistency");
 112   return _bmStartWord == (HeapWord*)(rs.base()) &&
 113          _bmWordSize  == rs.size()>>LogHeapWordSize;
 114 }
 115 #endif
 116 
 117 void CMBitMap::clearAll() {
 118   _bm.clear();
 119   return;
 120 }
 121 
 122 void CMBitMap::markRange(MemRegion mr) {
 123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 124   assert(!mr.is_empty(), "unexpected empty region");
 125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 126           ((HeapWord *) mr.end())),
 127          "markRange memory region end is not card aligned");
 128   // convert address range into offset range
 129   _bm.at_put_range(heapWordToOffset(mr.start()),
 130                    heapWordToOffset(mr.end()), true);
 131 }
 132 
 133 void CMBitMap::clearRange(MemRegion mr) {
 134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 135   assert(!mr.is_empty(), "unexpected empty region");
 136   // convert address range into offset range
 137   _bm.at_put_range(heapWordToOffset(mr.start()),
 138                    heapWordToOffset(mr.end()), false);
 139 }
 140 
 141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 142                                             HeapWord* end_addr) {
 143   HeapWord* start = getNextMarkedWordAddress(addr);
 144   start = MIN2(start, end_addr);
 145   HeapWord* end   = getNextUnmarkedWordAddress(start);
 146   end = MIN2(end, end_addr);
 147   assert(start <= end, "Consistency check");
 148   MemRegion mr(start, end);
 149   if (!mr.is_empty()) {
 150     clearRange(mr);
 151   }
 152   return mr;
 153 }
 154 
 155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 156   _base(NULL), _cm(cm)
 157 #ifdef ASSERT
 158   , _drain_in_progress(false)
 159   , _drain_in_progress_yields(false)
 160 #endif
 161 {}
 162 
 163 void CMMarkStack::allocate(size_t size) {
 164   _base = NEW_C_HEAP_ARRAY(oop, size);
 165   if (_base == NULL)
 166     vm_exit_during_initialization("Failed to allocate "
 167                                   "CM region mark stack");
 168   _index = 0;
 169   // QQQQ cast ...
 170   _capacity = (jint) size;
 171   _oops_do_bound = -1;
 172   NOT_PRODUCT(_max_depth = 0);
 173 }
 174 
 175 CMMarkStack::~CMMarkStack() {
 176   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 177 }
 178 
 179 void CMMarkStack::par_push(oop ptr) {
 180   while (true) {
 181     if (isFull()) {
 182       _overflow = true;
 183       return;
 184     }
 185     // Otherwise...
 186     jint index = _index;
 187     jint next_index = index+1;
 188     jint res = Atomic::cmpxchg(next_index, &_index, index);
 189     if (res == index) {
 190       _base[index] = ptr;
 191       // Note that we don't maintain this atomically.  We could, but it
 192       // doesn't seem necessary.
 193       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 194       return;
 195     }
 196     // Otherwise, we need to try again.
 197   }
 198 }
 199 
 200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 201   while (true) {
 202     if (isFull()) {
 203       _overflow = true;
 204       return;
 205     }
 206     // Otherwise...
 207     jint index = _index;
 208     jint next_index = index + n;
 209     if (next_index > _capacity) {
 210       _overflow = true;
 211       return;
 212     }
 213     jint res = Atomic::cmpxchg(next_index, &_index, index);
 214     if (res == index) {
 215       for (int i = 0; i < n; i++) {
 216         int ind = index + i;
 217         assert(ind < _capacity, "By overflow test above.");
 218         _base[ind] = ptr_arr[i];
 219       }
 220       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 221       return;
 222     }
 223     // Otherwise, we need to try again.
 224   }
 225 }
 226 
 227 
 228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 229   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 230   jint start = _index;
 231   jint next_index = start + n;
 232   if (next_index > _capacity) {
 233     _overflow = true;
 234     return;
 235   }
 236   // Otherwise.
 237   _index = next_index;
 238   for (int i = 0; i < n; i++) {
 239     int ind = start + i;
 240     guarantee(ind < _capacity, "By overflow test above.");
 241     _base[ind] = ptr_arr[i];
 242   }
 243 }
 244 
 245 
 246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 247   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 248   jint index = _index;
 249   if (index == 0) {
 250     *n = 0;
 251     return false;
 252   } else {
 253     int k = MIN2(max, index);
 254     jint new_ind = index - k;
 255     for (int j = 0; j < k; j++) {
 256       ptr_arr[j] = _base[new_ind + j];
 257     }
 258     _index = new_ind;
 259     *n = k;
 260     return true;
 261   }
 262 }
 263 
 264 
 265 CMRegionStack::CMRegionStack() : _base(NULL) {}
 266 
 267 void CMRegionStack::allocate(size_t size) {
 268   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 269   if (_base == NULL)
 270     vm_exit_during_initialization("Failed to allocate "
 271                                   "CM region mark stack");
 272   _index = 0;
 273   // QQQQ cast ...
 274   _capacity = (jint) size;
 275 }
 276 
 277 CMRegionStack::~CMRegionStack() {
 278   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 279 }
 280 
 281 void CMRegionStack::push(MemRegion mr) {
 282   assert(mr.word_size() > 0, "Precondition");
 283   while (true) {
 284     if (isFull()) {
 285       _overflow = true;
 286       return;
 287     }
 288     // Otherwise...
 289     jint index = _index;
 290     jint next_index = index+1;
 291     jint res = Atomic::cmpxchg(next_index, &_index, index);
 292     if (res == index) {
 293       _base[index] = mr;
 294       return;
 295     }
 296     // Otherwise, we need to try again.
 297   }
 298 }
 299 
 300 MemRegion CMRegionStack::pop() {
 301   while (true) {
 302     // Otherwise...
 303     jint index = _index;
 304 
 305     if (index == 0) {
 306       return MemRegion();
 307     }
 308     jint next_index = index-1;
 309     jint res = Atomic::cmpxchg(next_index, &_index, index);
 310     if (res == index) {
 311       MemRegion mr = _base[next_index];
 312       if (mr.start() != NULL) {
 313         tmp_guarantee_CM( mr.end() != NULL, "invariant" );
 314         tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
 315         return mr;
 316       } else {
 317         // that entry was invalidated... let's skip it
 318         tmp_guarantee_CM( mr.end() == NULL, "invariant" );
 319       }
 320     }
 321     // Otherwise, we need to try again.
 322   }
 323 }
 324 
 325 bool CMRegionStack::invalidate_entries_into_cset() {
 326   bool result = false;
 327   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 328   for (int i = 0; i < _oops_do_bound; ++i) {
 329     MemRegion mr = _base[i];
 330     if (mr.start() != NULL) {
 331       tmp_guarantee_CM( mr.end() != NULL, "invariant");
 332       tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
 333       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 334       tmp_guarantee_CM( hr != NULL, "invariant" );
 335       if (hr->in_collection_set()) {
 336         // The region points into the collection set
 337         _base[i] = MemRegion();
 338         result = true;
 339       }
 340     } else {
 341       // that entry was invalidated... let's skip it
 342       tmp_guarantee_CM( mr.end() == NULL, "invariant" );
 343     }
 344   }
 345   return result;
 346 }
 347 
 348 template<class OopClosureClass>
 349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 350   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 351          || SafepointSynchronize::is_at_safepoint(),
 352          "Drain recursion must be yield-safe.");
 353   bool res = true;
 354   debug_only(_drain_in_progress = true);
 355   debug_only(_drain_in_progress_yields = yield_after);
 356   while (!isEmpty()) {
 357     oop newOop = pop();
 358     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 359     assert(newOop->is_oop(), "Expected an oop");
 360     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 361            "only grey objects on this stack");
 362     // iterate over the oops in this oop, marking and pushing
 363     // the ones in CMS generation.
 364     newOop->oop_iterate(cl);
 365     if (yield_after && _cm->do_yield_check()) {
 366       res = false; break;
 367     }
 368   }
 369   debug_only(_drain_in_progress = false);
 370   return res;
 371 }
 372 
 373 void CMMarkStack::oops_do(OopClosure* f) {
 374   if (_index == 0) return;
 375   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 376          "Bound must be set.");
 377   for (int i = 0; i < _oops_do_bound; i++) {
 378     f->do_oop(&_base[i]);
 379   }
 380   _oops_do_bound = -1;
 381 }
 382 
 383 bool ConcurrentMark::not_yet_marked(oop obj) const {
 384   return (_g1h->is_obj_ill(obj)
 385           || (_g1h->is_in_permanent(obj)
 386               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 387 }
 388 
 389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 391 #endif // _MSC_VER
 392 
 393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 394                                int max_regions) :
 395   _markBitMap1(rs, MinObjAlignment - 1),
 396   _markBitMap2(rs, MinObjAlignment - 1),
 397 
 398   _parallel_marking_threads(0),
 399   _sleep_factor(0.0),
 400   _marking_task_overhead(1.0),
 401   _cleanup_sleep_factor(0.0),
 402   _cleanup_task_overhead(1.0),
 403   _region_bm(max_regions, false /* in_resource_area*/),
 404   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 405            CardTableModRefBS::card_shift,
 406            false /* in_resource_area*/),
 407   _prevMarkBitMap(&_markBitMap1),
 408   _nextMarkBitMap(&_markBitMap2),
 409   _at_least_one_mark_complete(false),
 410 
 411   _markStack(this),
 412   _regionStack(),
 413   // _finger set in set_non_marking_state
 414 
 415   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 416   // _active_tasks set in set_non_marking_state
 417   // _tasks set inside the constructor
 418   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 419   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 420 
 421   _has_overflown(false),
 422   _concurrent(false),
 423 
 424   // _verbose_level set below
 425 
 426   _init_times(),
 427   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 428   _cleanup_times(),
 429   _total_counting_time(0.0),
 430   _total_rs_scrub_time(0.0),
 431 
 432   _parallel_workers(NULL),
 433   _cleanup_co_tracker(G1CLGroup)
 434 {
 435   CMVerboseLevel verbose_level =
 436     (CMVerboseLevel) G1MarkingVerboseLevel;
 437   if (verbose_level < no_verbose)
 438     verbose_level = no_verbose;
 439   if (verbose_level > high_verbose)
 440     verbose_level = high_verbose;
 441   _verbose_level = verbose_level;
 442 
 443   if (verbose_low())
 444     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 445                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 446 
 447   _markStack.allocate(G1CMStackSize);
 448   _regionStack.allocate(G1CMRegionStackSize);
 449 
 450   // Create & start a ConcurrentMark thread.
 451   if (G1ConcMark) {
 452     _cmThread = new ConcurrentMarkThread(this);
 453     assert(cmThread() != NULL, "CM Thread should have been created");
 454     assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 455   } else {
 456     _cmThread = NULL;
 457   }
 458   _g1h = G1CollectedHeap::heap();
 459   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 460   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 461   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 462 
 463   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 464   satb_qs.set_buffer_size(G1SATBLogBufferSize);
 465 
 466   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
 467   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
 468   for (int i = 0 ; i < size; i++) {
 469     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
 470   }
 471 
 472   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 473   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 474 
 475   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 476   _active_tasks = _max_task_num;
 477   for (int i = 0; i < (int) _max_task_num; ++i) {
 478     CMTaskQueue* task_queue = new CMTaskQueue();
 479     task_queue->initialize();
 480     _task_queues->register_queue(i, task_queue);
 481 
 482     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 483     _accum_task_vtime[i] = 0.0;
 484   }
 485 
 486   if (ParallelMarkingThreads > ParallelGCThreads) {
 487     vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
 488                                   "than ParallelGCThreads.");
 489   }
 490   if (ParallelGCThreads == 0) {
 491     // if we are not running with any parallel GC threads we will not
 492     // spawn any marking threads either
 493     _parallel_marking_threads =   0;
 494     _sleep_factor             = 0.0;
 495     _marking_task_overhead    = 1.0;
 496   } else {
 497     if (ParallelMarkingThreads > 0) {
 498       // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc
 499       // if both are set
 500 
 501       _parallel_marking_threads = ParallelMarkingThreads;
 502       _sleep_factor             = 0.0;
 503       _marking_task_overhead    = 1.0;
 504     } else if (G1MarkingOverheadPerc > 0) {
 505       // we will calculate the number of parallel marking threads
 506       // based on a target overhead with respect to the soft real-time
 507       // goal
 508 
 509       double marking_overhead = (double) G1MarkingOverheadPerc / 100.0;
 510       double overall_cm_overhead =
 511         (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS;
 512       double cpu_ratio = 1.0 / (double) os::processor_count();
 513       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 514       double marking_task_overhead =
 515         overall_cm_overhead / marking_thread_num *
 516                                                 (double) os::processor_count();
 517       double sleep_factor =
 518                          (1.0 - marking_task_overhead) / marking_task_overhead;
 519 
 520       _parallel_marking_threads = (size_t) marking_thread_num;
 521       _sleep_factor             = sleep_factor;
 522       _marking_task_overhead    = marking_task_overhead;
 523     } else {
 524       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 525       _sleep_factor             = 0.0;
 526       _marking_task_overhead    = 1.0;
 527     }
 528 
 529     if (parallel_marking_threads() > 1)
 530       _cleanup_task_overhead = 1.0;
 531     else
 532       _cleanup_task_overhead = marking_task_overhead();
 533     _cleanup_sleep_factor =
 534                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 535 
 536 #if 0
 537     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 538     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 539     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 540     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 541     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 542 #endif
 543 
 544     guarantee( parallel_marking_threads() > 0, "peace of mind" );
 545     _parallel_workers = new WorkGang("Parallel Marking Threads",
 546                                      (int) parallel_marking_threads(), false, true);
 547     if (_parallel_workers == NULL)
 548       vm_exit_during_initialization("Failed necessary allocation.");
 549   }
 550 
 551   // so that the call below can read a sensible value
 552   _heap_start = (HeapWord*) rs.base();
 553   set_non_marking_state();
 554 }
 555 
 556 void ConcurrentMark::update_g1_committed(bool force) {
 557   // If concurrent marking is not in progress, then we do not need to
 558   // update _heap_end. This has a subtle and important
 559   // side-effect. Imagine that two evacuation pauses happen between
 560   // marking completion and remark. The first one can grow the
 561   // heap (hence now the finger is below the heap end). Then, the
 562   // second one could unnecessarily push regions on the region
 563   // stack. This causes the invariant that the region stack is empty
 564   // at the beginning of remark to be false. By ensuring that we do
 565   // not observe heap expansions after marking is complete, then we do
 566   // not have this problem.
 567   if (!concurrent_marking_in_progress() && !force)
 568     return;
 569 
 570   MemRegion committed = _g1h->g1_committed();
 571   tmp_guarantee_CM( committed.start() == _heap_start,
 572                     "start shouldn't change" );
 573   HeapWord* new_end = committed.end();
 574   if (new_end > _heap_end) {
 575     // The heap has been expanded.
 576 
 577     _heap_end = new_end;
 578   }
 579   // Notice that the heap can also shrink. However, this only happens
 580   // during a Full GC (at least currently) and the entire marking
 581   // phase will bail out and the task will not be restarted. So, let's
 582   // do nothing.
 583 }
 584 
 585 void ConcurrentMark::reset() {
 586   // Starting values for these two. This should be called in a STW
 587   // phase. CM will be notified of any future g1_committed expansions
 588   // will be at the end of evacuation pauses, when tasks are
 589   // inactive.
 590   MemRegion committed = _g1h->g1_committed();
 591   _heap_start = committed.start();
 592   _heap_end   = committed.end();
 593 
 594   guarantee( _heap_start != NULL &&
 595              _heap_end != NULL   &&
 596              _heap_start < _heap_end, "heap bounds should look ok" );
 597 
 598   // reset all the marking data structures and any necessary flags
 599   clear_marking_state();
 600 
 601   if (verbose_low())
 602     gclog_or_tty->print_cr("[global] resetting");
 603 
 604   // We do reset all of them, since different phases will use
 605   // different number of active threads. So, it's easiest to have all
 606   // of them ready.
 607   for (int i = 0; i < (int) _max_task_num; ++i)
 608     _tasks[i]->reset(_nextMarkBitMap);
 609 
 610   // we need this to make sure that the flag is on during the evac
 611   // pause with initial mark piggy-backed
 612   set_concurrent_marking_in_progress();
 613 }
 614 
 615 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 616   guarantee( active_tasks <= _max_task_num, "we should not have more" );
 617 
 618   _active_tasks = active_tasks;
 619   // Need to update the three data structures below according to the
 620   // number of active threads for this phase.
 621   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 622   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 623   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 624 
 625   _concurrent = concurrent;
 626   // We propagate this to all tasks, not just the active ones.
 627   for (int i = 0; i < (int) _max_task_num; ++i)
 628     _tasks[i]->set_concurrent(concurrent);
 629 
 630   if (concurrent) {
 631     set_concurrent_marking_in_progress();
 632   } else {
 633     // We currently assume that the concurrent flag has been set to
 634     // false before we start remark. At this point we should also be
 635     // in a STW phase.
 636     guarantee( !concurrent_marking_in_progress(), "invariant" );
 637     guarantee( _finger == _heap_end, "only way to get here" );
 638     update_g1_committed(true);
 639   }
 640 }
 641 
 642 void ConcurrentMark::set_non_marking_state() {
 643   // We set the global marking state to some default values when we're
 644   // not doing marking.
 645   clear_marking_state();
 646   _active_tasks = 0;
 647   clear_concurrent_marking_in_progress();
 648 }
 649 
 650 ConcurrentMark::~ConcurrentMark() {
 651   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
 652   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
 653   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
 654                     _par_cleanup_thread_state);
 655 
 656   for (int i = 0; i < (int) _max_task_num; ++i) {
 657     delete _task_queues->queue(i);
 658     delete _tasks[i];
 659   }
 660   delete _task_queues;
 661   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 662 }
 663 
 664 // This closure is used to mark refs into the g1 generation
 665 // from external roots in the CMS bit map.
 666 // Called at the first checkpoint.
 667 //
 668 
 669 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
 670 #if PRINT_REACHABLE_AT_INITIAL_MARK
 671 static FILE* reachable_file = NULL;
 672 
 673 class PrintReachableClosure: public OopsInGenClosure {
 674   CMBitMap* _bm;
 675   int _level;
 676 public:
 677   PrintReachableClosure(CMBitMap* bm) :
 678     _bm(bm), _level(0) {
 679     guarantee(reachable_file != NULL, "pre-condition");
 680   }
 681   void do_oop(oop* p) {
 682     oop obj = *p;
 683     HeapWord* obj_addr = (HeapWord*)obj;
 684     if (obj == NULL) return;
 685     fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
 686             _level, p, (void*) obj, _bm->isMarked(obj_addr));
 687     if (!_bm->isMarked(obj_addr)) {
 688       _bm->mark(obj_addr);
 689       _level++;
 690       obj->oop_iterate(this);
 691       _level--;
 692     }
 693   }
 694 };
 695 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
 696 
 697 #define SEND_HEAP_DUMP_TO_FILE 0
 698 #if SEND_HEAP_DUMP_TO_FILE
 699 static FILE* heap_dump_file = NULL;
 700 #endif // SEND_HEAP_DUMP_TO_FILE
 701 
 702 void ConcurrentMark::clearNextBitmap() {
 703    guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
 704 
 705    // clear the mark bitmap (no grey objects to start with).
 706    // We need to do this in chunks and offer to yield in between
 707    // each chunk.
 708    HeapWord* start  = _nextMarkBitMap->startWord();
 709    HeapWord* end    = _nextMarkBitMap->endWord();
 710    HeapWord* cur    = start;
 711    size_t chunkSize = M;
 712    while (cur < end) {
 713      HeapWord* next = cur + chunkSize;
 714      if (next > end)
 715        next = end;
 716      MemRegion mr(cur,next);
 717      _nextMarkBitMap->clearRange(mr);
 718      cur = next;
 719      do_yield_check();
 720    }
 721 }
 722 
 723 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 724 public:
 725   bool doHeapRegion(HeapRegion* r) {
 726     if (!r->continuesHumongous()) {
 727       r->note_start_of_marking(true);
 728     }
 729     return false;
 730   }
 731 };
 732 
 733 void ConcurrentMark::checkpointRootsInitialPre() {
 734   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 735   G1CollectorPolicy* g1p = g1h->g1_policy();
 736 
 737   _has_aborted = false;
 738 
 739   // Find all the reachable objects...
 740 #if PRINT_REACHABLE_AT_INITIAL_MARK
 741   guarantee(reachable_file == NULL, "Protocol");
 742   char fn_buf[100];
 743   sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
 744   reachable_file = fopen(fn_buf, "w");
 745   // clear the mark bitmap (no grey objects to start with)
 746   _nextMarkBitMap->clearAll();
 747   PrintReachableClosure prcl(_nextMarkBitMap);
 748   g1h->process_strong_roots(
 749                             false,   // fake perm gen collection
 750                             SharedHeap::SO_AllClasses,
 751                             &prcl, // Regular roots
 752                             &prcl    // Perm Gen Roots
 753                             );
 754   // The root iteration above "consumed" dirty cards in the perm gen.
 755   // Therefore, as a shortcut, we dirty all such cards.
 756   g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
 757   fclose(reachable_file);
 758   reachable_file = NULL;
 759   // clear the mark bitmap again.
 760   _nextMarkBitMap->clearAll();
 761   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 762   COMPILER2_PRESENT(DerivedPointerTable::clear());
 763 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
 764 
 765   // Initialise marking structures. This has to be done in a STW phase.
 766   reset();
 767 }
 768 
 769 class CMMarkRootsClosure: public OopsInGenClosure {
 770 private:
 771   ConcurrentMark*  _cm;
 772   G1CollectedHeap* _g1h;
 773   bool             _do_barrier;
 774 
 775 public:
 776   CMMarkRootsClosure(ConcurrentMark* cm,
 777                      G1CollectedHeap* g1h,
 778                      bool do_barrier) : _cm(cm), _g1h(g1h),
 779                                         _do_barrier(do_barrier) { }
 780 
 781   virtual void do_oop(narrowOop* p) {
 782     guarantee(false, "NYI");
 783   }
 784 
 785   virtual void do_oop(oop* p) {
 786     oop thisOop = *p;
 787     if (thisOop != NULL) {
 788       assert(thisOop->is_oop() || thisOop->mark() == NULL,
 789              "expected an oop, possibly with mark word displaced");
 790       HeapWord* addr = (HeapWord*)thisOop;
 791       if (_g1h->is_in_g1_reserved(addr)) {
 792         _cm->grayRoot(thisOop);
 793       }
 794     }
 795     if (_do_barrier) {
 796       assert(!_g1h->is_in_g1_reserved(p),
 797              "Should be called on external roots");
 798       do_barrier(p);
 799     }
 800   }
 801 };
 802 
 803 void ConcurrentMark::checkpointRootsInitialPost() {
 804   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 805 
 806   // For each region note start of marking.
 807   NoteStartOfMarkHRClosure startcl;
 808   g1h->heap_region_iterate(&startcl);
 809 
 810   // Start weak-reference discovery.
 811   ReferenceProcessor* rp = g1h->ref_processor();
 812   rp->verify_no_references_recorded();
 813   rp->enable_discovery(); // enable ("weak") refs discovery
 814 
 815   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 816   satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
 817   satb_mq_set.set_active_all_threads(true);
 818 
 819   // update_g1_committed() will be called at the end of an evac pause
 820   // when marking is on. So, it's also called at the end of the
 821   // initial-mark pause to update the heap end, if the heap expands
 822   // during it. No need to call it here.
 823 
 824   guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
 825 
 826   size_t max_marking_threads =
 827     MAX2((size_t) 1, parallel_marking_threads());
 828   for (int i = 0; i < (int)_max_task_num; ++i) {
 829     _tasks[i]->enable_co_tracker();
 830     if (i < (int) max_marking_threads)
 831       _tasks[i]->reset_co_tracker(marking_task_overhead());
 832     else
 833       _tasks[i]->reset_co_tracker(0.0);
 834   }
 835 }
 836 
 837 // Checkpoint the roots into this generation from outside
 838 // this generation. [Note this initial checkpoint need only
 839 // be approximate -- we'll do a catch up phase subsequently.]
 840 void ConcurrentMark::checkpointRootsInitial() {
 841   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 842   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 843 
 844   double start = os::elapsedTime();
 845   GCOverheadReporter::recordSTWStart(start);
 846 
 847   // If there has not been a GC[n-1] since last GC[n] cycle completed,
 848   // precede our marking with a collection of all
 849   // younger generations to keep floating garbage to a minimum.
 850   // YSR: we won't do this for now -- it's an optimization to be
 851   // done post-beta.
 852 
 853   // YSR:    ignoring weak refs for now; will do at bug fixing stage
 854   // EVM:    assert(discoveredRefsAreClear());
 855 
 856 
 857   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
 858   g1p->record_concurrent_mark_init_start();
 859   checkpointRootsInitialPre();
 860 
 861   // YSR: when concurrent precleaning is in place, we'll
 862   // need to clear the cached card table here
 863 
 864   ResourceMark rm;
 865   HandleMark  hm;
 866 
 867   g1h->ensure_parsability(false);
 868   g1h->perm_gen()->save_marks();
 869 
 870   CMMarkRootsClosure notOlder(this, g1h, false);
 871   CMMarkRootsClosure older(this, g1h, true);
 872 
 873   g1h->set_marking_started();
 874   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
 875 
 876   g1h->process_strong_roots(false,   // fake perm gen collection
 877                             SharedHeap::SO_AllClasses,
 878                             &notOlder, // Regular roots
 879                             &older    // Perm Gen Roots
 880                             );
 881   checkpointRootsInitialPost();
 882 
 883   // Statistics.
 884   double end = os::elapsedTime();
 885   _init_times.add((end - start) * 1000.0);
 886   GCOverheadReporter::recordSTWEnd(end);
 887 
 888   g1p->record_concurrent_mark_init_end();
 889 }
 890 
 891 /*
 892    Notice that in the next two methods, we actually leave the STS
 893    during the barrier sync and join it immediately afterwards. If we
 894    do not do this, this then the following deadlock can occur: one
 895    thread could be in the barrier sync code, waiting for the other
 896    thread to also sync up, whereas another one could be trying to
 897    yield, while also waiting for the other threads to sync up too.
 898 
 899    Because the thread that does the sync barrier has left the STS, it
 900    is possible to be suspended for a Full GC or an evacuation pause
 901    could occur. This is actually safe, since the entering the sync
 902    barrier is one of the last things do_marking_step() does, and it
 903    doesn't manipulate any data structures afterwards.
 904 */
 905 
 906 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 907   if (verbose_low())
 908     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 909 
 910   ConcurrentGCThread::stsLeave();
 911   _first_overflow_barrier_sync.enter();
 912   ConcurrentGCThread::stsJoin();
 913   // at this point everyone should have synced up and not be doing any
 914   // more work
 915 
 916   if (verbose_low())
 917     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 918 
 919   // let task 0 do this
 920   if (task_num == 0) {
 921     // task 0 is responsible for clearing the global data structures
 922     clear_marking_state();
 923 
 924     if (PrintGC) {
 925       gclog_or_tty->date_stamp(PrintGCDateStamps);
 926       gclog_or_tty->stamp(PrintGCTimeStamps);
 927       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 928     }
 929   }
 930 
 931   // after this, each task should reset its own data structures then
 932   // then go into the second barrier
 933 }
 934 
 935 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 936   if (verbose_low())
 937     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 938 
 939   ConcurrentGCThread::stsLeave();
 940   _second_overflow_barrier_sync.enter();
 941   ConcurrentGCThread::stsJoin();
 942   // at this point everything should be re-initialised and ready to go
 943 
 944   if (verbose_low())
 945     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 946 }
 947 
 948 void ConcurrentMark::grayRoot(oop p) {
 949   HeapWord* addr = (HeapWord*) p;
 950   // We can't really check against _heap_start and _heap_end, since it
 951   // is possible during an evacuation pause with piggy-backed
 952   // initial-mark that the committed space is expanded during the
 953   // pause without CM observing this change. So the assertions below
 954   // is a bit conservative; but better than nothing.
 955   tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
 956                     "address should be within the heap bounds" );
 957 
 958   if (!_nextMarkBitMap->isMarked(addr))
 959     _nextMarkBitMap->parMark(addr);
 960 }
 961 
 962 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 963   // The objects on the region have already been marked "in bulk" by
 964   // the caller. We only need to decide whether to push the region on
 965   // the region stack or not.
 966 
 967   if (!concurrent_marking_in_progress() || !_should_gray_objects)
 968     // We're done with marking and waiting for remark. We do not need to
 969     // push anything else on the region stack.
 970     return;
 971 
 972   HeapWord* finger = _finger;
 973 
 974   if (verbose_low())
 975     gclog_or_tty->print_cr("[global] attempting to push "
 976                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 977                            PTR_FORMAT, mr.start(), mr.end(), finger);
 978 
 979   if (mr.start() < finger) {
 980     // The finger is always heap region aligned and it is not possible
 981     // for mr to span heap regions.
 982     tmp_guarantee_CM( mr.end() <= finger, "invariant" );
 983 
 984     tmp_guarantee_CM( mr.start() <= mr.end() &&
 985                       _heap_start <= mr.start() &&
 986                       mr.end() <= _heap_end,
 987                   "region boundaries should fall within the committed space" );
 988     if (verbose_low())
 989       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
 990                              "below the finger, pushing it",
 991                              mr.start(), mr.end());
 992 
 993     if (!region_stack_push(mr)) {
 994       if (verbose_low())
 995         gclog_or_tty->print_cr("[global] region stack has overflown.");
 996     }
 997   }
 998 }
 999 
1000 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1001   // The object is not marked by the caller. We need to at least mark
1002   // it and maybe push in on the stack.
1003 
1004   HeapWord* addr = (HeapWord*)p;
1005   if (!_nextMarkBitMap->isMarked(addr)) {
1006     // We definitely need to mark it, irrespective whether we bail out
1007     // because we're done with marking.
1008     if (_nextMarkBitMap->parMark(addr)) {
1009       if (!concurrent_marking_in_progress() || !_should_gray_objects)
1010         // If we're done with concurrent marking and we're waiting for
1011         // remark, then we're not pushing anything on the stack.
1012         return;
1013 
1014       // No OrderAccess:store_load() is needed. It is implicit in the
1015       // CAS done in parMark(addr) above
1016       HeapWord* finger = _finger;
1017 
1018       if (addr < finger) {
1019         if (!mark_stack_push(oop(addr))) {
1020           if (verbose_low())
1021             gclog_or_tty->print_cr("[global] global stack overflow "
1022                                    "during parMark");
1023         }
1024       }
1025     }
1026   }
1027 }
1028 
1029 class CMConcurrentMarkingTask: public AbstractGangTask {
1030 private:
1031   ConcurrentMark*       _cm;
1032   ConcurrentMarkThread* _cmt;
1033 
1034 public:
1035   void work(int worker_i) {
1036     guarantee( Thread::current()->is_ConcurrentGC_thread(),
1037                "this should only be done by a conc GC thread" );
1038 
1039     double start_vtime = os::elapsedVTime();
1040 
1041     ConcurrentGCThread::stsJoin();
1042 
1043     guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
1044     CMTask* the_task = _cm->task(worker_i);
1045     the_task->start_co_tracker();
1046     the_task->record_start_time();
1047     if (!_cm->has_aborted()) {
1048       do {
1049         double start_vtime_sec = os::elapsedVTime();
1050         double start_time_sec = os::elapsedTime();
1051         the_task->do_marking_step(10.0);
1052         double end_time_sec = os::elapsedTime();
1053         double end_vtime_sec = os::elapsedVTime();
1054         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1055         double elapsed_time_sec = end_time_sec - start_time_sec;
1056         _cm->clear_has_overflown();
1057 
1058         bool ret = _cm->do_yield_check(worker_i);
1059 
1060         jlong sleep_time_ms;
1061         if (!_cm->has_aborted() && the_task->has_aborted()) {
1062           sleep_time_ms =
1063             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1064           ConcurrentGCThread::stsLeave();
1065           os::sleep(Thread::current(), sleep_time_ms, false);
1066           ConcurrentGCThread::stsJoin();
1067         }
1068         double end_time2_sec = os::elapsedTime();
1069         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1070 
1071         the_task->update_co_tracker();
1072 
1073 #if 0
1074           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1075                                  "overhead %1.4lf",
1076                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1077                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1078           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1079                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1080 #endif
1081       } while (!_cm->has_aborted() && the_task->has_aborted());
1082     }
1083     the_task->record_end_time();
1084     guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
1085 
1086     ConcurrentGCThread::stsLeave();
1087 
1088     double end_vtime = os::elapsedVTime();
1089     the_task->update_co_tracker(true);
1090     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1091   }
1092 
1093   CMConcurrentMarkingTask(ConcurrentMark* cm,
1094                           ConcurrentMarkThread* cmt) :
1095       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1096 
1097   ~CMConcurrentMarkingTask() { }
1098 };
1099 
1100 void ConcurrentMark::markFromRoots() {
1101   // we might be tempted to assert that:
1102   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1103   //        "inconsistent argument?");
1104   // However that wouldn't be right, because it's possible that
1105   // a safepoint is indeed in progress as a younger generation
1106   // stop-the-world GC happens even as we mark in this generation.
1107 
1108   _restart_for_overflow = false;
1109 
1110   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1111 
1112   CMConcurrentMarkingTask markingTask(this, cmThread());
1113   if (parallel_marking_threads() > 0)
1114     _parallel_workers->run_task(&markingTask);
1115   else
1116     markingTask.work(0);
1117   print_stats();
1118 }
1119 
1120 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1121   // world is stopped at this checkpoint
1122   assert(SafepointSynchronize::is_at_safepoint(),
1123          "world should be stopped");
1124   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1125 
1126   // If a full collection has happened, we shouldn't do this.
1127   if (has_aborted()) {
1128     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1129     return;
1130   }
1131 
1132   G1CollectorPolicy* g1p = g1h->g1_policy();
1133   g1p->record_concurrent_mark_remark_start();
1134 
1135   double start = os::elapsedTime();
1136   GCOverheadReporter::recordSTWStart(start);
1137 
1138   checkpointRootsFinalWork();
1139 
1140   double mark_work_end = os::elapsedTime();
1141 
1142   weakRefsWork(clear_all_soft_refs);
1143 
1144   if (has_overflown()) {
1145     // Oops.  We overflowed.  Restart concurrent marking.
1146     _restart_for_overflow = true;
1147     // Clear the flag. We do not need it any more.
1148     clear_has_overflown();
1149     if (G1TraceMarkStackOverflow)
1150       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1151   } else {
1152     // We're done with marking.
1153     JavaThread::satb_mark_queue_set().set_active_all_threads(false);
1154   }
1155 
1156 #if VERIFY_OBJS_PROCESSED
1157   _scan_obj_cl.objs_processed = 0;
1158   ThreadLocalObjQueue::objs_enqueued = 0;
1159 #endif
1160 
1161   // Statistics
1162   double now = os::elapsedTime();
1163   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1164   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1165   _remark_times.add((now - start) * 1000.0);
1166 
1167   GCOverheadReporter::recordSTWEnd(now);
1168   for (int i = 0; i < (int)_max_task_num; ++i)
1169     _tasks[i]->disable_co_tracker();
1170   _cleanup_co_tracker.enable();
1171   _cleanup_co_tracker.reset(cleanup_task_overhead());
1172   g1p->record_concurrent_mark_remark_end();
1173 }
1174 
1175 
1176 #define CARD_BM_TEST_MODE 0
1177 
1178 class CalcLiveObjectsClosure: public HeapRegionClosure {
1179 
1180   CMBitMapRO* _bm;
1181   ConcurrentMark* _cm;
1182   COTracker* _co_tracker;
1183   bool _changed;
1184   bool _yield;
1185   size_t _words_done;
1186   size_t _tot_live;
1187   size_t _tot_used;
1188   size_t _regions_done;
1189   double _start_vtime_sec;
1190 
1191   BitMap* _region_bm;
1192   BitMap* _card_bm;
1193   intptr_t _bottom_card_num;
1194   bool _final;
1195 
1196   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1197     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1198 #if CARD_BM_TEST_MODE
1199       guarantee(_card_bm->at(i - _bottom_card_num),
1200                 "Should already be set.");
1201 #else
1202       _card_bm->par_at_put(i - _bottom_card_num, 1);
1203 #endif
1204     }
1205   }
1206 
1207 public:
1208   CalcLiveObjectsClosure(bool final,
1209                          CMBitMapRO *bm, ConcurrentMark *cm,
1210                          BitMap* region_bm, BitMap* card_bm,
1211                          COTracker* co_tracker) :
1212     _bm(bm), _cm(cm), _changed(false), _yield(true),
1213     _words_done(0), _tot_live(0), _tot_used(0),
1214     _region_bm(region_bm), _card_bm(card_bm),
1215     _final(final), _co_tracker(co_tracker),
1216     _regions_done(0), _start_vtime_sec(0.0)
1217   {
1218     _bottom_card_num =
1219       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1220                CardTableModRefBS::card_shift);
1221   }
1222 
1223   bool doHeapRegion(HeapRegion* hr) {
1224     if (_co_tracker != NULL)
1225       _co_tracker->update();
1226 
1227     if (!_final && _regions_done == 0)
1228       _start_vtime_sec = os::elapsedVTime();
1229 
1230     if (hr->continuesHumongous()) return false;
1231 
1232     HeapWord* nextTop = hr->next_top_at_mark_start();
1233     HeapWord* start   = hr->top_at_conc_mark_count();
1234     assert(hr->bottom() <= start && start <= hr->end() &&
1235            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1236            start <= nextTop,
1237            "Preconditions.");
1238     // Otherwise, record the number of word's we'll examine.
1239     size_t words_done = (nextTop - start);
1240     // Find the first marked object at or after "start".
1241     start = _bm->getNextMarkedWordAddress(start, nextTop);
1242     size_t marked_bytes = 0;
1243 
1244     // Below, the term "card num" means the result of shifting an address
1245     // by the card shift -- address 0 corresponds to card number 0.  One
1246     // must subtract the card num of the bottom of the heap to obtain a
1247     // card table index.
1248     // The first card num of the sequence of live cards currently being
1249     // constructed.  -1 ==> no sequence.
1250     intptr_t start_card_num = -1;
1251     // The last card num of the sequence of live cards currently being
1252     // constructed.  -1 ==> no sequence.
1253     intptr_t last_card_num = -1;
1254 
1255     while (start < nextTop) {
1256       if (_yield && _cm->do_yield_check()) {
1257         // We yielded.  It might be for a full collection, in which case
1258         // all bets are off; terminate the traversal.
1259         if (_cm->has_aborted()) {
1260           _changed = false;
1261           return true;
1262         } else {
1263           // Otherwise, it might be a collection pause, and the region
1264           // we're looking at might be in the collection set.  We'll
1265           // abandon this region.
1266           return false;
1267         }
1268       }
1269       oop obj = oop(start);
1270       int obj_sz = obj->size();
1271       // The card num of the start of the current object.
1272       intptr_t obj_card_num =
1273         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1274 
1275       HeapWord* obj_last = start + obj_sz - 1;
1276       intptr_t obj_last_card_num =
1277         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1278 
1279       if (obj_card_num != last_card_num) {
1280         if (start_card_num == -1) {
1281           assert(last_card_num == -1, "Both or neither.");
1282           start_card_num = obj_card_num;
1283         } else {
1284           assert(last_card_num != -1, "Both or neither.");
1285           assert(obj_card_num >= last_card_num, "Inv");
1286           if ((obj_card_num - last_card_num) > 1) {
1287             // Mark the last run, and start a new one.
1288             mark_card_num_range(start_card_num, last_card_num);
1289             start_card_num = obj_card_num;
1290           }
1291         }
1292 #if CARD_BM_TEST_MODE
1293         /*
1294         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1295                                obj_card_num - _bottom_card_num,
1296                                obj_last_card_num - _bottom_card_num);
1297         */
1298         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1299           _card_bm->par_at_put(j - _bottom_card_num, 1);
1300         }
1301 #endif
1302       }
1303       // In any case, we set the last card num.
1304       last_card_num = obj_last_card_num;
1305 
1306       marked_bytes += obj_sz * HeapWordSize;
1307       // Find the next marked object after this one.
1308       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1309       _changed = true;
1310     }
1311     // Handle the last range, if any.
1312     if (start_card_num != -1)
1313       mark_card_num_range(start_card_num, last_card_num);
1314     if (_final) {
1315       // Mark the allocated-since-marking portion...
1316       HeapWord* tp = hr->top();
1317       if (nextTop < tp) {
1318         start_card_num =
1319           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1320         last_card_num =
1321           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1322         mark_card_num_range(start_card_num, last_card_num);
1323         // This definitely means the region has live objects.
1324         _region_bm->par_at_put(hr->hrs_index(), 1);
1325       }
1326     }
1327 
1328     hr->add_to_marked_bytes(marked_bytes);
1329     // Update the live region bitmap.
1330     if (marked_bytes > 0) {
1331       _region_bm->par_at_put(hr->hrs_index(), 1);
1332     }
1333     hr->set_top_at_conc_mark_count(nextTop);
1334     _tot_live += hr->next_live_bytes();
1335     _tot_used += hr->used();
1336     _words_done = words_done;
1337 
1338     if (!_final) {
1339       ++_regions_done;
1340       if (_regions_done % 10 == 0) {
1341         double end_vtime_sec = os::elapsedVTime();
1342         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1343         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1344           jlong sleep_time_ms =
1345             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1346 #if 0
1347           gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
1348                                  "overhead %1.4lf",
1349                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1350                                  _co_tracker->concOverhead(os::elapsedTime()));
1351 #endif
1352           os::sleep(Thread::current(), sleep_time_ms, false);
1353           _start_vtime_sec = end_vtime_sec;
1354         }
1355       }
1356     }
1357 
1358     return false;
1359   }
1360 
1361   bool changed() { return _changed;  }
1362   void reset()   { _changed = false; _words_done = 0; }
1363   void no_yield() { _yield = false; }
1364   size_t words_done() { return _words_done; }
1365   size_t tot_live() { return _tot_live; }
1366   size_t tot_used() { return _tot_used; }
1367 };
1368 
1369 
1370 void ConcurrentMark::calcDesiredRegions() {
1371   guarantee( _cleanup_co_tracker.enabled(), "invariant" );
1372   _cleanup_co_tracker.start();
1373 
1374   _region_bm.clear();
1375   _card_bm.clear();
1376   CalcLiveObjectsClosure calccl(false /*final*/,
1377                                 nextMarkBitMap(), this,
1378                                 &_region_bm, &_card_bm,
1379                                 &_cleanup_co_tracker);
1380   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1381   g1h->heap_region_iterate(&calccl);
1382 
1383   do {
1384     calccl.reset();
1385     g1h->heap_region_iterate(&calccl);
1386   } while (calccl.changed());
1387 
1388   _cleanup_co_tracker.update(true);
1389 }
1390 
1391 class G1ParFinalCountTask: public AbstractGangTask {
1392 protected:
1393   G1CollectedHeap* _g1h;
1394   CMBitMap* _bm;
1395   size_t _n_workers;
1396   size_t *_live_bytes;
1397   size_t *_used_bytes;
1398   BitMap* _region_bm;
1399   BitMap* _card_bm;
1400 public:
1401   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1402                       BitMap* region_bm, BitMap* card_bm) :
1403     AbstractGangTask("G1 final counting"), _g1h(g1h),
1404     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1405   {
1406     if (ParallelGCThreads > 0)
1407       _n_workers = _g1h->workers()->total_workers();
1408     else
1409       _n_workers = 1;
1410     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1411     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1412   }
1413 
1414   ~G1ParFinalCountTask() {
1415     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1416     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1417   }
1418 
1419   void work(int i) {
1420     CalcLiveObjectsClosure calccl(true /*final*/,
1421                                   _bm, _g1h->concurrent_mark(),
1422                                   _region_bm, _card_bm,
1423                                   NULL /* CO tracker */);
1424     calccl.no_yield();
1425     if (ParallelGCThreads > 0) {
1426       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1427                                             HeapRegion::FinalCountClaimValue);
1428     } else {
1429       _g1h->heap_region_iterate(&calccl);
1430     }
1431     assert(calccl.complete(), "Shouldn't have yielded!");
1432 
1433     guarantee( (size_t)i < _n_workers, "invariant" );
1434     _live_bytes[i] = calccl.tot_live();
1435     _used_bytes[i] = calccl.tot_used();
1436   }
1437   size_t live_bytes()  {
1438     size_t live_bytes = 0;
1439     for (size_t i = 0; i < _n_workers; ++i)
1440       live_bytes += _live_bytes[i];
1441     return live_bytes;
1442   }
1443   size_t used_bytes()  {
1444     size_t used_bytes = 0;
1445     for (size_t i = 0; i < _n_workers; ++i)
1446       used_bytes += _used_bytes[i];
1447     return used_bytes;
1448   }
1449 };
1450 
1451 class G1ParNoteEndTask;
1452 
1453 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1454   G1CollectedHeap* _g1;
1455   int _worker_num;
1456   size_t _max_live_bytes;
1457   size_t _regions_claimed;
1458   size_t _freed_bytes;
1459   size_t _cleared_h_regions;
1460   size_t _freed_regions;
1461   UncleanRegionList* _unclean_region_list;
1462   double _claimed_region_time;
1463   double _max_region_time;
1464 
1465 public:
1466   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1467                              UncleanRegionList* list,
1468                              int worker_num);
1469   size_t freed_bytes() { return _freed_bytes; }
1470   size_t cleared_h_regions() { return _cleared_h_regions; }
1471   size_t freed_regions() { return  _freed_regions; }
1472   UncleanRegionList* unclean_region_list() {
1473     return _unclean_region_list;
1474   }
1475 
1476   bool doHeapRegion(HeapRegion *r);
1477 
1478   size_t max_live_bytes() { return _max_live_bytes; }
1479   size_t regions_claimed() { return _regions_claimed; }
1480   double claimed_region_time_sec() { return _claimed_region_time; }
1481   double max_region_time_sec() { return _max_region_time; }
1482 };
1483 
1484 class G1ParNoteEndTask: public AbstractGangTask {
1485   friend class G1NoteEndOfConcMarkClosure;
1486 protected:
1487   G1CollectedHeap* _g1h;
1488   size_t _max_live_bytes;
1489   size_t _freed_bytes;
1490   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1491 public:
1492   G1ParNoteEndTask(G1CollectedHeap* g1h,
1493                    ConcurrentMark::ParCleanupThreadState**
1494                    par_cleanup_thread_state) :
1495     AbstractGangTask("G1 note end"), _g1h(g1h),
1496     _max_live_bytes(0), _freed_bytes(0),
1497     _par_cleanup_thread_state(par_cleanup_thread_state)
1498   {}
1499 
1500   void work(int i) {
1501     double start = os::elapsedTime();
1502     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1503                                            &_par_cleanup_thread_state[i]->list,
1504                                            i);
1505     if (ParallelGCThreads > 0) {
1506       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1507                                             HeapRegion::NoteEndClaimValue);
1508     } else {
1509       _g1h->heap_region_iterate(&g1_note_end);
1510     }
1511     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1512 
1513     // Now finish up freeing the current thread's regions.
1514     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1515                                   g1_note_end.cleared_h_regions(),
1516                                   0, NULL);
1517     {
1518       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1519       _max_live_bytes += g1_note_end.max_live_bytes();
1520       _freed_bytes += g1_note_end.freed_bytes();
1521     }
1522     double end = os::elapsedTime();
1523     if (G1PrintParCleanupStats) {
1524       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1525                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1526                           i, start, end, (end-start)*1000.0,
1527                           g1_note_end.regions_claimed(),
1528                           g1_note_end.claimed_region_time_sec()*1000.0,
1529                           g1_note_end.max_region_time_sec()*1000.0);
1530     }
1531   }
1532   size_t max_live_bytes() { return _max_live_bytes; }
1533   size_t freed_bytes() { return _freed_bytes; }
1534 };
1535 
1536 class G1ParScrubRemSetTask: public AbstractGangTask {
1537 protected:
1538   G1RemSet* _g1rs;
1539   BitMap* _region_bm;
1540   BitMap* _card_bm;
1541 public:
1542   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1543                        BitMap* region_bm, BitMap* card_bm) :
1544     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1545     _region_bm(region_bm), _card_bm(card_bm)
1546   {}
1547 
1548   void work(int i) {
1549     if (ParallelGCThreads > 0) {
1550       _g1rs->scrub_par(_region_bm, _card_bm, i,
1551                        HeapRegion::ScrubRemSetClaimValue);
1552     } else {
1553       _g1rs->scrub(_region_bm, _card_bm);
1554     }
1555   }
1556 
1557 };
1558 
1559 G1NoteEndOfConcMarkClosure::
1560 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1561                            UncleanRegionList* list,
1562                            int worker_num)
1563   : _g1(g1), _worker_num(worker_num),
1564     _max_live_bytes(0), _regions_claimed(0),
1565     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1566     _claimed_region_time(0.0), _max_region_time(0.0),
1567     _unclean_region_list(list)
1568 {}
1569 
1570 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1571   // We use a claim value of zero here because all regions
1572   // were claimed with value 1 in the FinalCount task.
1573   r->reset_gc_time_stamp();
1574   if (!r->continuesHumongous()) {
1575     double start = os::elapsedTime();
1576     _regions_claimed++;
1577     r->note_end_of_marking();
1578     _max_live_bytes += r->max_live_bytes();
1579     _g1->free_region_if_totally_empty_work(r,
1580                                            _freed_bytes,
1581                                            _cleared_h_regions,
1582                                            _freed_regions,
1583                                            _unclean_region_list,
1584                                            true /*par*/);
1585     double region_time = (os::elapsedTime() - start);
1586     _claimed_region_time += region_time;
1587     if (region_time > _max_region_time) _max_region_time = region_time;
1588   }
1589   return false;
1590 }
1591 
1592 void ConcurrentMark::cleanup() {
1593   // world is stopped at this checkpoint
1594   assert(SafepointSynchronize::is_at_safepoint(),
1595          "world should be stopped");
1596   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1597 
1598   // If a full collection has happened, we shouldn't do this.
1599   if (has_aborted()) {
1600     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1601     return;
1602   }
1603 
1604   _cleanup_co_tracker.disable();
1605 
1606   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1607   g1p->record_concurrent_mark_cleanup_start();
1608 
1609   double start = os::elapsedTime();
1610   GCOverheadReporter::recordSTWStart(start);
1611 
1612   // Do counting once more with the world stopped for good measure.
1613   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1614                                         &_region_bm, &_card_bm);
1615   if (ParallelGCThreads > 0) {
1616     assert(g1h->check_heap_region_claim_values(
1617                                                HeapRegion::InitialClaimValue),
1618            "sanity check");
1619 
1620     int n_workers = g1h->workers()->total_workers();
1621     g1h->set_par_threads(n_workers);
1622     g1h->workers()->run_task(&g1_par_count_task);
1623     g1h->set_par_threads(0);
1624 
1625     assert(g1h->check_heap_region_claim_values(
1626                                              HeapRegion::FinalCountClaimValue),
1627            "sanity check");
1628   } else {
1629     g1_par_count_task.work(0);
1630   }
1631 
1632   size_t known_garbage_bytes =
1633     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1634 #if 0
1635   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1636                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1637                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1638                          (double) known_garbage_bytes / (double) (1024 * 1024));
1639 #endif // 0
1640   g1p->set_known_garbage_bytes(known_garbage_bytes);
1641 
1642   size_t start_used_bytes = g1h->used();
1643   _at_least_one_mark_complete = true;
1644   g1h->set_marking_complete();
1645 
1646   double count_end = os::elapsedTime();
1647   double this_final_counting_time = (count_end - start);
1648   if (G1PrintParCleanupStats) {
1649     gclog_or_tty->print_cr("Cleanup:");
1650     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1651                            this_final_counting_time*1000.0);
1652   }
1653   _total_counting_time += this_final_counting_time;
1654 
1655   // Install newly created mark bitMap as "prev".
1656   swapMarkBitMaps();
1657 
1658   g1h->reset_gc_time_stamp();
1659 
1660   // Note end of marking in all heap regions.
1661   double note_end_start = os::elapsedTime();
1662   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1663   if (ParallelGCThreads > 0) {
1664     int n_workers = g1h->workers()->total_workers();
1665     g1h->set_par_threads(n_workers);
1666     g1h->workers()->run_task(&g1_par_note_end_task);
1667     g1h->set_par_threads(0);
1668 
1669     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1670            "sanity check");
1671   } else {
1672     g1_par_note_end_task.work(0);
1673   }
1674   g1h->set_unclean_regions_coming(true);
1675   double note_end_end = os::elapsedTime();
1676   // Tell the mutators that there might be unclean regions coming...
1677   if (G1PrintParCleanupStats) {
1678     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1679                            (note_end_end - note_end_start)*1000.0);
1680   }
1681 
1682 
1683   // call below, since it affects the metric by which we sort the heap
1684   // regions.
1685   if (G1ScrubRemSets) {
1686     double rs_scrub_start = os::elapsedTime();
1687     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1688     if (ParallelGCThreads > 0) {
1689       int n_workers = g1h->workers()->total_workers();
1690       g1h->set_par_threads(n_workers);
1691       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1692       g1h->set_par_threads(0);
1693 
1694       assert(g1h->check_heap_region_claim_values(
1695                                             HeapRegion::ScrubRemSetClaimValue),
1696              "sanity check");
1697     } else {
1698       g1_par_scrub_rs_task.work(0);
1699     }
1700 
1701     double rs_scrub_end = os::elapsedTime();
1702     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1703     _total_rs_scrub_time += this_rs_scrub_time;
1704   }
1705 
1706   // this will also free any regions totally full of garbage objects,
1707   // and sort the regions.
1708   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1709                         g1_par_note_end_task.freed_bytes(),
1710                         g1_par_note_end_task.max_live_bytes());
1711 
1712   // Statistics.
1713   double end = os::elapsedTime();
1714   _cleanup_times.add((end - start) * 1000.0);
1715   GCOverheadReporter::recordSTWEnd(end);
1716 
1717   // G1CollectedHeap::heap()->print();
1718   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1719   // G1CollectedHeap::heap()->get_gc_time_stamp());
1720 
1721   if (PrintGC || PrintGCDetails) {
1722     g1h->print_size_transition(gclog_or_tty,
1723                                start_used_bytes,
1724                                g1h->used(),
1725                                g1h->capacity());
1726   }
1727 
1728   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1729   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1730 
1731   // We need to make this be a "collection" so any collection pause that
1732   // races with it goes around and waits for completeCleanup to finish.
1733   g1h->increment_total_collections();
1734 
1735 #ifndef PRODUCT
1736   if (G1VerifyConcMark) {
1737     G1CollectedHeap::heap()->prepare_for_verify();
1738     G1CollectedHeap::heap()->verify(true,false);
1739   }
1740 #endif
1741 }
1742 
1743 void ConcurrentMark::completeCleanup() {
1744   // A full collection intervened.
1745   if (has_aborted()) return;
1746 
1747   int first = 0;
1748   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1749   for (int t = 0; t < last; t++) {
1750     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1751     assert(list->well_formed(), "Inv");
1752     HeapRegion* hd = list->hd();
1753     while (hd != NULL) {
1754       // Now finish up the other stuff.
1755       hd->rem_set()->clear();
1756       HeapRegion* next_hd = hd->next_from_unclean_list();
1757       (void)list->pop();
1758       guarantee(list->hd() == next_hd, "how not?");
1759       _g1h->put_region_on_unclean_list(hd);
1760       if (!hd->isHumongous()) {
1761         // Add this to the _free_regions count by 1.
1762         _g1h->finish_free_region_work(0, 0, 1, NULL);
1763       }
1764       hd = list->hd();
1765       guarantee(hd == next_hd, "how not?");
1766     }
1767   }
1768 }
1769 
1770 
1771 class G1CMIsAliveClosure: public BoolObjectClosure {
1772   G1CollectedHeap* _g1;
1773  public:
1774   G1CMIsAliveClosure(G1CollectedHeap* g1) :
1775     _g1(g1)
1776   {}
1777 
1778   void do_object(oop obj) {
1779     assert(false, "not to be invoked");
1780   }
1781   bool do_object_b(oop obj) {
1782     HeapWord* addr = (HeapWord*)obj;
1783     return addr != NULL &&
1784            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1785   }
1786 };
1787 
1788 class G1CMKeepAliveClosure: public OopClosure {
1789   G1CollectedHeap* _g1;
1790   ConcurrentMark*  _cm;
1791   CMBitMap*        _bitMap;
1792  public:
1793   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1794                        CMBitMap* bitMap) :
1795     _g1(g1), _cm(cm),
1796     _bitMap(bitMap) {}
1797 
1798   void do_oop(narrowOop* p) {
1799     guarantee(false, "NYI");
1800   }
1801 
1802   void do_oop(oop* p) {
1803     oop thisOop = *p;
1804     HeapWord* addr = (HeapWord*)thisOop;
1805     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1806       _bitMap->mark(addr);
1807       _cm->mark_stack_push(thisOop);
1808     }
1809   }
1810 };
1811 
1812 class G1CMDrainMarkingStackClosure: public VoidClosure {
1813   CMMarkStack*                  _markStack;
1814   CMBitMap*                     _bitMap;
1815   G1CMKeepAliveClosure*         _oopClosure;
1816  public:
1817   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1818                                G1CMKeepAliveClosure* oopClosure) :
1819     _bitMap(bitMap),
1820     _markStack(markStack),
1821     _oopClosure(oopClosure)
1822   {}
1823 
1824   void do_void() {
1825     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1826   }
1827 };
1828 
1829 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1830   ResourceMark rm;
1831   HandleMark   hm;
1832   ReferencePolicy* soft_ref_policy;
1833 
1834   // Process weak references.
1835   if (clear_all_soft_refs) {
1836     soft_ref_policy = new AlwaysClearPolicy();
1837   } else {
1838 #ifdef COMPILER2
1839     soft_ref_policy = new LRUMaxHeapPolicy();
1840 #else
1841     soft_ref_policy = new LRUCurrentHeapPolicy();
1842 #endif
1843   }
1844   assert(_markStack.isEmpty(), "mark stack should be empty");
1845 
1846   G1CollectedHeap* g1 = G1CollectedHeap::heap();
1847   G1CMIsAliveClosure g1IsAliveClosure(g1);
1848 
1849   G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap());
1850   G1CMDrainMarkingStackClosure
1851     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1852                                &g1KeepAliveClosure);
1853 
1854   // XXXYYY  Also: copy the parallel ref processing code from CMS.
1855   ReferenceProcessor* rp = g1->ref_processor();
1856   rp->process_discovered_references(soft_ref_policy,
1857                                     &g1IsAliveClosure,
1858                                     &g1KeepAliveClosure,
1859                                     &g1DrainMarkingStackClosure,
1860                                     NULL);
1861   assert(_markStack.overflow() || _markStack.isEmpty(),
1862          "mark stack should be empty (unless it overflowed)");
1863   if (_markStack.overflow()) {
1864     set_has_overflown();
1865   }
1866 
1867   rp->enqueue_discovered_references();
1868   rp->verify_no_references_recorded();
1869   assert(!rp->discovery_enabled(), "should have been disabled");
1870 
1871   // Now clean up stale oops in SymbolTable and StringTable
1872   SymbolTable::unlink(&g1IsAliveClosure);
1873   StringTable::unlink(&g1IsAliveClosure);
1874 }
1875 
1876 void ConcurrentMark::swapMarkBitMaps() {
1877   CMBitMapRO* temp = _prevMarkBitMap;
1878   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
1879   _nextMarkBitMap  = (CMBitMap*)  temp;
1880 }
1881 
1882 class CMRemarkTask: public AbstractGangTask {
1883 private:
1884   ConcurrentMark *_cm;
1885 
1886 public:
1887   void work(int worker_i) {
1888     // Since all available tasks are actually started, we should
1889     // only proceed if we're supposed to be actived.
1890     if ((size_t)worker_i < _cm->active_tasks()) {
1891       CMTask* task = _cm->task(worker_i);
1892       task->record_start_time();
1893       do {
1894         task->do_marking_step(1000000000.0 /* something very large */);
1895       } while (task->has_aborted() && !_cm->has_overflown());
1896       // If we overflow, then we do not want to restart. We instead
1897       // want to abort remark and do concurrent marking again.
1898       task->record_end_time();
1899     }
1900   }
1901 
1902   CMRemarkTask(ConcurrentMark* cm) :
1903     AbstractGangTask("Par Remark"), _cm(cm) { }
1904 };
1905 
1906 void ConcurrentMark::checkpointRootsFinalWork() {
1907   ResourceMark rm;
1908   HandleMark   hm;
1909   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1910 
1911   g1h->ensure_parsability(false);
1912 
1913   if (ParallelGCThreads > 0) {
1914     g1h->change_strong_roots_parity();
1915     // this is remark, so we'll use up all available threads
1916     int active_workers = ParallelGCThreads;
1917     set_phase(active_workers, false);
1918 
1919     CMRemarkTask remarkTask(this);
1920     // We will start all available threads, even if we decide that the
1921     // active_workers will be fewer. The extra ones will just bail out
1922     // immediately.
1923     int n_workers = g1h->workers()->total_workers();
1924     g1h->set_par_threads(n_workers);
1925     g1h->workers()->run_task(&remarkTask);
1926     g1h->set_par_threads(0);
1927 
1928     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1929     guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1930   } else {
1931     g1h->change_strong_roots_parity();
1932     // this is remark, so we'll use up all available threads
1933     int active_workers = 1;
1934     set_phase(active_workers, false);
1935 
1936     CMRemarkTask remarkTask(this);
1937     // We will start all available threads, even if we decide that the
1938     // active_workers will be fewer. The extra ones will just bail out
1939     // immediately.
1940     remarkTask.work(0);
1941 
1942     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1943     guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1944   }
1945 
1946   print_stats();
1947 
1948   if (!restart_for_overflow())
1949     set_non_marking_state();
1950 
1951 #if VERIFY_OBJS_PROCESSED
1952   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1953     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1954                            _scan_obj_cl.objs_processed,
1955                            ThreadLocalObjQueue::objs_enqueued);
1956     guarantee(_scan_obj_cl.objs_processed ==
1957               ThreadLocalObjQueue::objs_enqueued,
1958               "Different number of objs processed and enqueued.");
1959   }
1960 #endif
1961 }
1962 
1963 class ReachablePrinterOopClosure: public OopClosure {
1964 private:
1965   G1CollectedHeap* _g1h;
1966   CMBitMapRO*      _bitmap;
1967   outputStream*    _out;
1968 
1969 public:
1970   ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
1971     _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
1972 
1973   void do_oop(narrowOop* p) {
1974     guarantee(false, "NYI");
1975   }
1976 
1977   void do_oop(oop* p) {
1978     oop         obj = *p;
1979     const char* str = NULL;
1980     const char* str2 = "";
1981 
1982     if (!_g1h->is_in_g1_reserved(obj))
1983       str = "outside G1 reserved";
1984     else {
1985       HeapRegion* hr  = _g1h->heap_region_containing(obj);
1986       guarantee( hr != NULL, "invariant" );
1987       if (hr->obj_allocated_since_prev_marking(obj)) {
1988         str = "over TAMS";
1989         if (_bitmap->isMarked((HeapWord*) obj))
1990           str2 = " AND MARKED";
1991       } else if (_bitmap->isMarked((HeapWord*) obj))
1992         str = "marked";
1993       else
1994         str = "#### NOT MARKED ####";
1995     }
1996 
1997     _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
1998                    p, (void*) obj, str, str2);
1999   }
2000 };
2001 
2002 class ReachablePrinterClosure: public BitMapClosure {
2003 private:
2004   CMBitMapRO* _bitmap;
2005   outputStream* _out;
2006 
2007 public:
2008   ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2009     _bitmap(bitmap), _out(out) { }
2010 
2011   bool do_bit(size_t offset) {
2012     HeapWord* addr = _bitmap->offsetToHeapWord(offset);
2013     ReachablePrinterOopClosure oopCl(_bitmap, _out);
2014 
2015     _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
2016     oop(addr)->oop_iterate(&oopCl);
2017     _out->print_cr("");
2018 
2019     return true;
2020   }
2021 };
2022 
2023 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2024 private:
2025   CMBitMapRO* _bitmap;
2026   outputStream* _out;
2027 
2028 public:
2029   void do_object(oop o) {
2030     ReachablePrinterOopClosure oopCl(_bitmap, _out);
2031 
2032     _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
2033     o->oop_iterate(&oopCl);
2034     _out->print_cr("");
2035   }
2036 
2037   ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2038     _bitmap(bitmap), _out(out) { }
2039 };
2040 
2041 class RegionReachablePrinterClosure : public HeapRegionClosure {
2042 private:
2043   CMBitMapRO* _bitmap;
2044   outputStream* _out;
2045 
2046 public:
2047   bool doHeapRegion(HeapRegion* hr) {
2048     HeapWord* b = hr->bottom();
2049     HeapWord* e = hr->end();
2050     HeapWord* t = hr->top();
2051     HeapWord* p = hr->prev_top_at_mark_start();
2052     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2053                    "PTAMS: "PTR_FORMAT, b, e, t, p);
2054     _out->print_cr("");
2055 
2056     ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
2057     hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2058 
2059     return false;
2060   }
2061 
2062   RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2063                                 outputStream* out) :
2064     _bitmap(bitmap), _out(out) { }
2065 };
2066 
2067 void ConcurrentMark::print_prev_bitmap_reachable() {
2068   outputStream* out = gclog_or_tty;
2069 
2070 #if SEND_HEAP_DUMP_TO_FILE
2071   guarantee(heap_dump_file == NULL, "Protocol");
2072   char fn_buf[100];
2073   sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
2074   heap_dump_file = fopen(fn_buf, "w");
2075   fileStream fstream(heap_dump_file);
2076   out = &fstream;
2077 #endif // SEND_HEAP_DUMP_TO_FILE
2078 
2079   RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
2080   out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
2081   _g1h->heap_region_iterate(&rcl);
2082   out->print_cr("");
2083 
2084   ReachablePrinterClosure cl(_prevMarkBitMap, out);
2085   out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
2086   _prevMarkBitMap->iterate(&cl);
2087   out->print_cr("");
2088 
2089 #if SEND_HEAP_DUMP_TO_FILE
2090   fclose(heap_dump_file);
2091   heap_dump_file = NULL;
2092 #endif // SEND_HEAP_DUMP_TO_FILE
2093 }
2094 
2095 // This note is for drainAllSATBBuffers and the code in between.
2096 // In the future we could reuse a task to do this work during an
2097 // evacuation pause (since now tasks are not active and can be claimed
2098 // during an evacuation pause). This was a late change to the code and
2099 // is currently not being taken advantage of.
2100 
2101 class CMGlobalObjectClosure : public ObjectClosure {
2102 private:
2103   ConcurrentMark* _cm;
2104 
2105 public:
2106   void do_object(oop obj) {
2107     _cm->deal_with_reference(obj);
2108   }
2109 
2110   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2111 };
2112 
2113 void ConcurrentMark::deal_with_reference(oop obj) {
2114   if (verbose_high())
2115     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2116                            (void*) obj);
2117 
2118 
2119   HeapWord* objAddr = (HeapWord*) obj;
2120   if (_g1h->is_in_g1_reserved(objAddr)) {
2121     tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2122     HeapRegion* hr = _g1h->heap_region_containing(obj);
2123     if (_g1h->is_obj_ill(obj, hr)) {
2124       if (verbose_high())
2125         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2126                                "marked", (void*) obj);
2127 
2128       // we need to mark it first
2129       if (_nextMarkBitMap->parMark(objAddr)) {
2130         // No OrderAccess:store_load() is needed. It is implicit in the
2131         // CAS done in parMark(objAddr) above
2132         HeapWord* finger = _finger;
2133         if (objAddr < finger) {
2134           if (verbose_high())
2135             gclog_or_tty->print_cr("[global] below the global finger "
2136                                    "("PTR_FORMAT"), pushing it", finger);
2137           if (!mark_stack_push(obj)) {
2138             if (verbose_low())
2139               gclog_or_tty->print_cr("[global] global stack overflow during "
2140                                      "deal_with_reference");
2141           }
2142         }
2143       }
2144     }
2145   }
2146 }
2147 
2148 void ConcurrentMark::drainAllSATBBuffers() {
2149   CMGlobalObjectClosure oc(this);
2150   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2151   satb_mq_set.set_closure(&oc);
2152 
2153   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2154     if (verbose_medium())
2155       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2156   }
2157 
2158   // no need to check whether we should do this, as this is only
2159   // called during an evacuation pause
2160   satb_mq_set.iterate_closure_all_threads();
2161 
2162   satb_mq_set.set_closure(NULL);
2163   guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
2164 }
2165 
2166 void ConcurrentMark::markPrev(oop p) {
2167   // Note we are overriding the read-only view of the prev map here, via
2168   // the cast.
2169   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2170 }
2171 
2172 void ConcurrentMark::clear(oop p) {
2173   assert(p != NULL && p->is_oop(), "expected an oop");
2174   HeapWord* addr = (HeapWord*)p;
2175   assert(addr >= _nextMarkBitMap->startWord() ||
2176          addr < _nextMarkBitMap->endWord(), "in a region");
2177 
2178   _nextMarkBitMap->clear(addr);
2179 }
2180 
2181 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2182   // Note we are overriding the read-only view of the prev map here, via
2183   // the cast.
2184   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2185   _nextMarkBitMap->clearRange(mr);
2186 }
2187 
2188 HeapRegion*
2189 ConcurrentMark::claim_region(int task_num) {
2190   // "checkpoint" the finger
2191   HeapWord* finger = _finger;
2192 
2193   // _heap_end will not change underneath our feet; it only changes at
2194   // yield points.
2195   while (finger < _heap_end) {
2196     tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
2197 
2198     // is the gap between reading the finger and doing the CAS too long?
2199 
2200     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
2201     HeapWord*   bottom        = curr_region->bottom();
2202     HeapWord*   end           = curr_region->end();
2203     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2204 
2205     if (verbose_low())
2206       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2207                              "["PTR_FORMAT", "PTR_FORMAT"), "
2208                              "limit = "PTR_FORMAT,
2209                              task_num, curr_region, bottom, end, limit);
2210 
2211     HeapWord* res =
2212       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2213     if (res == finger) {
2214       // we succeeded
2215 
2216       // notice that _finger == end cannot be guaranteed here since,
2217       // someone else might have moved the finger even further
2218       guarantee( _finger >= end, "the finger should have moved forward" );
2219 
2220       if (verbose_low())
2221         gclog_or_tty->print_cr("[%d] we were successful with region = "
2222                                PTR_FORMAT, task_num, curr_region);
2223 
2224       if (limit > bottom) {
2225         if (verbose_low())
2226           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2227                                  "returning it ", task_num, curr_region);
2228         return curr_region;
2229       } else {
2230         tmp_guarantee_CM( limit == bottom,
2231                           "the region limit should be at bottom" );
2232         if (verbose_low())
2233           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2234                                  "returning NULL", task_num, curr_region);
2235         // we return NULL and the caller should try calling
2236         // claim_region() again.
2237         return NULL;
2238       }
2239     } else {
2240       guarantee( _finger > finger, "the finger should have moved forward" );
2241       if (verbose_low())
2242         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2243                                "global finger = "PTR_FORMAT", "
2244                                "our finger = "PTR_FORMAT,
2245                                task_num, _finger, finger);
2246 
2247       // read it again
2248       finger = _finger;
2249     }
2250   }
2251 
2252   return NULL;
2253 }
2254 
2255 void ConcurrentMark::oops_do(OopClosure* cl) {
2256   if (_markStack.size() > 0 && verbose_low())
2257     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2258                            "size = %d", _markStack.size());
2259   // we first iterate over the contents of the mark stack...
2260   _markStack.oops_do(cl);
2261 
2262   for (int i = 0; i < (int)_max_task_num; ++i) {
2263     OopTaskQueue* queue = _task_queues->queue((int)i);
2264 
2265     if (queue->size() > 0 && verbose_low())
2266       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2267                              "size = %d", i, queue->size());
2268 
2269     // ...then over the contents of the all the task queues.
2270     queue->oops_do(cl);
2271   }
2272 
2273   // finally, invalidate any entries that in the region stack that
2274   // point into the collection set
2275   if (_regionStack.invalidate_entries_into_cset()) {
2276     // otherwise, any gray objects copied during the evacuation pause
2277     // might not be visited.
2278     guarantee( _should_gray_objects, "invariant" );
2279   }
2280 }
2281 
2282 void ConcurrentMark::clear_marking_state() {
2283   _markStack.setEmpty();
2284   _markStack.clear_overflow();
2285   _regionStack.setEmpty();
2286   _regionStack.clear_overflow();
2287   clear_has_overflown();
2288   _finger = _heap_start;
2289 
2290   for (int i = 0; i < (int)_max_task_num; ++i) {
2291     OopTaskQueue* queue = _task_queues->queue(i);
2292     queue->set_empty();
2293   }
2294 }
2295 
2296 void ConcurrentMark::print_stats() {
2297   if (verbose_stats()) {
2298     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2299     for (size_t i = 0; i < _active_tasks; ++i) {
2300       _tasks[i]->print_stats();
2301       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2302     }
2303   }
2304 }
2305 
2306 class CSMarkOopClosure: public OopClosure {
2307   friend class CSMarkBitMapClosure;
2308 
2309   G1CollectedHeap* _g1h;
2310   CMBitMap*        _bm;
2311   ConcurrentMark*  _cm;
2312   oop*             _ms;
2313   jint*            _array_ind_stack;
2314   int              _ms_size;
2315   int              _ms_ind;
2316   int              _array_increment;
2317 
2318   bool push(oop obj, int arr_ind = 0) {
2319     if (_ms_ind == _ms_size) {
2320       gclog_or_tty->print_cr("Mark stack is full.");
2321       return false;
2322     }
2323     _ms[_ms_ind] = obj;
2324     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2325     _ms_ind++;
2326     return true;
2327   }
2328 
2329   oop pop() {
2330     if (_ms_ind == 0) return NULL;
2331     else {
2332       _ms_ind--;
2333       return _ms[_ms_ind];
2334     }
2335   }
2336 
2337   bool drain() {
2338     while (_ms_ind > 0) {
2339       oop obj = pop();
2340       assert(obj != NULL, "Since index was non-zero.");
2341       if (obj->is_objArray()) {
2342         jint arr_ind = _array_ind_stack[_ms_ind];
2343         objArrayOop aobj = objArrayOop(obj);
2344         jint len = aobj->length();
2345         jint next_arr_ind = arr_ind + _array_increment;
2346         if (next_arr_ind < len) {
2347           push(obj, next_arr_ind);
2348         }
2349         // Now process this portion of this one.
2350         int lim = MIN2(next_arr_ind, len);
2351         assert(!UseCompressedOops, "This needs to be fixed");
2352         for (int j = arr_ind; j < lim; j++) {
2353           do_oop(aobj->obj_at_addr<oop>(j));
2354         }
2355 
2356       } else {
2357         obj->oop_iterate(this);
2358       }
2359       if (abort()) return false;
2360     }
2361     return true;
2362   }
2363 
2364 public:
2365   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2366     _g1h(G1CollectedHeap::heap()),
2367     _cm(cm),
2368     _bm(cm->nextMarkBitMap()),
2369     _ms_size(ms_size), _ms_ind(0),
2370     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2371     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2372     _array_increment(MAX2(ms_size/8, 16))
2373   {}
2374 
2375   ~CSMarkOopClosure() {
2376     FREE_C_HEAP_ARRAY(oop, _ms);
2377     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2378   }
2379 
2380   void do_oop(narrowOop* p) {
2381     guarantee(false, "NYI");
2382   }
2383 
2384   void do_oop(oop* p) {
2385     oop obj = *p;
2386     if (obj == NULL) return;
2387     if (obj->is_forwarded()) {
2388       // If the object has already been forwarded, we have to make sure
2389       // that it's marked.  So follow the forwarding pointer.  Note that
2390       // this does the right thing for self-forwarding pointers in the
2391       // evacuation failure case.
2392       obj = obj->forwardee();
2393     }
2394     HeapRegion* hr = _g1h->heap_region_containing(obj);
2395     if (hr != NULL) {
2396       if (hr->in_collection_set()) {
2397         if (_g1h->is_obj_ill(obj)) {
2398           _bm->mark((HeapWord*)obj);
2399           if (!push(obj)) {
2400             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2401             set_abort();
2402           }
2403         }
2404       } else {
2405         // Outside the collection set; we need to gray it
2406         _cm->deal_with_reference(obj);
2407       }
2408     }
2409   }
2410 };
2411 
2412 class CSMarkBitMapClosure: public BitMapClosure {
2413   G1CollectedHeap* _g1h;
2414   CMBitMap*        _bitMap;
2415   ConcurrentMark*  _cm;
2416   CSMarkOopClosure _oop_cl;
2417 public:
2418   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2419     _g1h(G1CollectedHeap::heap()),
2420     _bitMap(cm->nextMarkBitMap()),
2421     _oop_cl(cm, ms_size)
2422   {}
2423 
2424   ~CSMarkBitMapClosure() {}
2425 
2426   bool do_bit(size_t offset) {
2427     // convert offset into a HeapWord*
2428     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2429     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2430            "address out of range");
2431     assert(_bitMap->isMarked(addr), "tautology");
2432     oop obj = oop(addr);
2433     if (!obj->is_forwarded()) {
2434       if (!_oop_cl.push(obj)) return false;
2435       if (!_oop_cl.drain()) return false;
2436     }
2437     // Otherwise...
2438     return true;
2439   }
2440 };
2441 
2442 
2443 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2444   CMBitMap* _bm;
2445   CSMarkBitMapClosure _bit_cl;
2446   enum SomePrivateConstants {
2447     MSSize = 1000
2448   };
2449   bool _completed;
2450 public:
2451   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2452     _bm(cm->nextMarkBitMap()),
2453     _bit_cl(cm, MSSize),
2454     _completed(true)
2455   {}
2456 
2457   ~CompleteMarkingInCSHRClosure() {}
2458 
2459   bool doHeapRegion(HeapRegion* r) {
2460     if (!r->evacuation_failed()) {
2461       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2462       if (!mr.is_empty()) {
2463         if (!_bm->iterate(&_bit_cl, mr)) {
2464           _completed = false;
2465           return true;
2466         }
2467       }
2468     }
2469     return false;
2470   }
2471 
2472   bool completed() { return _completed; }
2473 };
2474 
2475 class ClearMarksInHRClosure: public HeapRegionClosure {
2476   CMBitMap* _bm;
2477 public:
2478   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2479 
2480   bool doHeapRegion(HeapRegion* r) {
2481     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2482       MemRegion usedMR = r->used_region();
2483       _bm->clearRange(r->used_region());
2484     }
2485     return false;
2486   }
2487 };
2488 
2489 void ConcurrentMark::complete_marking_in_collection_set() {
2490   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
2491 
2492   if (!g1h->mark_in_progress()) {
2493     g1h->g1_policy()->record_mark_closure_time(0.0);
2494     return;
2495   }
2496 
2497   int i = 1;
2498   double start = os::elapsedTime();
2499   while (true) {
2500     i++;
2501     CompleteMarkingInCSHRClosure cmplt(this);
2502     g1h->collection_set_iterate(&cmplt);
2503     if (cmplt.completed()) break;
2504   }
2505   double end_time = os::elapsedTime();
2506   double elapsed_time_ms = (end_time - start) * 1000.0;
2507   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2508   if (PrintGCDetails) {
2509     gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2510   }
2511 
2512   ClearMarksInHRClosure clr(nextMarkBitMap());
2513   g1h->collection_set_iterate(&clr);
2514 }
2515 
2516 // The next two methods deal with the following optimisation. Some
2517 // objects are gray by being marked and located above the finger. If
2518 // they are copied, during an evacuation pause, below the finger then
2519 // the need to be pushed on the stack. The observation is that, if
2520 // there are no regions in the collection set located above the
2521 // finger, then the above cannot happen, hence we do not need to
2522 // explicitly gray any objects when copying them to below the
2523 // finger. The global stack will be scanned to ensure that, if it
2524 // points to objects being copied, it will update their
2525 // location. There is a tricky situation with the gray objects in
2526 // region stack that are being coped, however. See the comment in
2527 // newCSet().
2528 
2529 void ConcurrentMark::newCSet() {
2530   if (!concurrent_marking_in_progress())
2531     // nothing to do if marking is not in progress
2532     return;
2533 
2534   // find what the lowest finger is among the global and local fingers
2535   _min_finger = _finger;
2536   for (int i = 0; i < (int)_max_task_num; ++i) {
2537     CMTask* task = _tasks[i];
2538     HeapWord* task_finger = task->finger();
2539     if (task_finger != NULL && task_finger < _min_finger)
2540       _min_finger = task_finger;
2541   }
2542 
2543   _should_gray_objects = false;
2544 
2545   // This fixes a very subtle and fustrating bug. It might be the case
2546   // that, during en evacuation pause, heap regions that contain
2547   // objects that are gray (by being in regions contained in the
2548   // region stack) are included in the collection set. Since such gray
2549   // objects will be moved, and because it's not easy to redirect
2550   // region stack entries to point to a new location (because objects
2551   // in one region might be scattered to multiple regions after they
2552   // are copied), one option is to ensure that all marked objects
2553   // copied during a pause are pushed on the stack. Notice, however,
2554   // that this problem can only happen when the region stack is not
2555   // empty during an evacuation pause. So, we make the fix a bit less
2556   // conservative and ensure that regions are pushed on the stack,
2557   // irrespective whether all collection set regions are below the
2558   // finger, if the region stack is not empty. This is expected to be
2559   // a rare case, so I don't think it's necessary to be smarted about it.
2560   if (!region_stack_empty())
2561     _should_gray_objects = true;
2562 }
2563 
2564 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2565   if (!concurrent_marking_in_progress())
2566     return;
2567 
2568   HeapWord* region_end = hr->end();
2569   if (region_end > _min_finger)
2570     _should_gray_objects = true;
2571 }
2572 
2573 void ConcurrentMark::disable_co_trackers() {
2574   if (has_aborted()) {
2575     if (_cleanup_co_tracker.enabled())
2576       _cleanup_co_tracker.disable();
2577     for (int i = 0; i < (int)_max_task_num; ++i) {
2578       CMTask* task = _tasks[i];
2579       if (task->co_tracker_enabled())
2580         task->disable_co_tracker();
2581     }
2582   } else {
2583     guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
2584     for (int i = 0; i < (int)_max_task_num; ++i) {
2585       CMTask* task = _tasks[i];
2586       guarantee( !task->co_tracker_enabled(), "invariant" );
2587     }
2588   }
2589 }
2590 
2591 // abandon current marking iteration due to a Full GC
2592 void ConcurrentMark::abort() {
2593   // If we're not marking, nothing to do.
2594   if (!G1ConcMark) return;
2595 
2596   // Clear all marks to force marking thread to do nothing
2597   _nextMarkBitMap->clearAll();
2598   // Empty mark stack
2599   clear_marking_state();
2600   for (int i = 0; i < (int)_max_task_num; ++i)
2601     _tasks[i]->clear_region_fields();
2602   _has_aborted = true;
2603 
2604   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2605   satb_mq_set.abandon_partial_marking();
2606   satb_mq_set.set_active_all_threads(false);
2607 }
2608 
2609 static void print_ms_time_info(const char* prefix, const char* name,
2610                                NumberSeq& ns) {
2611   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2612                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2613   if (ns.num() > 0) {
2614     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2615                            prefix, ns.sd(), ns.maximum());
2616   }
2617 }
2618 
2619 void ConcurrentMark::print_summary_info() {
2620   gclog_or_tty->print_cr(" Concurrent marking:");
2621   print_ms_time_info("  ", "init marks", _init_times);
2622   print_ms_time_info("  ", "remarks", _remark_times);
2623   {
2624     print_ms_time_info("     ", "final marks", _remark_mark_times);
2625     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2626 
2627   }
2628   print_ms_time_info("  ", "cleanups", _cleanup_times);
2629   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2630                          _total_counting_time,
2631                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2632                           (double)_cleanup_times.num()
2633                          : 0.0));
2634   if (G1ScrubRemSets) {
2635     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2636                            _total_rs_scrub_time,
2637                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2638                             (double)_cleanup_times.num()
2639                            : 0.0));
2640   }
2641   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2642                          (_init_times.sum() + _remark_times.sum() +
2643                           _cleanup_times.sum())/1000.0);
2644   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2645                 "(%8.2f s marking, %8.2f s counting).",
2646                 cmThread()->vtime_accum(),
2647                 cmThread()->vtime_mark_accum(),
2648                 cmThread()->vtime_count_accum());
2649 }
2650 
2651 // Closures
2652 // XXX: there seems to be a lot of code  duplication here;
2653 // should refactor and consolidate the shared code.
2654 
2655 // This closure is used to mark refs into the CMS generation in
2656 // the CMS bit map. Called at the first checkpoint.
2657 
2658 // We take a break if someone is trying to stop the world.
2659 bool ConcurrentMark::do_yield_check(int worker_i) {
2660   if (should_yield()) {
2661     if (worker_i == 0)
2662       _g1h->g1_policy()->record_concurrent_pause();
2663     cmThread()->yield();
2664     if (worker_i == 0)
2665       _g1h->g1_policy()->record_concurrent_pause_end();
2666     return true;
2667   } else {
2668     return false;
2669   }
2670 }
2671 
2672 bool ConcurrentMark::should_yield() {
2673   return cmThread()->should_yield();
2674 }
2675 
2676 bool ConcurrentMark::containing_card_is_marked(void* p) {
2677   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2678   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2679 }
2680 
2681 bool ConcurrentMark::containing_cards_are_marked(void* start,
2682                                                  void* last) {
2683   return
2684     containing_card_is_marked(start) &&
2685     containing_card_is_marked(last);
2686 }
2687 
2688 #ifndef PRODUCT
2689 // for debugging purposes
2690 void ConcurrentMark::print_finger() {
2691   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2692                          _heap_start, _heap_end, _finger);
2693   for (int i = 0; i < (int) _max_task_num; ++i) {
2694     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
2695   }
2696   gclog_or_tty->print_cr("");
2697 }
2698 #endif
2699 
2700 // Closure for iteration over bitmaps
2701 class CMBitMapClosure : public BitMapClosure {
2702 private:
2703   // the bitmap that is being iterated over
2704   CMBitMap*                   _nextMarkBitMap;
2705   ConcurrentMark*             _cm;
2706   CMTask*                     _task;
2707   // true if we're scanning a heap region claimed by the task (so that
2708   // we move the finger along), false if we're not, i.e. currently when
2709   // scanning a heap region popped from the region stack (so that we
2710   // do not move the task finger along; it'd be a mistake if we did so).
2711   bool                        _scanning_heap_region;
2712 
2713 public:
2714   CMBitMapClosure(CMTask *task,
2715                   ConcurrentMark* cm,
2716                   CMBitMap* nextMarkBitMap)
2717     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2718 
2719   void set_scanning_heap_region(bool scanning_heap_region) {
2720     _scanning_heap_region = scanning_heap_region;
2721   }
2722 
2723   bool do_bit(size_t offset) {
2724     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2725     tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
2726     tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
2727 
2728     if (_scanning_heap_region) {
2729       statsOnly( _task->increase_objs_found_on_bitmap() );
2730       tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
2731       // We move that task's local finger along.
2732       _task->move_finger_to(addr);
2733     } else {
2734       // We move the task's region finger along.
2735       _task->move_region_finger_to(addr);
2736     }
2737 
2738     _task->scan_object(oop(addr));
2739     // we only partially drain the local queue and global stack
2740     _task->drain_local_queue(true);
2741     _task->drain_global_stack(true);
2742 
2743     // if the has_aborted flag has been raised, we need to bail out of
2744     // the iteration
2745     return !_task->has_aborted();
2746   }
2747 };
2748 
2749 // Closure for iterating over objects, currently only used for
2750 // processing SATB buffers.
2751 class CMObjectClosure : public ObjectClosure {
2752 private:
2753   CMTask* _task;
2754 
2755 public:
2756   void do_object(oop obj) {
2757     _task->deal_with_reference(obj);
2758   }
2759 
2760   CMObjectClosure(CMTask* task) : _task(task) { }
2761 };
2762 
2763 // Closure for iterating over object fields
2764 class CMOopClosure : public OopClosure {
2765 private:
2766   G1CollectedHeap*   _g1h;
2767   ConcurrentMark*    _cm;
2768   CMTask*            _task;
2769 
2770 public:
2771   void do_oop(narrowOop* p) {
2772     guarantee(false, "NYI");
2773   }
2774 
2775   void do_oop(oop* p) {
2776     tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
2777 
2778     oop obj = *p;
2779     if (_cm->verbose_high())
2780       gclog_or_tty->print_cr("[%d] we're looking at location "
2781                              "*"PTR_FORMAT" = "PTR_FORMAT,
2782                              _task->task_id(), p, (void*) obj);
2783     _task->deal_with_reference(obj);
2784   }
2785 
2786   CMOopClosure(G1CollectedHeap* g1h,
2787                ConcurrentMark* cm,
2788                CMTask* task)
2789     : _g1h(g1h), _cm(cm), _task(task) { }
2790 };
2791 
2792 void CMTask::setup_for_region(HeapRegion* hr) {
2793   tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
2794       "claim_region() should have filtered out continues humongous regions" );
2795 
2796   if (_cm->verbose_low())
2797     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2798                            _task_id, hr);
2799 
2800   _curr_region  = hr;
2801   _finger       = hr->bottom();
2802   update_region_limit();
2803 }
2804 
2805 void CMTask::update_region_limit() {
2806   HeapRegion* hr            = _curr_region;
2807   HeapWord* bottom          = hr->bottom();
2808   HeapWord* limit           = hr->next_top_at_mark_start();
2809 
2810   if (limit == bottom) {
2811     if (_cm->verbose_low())
2812       gclog_or_tty->print_cr("[%d] found an empty region "
2813                              "["PTR_FORMAT", "PTR_FORMAT")",
2814                              _task_id, bottom, limit);
2815     // The region was collected underneath our feet.
2816     // We set the finger to bottom to ensure that the bitmap
2817     // iteration that will follow this will not do anything.
2818     // (this is not a condition that holds when we set the region up,
2819     // as the region is not supposed to be empty in the first place)
2820     _finger = bottom;
2821   } else if (limit >= _region_limit) {
2822     tmp_guarantee_CM( limit >= _finger, "peace of mind" );
2823   } else {
2824     tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
2825     // This can happen under some pretty unusual circumstances.  An
2826     // evacuation pause empties the region underneath our feet (NTAMS
2827     // at bottom). We then do some allocation in the region (NTAMS
2828     // stays at bottom), followed by the region being used as a GC
2829     // alloc region (NTAMS will move to top() and the objects
2830     // originally below it will be grayed). All objects now marked in
2831     // the region are explicitly grayed, if below the global finger,
2832     // and we do not need in fact to scan anything else. So, we simply
2833     // set _finger to be limit to ensure that the bitmap iteration
2834     // doesn't do anything.
2835     _finger = limit;
2836   }
2837 
2838   _region_limit = limit;
2839 }
2840 
2841 void CMTask::giveup_current_region() {
2842   tmp_guarantee_CM( _curr_region != NULL, "invariant" );
2843   if (_cm->verbose_low())
2844     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2845                            _task_id, _curr_region);
2846   clear_region_fields();
2847 }
2848 
2849 void CMTask::clear_region_fields() {
2850   // Values for these three fields that indicate that we're not
2851   // holding on to a region.
2852   _curr_region   = NULL;
2853   _finger        = NULL;
2854   _region_limit  = NULL;
2855 
2856   _region_finger = NULL;
2857 }
2858 
2859 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2860   guarantee( nextMarkBitMap != NULL, "invariant" );
2861 
2862   if (_cm->verbose_low())
2863     gclog_or_tty->print_cr("[%d] resetting", _task_id);
2864 
2865   _nextMarkBitMap                = nextMarkBitMap;
2866   clear_region_fields();
2867 
2868   _calls                         = 0;
2869   _elapsed_time_ms               = 0.0;
2870   _termination_time_ms           = 0.0;
2871   _termination_start_time_ms     = 0.0;
2872 
2873 #if _MARKING_STATS_
2874   _local_pushes                  = 0;
2875   _local_pops                    = 0;
2876   _local_max_size                = 0;
2877   _objs_scanned                  = 0;
2878   _global_pushes                 = 0;
2879   _global_pops                   = 0;
2880   _global_max_size               = 0;
2881   _global_transfers_to           = 0;
2882   _global_transfers_from         = 0;
2883   _region_stack_pops             = 0;
2884   _regions_claimed               = 0;
2885   _objs_found_on_bitmap          = 0;
2886   _satb_buffers_processed        = 0;
2887   _steal_attempts                = 0;
2888   _steals                        = 0;
2889   _aborted                       = 0;
2890   _aborted_overflow              = 0;
2891   _aborted_cm_aborted            = 0;
2892   _aborted_yield                 = 0;
2893   _aborted_timed_out             = 0;
2894   _aborted_satb                  = 0;
2895   _aborted_termination           = 0;
2896 #endif // _MARKING_STATS_
2897 }
2898 
2899 bool CMTask::should_exit_termination() {
2900   regular_clock_call();
2901   // This is called when we are in the termination protocol. We should
2902   // quit if, for some reason, this task wants to abort or the global
2903   // stack is not empty (this means that we can get work from it).
2904   return !_cm->mark_stack_empty() || has_aborted();
2905 }
2906 
2907 // This determines whether the method below will check both the local
2908 // and global fingers when determining whether to push on the stack a
2909 // gray object (value 1) or whether it will only check the global one
2910 // (value 0). The tradeoffs are that the former will be a bit more
2911 // accurate and possibly push less on the stack, but it might also be
2912 // a little bit slower.
2913 
2914 #define _CHECK_BOTH_FINGERS_      1
2915 
2916 void CMTask::deal_with_reference(oop obj) {
2917   if (_cm->verbose_high())
2918     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2919                            _task_id, (void*) obj);
2920 
2921   ++_refs_reached;
2922 
2923   HeapWord* objAddr = (HeapWord*) obj;
2924   if (_g1h->is_in_g1_reserved(objAddr)) {
2925     tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2926     HeapRegion* hr =  _g1h->heap_region_containing(obj);
2927     if (_g1h->is_obj_ill(obj, hr)) {
2928       if (_cm->verbose_high())
2929         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2930                                _task_id, (void*) obj);
2931 
2932       // we need to mark it first
2933       if (_nextMarkBitMap->parMark(objAddr)) {
2934         // No OrderAccess:store_load() is needed. It is implicit in the
2935         // CAS done in parMark(objAddr) above
2936         HeapWord* global_finger = _cm->finger();
2937 
2938 #if _CHECK_BOTH_FINGERS_
2939         // we will check both the local and global fingers
2940 
2941         if (_finger != NULL && objAddr < _finger) {
2942           if (_cm->verbose_high())
2943             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2944                                    "pushing it", _task_id, _finger);
2945           push(obj);
2946         } else if (_curr_region != NULL && objAddr < _region_limit) {
2947           // do nothing
2948         } else if (objAddr < global_finger) {
2949           // Notice that the global finger might be moving forward
2950           // concurrently. This is not a problem. In the worst case, we
2951           // mark the object while it is above the global finger and, by
2952           // the time we read the global finger, it has moved forward
2953           // passed this object. In this case, the object will probably
2954           // be visited when a task is scanning the region and will also
2955           // be pushed on the stack. So, some duplicate work, but no
2956           // correctness problems.
2957 
2958           if (_cm->verbose_high())
2959             gclog_or_tty->print_cr("[%d] below the global finger "
2960                                    "("PTR_FORMAT"), pushing it",
2961                                    _task_id, global_finger);
2962           push(obj);
2963         } else {
2964           // do nothing
2965         }
2966 #else // _CHECK_BOTH_FINGERS_
2967       // we will only check the global finger
2968 
2969         if (objAddr < global_finger) {
2970           // see long comment above
2971 
2972           if (_cm->verbose_high())
2973             gclog_or_tty->print_cr("[%d] below the global finger "
2974                                    "("PTR_FORMAT"), pushing it",
2975                                    _task_id, global_finger);
2976           push(obj);
2977         }
2978 #endif // _CHECK_BOTH_FINGERS_
2979       }
2980     }
2981   }
2982 }
2983 
2984 void CMTask::push(oop obj) {
2985   HeapWord* objAddr = (HeapWord*) obj;
2986   tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
2987   tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
2988   tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
2989 
2990   if (_cm->verbose_high())
2991     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
2992 
2993   if (!_task_queue->push(obj)) {
2994     // The local task queue looks full. We need to push some entries
2995     // to the global stack.
2996 
2997     if (_cm->verbose_medium())
2998       gclog_or_tty->print_cr("[%d] task queue overflow, "
2999                              "moving entries to the global stack",
3000                              _task_id);
3001     move_entries_to_global_stack();
3002 
3003     // this should succeed since, even if we overflow the global
3004     // stack, we should have definitely removed some entries from the
3005     // local queue. So, there must be space on it.
3006     bool success = _task_queue->push(obj);
3007     tmp_guarantee_CM( success, "invariant" );
3008   }
3009 
3010   statsOnly( int tmp_size = _task_queue->size();
3011              if (tmp_size > _local_max_size)
3012                _local_max_size = tmp_size;
3013              ++_local_pushes );
3014 }
3015 
3016 void CMTask::reached_limit() {
3017   tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
3018                     _refs_reached >= _refs_reached_limit ,
3019                  "shouldn't have been called otherwise" );
3020   regular_clock_call();
3021 }
3022 
3023 void CMTask::regular_clock_call() {
3024   if (has_aborted())
3025     return;
3026 
3027   // First, we need to recalculate the words scanned and refs reached
3028   // limits for the next clock call.
3029   recalculate_limits();
3030 
3031   // During the regular clock call we do the following
3032 
3033   // (1) If an overflow has been flagged, then we abort.
3034   if (_cm->has_overflown()) {
3035     set_has_aborted();
3036     return;
3037   }
3038 
3039   // If we are not concurrent (i.e. we're doing remark) we don't need
3040   // to check anything else. The other steps are only needed during
3041   // the concurrent marking phase.
3042   if (!concurrent())
3043     return;
3044 
3045   // (2) If marking has been aborted for Full GC, then we also abort.
3046   if (_cm->has_aborted()) {
3047     set_has_aborted();
3048     statsOnly( ++_aborted_cm_aborted );
3049     return;
3050   }
3051 
3052   double curr_time_ms = os::elapsedVTime() * 1000.0;
3053 
3054   // (3) If marking stats are enabled, then we update the step history.
3055 #if _MARKING_STATS_
3056   if (_words_scanned >= _words_scanned_limit)
3057     ++_clock_due_to_scanning;
3058   if (_refs_reached >= _refs_reached_limit)
3059     ++_clock_due_to_marking;
3060 
3061   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3062   _interval_start_time_ms = curr_time_ms;
3063   _all_clock_intervals_ms.add(last_interval_ms);
3064 
3065   if (_cm->verbose_medium()) {
3066     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3067                            "scanned = %d%s, refs reached = %d%s",
3068                            _task_id, last_interval_ms,
3069                            _words_scanned,
3070                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3071                            _refs_reached,
3072                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3073   }
3074 #endif // _MARKING_STATS_
3075 
3076   // (4) We check whether we should yield. If we have to, then we abort.
3077   if (_cm->should_yield()) {
3078     // We should yield. To do this we abort the task. The caller is
3079     // responsible for yielding.
3080     set_has_aborted();
3081     statsOnly( ++_aborted_yield );
3082     return;
3083   }
3084 
3085   // (5) We check whether we've reached our time quota. If we have,
3086   // then we abort.
3087   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3088   if (elapsed_time_ms > _time_target_ms) {
3089     set_has_aborted();
3090     _has_aborted_timed_out = true;
3091     statsOnly( ++_aborted_timed_out );
3092     return;
3093   }
3094 
3095   // (6) Finally, we check whether there are enough completed STAB
3096   // buffers available for processing. If there are, we abort.
3097   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3098   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3099     if (_cm->verbose_low())
3100       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3101                              _task_id);
3102     // we do need to process SATB buffers, we'll abort and restart
3103     // the marking task to do so
3104     set_has_aborted();
3105     statsOnly( ++_aborted_satb );
3106     return;
3107   }
3108 }
3109 
3110 void CMTask::recalculate_limits() {
3111   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3112   _words_scanned_limit      = _real_words_scanned_limit;
3113 
3114   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3115   _refs_reached_limit       = _real_refs_reached_limit;
3116 }
3117 
3118 void CMTask::decrease_limits() {
3119   // This is called when we believe that we're going to do an infrequent
3120   // operation which will increase the per byte scanned cost (i.e. move
3121   // entries to/from the global stack). It basically tries to decrease the
3122   // scanning limit so that the clock is called earlier.
3123 
3124   if (_cm->verbose_medium())
3125     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3126 
3127   _words_scanned_limit = _real_words_scanned_limit -
3128     3 * words_scanned_period / 4;
3129   _refs_reached_limit  = _real_refs_reached_limit -
3130     3 * refs_reached_period / 4;
3131 }
3132 
3133 void CMTask::move_entries_to_global_stack() {
3134   // local array where we'll store the entries that will be popped
3135   // from the local queue
3136   oop buffer[global_stack_transfer_size];
3137 
3138   int n = 0;
3139   oop obj;
3140   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3141     buffer[n] = obj;
3142     ++n;
3143   }
3144 
3145   if (n > 0) {
3146     // we popped at least one entry from the local queue
3147 
3148     statsOnly( ++_global_transfers_to; _local_pops += n );
3149 
3150     if (!_cm->mark_stack_push(buffer, n)) {
3151       if (_cm->verbose_low())
3152         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3153       set_has_aborted();
3154     } else {
3155       // the transfer was successful
3156 
3157       if (_cm->verbose_medium())
3158         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3159                                _task_id, n);
3160       statsOnly( int tmp_size = _cm->mark_stack_size();
3161                  if (tmp_size > _global_max_size)
3162                    _global_max_size = tmp_size;
3163                  _global_pushes += n );
3164     }
3165   }
3166 
3167   // this operation was quite expensive, so decrease the limits
3168   decrease_limits();
3169 }
3170 
3171 void CMTask::get_entries_from_global_stack() {
3172   // local array where we'll store the entries that will be popped
3173   // from the global stack.
3174   oop buffer[global_stack_transfer_size];
3175   int n;
3176   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3177   tmp_guarantee_CM( n <= global_stack_transfer_size,
3178                     "we should not pop more than the given limit" );
3179   if (n > 0) {
3180     // yes, we did actually pop at least one entry
3181 
3182     statsOnly( ++_global_transfers_from; _global_pops += n );
3183     if (_cm->verbose_medium())
3184       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3185                              _task_id, n);
3186     for (int i = 0; i < n; ++i) {
3187       bool success = _task_queue->push(buffer[i]);
3188       // We only call this when the local queue is empty or under a
3189       // given target limit. So, we do not expect this push to fail.
3190       tmp_guarantee_CM( success, "invariant" );
3191     }
3192 
3193     statsOnly( int tmp_size = _task_queue->size();
3194                if (tmp_size > _local_max_size)
3195                  _local_max_size = tmp_size;
3196                _local_pushes += n );
3197   }
3198 
3199   // this operation was quite expensive, so decrease the limits
3200   decrease_limits();
3201 }
3202 
3203 void CMTask::drain_local_queue(bool partially) {
3204   if (has_aborted())
3205     return;
3206 
3207   // Decide what the target size is, depending whether we're going to
3208   // drain it partially (so that other tasks can steal if they run out
3209   // of things to do) or totally (at the very end).
3210   size_t target_size;
3211   if (partially)
3212     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3213   else
3214     target_size = 0;
3215 
3216   if (_task_queue->size() > target_size) {
3217     if (_cm->verbose_high())
3218       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3219                              _task_id, target_size);
3220 
3221     oop obj;
3222     bool ret = _task_queue->pop_local(obj);
3223     while (ret) {
3224       statsOnly( ++_local_pops );
3225 
3226       if (_cm->verbose_high())
3227         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3228                                (void*) obj);
3229 
3230       tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
3231                         "invariant" );
3232 
3233       scan_object(obj);
3234 
3235       if (_task_queue->size() <= target_size || has_aborted())
3236         ret = false;
3237       else
3238         ret = _task_queue->pop_local(obj);
3239     }
3240 
3241     if (_cm->verbose_high())
3242       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3243                              _task_id, _task_queue->size());
3244   }
3245 }
3246 
3247 void CMTask::drain_global_stack(bool partially) {
3248   if (has_aborted())
3249     return;
3250 
3251   // We have a policy to drain the local queue before we attempt to
3252   // drain the global stack.
3253   tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
3254 
3255   // Decide what the target size is, depending whether we're going to
3256   // drain it partially (so that other tasks can steal if they run out
3257   // of things to do) or totally (at the very end).  Notice that,
3258   // because we move entries from the global stack in chunks or
3259   // because another task might be doing the same, we might in fact
3260   // drop below the target. But, this is not a problem.
3261   size_t target_size;
3262   if (partially)
3263     target_size = _cm->partial_mark_stack_size_target();
3264   else
3265     target_size = 0;
3266 
3267   if (_cm->mark_stack_size() > target_size) {
3268     if (_cm->verbose_low())
3269       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3270                              _task_id, target_size);
3271 
3272     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3273       get_entries_from_global_stack();
3274       drain_local_queue(partially);
3275     }
3276 
3277     if (_cm->verbose_low())
3278       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3279                              _task_id, _cm->mark_stack_size());
3280   }
3281 }
3282 
3283 // SATB Queue has several assumptions on whether to call the par or
3284 // non-par versions of the methods. this is why some of the code is
3285 // replicated. We should really get rid of the single-threaded version
3286 // of the code to simplify things.
3287 void CMTask::drain_satb_buffers() {
3288   if (has_aborted())
3289     return;
3290 
3291   // We set this so that the regular clock knows that we're in the
3292   // middle of draining buffers and doesn't set the abort flag when it
3293   // notices that SATB buffers are available for draining. It'd be
3294   // very counter productive if it did that. :-)
3295   _draining_satb_buffers = true;
3296 
3297   CMObjectClosure oc(this);
3298   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3299   if (ParallelGCThreads > 0)
3300     satb_mq_set.set_par_closure(_task_id, &oc);
3301   else
3302     satb_mq_set.set_closure(&oc);
3303 
3304   // This keeps claiming and applying the closure to completed buffers
3305   // until we run out of buffers or we need to abort.
3306   if (ParallelGCThreads > 0) {
3307     while (!has_aborted() &&
3308            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3309       if (_cm->verbose_medium())
3310         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3311       statsOnly( ++_satb_buffers_processed );
3312       regular_clock_call();
3313     }
3314   } else {
3315     while (!has_aborted() &&
3316            satb_mq_set.apply_closure_to_completed_buffer()) {
3317       if (_cm->verbose_medium())
3318         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3319       statsOnly( ++_satb_buffers_processed );
3320       regular_clock_call();
3321     }
3322   }
3323 
3324   if (!concurrent() && !has_aborted()) {
3325     // We should only do this during remark.
3326     if (ParallelGCThreads > 0)
3327       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3328     else
3329       satb_mq_set.iterate_closure_all_threads();
3330   }
3331 
3332   _draining_satb_buffers = false;
3333 
3334   tmp_guarantee_CM( has_aborted() ||
3335                     concurrent() ||
3336                     satb_mq_set.completed_buffers_num() == 0, "invariant" );
3337 
3338   if (ParallelGCThreads > 0)
3339     satb_mq_set.set_par_closure(_task_id, NULL);
3340   else
3341     satb_mq_set.set_closure(NULL);
3342 
3343   // again, this was a potentially expensive operation, decrease the
3344   // limits to get the regular clock call early
3345   decrease_limits();
3346 }
3347 
3348 void CMTask::drain_region_stack(BitMapClosure* bc) {
3349   if (has_aborted())
3350     return;
3351 
3352   tmp_guarantee_CM( _region_finger == NULL,
3353                     "it should be NULL when we're not scanning a region" );
3354 
3355   if (!_cm->region_stack_empty()) {
3356     if (_cm->verbose_low())
3357       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3358                              _task_id, _cm->region_stack_size());
3359 
3360     MemRegion mr = _cm->region_stack_pop();
3361     // it returns MemRegion() if the pop fails
3362     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3363 
3364     while (mr.start() != NULL) {
3365       if (_cm->verbose_medium())
3366         gclog_or_tty->print_cr("[%d] we are scanning region "
3367                                "["PTR_FORMAT", "PTR_FORMAT")",
3368                                _task_id, mr.start(), mr.end());
3369       tmp_guarantee_CM( mr.end() <= _cm->finger(),
3370                         "otherwise the region shouldn't be on the stack" );
3371       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3372       if (_nextMarkBitMap->iterate(bc, mr)) {
3373         tmp_guarantee_CM( !has_aborted(),
3374                "cannot abort the task without aborting the bitmap iteration" );
3375 
3376         // We finished iterating over the region without aborting.
3377         regular_clock_call();
3378         if (has_aborted())
3379           mr = MemRegion();
3380         else {
3381           mr = _cm->region_stack_pop();
3382           // it returns MemRegion() if the pop fails
3383           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3384         }
3385       } else {
3386         guarantee( has_aborted(), "currently the only way to do so" );
3387 
3388         // The only way to abort the bitmap iteration is to return
3389         // false from the do_bit() method. However, inside the
3390         // do_bit() method we move the _region_finger to point to the
3391         // object currently being looked at. So, if we bail out, we
3392         // have definitely set _region_finger to something non-null.
3393         guarantee( _region_finger != NULL, "invariant" );
3394 
3395         // The iteration was actually aborted. So now _region_finger
3396         // points to the address of the object we last scanned. If we
3397         // leave it there, when we restart this task, we will rescan
3398         // the object. It is easy to avoid this. We move the finger by
3399         // enough to point to the next possible object header (the
3400         // bitmap knows by how much we need to move it as it knows its
3401         // granularity).
3402         MemRegion newRegion =
3403           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3404 
3405         if (!newRegion.is_empty()) {
3406           if (_cm->verbose_low()) {
3407             gclog_or_tty->print_cr("[%d] pushing unscanned region"
3408                                    "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3409                                    _task_id,
3410                                    newRegion.start(), newRegion.end());
3411           }
3412           // Now push the part of the region we didn't scan on the
3413           // region stack to make sure a task scans it later.
3414           _cm->region_stack_push(newRegion);
3415         }
3416         // break from while
3417         mr = MemRegion();
3418       }
3419       _region_finger = NULL;
3420     }
3421 
3422     // We only push regions on the region stack during evacuation
3423     // pauses. So if we come out the above iteration because we region
3424     // stack is empty, it will remain empty until the next yield
3425     // point. So, the guarantee below is safe.
3426     guarantee( has_aborted() || _cm->region_stack_empty(),
3427                "only way to exit the loop" );
3428 
3429     if (_cm->verbose_low())
3430       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3431                              _task_id, _cm->region_stack_size());
3432   }
3433 }
3434 
3435 void CMTask::print_stats() {
3436   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3437                          _task_id, _calls);
3438   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3439                          _elapsed_time_ms, _termination_time_ms);
3440   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3441                          _step_times_ms.num(), _step_times_ms.avg(),
3442                          _step_times_ms.sd());
3443   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3444                          _step_times_ms.maximum(), _step_times_ms.sum());
3445 
3446 #if _MARKING_STATS_
3447   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3448                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3449                          _all_clock_intervals_ms.sd());
3450   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3451                          _all_clock_intervals_ms.maximum(),
3452                          _all_clock_intervals_ms.sum());
3453   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3454                          _clock_due_to_scanning, _clock_due_to_marking);
3455   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3456                          _objs_scanned, _objs_found_on_bitmap);
3457   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3458                          _local_pushes, _local_pops, _local_max_size);
3459   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3460                          _global_pushes, _global_pops, _global_max_size);
3461   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3462                          _global_transfers_to,_global_transfers_from);
3463   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
3464                          _regions_claimed, _region_stack_pops);
3465   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3466   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3467                          _steal_attempts, _steals);
3468   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3469   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3470                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3471   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3472                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3473 #endif // _MARKING_STATS_
3474 }
3475 
3476 /*****************************************************************************
3477 
3478     The do_marking_step(time_target_ms) method is the building block
3479     of the parallel marking framework. It can be called in parallel
3480     with other invocations of do_marking_step() on different tasks
3481     (but only one per task, obviously) and concurrently with the
3482     mutator threads, or during remark, hence it eliminates the need
3483     for two versions of the code. When called during remark, it will
3484     pick up from where the task left off during the concurrent marking
3485     phase. Interestingly, tasks are also claimable during evacuation
3486     pauses too, since do_marking_step() ensures that it aborts before
3487     it needs to yield.
3488 
3489     The data structures that is uses to do marking work are the
3490     following:
3491 
3492       (1) Marking Bitmap. If there are gray objects that appear only
3493       on the bitmap (this happens either when dealing with an overflow
3494       or when the initial marking phase has simply marked the roots
3495       and didn't push them on the stack), then tasks claim heap
3496       regions whose bitmap they then scan to find gray objects. A
3497       global finger indicates where the end of the last claimed region
3498       is. A local finger indicates how far into the region a task has
3499       scanned. The two fingers are used to determine how to gray an
3500       object (i.e. whether simply marking it is OK, as it will be
3501       visited by a task in the future, or whether it needs to be also
3502       pushed on a stack).
3503 
3504       (2) Local Queue. The local queue of the task which is accessed
3505       reasonably efficiently by the task. Other tasks can steal from
3506       it when they run out of work. Throughout the marking phase, a
3507       task attempts to keep its local queue short but not totally
3508       empty, so that entries are available for stealing by other
3509       tasks. Only when there is no more work, a task will totally
3510       drain its local queue.
3511 
3512       (3) Global Mark Stack. This handles local queue overflow. During
3513       marking only sets of entries are moved between it and the local
3514       queues, as access to it requires a mutex and more fine-grain
3515       interaction with it which might cause contention. If it
3516       overflows, then the marking phase should restart and iterate
3517       over the bitmap to identify gray objects. Throughout the marking
3518       phase, tasks attempt to keep the global mark stack at a small
3519       length but not totally empty, so that entries are available for
3520       popping by other tasks. Only when there is no more work, tasks
3521       will totally drain the global mark stack.
3522 
3523       (4) Global Region Stack. Entries on it correspond to areas of
3524       the bitmap that need to be scanned since they contain gray
3525       objects. Pushes on the region stack only happen during
3526       evacuation pauses and typically correspond to areas covered by
3527       GC LABS. If it overflows, then the marking phase should restart
3528       and iterate over the bitmap to identify gray objects. Tasks will
3529       try to totally drain the region stack as soon as possible.
3530 
3531       (5) SATB Buffer Queue. This is where completed SATB buffers are
3532       made available. Buffers are regularly removed from this queue
3533       and scanned for roots, so that the queue doesn't get too
3534       long. During remark, all completed buffers are processed, as
3535       well as the filled in parts of any uncompleted buffers.
3536 
3537     The do_marking_step() method tries to abort when the time target
3538     has been reached. There are a few other cases when the
3539     do_marking_step() method also aborts:
3540 
3541       (1) When the marking phase has been aborted (after a Full GC).
3542 
3543       (2) When a global overflow (either on the global stack or the
3544       region stack) has been triggered. Before the task aborts, it
3545       will actually sync up with the other tasks to ensure that all
3546       the marking data structures (local queues, stacks, fingers etc.)
3547       are re-initialised so that when do_marking_step() completes,
3548       the marking phase can immediately restart.
3549 
3550       (3) When enough completed SATB buffers are available. The
3551       do_marking_step() method only tries to drain SATB buffers right
3552       at the beginning. So, if enough buffers are available, the
3553       marking step aborts and the SATB buffers are processed at
3554       the beginning of the next invocation.
3555 
3556       (4) To yield. when we have to yield then we abort and yield
3557       right at the end of do_marking_step(). This saves us from a lot
3558       of hassle as, by yielding we might allow a Full GC. If this
3559       happens then objects will be compacted underneath our feet, the
3560       heap might shrink, etc. We save checking for this by just
3561       aborting and doing the yield right at the end.
3562 
3563     From the above it follows that the do_marking_step() method should
3564     be called in a loop (or, otherwise, regularly) until it completes.
3565 
3566     If a marking step completes without its has_aborted() flag being
3567     true, it means it has completed the current marking phase (and
3568     also all other marking tasks have done so and have all synced up).
3569 
3570     A method called regular_clock_call() is invoked "regularly" (in
3571     sub ms intervals) throughout marking. It is this clock method that
3572     checks all the abort conditions which were mentioned above and
3573     decides when the task should abort. A work-based scheme is used to
3574     trigger this clock method: when the number of object words the
3575     marking phase has scanned or the number of references the marking
3576     phase has visited reach a given limit. Additional invocations to
3577     the method clock have been planted in a few other strategic places
3578     too. The initial reason for the clock method was to avoid calling
3579     vtime too regularly, as it is quite expensive. So, once it was in
3580     place, it was natural to piggy-back all the other conditions on it
3581     too and not constantly check them throughout the code.
3582 
3583  *****************************************************************************/
3584 
3585 void CMTask::do_marking_step(double time_target_ms) {
3586   guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
3587   guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
3588 
3589   guarantee( concurrent() || _cm->region_stack_empty(),
3590              "the region stack should have been cleared before remark" );
3591   guarantee( _region_finger == NULL,
3592              "this should be non-null only when a region is being scanned" );
3593 
3594   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3595   guarantee( _task_queues != NULL, "invariant" );
3596   guarantee( _task_queue != NULL,  "invariant" );
3597   guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
3598 
3599   guarantee( !_claimed,
3600              "only one thread should claim this task at any one time" );
3601 
3602   // OK, this doesn't safeguard again all possible scenarios, as it is
3603   // possible for two threads to set the _claimed flag at the same
3604   // time. But it is only for debugging purposes anyway and it will
3605   // catch most problems.
3606   _claimed = true;
3607 
3608   _start_time_ms = os::elapsedVTime() * 1000.0;
3609   statsOnly( _interval_start_time_ms = _start_time_ms );
3610 
3611   double diff_prediction_ms =
3612     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3613   _time_target_ms = time_target_ms - diff_prediction_ms;
3614 
3615   // set up the variables that are used in the work-based scheme to
3616   // call the regular clock method
3617   _words_scanned = 0;
3618   _refs_reached  = 0;
3619   recalculate_limits();
3620 
3621   // clear all flags
3622   clear_has_aborted();
3623   _has_aborted_timed_out = false;
3624   _draining_satb_buffers = false;
3625 
3626   ++_calls;
3627 
3628   if (_cm->verbose_low())
3629     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3630                            "target = %1.2lfms >>>>>>>>>>",
3631                            _task_id, _calls, _time_target_ms);
3632 
3633   // Set up the bitmap and oop closures. Anything that uses them is
3634   // eventually called from this method, so it is OK to allocate these
3635   // statically.
3636   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3637   CMOopClosure    oop_closure(_g1h, _cm, this);
3638   set_oop_closure(&oop_closure);
3639 
3640   if (_cm->has_overflown()) {
3641     // This can happen if the region stack or the mark stack overflows
3642     // during a GC pause and this task, after a yield point,
3643     // restarts. We have to abort as we need to get into the overflow
3644     // protocol which happens right at the end of this task.
3645     set_has_aborted();
3646   }
3647 
3648   // First drain any available SATB buffers. After this, we will not
3649   // look at SATB buffers before the next invocation of this method.
3650   // If enough completed SATB buffers are queued up, the regular clock
3651   // will abort this task so that it restarts.
3652   drain_satb_buffers();
3653   // ...then partially drain the local queue and the global stack
3654   drain_local_queue(true);
3655   drain_global_stack(true);
3656 
3657   // Then totally drain the region stack.  We will not look at
3658   // it again before the next invocation of this method. Entries on
3659   // the region stack are only added during evacuation pauses, for
3660   // which we have to yield. When we do, we abort the task anyway so
3661   // it will look at the region stack again when it restarts.
3662   bitmap_closure.set_scanning_heap_region(false);
3663   drain_region_stack(&bitmap_closure);
3664   // ...then partially drain the local queue and the global stack
3665   drain_local_queue(true);
3666   drain_global_stack(true);
3667 
3668   do {
3669     if (!has_aborted() && _curr_region != NULL) {
3670       // This means that we're already holding on to a region.
3671       tmp_guarantee_CM( _finger != NULL,
3672                         "if region is not NULL, then the finger "
3673                         "should not be NULL either" );
3674 
3675       // We might have restarted this task after an evacuation pause
3676       // which might have evacuated the region we're holding on to
3677       // underneath our feet. Let's read its limit again to make sure
3678       // that we do not iterate over a region of the heap that
3679       // contains garbage (update_region_limit() will also move
3680       // _finger to the start of the region if it is found empty).
3681       update_region_limit();
3682       // We will start from _finger not from the start of the region,
3683       // as we might be restarting this task after aborting half-way
3684       // through scanning this region. In this case, _finger points to
3685       // the address where we last found a marked object. If this is a
3686       // fresh region, _finger points to start().
3687       MemRegion mr = MemRegion(_finger, _region_limit);
3688 
3689       if (_cm->verbose_low())
3690         gclog_or_tty->print_cr("[%d] we're scanning part "
3691                                "["PTR_FORMAT", "PTR_FORMAT") "
3692                                "of region "PTR_FORMAT,
3693                                _task_id, _finger, _region_limit, _curr_region);
3694 
3695       // Let's iterate over the bitmap of the part of the
3696       // region that is left.
3697       bitmap_closure.set_scanning_heap_region(true);
3698       if (mr.is_empty() ||
3699           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3700         // We successfully completed iterating over the region. Now,
3701         // let's give up the region.
3702         giveup_current_region();
3703         regular_clock_call();
3704       } else {
3705         guarantee( has_aborted(), "currently the only way to do so" );
3706         // The only way to abort the bitmap iteration is to return
3707         // false from the do_bit() method. However, inside the
3708         // do_bit() method we move the _finger to point to the
3709         // object currently being looked at. So, if we bail out, we
3710         // have definitely set _finger to something non-null.
3711         guarantee( _finger != NULL, "invariant" );
3712 
3713         // Region iteration was actually aborted. So now _finger
3714         // points to the address of the object we last scanned. If we
3715         // leave it there, when we restart this task, we will rescan
3716         // the object. It is easy to avoid this. We move the finger by
3717         // enough to point to the next possible object header (the
3718         // bitmap knows by how much we need to move it as it knows its
3719         // granularity).
3720         move_finger_to(_nextMarkBitMap->nextWord(_finger));
3721       }
3722     }
3723     // At this point we have either completed iterating over the
3724     // region we were holding on to, or we have aborted.
3725 
3726     // We then partially drain the local queue and the global stack.
3727     // (Do we really need this?)
3728     drain_local_queue(true);
3729     drain_global_stack(true);
3730 
3731     // Read the note on the claim_region() method on why it might
3732     // return NULL with potentially more regions available for
3733     // claiming and why we have to check out_of_regions() to determine
3734     // whether we're done or not.
3735     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3736       // We are going to try to claim a new region. We should have
3737       // given up on the previous one.
3738       tmp_guarantee_CM( _curr_region  == NULL &&
3739                         _finger       == NULL &&
3740                         _region_limit == NULL, "invariant" );
3741       if (_cm->verbose_low())
3742         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3743       HeapRegion* claimed_region = _cm->claim_region(_task_id);
3744       if (claimed_region != NULL) {
3745         // Yes, we managed to claim one
3746         statsOnly( ++_regions_claimed );
3747 
3748         if (_cm->verbose_low())
3749           gclog_or_tty->print_cr("[%d] we successfully claimed "
3750                                  "region "PTR_FORMAT,
3751                                  _task_id, claimed_region);
3752 
3753         setup_for_region(claimed_region);
3754         tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
3755       }
3756       // It is important to call the regular clock here. It might take
3757       // a while to claim a region if, for example, we hit a large
3758       // block of empty regions. So we need to call the regular clock
3759       // method once round the loop to make sure it's called
3760       // frequently enough.
3761       regular_clock_call();
3762     }
3763 
3764     if (!has_aborted() && _curr_region == NULL) {
3765       tmp_guarantee_CM( _cm->out_of_regions(),
3766                         "at this point we should be out of regions" );
3767     }
3768   } while ( _curr_region != NULL && !has_aborted());
3769 
3770   if (!has_aborted()) {
3771     // We cannot check whether the global stack is empty, since other
3772     // tasks might be pushing objects to it concurrently. We also cannot
3773     // check if the region stack is empty because if a thread is aborting
3774     // it can push a partially done region back.
3775     tmp_guarantee_CM( _cm->out_of_regions(),
3776                       "at this point we should be out of regions" );
3777 
3778     if (_cm->verbose_low())
3779       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3780 
3781     // Try to reduce the number of available SATB buffers so that
3782     // remark has less work to do.
3783     drain_satb_buffers();
3784   }
3785 
3786   // Since we've done everything else, we can now totally drain the
3787   // local queue and global stack.
3788   drain_local_queue(false);
3789   drain_global_stack(false);
3790 
3791   // Attempt at work stealing from other task's queues.
3792   if (!has_aborted()) {
3793     // We have not aborted. This means that we have finished all that
3794     // we could. Let's try to do some stealing...
3795 
3796     // We cannot check whether the global stack is empty, since other
3797     // tasks might be pushing objects to it concurrently. We also cannot
3798     // check if the region stack is empty because if a thread is aborting
3799     // it can push a partially done region back.
3800     guarantee( _cm->out_of_regions() &&
3801                _task_queue->size() == 0, "only way to reach here" );
3802 
3803     if (_cm->verbose_low())
3804       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3805 
3806     while (!has_aborted()) {
3807       oop obj;
3808       statsOnly( ++_steal_attempts );
3809 
3810       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3811         if (_cm->verbose_medium())
3812           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3813                                  _task_id, (void*) obj);
3814 
3815         statsOnly( ++_steals );
3816 
3817         tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
3818                           "any stolen object should be marked" );
3819         scan_object(obj);
3820 
3821         // And since we're towards the end, let's totally drain the
3822         // local queue and global stack.
3823         drain_local_queue(false);
3824         drain_global_stack(false);
3825       } else {
3826         break;
3827       }
3828     }
3829   }
3830 
3831   // We still haven't aborted. Now, let's try to get into the
3832   // termination protocol.
3833   if (!has_aborted()) {
3834     // We cannot check whether the global stack is empty, since other
3835     // tasks might be concurrently pushing objects on it. We also cannot
3836     // check if the region stack is empty because if a thread is aborting
3837     // it can push a partially done region back.
3838     guarantee( _cm->out_of_regions() &&
3839                _task_queue->size() == 0, "only way to reach here" );
3840 
3841     if (_cm->verbose_low())
3842       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3843 
3844     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3845     // The CMTask class also extends the TerminatorTerminator class,
3846     // hence its should_exit_termination() method will also decide
3847     // whether to exit the termination protocol or not.
3848     bool finished = _cm->terminator()->offer_termination(this);
3849     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3850     _termination_time_ms +=
3851       termination_end_time_ms - _termination_start_time_ms;
3852 
3853     if (finished) {
3854       // We're all done.
3855 
3856       if (_task_id == 0) {
3857         // let's allow task 0 to do this
3858         if (concurrent()) {
3859           guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
3860           // we need to set this to false before the next
3861           // safepoint. This way we ensure that the marking phase
3862           // doesn't observe any more heap expansions.
3863           _cm->clear_concurrent_marking_in_progress();
3864         }
3865       }
3866 
3867       // We can now guarantee that the global stack is empty, since
3868       // all other tasks have finished.
3869       guarantee( _cm->out_of_regions() &&
3870                  _cm->region_stack_empty() &&
3871                  _cm->mark_stack_empty() &&
3872                  _task_queue->size() == 0 &&
3873                  !_cm->has_overflown() &&
3874                  !_cm->mark_stack_overflow() &&
3875                  !_cm->region_stack_overflow(),
3876                  "only way to reach here" );
3877 
3878       if (_cm->verbose_low())
3879         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3880     } else {
3881       // Apparently there's more work to do. Let's abort this task. It
3882       // will restart it and we can hopefully find more things to do.
3883 
3884       if (_cm->verbose_low())
3885         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3886 
3887       set_has_aborted();
3888       statsOnly( ++_aborted_termination );
3889     }
3890   }
3891 
3892   // Mainly for debugging purposes to make sure that a pointer to the
3893   // closure which was statically allocated in this frame doesn't
3894   // escape it by accident.
3895   set_oop_closure(NULL);
3896   double end_time_ms = os::elapsedVTime() * 1000.0;
3897   double elapsed_time_ms = end_time_ms - _start_time_ms;
3898   // Update the step history.
3899   _step_times_ms.add(elapsed_time_ms);
3900 
3901   if (has_aborted()) {
3902     // The task was aborted for some reason.
3903 
3904     statsOnly( ++_aborted );
3905 
3906     if (_has_aborted_timed_out) {
3907       double diff_ms = elapsed_time_ms - _time_target_ms;
3908       // Keep statistics of how well we did with respect to hitting
3909       // our target only if we actually timed out (if we aborted for
3910       // other reasons, then the results might get skewed).
3911       _marking_step_diffs_ms.add(diff_ms);
3912     }
3913 
3914     if (_cm->has_overflown()) {
3915       // This is the interesting one. We aborted because a global
3916       // overflow was raised. This means we have to restart the
3917       // marking phase and start iterating over regions. However, in
3918       // order to do this we have to make sure that all tasks stop
3919       // what they are doing and re-initialise in a safe manner. We
3920       // will achieve this with the use of two barrier sync points.
3921 
3922       if (_cm->verbose_low())
3923         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3924 
3925       _cm->enter_first_sync_barrier(_task_id);
3926       // When we exit this sync barrier we know that all tasks have
3927       // stopped doing marking work. So, it's now safe to
3928       // re-initialise our data structures. At the end of this method,
3929       // task 0 will clear the global data structures.
3930 
3931       statsOnly( ++_aborted_overflow );
3932 
3933       // We clear the local state of this task...
3934       clear_region_fields();
3935 
3936       // ...and enter the second barrier.
3937       _cm->enter_second_sync_barrier(_task_id);
3938       // At this point everything has bee re-initialised and we're
3939       // ready to restart.
3940     }
3941 
3942     if (_cm->verbose_low()) {
3943       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3944                              "elapsed = %1.2lfms <<<<<<<<<<",
3945                              _task_id, _time_target_ms, elapsed_time_ms);
3946       if (_cm->has_aborted())
3947         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
3948                                _task_id);
3949     }
3950   } else {
3951     if (_cm->verbose_low())
3952       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
3953                              "elapsed = %1.2lfms <<<<<<<<<<",
3954                              _task_id, _time_target_ms, elapsed_time_ms);
3955   }
3956 
3957   _claimed = false;
3958 }
3959 
3960 CMTask::CMTask(int task_id,
3961                ConcurrentMark* cm,
3962                CMTaskQueue* task_queue,
3963                CMTaskQueueSet* task_queues)
3964   : _g1h(G1CollectedHeap::heap()),
3965     _co_tracker(G1CMGroup),
3966     _task_id(task_id), _cm(cm),
3967     _claimed(false),
3968     _nextMarkBitMap(NULL), _hash_seed(17),
3969     _task_queue(task_queue),
3970     _task_queues(task_queues),
3971     _oop_closure(NULL) {
3972   guarantee( task_queue != NULL, "invariant" );
3973   guarantee( task_queues != NULL, "invariant" );
3974 
3975   statsOnly( _clock_due_to_scanning = 0;
3976              _clock_due_to_marking  = 0 );
3977 
3978   _marking_step_diffs_ms.add(0.5);
3979 }