1 /*
2 * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
27
28 //
29 // CMS Bit Map Wrapper
30
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
38
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
51
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
67
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
80
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
85
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
96
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
106
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
116
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
121
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
132
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
140
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
154
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
162
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
174
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
178
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
199
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
226
227
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 guarantee(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
244
245
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
263
264
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
266
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
276
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
280
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
299
300 MemRegion CMRegionStack::pop() {
301 while (true) {
302 // Otherwise...
303 jint index = _index;
304
305 if (index == 0) {
306 return MemRegion();
307 }
308 jint next_index = index-1;
309 jint res = Atomic::cmpxchg(next_index, &_index, index);
310 if (res == index) {
311 MemRegion mr = _base[next_index];
312 if (mr.start() != NULL) {
313 tmp_guarantee_CM( mr.end() != NULL, "invariant" );
314 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
315 return mr;
316 } else {
317 // that entry was invalidated... let's skip it
318 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
319 }
320 }
321 // Otherwise, we need to try again.
322 }
323 }
324
325 bool CMRegionStack::invalidate_entries_into_cset() {
326 bool result = false;
327 G1CollectedHeap* g1h = G1CollectedHeap::heap();
328 for (int i = 0; i < _oops_do_bound; ++i) {
329 MemRegion mr = _base[i];
330 if (mr.start() != NULL) {
331 tmp_guarantee_CM( mr.end() != NULL, "invariant");
332 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
333 HeapRegion* hr = g1h->heap_region_containing(mr.start());
334 tmp_guarantee_CM( hr != NULL, "invariant" );
335 if (hr->in_collection_set()) {
336 // The region points into the collection set
337 _base[i] = MemRegion();
338 result = true;
339 }
340 } else {
341 // that entry was invalidated... let's skip it
342 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
343 }
344 }
345 return result;
346 }
347
348 template<class OopClosureClass>
349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
350 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
351 || SafepointSynchronize::is_at_safepoint(),
352 "Drain recursion must be yield-safe.");
353 bool res = true;
354 debug_only(_drain_in_progress = true);
355 debug_only(_drain_in_progress_yields = yield_after);
356 while (!isEmpty()) {
357 oop newOop = pop();
358 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
359 assert(newOop->is_oop(), "Expected an oop");
360 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
361 "only grey objects on this stack");
362 // iterate over the oops in this oop, marking and pushing
363 // the ones in CMS generation.
364 newOop->oop_iterate(cl);
365 if (yield_after && _cm->do_yield_check()) {
366 res = false; break;
367 }
368 }
369 debug_only(_drain_in_progress = false);
370 return res;
371 }
372
373 void CMMarkStack::oops_do(OopClosure* f) {
374 if (_index == 0) return;
375 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
376 "Bound must be set.");
377 for (int i = 0; i < _oops_do_bound; i++) {
378 f->do_oop(&_base[i]);
379 }
380 _oops_do_bound = -1;
381 }
382
383 bool ConcurrentMark::not_yet_marked(oop obj) const {
384 return (_g1h->is_obj_ill(obj)
385 || (_g1h->is_in_permanent(obj)
386 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
387 }
388
389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
391 #endif // _MSC_VER
392
393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
394 int max_regions) :
395 _markBitMap1(rs, MinObjAlignment - 1),
396 _markBitMap2(rs, MinObjAlignment - 1),
397
398 _parallel_marking_threads(0),
399 _sleep_factor(0.0),
400 _marking_task_overhead(1.0),
401 _cleanup_sleep_factor(0.0),
402 _cleanup_task_overhead(1.0),
403 _region_bm(max_regions, false /* in_resource_area*/),
404 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
405 CardTableModRefBS::card_shift,
406 false /* in_resource_area*/),
407 _prevMarkBitMap(&_markBitMap1),
408 _nextMarkBitMap(&_markBitMap2),
409 _at_least_one_mark_complete(false),
410
411 _markStack(this),
412 _regionStack(),
413 // _finger set in set_non_marking_state
414
415 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
416 // _active_tasks set in set_non_marking_state
417 // _tasks set inside the constructor
418 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
419 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
420
421 _has_overflown(false),
422 _concurrent(false),
423
424 // _verbose_level set below
425
426 _init_times(),
427 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
428 _cleanup_times(),
429 _total_counting_time(0.0),
430 _total_rs_scrub_time(0.0),
431
432 _parallel_workers(NULL),
433 _cleanup_co_tracker(G1CLGroup)
434 {
435 CMVerboseLevel verbose_level =
436 (CMVerboseLevel) G1MarkingVerboseLevel;
437 if (verbose_level < no_verbose)
438 verbose_level = no_verbose;
439 if (verbose_level > high_verbose)
440 verbose_level = high_verbose;
441 _verbose_level = verbose_level;
442
443 if (verbose_low())
444 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
445 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
446
447 _markStack.allocate(G1CMStackSize);
448 _regionStack.allocate(G1CMRegionStackSize);
449
450 // Create & start a ConcurrentMark thread.
451 if (G1ConcMark) {
452 _cmThread = new ConcurrentMarkThread(this);
453 assert(cmThread() != NULL, "CM Thread should have been created");
454 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
455 } else {
456 _cmThread = NULL;
457 }
458 _g1h = G1CollectedHeap::heap();
459 assert(CGC_lock != NULL, "Where's the CGC_lock?");
460 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
461 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
462
463 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
464 satb_qs.set_buffer_size(G1SATBLogBufferSize);
465
466 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
467 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
468 for (int i = 0 ; i < size; i++) {
469 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
470 }
471
472 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
473 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
474
475 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
476 _active_tasks = _max_task_num;
477 for (int i = 0; i < (int) _max_task_num; ++i) {
478 CMTaskQueue* task_queue = new CMTaskQueue();
479 task_queue->initialize();
480 _task_queues->register_queue(i, task_queue);
481
482 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
483 _accum_task_vtime[i] = 0.0;
484 }
485
486 if (ParallelMarkingThreads > ParallelGCThreads) {
487 vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
488 "than ParallelGCThreads.");
489 }
490 if (ParallelGCThreads == 0) {
491 // if we are not running with any parallel GC threads we will not
492 // spawn any marking threads either
493 _parallel_marking_threads = 0;
494 _sleep_factor = 0.0;
495 _marking_task_overhead = 1.0;
496 } else {
497 if (ParallelMarkingThreads > 0) {
498 // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc
499 // if both are set
500
501 _parallel_marking_threads = ParallelMarkingThreads;
502 _sleep_factor = 0.0;
503 _marking_task_overhead = 1.0;
504 } else if (G1MarkingOverheadPerc > 0) {
505 // we will calculate the number of parallel marking threads
506 // based on a target overhead with respect to the soft real-time
507 // goal
508
509 double marking_overhead = (double) G1MarkingOverheadPerc / 100.0;
510 double overall_cm_overhead =
511 (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS;
512 double cpu_ratio = 1.0 / (double) os::processor_count();
513 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
514 double marking_task_overhead =
515 overall_cm_overhead / marking_thread_num *
516 (double) os::processor_count();
517 double sleep_factor =
518 (1.0 - marking_task_overhead) / marking_task_overhead;
519
520 _parallel_marking_threads = (size_t) marking_thread_num;
521 _sleep_factor = sleep_factor;
522 _marking_task_overhead = marking_task_overhead;
523 } else {
524 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
525 _sleep_factor = 0.0;
526 _marking_task_overhead = 1.0;
527 }
528
529 if (parallel_marking_threads() > 1)
530 _cleanup_task_overhead = 1.0;
531 else
532 _cleanup_task_overhead = marking_task_overhead();
533 _cleanup_sleep_factor =
534 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
535
536 #if 0
537 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
538 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
539 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
540 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
541 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
542 #endif
543
544 guarantee( parallel_marking_threads() > 0, "peace of mind" );
545 _parallel_workers = new WorkGang("Parallel Marking Threads",
546 (int) parallel_marking_threads(), false, true);
547 if (_parallel_workers == NULL)
548 vm_exit_during_initialization("Failed necessary allocation.");
549 }
550
551 // so that the call below can read a sensible value
552 _heap_start = (HeapWord*) rs.base();
553 set_non_marking_state();
554 }
555
556 void ConcurrentMark::update_g1_committed(bool force) {
557 // If concurrent marking is not in progress, then we do not need to
558 // update _heap_end. This has a subtle and important
559 // side-effect. Imagine that two evacuation pauses happen between
560 // marking completion and remark. The first one can grow the
561 // heap (hence now the finger is below the heap end). Then, the
562 // second one could unnecessarily push regions on the region
563 // stack. This causes the invariant that the region stack is empty
564 // at the beginning of remark to be false. By ensuring that we do
565 // not observe heap expansions after marking is complete, then we do
566 // not have this problem.
567 if (!concurrent_marking_in_progress() && !force)
568 return;
569
570 MemRegion committed = _g1h->g1_committed();
571 tmp_guarantee_CM( committed.start() == _heap_start,
572 "start shouldn't change" );
573 HeapWord* new_end = committed.end();
574 if (new_end > _heap_end) {
575 // The heap has been expanded.
576
577 _heap_end = new_end;
578 }
579 // Notice that the heap can also shrink. However, this only happens
580 // during a Full GC (at least currently) and the entire marking
581 // phase will bail out and the task will not be restarted. So, let's
582 // do nothing.
583 }
584
585 void ConcurrentMark::reset() {
586 // Starting values for these two. This should be called in a STW
587 // phase. CM will be notified of any future g1_committed expansions
588 // will be at the end of evacuation pauses, when tasks are
589 // inactive.
590 MemRegion committed = _g1h->g1_committed();
591 _heap_start = committed.start();
592 _heap_end = committed.end();
593
594 guarantee( _heap_start != NULL &&
595 _heap_end != NULL &&
596 _heap_start < _heap_end, "heap bounds should look ok" );
597
598 // reset all the marking data structures and any necessary flags
599 clear_marking_state();
600
601 if (verbose_low())
602 gclog_or_tty->print_cr("[global] resetting");
603
604 // We do reset all of them, since different phases will use
605 // different number of active threads. So, it's easiest to have all
606 // of them ready.
607 for (int i = 0; i < (int) _max_task_num; ++i)
608 _tasks[i]->reset(_nextMarkBitMap);
609
610 // we need this to make sure that the flag is on during the evac
611 // pause with initial mark piggy-backed
612 set_concurrent_marking_in_progress();
613 }
614
615 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
616 guarantee( active_tasks <= _max_task_num, "we should not have more" );
617
618 _active_tasks = active_tasks;
619 // Need to update the three data structures below according to the
620 // number of active threads for this phase.
621 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
622 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
623 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
624
625 _concurrent = concurrent;
626 // We propagate this to all tasks, not just the active ones.
627 for (int i = 0; i < (int) _max_task_num; ++i)
628 _tasks[i]->set_concurrent(concurrent);
629
630 if (concurrent) {
631 set_concurrent_marking_in_progress();
632 } else {
633 // We currently assume that the concurrent flag has been set to
634 // false before we start remark. At this point we should also be
635 // in a STW phase.
636 guarantee( !concurrent_marking_in_progress(), "invariant" );
637 guarantee( _finger == _heap_end, "only way to get here" );
638 update_g1_committed(true);
639 }
640 }
641
642 void ConcurrentMark::set_non_marking_state() {
643 // We set the global marking state to some default values when we're
644 // not doing marking.
645 clear_marking_state();
646 _active_tasks = 0;
647 clear_concurrent_marking_in_progress();
648 }
649
650 ConcurrentMark::~ConcurrentMark() {
651 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
652 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
653 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
654 _par_cleanup_thread_state);
655
656 for (int i = 0; i < (int) _max_task_num; ++i) {
657 delete _task_queues->queue(i);
658 delete _tasks[i];
659 }
660 delete _task_queues;
661 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
662 }
663
664 // This closure is used to mark refs into the g1 generation
665 // from external roots in the CMS bit map.
666 // Called at the first checkpoint.
667 //
668
669 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
670 #if PRINT_REACHABLE_AT_INITIAL_MARK
671 static FILE* reachable_file = NULL;
672
673 class PrintReachableClosure: public OopsInGenClosure {
674 CMBitMap* _bm;
675 int _level;
676 public:
677 PrintReachableClosure(CMBitMap* bm) :
678 _bm(bm), _level(0) {
679 guarantee(reachable_file != NULL, "pre-condition");
680 }
681 void do_oop(oop* p) {
682 oop obj = *p;
683 HeapWord* obj_addr = (HeapWord*)obj;
684 if (obj == NULL) return;
685 fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
686 _level, p, (void*) obj, _bm->isMarked(obj_addr));
687 if (!_bm->isMarked(obj_addr)) {
688 _bm->mark(obj_addr);
689 _level++;
690 obj->oop_iterate(this);
691 _level--;
692 }
693 }
694 };
695 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
696
697 #define SEND_HEAP_DUMP_TO_FILE 0
698 #if SEND_HEAP_DUMP_TO_FILE
699 static FILE* heap_dump_file = NULL;
700 #endif // SEND_HEAP_DUMP_TO_FILE
701
702 void ConcurrentMark::clearNextBitmap() {
703 guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
704
705 // clear the mark bitmap (no grey objects to start with).
706 // We need to do this in chunks and offer to yield in between
707 // each chunk.
708 HeapWord* start = _nextMarkBitMap->startWord();
709 HeapWord* end = _nextMarkBitMap->endWord();
710 HeapWord* cur = start;
711 size_t chunkSize = M;
712 while (cur < end) {
713 HeapWord* next = cur + chunkSize;
714 if (next > end)
715 next = end;
716 MemRegion mr(cur,next);
717 _nextMarkBitMap->clearRange(mr);
718 cur = next;
719 do_yield_check();
720 }
721 }
722
723 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
724 public:
725 bool doHeapRegion(HeapRegion* r) {
726 if (!r->continuesHumongous()) {
727 r->note_start_of_marking(true);
728 }
729 return false;
730 }
731 };
732
733 void ConcurrentMark::checkpointRootsInitialPre() {
734 G1CollectedHeap* g1h = G1CollectedHeap::heap();
735 G1CollectorPolicy* g1p = g1h->g1_policy();
736
737 _has_aborted = false;
738
739 // Find all the reachable objects...
740 #if PRINT_REACHABLE_AT_INITIAL_MARK
741 guarantee(reachable_file == NULL, "Protocol");
742 char fn_buf[100];
743 sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
744 reachable_file = fopen(fn_buf, "w");
745 // clear the mark bitmap (no grey objects to start with)
746 _nextMarkBitMap->clearAll();
747 PrintReachableClosure prcl(_nextMarkBitMap);
748 g1h->process_strong_roots(
749 false, // fake perm gen collection
750 SharedHeap::SO_AllClasses,
751 &prcl, // Regular roots
752 &prcl // Perm Gen Roots
753 );
754 // The root iteration above "consumed" dirty cards in the perm gen.
755 // Therefore, as a shortcut, we dirty all such cards.
756 g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
757 fclose(reachable_file);
758 reachable_file = NULL;
759 // clear the mark bitmap again.
760 _nextMarkBitMap->clearAll();
761 COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
762 COMPILER2_PRESENT(DerivedPointerTable::clear());
763 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
764
765 // Initialise marking structures. This has to be done in a STW phase.
766 reset();
767 }
768
769 class CMMarkRootsClosure: public OopsInGenClosure {
770 private:
771 ConcurrentMark* _cm;
772 G1CollectedHeap* _g1h;
773 bool _do_barrier;
774
775 public:
776 CMMarkRootsClosure(ConcurrentMark* cm,
777 G1CollectedHeap* g1h,
778 bool do_barrier) : _cm(cm), _g1h(g1h),
779 _do_barrier(do_barrier) { }
780
781 virtual void do_oop(narrowOop* p) {
782 guarantee(false, "NYI");
783 }
784
785 virtual void do_oop(oop* p) {
786 oop thisOop = *p;
787 if (thisOop != NULL) {
788 assert(thisOop->is_oop() || thisOop->mark() == NULL,
789 "expected an oop, possibly with mark word displaced");
790 HeapWord* addr = (HeapWord*)thisOop;
791 if (_g1h->is_in_g1_reserved(addr)) {
792 _cm->grayRoot(thisOop);
793 }
794 }
795 if (_do_barrier) {
796 assert(!_g1h->is_in_g1_reserved(p),
797 "Should be called on external roots");
798 do_barrier(p);
799 }
800 }
801 };
802
803 void ConcurrentMark::checkpointRootsInitialPost() {
804 G1CollectedHeap* g1h = G1CollectedHeap::heap();
805
806 // For each region note start of marking.
807 NoteStartOfMarkHRClosure startcl;
808 g1h->heap_region_iterate(&startcl);
809
810 // Start weak-reference discovery.
811 ReferenceProcessor* rp = g1h->ref_processor();
812 rp->verify_no_references_recorded();
813 rp->enable_discovery(); // enable ("weak") refs discovery
814 rp->snap_policy(false); // snapshot the soft ref policy to be used in this cycle
815
816 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
817 satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
818 satb_mq_set.set_active_all_threads(true);
819
820 // update_g1_committed() will be called at the end of an evac pause
821 // when marking is on. So, it's also called at the end of the
822 // initial-mark pause to update the heap end, if the heap expands
823 // during it. No need to call it here.
824
825 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
826
827 size_t max_marking_threads =
828 MAX2((size_t) 1, parallel_marking_threads());
829 for (int i = 0; i < (int)_max_task_num; ++i) {
830 _tasks[i]->enable_co_tracker();
831 if (i < (int) max_marking_threads)
832 _tasks[i]->reset_co_tracker(marking_task_overhead());
833 else
834 _tasks[i]->reset_co_tracker(0.0);
835 }
836 }
837
838 // Checkpoint the roots into this generation from outside
839 // this generation. [Note this initial checkpoint need only
840 // be approximate -- we'll do a catch up phase subsequently.]
841 void ConcurrentMark::checkpointRootsInitial() {
842 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
843 G1CollectedHeap* g1h = G1CollectedHeap::heap();
844
845 double start = os::elapsedTime();
846 GCOverheadReporter::recordSTWStart(start);
847
848 // If there has not been a GC[n-1] since last GC[n] cycle completed,
849 // precede our marking with a collection of all
850 // younger generations to keep floating garbage to a minimum.
851 // YSR: we won't do this for now -- it's an optimization to be
852 // done post-beta.
853
854 // YSR: ignoring weak refs for now; will do at bug fixing stage
855 // EVM: assert(discoveredRefsAreClear());
856
857
858 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
859 g1p->record_concurrent_mark_init_start();
860 checkpointRootsInitialPre();
861
862 // YSR: when concurrent precleaning is in place, we'll
863 // need to clear the cached card table here
864
865 ResourceMark rm;
866 HandleMark hm;
867
868 g1h->ensure_parsability(false);
869 g1h->perm_gen()->save_marks();
870
871 CMMarkRootsClosure notOlder(this, g1h, false);
872 CMMarkRootsClosure older(this, g1h, true);
873
874 g1h->set_marking_started();
875 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
876
877 g1h->process_strong_roots(false, // fake perm gen collection
878 SharedHeap::SO_AllClasses,
879 ¬Older, // Regular roots
880 &older // Perm Gen Roots
881 );
882 checkpointRootsInitialPost();
883
884 // Statistics.
885 double end = os::elapsedTime();
886 _init_times.add((end - start) * 1000.0);
887 GCOverheadReporter::recordSTWEnd(end);
888
889 g1p->record_concurrent_mark_init_end();
890 }
891
892 /*
893 Notice that in the next two methods, we actually leave the STS
894 during the barrier sync and join it immediately afterwards. If we
895 do not do this, this then the following deadlock can occur: one
896 thread could be in the barrier sync code, waiting for the other
897 thread to also sync up, whereas another one could be trying to
898 yield, while also waiting for the other threads to sync up too.
899
900 Because the thread that does the sync barrier has left the STS, it
901 is possible to be suspended for a Full GC or an evacuation pause
902 could occur. This is actually safe, since the entering the sync
903 barrier is one of the last things do_marking_step() does, and it
904 doesn't manipulate any data structures afterwards.
905 */
906
907 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
908 if (verbose_low())
909 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
910
911 ConcurrentGCThread::stsLeave();
912 _first_overflow_barrier_sync.enter();
913 ConcurrentGCThread::stsJoin();
914 // at this point everyone should have synced up and not be doing any
915 // more work
916
917 if (verbose_low())
918 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
919
920 // let task 0 do this
921 if (task_num == 0) {
922 // task 0 is responsible for clearing the global data structures
923 clear_marking_state();
924
925 if (PrintGC) {
926 gclog_or_tty->date_stamp(PrintGCDateStamps);
927 gclog_or_tty->stamp(PrintGCTimeStamps);
928 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
929 }
930 }
931
932 // after this, each task should reset its own data structures then
933 // then go into the second barrier
934 }
935
936 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
937 if (verbose_low())
938 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
939
940 ConcurrentGCThread::stsLeave();
941 _second_overflow_barrier_sync.enter();
942 ConcurrentGCThread::stsJoin();
943 // at this point everything should be re-initialised and ready to go
944
945 if (verbose_low())
946 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
947 }
948
949 void ConcurrentMark::grayRoot(oop p) {
950 HeapWord* addr = (HeapWord*) p;
951 // We can't really check against _heap_start and _heap_end, since it
952 // is possible during an evacuation pause with piggy-backed
953 // initial-mark that the committed space is expanded during the
954 // pause without CM observing this change. So the assertions below
955 // is a bit conservative; but better than nothing.
956 tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
957 "address should be within the heap bounds" );
958
959 if (!_nextMarkBitMap->isMarked(addr))
960 _nextMarkBitMap->parMark(addr);
961 }
962
963 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
964 // The objects on the region have already been marked "in bulk" by
965 // the caller. We only need to decide whether to push the region on
966 // the region stack or not.
967
968 if (!concurrent_marking_in_progress() || !_should_gray_objects)
969 // We're done with marking and waiting for remark. We do not need to
970 // push anything else on the region stack.
971 return;
972
973 HeapWord* finger = _finger;
974
975 if (verbose_low())
976 gclog_or_tty->print_cr("[global] attempting to push "
977 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
978 PTR_FORMAT, mr.start(), mr.end(), finger);
979
980 if (mr.start() < finger) {
981 // The finger is always heap region aligned and it is not possible
982 // for mr to span heap regions.
983 tmp_guarantee_CM( mr.end() <= finger, "invariant" );
984
985 tmp_guarantee_CM( mr.start() <= mr.end() &&
986 _heap_start <= mr.start() &&
987 mr.end() <= _heap_end,
988 "region boundaries should fall within the committed space" );
989 if (verbose_low())
990 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
991 "below the finger, pushing it",
992 mr.start(), mr.end());
993
994 if (!region_stack_push(mr)) {
995 if (verbose_low())
996 gclog_or_tty->print_cr("[global] region stack has overflown.");
997 }
998 }
999 }
1000
1001 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1002 // The object is not marked by the caller. We need to at least mark
1003 // it and maybe push in on the stack.
1004
1005 HeapWord* addr = (HeapWord*)p;
1006 if (!_nextMarkBitMap->isMarked(addr)) {
1007 // We definitely need to mark it, irrespective whether we bail out
1008 // because we're done with marking.
1009 if (_nextMarkBitMap->parMark(addr)) {
1010 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1011 // If we're done with concurrent marking and we're waiting for
1012 // remark, then we're not pushing anything on the stack.
1013 return;
1014
1015 // No OrderAccess:store_load() is needed. It is implicit in the
1016 // CAS done in parMark(addr) above
1017 HeapWord* finger = _finger;
1018
1019 if (addr < finger) {
1020 if (!mark_stack_push(oop(addr))) {
1021 if (verbose_low())
1022 gclog_or_tty->print_cr("[global] global stack overflow "
1023 "during parMark");
1024 }
1025 }
1026 }
1027 }
1028 }
1029
1030 class CMConcurrentMarkingTask: public AbstractGangTask {
1031 private:
1032 ConcurrentMark* _cm;
1033 ConcurrentMarkThread* _cmt;
1034
1035 public:
1036 void work(int worker_i) {
1037 guarantee( Thread::current()->is_ConcurrentGC_thread(),
1038 "this should only be done by a conc GC thread" );
1039
1040 double start_vtime = os::elapsedVTime();
1041
1042 ConcurrentGCThread::stsJoin();
1043
1044 guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
1045 CMTask* the_task = _cm->task(worker_i);
1046 the_task->start_co_tracker();
1047 the_task->record_start_time();
1048 if (!_cm->has_aborted()) {
1049 do {
1050 double start_vtime_sec = os::elapsedVTime();
1051 double start_time_sec = os::elapsedTime();
1052 the_task->do_marking_step(10.0);
1053 double end_time_sec = os::elapsedTime();
1054 double end_vtime_sec = os::elapsedVTime();
1055 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1056 double elapsed_time_sec = end_time_sec - start_time_sec;
1057 _cm->clear_has_overflown();
1058
1059 bool ret = _cm->do_yield_check(worker_i);
1060
1061 jlong sleep_time_ms;
1062 if (!_cm->has_aborted() && the_task->has_aborted()) {
1063 sleep_time_ms =
1064 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1065 ConcurrentGCThread::stsLeave();
1066 os::sleep(Thread::current(), sleep_time_ms, false);
1067 ConcurrentGCThread::stsJoin();
1068 }
1069 double end_time2_sec = os::elapsedTime();
1070 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1071
1072 the_task->update_co_tracker();
1073
1074 #if 0
1075 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1076 "overhead %1.4lf",
1077 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1078 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1079 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1080 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1081 #endif
1082 } while (!_cm->has_aborted() && the_task->has_aborted());
1083 }
1084 the_task->record_end_time();
1085 guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
1086
1087 ConcurrentGCThread::stsLeave();
1088
1089 double end_vtime = os::elapsedVTime();
1090 the_task->update_co_tracker(true);
1091 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1092 }
1093
1094 CMConcurrentMarkingTask(ConcurrentMark* cm,
1095 ConcurrentMarkThread* cmt) :
1096 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1097
1098 ~CMConcurrentMarkingTask() { }
1099 };
1100
1101 void ConcurrentMark::markFromRoots() {
1102 // we might be tempted to assert that:
1103 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1104 // "inconsistent argument?");
1105 // However that wouldn't be right, because it's possible that
1106 // a safepoint is indeed in progress as a younger generation
1107 // stop-the-world GC happens even as we mark in this generation.
1108
1109 _restart_for_overflow = false;
1110
1111 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1112
1113 CMConcurrentMarkingTask markingTask(this, cmThread());
1114 if (parallel_marking_threads() > 0)
1115 _parallel_workers->run_task(&markingTask);
1116 else
1117 markingTask.work(0);
1118 print_stats();
1119 }
1120
1121 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1122 // world is stopped at this checkpoint
1123 assert(SafepointSynchronize::is_at_safepoint(),
1124 "world should be stopped");
1125 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1126
1127 // If a full collection has happened, we shouldn't do this.
1128 if (has_aborted()) {
1129 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1130 return;
1131 }
1132
1133 G1CollectorPolicy* g1p = g1h->g1_policy();
1134 g1p->record_concurrent_mark_remark_start();
1135
1136 double start = os::elapsedTime();
1137 GCOverheadReporter::recordSTWStart(start);
1138
1139 checkpointRootsFinalWork();
1140
1141 double mark_work_end = os::elapsedTime();
1142
1143 weakRefsWork(clear_all_soft_refs);
1144
1145 if (has_overflown()) {
1146 // Oops. We overflowed. Restart concurrent marking.
1147 _restart_for_overflow = true;
1148 // Clear the flag. We do not need it any more.
1149 clear_has_overflown();
1150 if (G1TraceMarkStackOverflow)
1151 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1152 } else {
1153 // We're done with marking.
1154 JavaThread::satb_mark_queue_set().set_active_all_threads(false);
1155 }
1156
1157 #if VERIFY_OBJS_PROCESSED
1158 _scan_obj_cl.objs_processed = 0;
1159 ThreadLocalObjQueue::objs_enqueued = 0;
1160 #endif
1161
1162 // Statistics
1163 double now = os::elapsedTime();
1164 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1165 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1166 _remark_times.add((now - start) * 1000.0);
1167
1168 GCOverheadReporter::recordSTWEnd(now);
1169 for (int i = 0; i < (int)_max_task_num; ++i)
1170 _tasks[i]->disable_co_tracker();
1171 _cleanup_co_tracker.enable();
1172 _cleanup_co_tracker.reset(cleanup_task_overhead());
1173 g1p->record_concurrent_mark_remark_end();
1174 }
1175
1176
1177 #define CARD_BM_TEST_MODE 0
1178
1179 class CalcLiveObjectsClosure: public HeapRegionClosure {
1180
1181 CMBitMapRO* _bm;
1182 ConcurrentMark* _cm;
1183 COTracker* _co_tracker;
1184 bool _changed;
1185 bool _yield;
1186 size_t _words_done;
1187 size_t _tot_live;
1188 size_t _tot_used;
1189 size_t _regions_done;
1190 double _start_vtime_sec;
1191
1192 BitMap* _region_bm;
1193 BitMap* _card_bm;
1194 intptr_t _bottom_card_num;
1195 bool _final;
1196
1197 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1198 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1199 #if CARD_BM_TEST_MODE
1200 guarantee(_card_bm->at(i - _bottom_card_num),
1201 "Should already be set.");
1202 #else
1203 _card_bm->par_at_put(i - _bottom_card_num, 1);
1204 #endif
1205 }
1206 }
1207
1208 public:
1209 CalcLiveObjectsClosure(bool final,
1210 CMBitMapRO *bm, ConcurrentMark *cm,
1211 BitMap* region_bm, BitMap* card_bm,
1212 COTracker* co_tracker) :
1213 _bm(bm), _cm(cm), _changed(false), _yield(true),
1214 _words_done(0), _tot_live(0), _tot_used(0),
1215 _region_bm(region_bm), _card_bm(card_bm),
1216 _final(final), _co_tracker(co_tracker),
1217 _regions_done(0), _start_vtime_sec(0.0)
1218 {
1219 _bottom_card_num =
1220 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1221 CardTableModRefBS::card_shift);
1222 }
1223
1224 bool doHeapRegion(HeapRegion* hr) {
1225 if (_co_tracker != NULL)
1226 _co_tracker->update();
1227
1228 if (!_final && _regions_done == 0)
1229 _start_vtime_sec = os::elapsedVTime();
1230
1231 if (hr->continuesHumongous()) return false;
1232
1233 HeapWord* nextTop = hr->next_top_at_mark_start();
1234 HeapWord* start = hr->top_at_conc_mark_count();
1235 assert(hr->bottom() <= start && start <= hr->end() &&
1236 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1237 start <= nextTop,
1238 "Preconditions.");
1239 // Otherwise, record the number of word's we'll examine.
1240 size_t words_done = (nextTop - start);
1241 // Find the first marked object at or after "start".
1242 start = _bm->getNextMarkedWordAddress(start, nextTop);
1243 size_t marked_bytes = 0;
1244
1245 // Below, the term "card num" means the result of shifting an address
1246 // by the card shift -- address 0 corresponds to card number 0. One
1247 // must subtract the card num of the bottom of the heap to obtain a
1248 // card table index.
1249 // The first card num of the sequence of live cards currently being
1250 // constructed. -1 ==> no sequence.
1251 intptr_t start_card_num = -1;
1252 // The last card num of the sequence of live cards currently being
1253 // constructed. -1 ==> no sequence.
1254 intptr_t last_card_num = -1;
1255
1256 while (start < nextTop) {
1257 if (_yield && _cm->do_yield_check()) {
1258 // We yielded. It might be for a full collection, in which case
1259 // all bets are off; terminate the traversal.
1260 if (_cm->has_aborted()) {
1261 _changed = false;
1262 return true;
1263 } else {
1264 // Otherwise, it might be a collection pause, and the region
1265 // we're looking at might be in the collection set. We'll
1266 // abandon this region.
1267 return false;
1268 }
1269 }
1270 oop obj = oop(start);
1271 int obj_sz = obj->size();
1272 // The card num of the start of the current object.
1273 intptr_t obj_card_num =
1274 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1275
1276 HeapWord* obj_last = start + obj_sz - 1;
1277 intptr_t obj_last_card_num =
1278 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1279
1280 if (obj_card_num != last_card_num) {
1281 if (start_card_num == -1) {
1282 assert(last_card_num == -1, "Both or neither.");
1283 start_card_num = obj_card_num;
1284 } else {
1285 assert(last_card_num != -1, "Both or neither.");
1286 assert(obj_card_num >= last_card_num, "Inv");
1287 if ((obj_card_num - last_card_num) > 1) {
1288 // Mark the last run, and start a new one.
1289 mark_card_num_range(start_card_num, last_card_num);
1290 start_card_num = obj_card_num;
1291 }
1292 }
1293 #if CARD_BM_TEST_MODE
1294 /*
1295 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1296 obj_card_num - _bottom_card_num,
1297 obj_last_card_num - _bottom_card_num);
1298 */
1299 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1300 _card_bm->par_at_put(j - _bottom_card_num, 1);
1301 }
1302 #endif
1303 }
1304 // In any case, we set the last card num.
1305 last_card_num = obj_last_card_num;
1306
1307 marked_bytes += obj_sz * HeapWordSize;
1308 // Find the next marked object after this one.
1309 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1310 _changed = true;
1311 }
1312 // Handle the last range, if any.
1313 if (start_card_num != -1)
1314 mark_card_num_range(start_card_num, last_card_num);
1315 if (_final) {
1316 // Mark the allocated-since-marking portion...
1317 HeapWord* tp = hr->top();
1318 if (nextTop < tp) {
1319 start_card_num =
1320 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1321 last_card_num =
1322 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1323 mark_card_num_range(start_card_num, last_card_num);
1324 // This definitely means the region has live objects.
1325 _region_bm->par_at_put(hr->hrs_index(), 1);
1326 }
1327 }
1328
1329 hr->add_to_marked_bytes(marked_bytes);
1330 // Update the live region bitmap.
1331 if (marked_bytes > 0) {
1332 _region_bm->par_at_put(hr->hrs_index(), 1);
1333 }
1334 hr->set_top_at_conc_mark_count(nextTop);
1335 _tot_live += hr->next_live_bytes();
1336 _tot_used += hr->used();
1337 _words_done = words_done;
1338
1339 if (!_final) {
1340 ++_regions_done;
1341 if (_regions_done % 10 == 0) {
1342 double end_vtime_sec = os::elapsedVTime();
1343 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1344 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1345 jlong sleep_time_ms =
1346 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1347 #if 0
1348 gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
1349 "overhead %1.4lf",
1350 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1351 _co_tracker->concOverhead(os::elapsedTime()));
1352 #endif
1353 os::sleep(Thread::current(), sleep_time_ms, false);
1354 _start_vtime_sec = end_vtime_sec;
1355 }
1356 }
1357 }
1358
1359 return false;
1360 }
1361
1362 bool changed() { return _changed; }
1363 void reset() { _changed = false; _words_done = 0; }
1364 void no_yield() { _yield = false; }
1365 size_t words_done() { return _words_done; }
1366 size_t tot_live() { return _tot_live; }
1367 size_t tot_used() { return _tot_used; }
1368 };
1369
1370
1371 void ConcurrentMark::calcDesiredRegions() {
1372 guarantee( _cleanup_co_tracker.enabled(), "invariant" );
1373 _cleanup_co_tracker.start();
1374
1375 _region_bm.clear();
1376 _card_bm.clear();
1377 CalcLiveObjectsClosure calccl(false /*final*/,
1378 nextMarkBitMap(), this,
1379 &_region_bm, &_card_bm,
1380 &_cleanup_co_tracker);
1381 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1382 g1h->heap_region_iterate(&calccl);
1383
1384 do {
1385 calccl.reset();
1386 g1h->heap_region_iterate(&calccl);
1387 } while (calccl.changed());
1388
1389 _cleanup_co_tracker.update(true);
1390 }
1391
1392 class G1ParFinalCountTask: public AbstractGangTask {
1393 protected:
1394 G1CollectedHeap* _g1h;
1395 CMBitMap* _bm;
1396 size_t _n_workers;
1397 size_t *_live_bytes;
1398 size_t *_used_bytes;
1399 BitMap* _region_bm;
1400 BitMap* _card_bm;
1401 public:
1402 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1403 BitMap* region_bm, BitMap* card_bm) :
1404 AbstractGangTask("G1 final counting"), _g1h(g1h),
1405 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1406 {
1407 if (ParallelGCThreads > 0)
1408 _n_workers = _g1h->workers()->total_workers();
1409 else
1410 _n_workers = 1;
1411 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1412 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1413 }
1414
1415 ~G1ParFinalCountTask() {
1416 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1417 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1418 }
1419
1420 void work(int i) {
1421 CalcLiveObjectsClosure calccl(true /*final*/,
1422 _bm, _g1h->concurrent_mark(),
1423 _region_bm, _card_bm,
1424 NULL /* CO tracker */);
1425 calccl.no_yield();
1426 if (ParallelGCThreads > 0) {
1427 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1428 HeapRegion::FinalCountClaimValue);
1429 } else {
1430 _g1h->heap_region_iterate(&calccl);
1431 }
1432 assert(calccl.complete(), "Shouldn't have yielded!");
1433
1434 guarantee( (size_t)i < _n_workers, "invariant" );
1435 _live_bytes[i] = calccl.tot_live();
1436 _used_bytes[i] = calccl.tot_used();
1437 }
1438 size_t live_bytes() {
1439 size_t live_bytes = 0;
1440 for (size_t i = 0; i < _n_workers; ++i)
1441 live_bytes += _live_bytes[i];
1442 return live_bytes;
1443 }
1444 size_t used_bytes() {
1445 size_t used_bytes = 0;
1446 for (size_t i = 0; i < _n_workers; ++i)
1447 used_bytes += _used_bytes[i];
1448 return used_bytes;
1449 }
1450 };
1451
1452 class G1ParNoteEndTask;
1453
1454 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1455 G1CollectedHeap* _g1;
1456 int _worker_num;
1457 size_t _max_live_bytes;
1458 size_t _regions_claimed;
1459 size_t _freed_bytes;
1460 size_t _cleared_h_regions;
1461 size_t _freed_regions;
1462 UncleanRegionList* _unclean_region_list;
1463 double _claimed_region_time;
1464 double _max_region_time;
1465
1466 public:
1467 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1468 UncleanRegionList* list,
1469 int worker_num);
1470 size_t freed_bytes() { return _freed_bytes; }
1471 size_t cleared_h_regions() { return _cleared_h_regions; }
1472 size_t freed_regions() { return _freed_regions; }
1473 UncleanRegionList* unclean_region_list() {
1474 return _unclean_region_list;
1475 }
1476
1477 bool doHeapRegion(HeapRegion *r);
1478
1479 size_t max_live_bytes() { return _max_live_bytes; }
1480 size_t regions_claimed() { return _regions_claimed; }
1481 double claimed_region_time_sec() { return _claimed_region_time; }
1482 double max_region_time_sec() { return _max_region_time; }
1483 };
1484
1485 class G1ParNoteEndTask: public AbstractGangTask {
1486 friend class G1NoteEndOfConcMarkClosure;
1487 protected:
1488 G1CollectedHeap* _g1h;
1489 size_t _max_live_bytes;
1490 size_t _freed_bytes;
1491 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1492 public:
1493 G1ParNoteEndTask(G1CollectedHeap* g1h,
1494 ConcurrentMark::ParCleanupThreadState**
1495 par_cleanup_thread_state) :
1496 AbstractGangTask("G1 note end"), _g1h(g1h),
1497 _max_live_bytes(0), _freed_bytes(0),
1498 _par_cleanup_thread_state(par_cleanup_thread_state)
1499 {}
1500
1501 void work(int i) {
1502 double start = os::elapsedTime();
1503 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1504 &_par_cleanup_thread_state[i]->list,
1505 i);
1506 if (ParallelGCThreads > 0) {
1507 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1508 HeapRegion::NoteEndClaimValue);
1509 } else {
1510 _g1h->heap_region_iterate(&g1_note_end);
1511 }
1512 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1513
1514 // Now finish up freeing the current thread's regions.
1515 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1516 g1_note_end.cleared_h_regions(),
1517 0, NULL);
1518 {
1519 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1520 _max_live_bytes += g1_note_end.max_live_bytes();
1521 _freed_bytes += g1_note_end.freed_bytes();
1522 }
1523 double end = os::elapsedTime();
1524 if (G1PrintParCleanupStats) {
1525 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1526 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1527 i, start, end, (end-start)*1000.0,
1528 g1_note_end.regions_claimed(),
1529 g1_note_end.claimed_region_time_sec()*1000.0,
1530 g1_note_end.max_region_time_sec()*1000.0);
1531 }
1532 }
1533 size_t max_live_bytes() { return _max_live_bytes; }
1534 size_t freed_bytes() { return _freed_bytes; }
1535 };
1536
1537 class G1ParScrubRemSetTask: public AbstractGangTask {
1538 protected:
1539 G1RemSet* _g1rs;
1540 BitMap* _region_bm;
1541 BitMap* _card_bm;
1542 public:
1543 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1544 BitMap* region_bm, BitMap* card_bm) :
1545 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1546 _region_bm(region_bm), _card_bm(card_bm)
1547 {}
1548
1549 void work(int i) {
1550 if (ParallelGCThreads > 0) {
1551 _g1rs->scrub_par(_region_bm, _card_bm, i,
1552 HeapRegion::ScrubRemSetClaimValue);
1553 } else {
1554 _g1rs->scrub(_region_bm, _card_bm);
1555 }
1556 }
1557
1558 };
1559
1560 G1NoteEndOfConcMarkClosure::
1561 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1562 UncleanRegionList* list,
1563 int worker_num)
1564 : _g1(g1), _worker_num(worker_num),
1565 _max_live_bytes(0), _regions_claimed(0),
1566 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1567 _claimed_region_time(0.0), _max_region_time(0.0),
1568 _unclean_region_list(list)
1569 {}
1570
1571 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1572 // We use a claim value of zero here because all regions
1573 // were claimed with value 1 in the FinalCount task.
1574 r->reset_gc_time_stamp();
1575 if (!r->continuesHumongous()) {
1576 double start = os::elapsedTime();
1577 _regions_claimed++;
1578 r->note_end_of_marking();
1579 _max_live_bytes += r->max_live_bytes();
1580 _g1->free_region_if_totally_empty_work(r,
1581 _freed_bytes,
1582 _cleared_h_regions,
1583 _freed_regions,
1584 _unclean_region_list,
1585 true /*par*/);
1586 double region_time = (os::elapsedTime() - start);
1587 _claimed_region_time += region_time;
1588 if (region_time > _max_region_time) _max_region_time = region_time;
1589 }
1590 return false;
1591 }
1592
1593 void ConcurrentMark::cleanup() {
1594 // world is stopped at this checkpoint
1595 assert(SafepointSynchronize::is_at_safepoint(),
1596 "world should be stopped");
1597 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1598
1599 // If a full collection has happened, we shouldn't do this.
1600 if (has_aborted()) {
1601 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1602 return;
1603 }
1604
1605 _cleanup_co_tracker.disable();
1606
1607 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1608 g1p->record_concurrent_mark_cleanup_start();
1609
1610 double start = os::elapsedTime();
1611 GCOverheadReporter::recordSTWStart(start);
1612
1613 // Do counting once more with the world stopped for good measure.
1614 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1615 &_region_bm, &_card_bm);
1616 if (ParallelGCThreads > 0) {
1617 assert(g1h->check_heap_region_claim_values(
1618 HeapRegion::InitialClaimValue),
1619 "sanity check");
1620
1621 int n_workers = g1h->workers()->total_workers();
1622 g1h->set_par_threads(n_workers);
1623 g1h->workers()->run_task(&g1_par_count_task);
1624 g1h->set_par_threads(0);
1625
1626 assert(g1h->check_heap_region_claim_values(
1627 HeapRegion::FinalCountClaimValue),
1628 "sanity check");
1629 } else {
1630 g1_par_count_task.work(0);
1631 }
1632
1633 size_t known_garbage_bytes =
1634 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1635 #if 0
1636 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1637 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1638 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1639 (double) known_garbage_bytes / (double) (1024 * 1024));
1640 #endif // 0
1641 g1p->set_known_garbage_bytes(known_garbage_bytes);
1642
1643 size_t start_used_bytes = g1h->used();
1644 _at_least_one_mark_complete = true;
1645 g1h->set_marking_complete();
1646
1647 double count_end = os::elapsedTime();
1648 double this_final_counting_time = (count_end - start);
1649 if (G1PrintParCleanupStats) {
1650 gclog_or_tty->print_cr("Cleanup:");
1651 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1652 this_final_counting_time*1000.0);
1653 }
1654 _total_counting_time += this_final_counting_time;
1655
1656 // Install newly created mark bitMap as "prev".
1657 swapMarkBitMaps();
1658
1659 g1h->reset_gc_time_stamp();
1660
1661 // Note end of marking in all heap regions.
1662 double note_end_start = os::elapsedTime();
1663 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1664 if (ParallelGCThreads > 0) {
1665 int n_workers = g1h->workers()->total_workers();
1666 g1h->set_par_threads(n_workers);
1667 g1h->workers()->run_task(&g1_par_note_end_task);
1668 g1h->set_par_threads(0);
1669
1670 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1671 "sanity check");
1672 } else {
1673 g1_par_note_end_task.work(0);
1674 }
1675 g1h->set_unclean_regions_coming(true);
1676 double note_end_end = os::elapsedTime();
1677 // Tell the mutators that there might be unclean regions coming...
1678 if (G1PrintParCleanupStats) {
1679 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1680 (note_end_end - note_end_start)*1000.0);
1681 }
1682
1683
1684 // call below, since it affects the metric by which we sort the heap
1685 // regions.
1686 if (G1ScrubRemSets) {
1687 double rs_scrub_start = os::elapsedTime();
1688 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1689 if (ParallelGCThreads > 0) {
1690 int n_workers = g1h->workers()->total_workers();
1691 g1h->set_par_threads(n_workers);
1692 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1693 g1h->set_par_threads(0);
1694
1695 assert(g1h->check_heap_region_claim_values(
1696 HeapRegion::ScrubRemSetClaimValue),
1697 "sanity check");
1698 } else {
1699 g1_par_scrub_rs_task.work(0);
1700 }
1701
1702 double rs_scrub_end = os::elapsedTime();
1703 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1704 _total_rs_scrub_time += this_rs_scrub_time;
1705 }
1706
1707 // this will also free any regions totally full of garbage objects,
1708 // and sort the regions.
1709 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1710 g1_par_note_end_task.freed_bytes(),
1711 g1_par_note_end_task.max_live_bytes());
1712
1713 // Statistics.
1714 double end = os::elapsedTime();
1715 _cleanup_times.add((end - start) * 1000.0);
1716 GCOverheadReporter::recordSTWEnd(end);
1717
1718 // G1CollectedHeap::heap()->print();
1719 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1720 // G1CollectedHeap::heap()->get_gc_time_stamp());
1721
1722 if (PrintGC || PrintGCDetails) {
1723 g1h->print_size_transition(gclog_or_tty,
1724 start_used_bytes,
1725 g1h->used(),
1726 g1h->capacity());
1727 }
1728
1729 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1730 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1731
1732 // We need to make this be a "collection" so any collection pause that
1733 // races with it goes around and waits for completeCleanup to finish.
1734 g1h->increment_total_collections();
1735
1736 #ifndef PRODUCT
1737 if (G1VerifyConcMark) {
1738 G1CollectedHeap::heap()->prepare_for_verify();
1739 G1CollectedHeap::heap()->verify(true,false);
1740 }
1741 #endif
1742 }
1743
1744 void ConcurrentMark::completeCleanup() {
1745 // A full collection intervened.
1746 if (has_aborted()) return;
1747
1748 int first = 0;
1749 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1750 for (int t = 0; t < last; t++) {
1751 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1752 assert(list->well_formed(), "Inv");
1753 HeapRegion* hd = list->hd();
1754 while (hd != NULL) {
1755 // Now finish up the other stuff.
1756 hd->rem_set()->clear();
1757 HeapRegion* next_hd = hd->next_from_unclean_list();
1758 (void)list->pop();
1759 guarantee(list->hd() == next_hd, "how not?");
1760 _g1h->put_region_on_unclean_list(hd);
1761 if (!hd->isHumongous()) {
1762 // Add this to the _free_regions count by 1.
1763 _g1h->finish_free_region_work(0, 0, 1, NULL);
1764 }
1765 hd = list->hd();
1766 guarantee(hd == next_hd, "how not?");
1767 }
1768 }
1769 }
1770
1771
1772 class G1CMIsAliveClosure: public BoolObjectClosure {
1773 G1CollectedHeap* _g1;
1774 public:
1775 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1776 _g1(g1)
1777 {}
1778
1779 void do_object(oop obj) {
1780 assert(false, "not to be invoked");
1781 }
1782 bool do_object_b(oop obj) {
1783 HeapWord* addr = (HeapWord*)obj;
1784 return addr != NULL &&
1785 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1786 }
1787 };
1788
1789 class G1CMKeepAliveClosure: public OopClosure {
1790 G1CollectedHeap* _g1;
1791 ConcurrentMark* _cm;
1792 CMBitMap* _bitMap;
1793 public:
1794 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1795 CMBitMap* bitMap) :
1796 _g1(g1), _cm(cm),
1797 _bitMap(bitMap) {}
1798
1799 void do_oop(narrowOop* p) {
1800 guarantee(false, "NYI");
1801 }
1802
1803 void do_oop(oop* p) {
1804 oop thisOop = *p;
1805 HeapWord* addr = (HeapWord*)thisOop;
1806 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1807 _bitMap->mark(addr);
1808 _cm->mark_stack_push(thisOop);
1809 }
1810 }
1811 };
1812
1813 class G1CMDrainMarkingStackClosure: public VoidClosure {
1814 CMMarkStack* _markStack;
1815 CMBitMap* _bitMap;
1816 G1CMKeepAliveClosure* _oopClosure;
1817 public:
1818 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1819 G1CMKeepAliveClosure* oopClosure) :
1820 _bitMap(bitMap),
1821 _markStack(markStack),
1822 _oopClosure(oopClosure)
1823 {}
1824
1825 void do_void() {
1826 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1827 }
1828 };
1829
1830 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1831 ResourceMark rm;
1832 HandleMark hm;
1833 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1834 ReferenceProcessor* rp = g1h->ref_processor();
1835
1836 // Process weak references.
1837 rp->snap_policy(clear_all_soft_refs);
1838 assert(_markStack.isEmpty(), "mark stack should be empty");
1839
1840 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1841 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1842 G1CMDrainMarkingStackClosure
1843 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1844 &g1KeepAliveClosure);
1845
1846 // XXXYYY Also: copy the parallel ref processing code from CMS.
1847 rp->process_discovered_references(&g1IsAliveClosure,
1848 &g1KeepAliveClosure,
1849 &g1DrainMarkingStackClosure,
1850 NULL);
1851 assert(_markStack.overflow() || _markStack.isEmpty(),
1852 "mark stack should be empty (unless it overflowed)");
1853 if (_markStack.overflow()) {
1854 set_has_overflown();
1855 }
1856
1857 rp->enqueue_discovered_references();
1858 rp->verify_no_references_recorded();
1859 assert(!rp->discovery_enabled(), "should have been disabled");
1860
1861 // Now clean up stale oops in SymbolTable and StringTable
1862 SymbolTable::unlink(&g1IsAliveClosure);
1863 StringTable::unlink(&g1IsAliveClosure);
1864 }
1865
1866 void ConcurrentMark::swapMarkBitMaps() {
1867 CMBitMapRO* temp = _prevMarkBitMap;
1868 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1869 _nextMarkBitMap = (CMBitMap*) temp;
1870 }
1871
1872 class CMRemarkTask: public AbstractGangTask {
1873 private:
1874 ConcurrentMark *_cm;
1875
1876 public:
1877 void work(int worker_i) {
1878 // Since all available tasks are actually started, we should
1879 // only proceed if we're supposed to be actived.
1880 if ((size_t)worker_i < _cm->active_tasks()) {
1881 CMTask* task = _cm->task(worker_i);
1882 task->record_start_time();
1883 do {
1884 task->do_marking_step(1000000000.0 /* something very large */);
1885 } while (task->has_aborted() && !_cm->has_overflown());
1886 // If we overflow, then we do not want to restart. We instead
1887 // want to abort remark and do concurrent marking again.
1888 task->record_end_time();
1889 }
1890 }
1891
1892 CMRemarkTask(ConcurrentMark* cm) :
1893 AbstractGangTask("Par Remark"), _cm(cm) { }
1894 };
1895
1896 void ConcurrentMark::checkpointRootsFinalWork() {
1897 ResourceMark rm;
1898 HandleMark hm;
1899 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1900
1901 g1h->ensure_parsability(false);
1902
1903 if (ParallelGCThreads > 0) {
1904 g1h->change_strong_roots_parity();
1905 // this is remark, so we'll use up all available threads
1906 int active_workers = ParallelGCThreads;
1907 set_phase(active_workers, false);
1908
1909 CMRemarkTask remarkTask(this);
1910 // We will start all available threads, even if we decide that the
1911 // active_workers will be fewer. The extra ones will just bail out
1912 // immediately.
1913 int n_workers = g1h->workers()->total_workers();
1914 g1h->set_par_threads(n_workers);
1915 g1h->workers()->run_task(&remarkTask);
1916 g1h->set_par_threads(0);
1917
1918 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1919 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1920 } else {
1921 g1h->change_strong_roots_parity();
1922 // this is remark, so we'll use up all available threads
1923 int active_workers = 1;
1924 set_phase(active_workers, false);
1925
1926 CMRemarkTask remarkTask(this);
1927 // We will start all available threads, even if we decide that the
1928 // active_workers will be fewer. The extra ones will just bail out
1929 // immediately.
1930 remarkTask.work(0);
1931
1932 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1933 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1934 }
1935
1936 print_stats();
1937
1938 if (!restart_for_overflow())
1939 set_non_marking_state();
1940
1941 #if VERIFY_OBJS_PROCESSED
1942 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1943 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1944 _scan_obj_cl.objs_processed,
1945 ThreadLocalObjQueue::objs_enqueued);
1946 guarantee(_scan_obj_cl.objs_processed ==
1947 ThreadLocalObjQueue::objs_enqueued,
1948 "Different number of objs processed and enqueued.");
1949 }
1950 #endif
1951 }
1952
1953 class ReachablePrinterOopClosure: public OopClosure {
1954 private:
1955 G1CollectedHeap* _g1h;
1956 CMBitMapRO* _bitmap;
1957 outputStream* _out;
1958
1959 public:
1960 ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
1961 _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
1962
1963 void do_oop(narrowOop* p) {
1964 guarantee(false, "NYI");
1965 }
1966
1967 void do_oop(oop* p) {
1968 oop obj = *p;
1969 const char* str = NULL;
1970 const char* str2 = "";
1971
1972 if (!_g1h->is_in_g1_reserved(obj))
1973 str = "outside G1 reserved";
1974 else {
1975 HeapRegion* hr = _g1h->heap_region_containing(obj);
1976 guarantee( hr != NULL, "invariant" );
1977 if (hr->obj_allocated_since_prev_marking(obj)) {
1978 str = "over TAMS";
1979 if (_bitmap->isMarked((HeapWord*) obj))
1980 str2 = " AND MARKED";
1981 } else if (_bitmap->isMarked((HeapWord*) obj))
1982 str = "marked";
1983 else
1984 str = "#### NOT MARKED ####";
1985 }
1986
1987 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
1988 p, (void*) obj, str, str2);
1989 }
1990 };
1991
1992 class ReachablePrinterClosure: public BitMapClosure {
1993 private:
1994 CMBitMapRO* _bitmap;
1995 outputStream* _out;
1996
1997 public:
1998 ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
1999 _bitmap(bitmap), _out(out) { }
2000
2001 bool do_bit(size_t offset) {
2002 HeapWord* addr = _bitmap->offsetToHeapWord(offset);
2003 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2004
2005 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
2006 oop(addr)->oop_iterate(&oopCl);
2007 _out->print_cr("");
2008
2009 return true;
2010 }
2011 };
2012
2013 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2014 private:
2015 CMBitMapRO* _bitmap;
2016 outputStream* _out;
2017
2018 public:
2019 void do_object(oop o) {
2020 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2021
2022 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o);
2023 o->oop_iterate(&oopCl);
2024 _out->print_cr("");
2025 }
2026
2027 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2028 _bitmap(bitmap), _out(out) { }
2029 };
2030
2031 class RegionReachablePrinterClosure : public HeapRegionClosure {
2032 private:
2033 CMBitMapRO* _bitmap;
2034 outputStream* _out;
2035
2036 public:
2037 bool doHeapRegion(HeapRegion* hr) {
2038 HeapWord* b = hr->bottom();
2039 HeapWord* e = hr->end();
2040 HeapWord* t = hr->top();
2041 HeapWord* p = hr->prev_top_at_mark_start();
2042 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2043 "PTAMS: "PTR_FORMAT, b, e, t, p);
2044 _out->print_cr("");
2045
2046 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
2047 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2048
2049 return false;
2050 }
2051
2052 RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2053 outputStream* out) :
2054 _bitmap(bitmap), _out(out) { }
2055 };
2056
2057 void ConcurrentMark::print_prev_bitmap_reachable() {
2058 outputStream* out = gclog_or_tty;
2059
2060 #if SEND_HEAP_DUMP_TO_FILE
2061 guarantee(heap_dump_file == NULL, "Protocol");
2062 char fn_buf[100];
2063 sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
2064 heap_dump_file = fopen(fn_buf, "w");
2065 fileStream fstream(heap_dump_file);
2066 out = &fstream;
2067 #endif // SEND_HEAP_DUMP_TO_FILE
2068
2069 RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
2070 out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
2071 _g1h->heap_region_iterate(&rcl);
2072 out->print_cr("");
2073
2074 ReachablePrinterClosure cl(_prevMarkBitMap, out);
2075 out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
2076 _prevMarkBitMap->iterate(&cl);
2077 out->print_cr("");
2078
2079 #if SEND_HEAP_DUMP_TO_FILE
2080 fclose(heap_dump_file);
2081 heap_dump_file = NULL;
2082 #endif // SEND_HEAP_DUMP_TO_FILE
2083 }
2084
2085 // This note is for drainAllSATBBuffers and the code in between.
2086 // In the future we could reuse a task to do this work during an
2087 // evacuation pause (since now tasks are not active and can be claimed
2088 // during an evacuation pause). This was a late change to the code and
2089 // is currently not being taken advantage of.
2090
2091 class CMGlobalObjectClosure : public ObjectClosure {
2092 private:
2093 ConcurrentMark* _cm;
2094
2095 public:
2096 void do_object(oop obj) {
2097 _cm->deal_with_reference(obj);
2098 }
2099
2100 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2101 };
2102
2103 void ConcurrentMark::deal_with_reference(oop obj) {
2104 if (verbose_high())
2105 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2106 (void*) obj);
2107
2108
2109 HeapWord* objAddr = (HeapWord*) obj;
2110 if (_g1h->is_in_g1_reserved(objAddr)) {
2111 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2112 HeapRegion* hr = _g1h->heap_region_containing(obj);
2113 if (_g1h->is_obj_ill(obj, hr)) {
2114 if (verbose_high())
2115 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2116 "marked", (void*) obj);
2117
2118 // we need to mark it first
2119 if (_nextMarkBitMap->parMark(objAddr)) {
2120 // No OrderAccess:store_load() is needed. It is implicit in the
2121 // CAS done in parMark(objAddr) above
2122 HeapWord* finger = _finger;
2123 if (objAddr < finger) {
2124 if (verbose_high())
2125 gclog_or_tty->print_cr("[global] below the global finger "
2126 "("PTR_FORMAT"), pushing it", finger);
2127 if (!mark_stack_push(obj)) {
2128 if (verbose_low())
2129 gclog_or_tty->print_cr("[global] global stack overflow during "
2130 "deal_with_reference");
2131 }
2132 }
2133 }
2134 }
2135 }
2136 }
2137
2138 void ConcurrentMark::drainAllSATBBuffers() {
2139 CMGlobalObjectClosure oc(this);
2140 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2141 satb_mq_set.set_closure(&oc);
2142
2143 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2144 if (verbose_medium())
2145 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2146 }
2147
2148 // no need to check whether we should do this, as this is only
2149 // called during an evacuation pause
2150 satb_mq_set.iterate_closure_all_threads();
2151
2152 satb_mq_set.set_closure(NULL);
2153 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
2154 }
2155
2156 void ConcurrentMark::markPrev(oop p) {
2157 // Note we are overriding the read-only view of the prev map here, via
2158 // the cast.
2159 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2160 }
2161
2162 void ConcurrentMark::clear(oop p) {
2163 assert(p != NULL && p->is_oop(), "expected an oop");
2164 HeapWord* addr = (HeapWord*)p;
2165 assert(addr >= _nextMarkBitMap->startWord() ||
2166 addr < _nextMarkBitMap->endWord(), "in a region");
2167
2168 _nextMarkBitMap->clear(addr);
2169 }
2170
2171 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2172 // Note we are overriding the read-only view of the prev map here, via
2173 // the cast.
2174 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2175 _nextMarkBitMap->clearRange(mr);
2176 }
2177
2178 HeapRegion*
2179 ConcurrentMark::claim_region(int task_num) {
2180 // "checkpoint" the finger
2181 HeapWord* finger = _finger;
2182
2183 // _heap_end will not change underneath our feet; it only changes at
2184 // yield points.
2185 while (finger < _heap_end) {
2186 tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
2187
2188 // is the gap between reading the finger and doing the CAS too long?
2189
2190 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2191 HeapWord* bottom = curr_region->bottom();
2192 HeapWord* end = curr_region->end();
2193 HeapWord* limit = curr_region->next_top_at_mark_start();
2194
2195 if (verbose_low())
2196 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2197 "["PTR_FORMAT", "PTR_FORMAT"), "
2198 "limit = "PTR_FORMAT,
2199 task_num, curr_region, bottom, end, limit);
2200
2201 HeapWord* res =
2202 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2203 if (res == finger) {
2204 // we succeeded
2205
2206 // notice that _finger == end cannot be guaranteed here since,
2207 // someone else might have moved the finger even further
2208 guarantee( _finger >= end, "the finger should have moved forward" );
2209
2210 if (verbose_low())
2211 gclog_or_tty->print_cr("[%d] we were successful with region = "
2212 PTR_FORMAT, task_num, curr_region);
2213
2214 if (limit > bottom) {
2215 if (verbose_low())
2216 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2217 "returning it ", task_num, curr_region);
2218 return curr_region;
2219 } else {
2220 tmp_guarantee_CM( limit == bottom,
2221 "the region limit should be at bottom" );
2222 if (verbose_low())
2223 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2224 "returning NULL", task_num, curr_region);
2225 // we return NULL and the caller should try calling
2226 // claim_region() again.
2227 return NULL;
2228 }
2229 } else {
2230 guarantee( _finger > finger, "the finger should have moved forward" );
2231 if (verbose_low())
2232 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2233 "global finger = "PTR_FORMAT", "
2234 "our finger = "PTR_FORMAT,
2235 task_num, _finger, finger);
2236
2237 // read it again
2238 finger = _finger;
2239 }
2240 }
2241
2242 return NULL;
2243 }
2244
2245 void ConcurrentMark::oops_do(OopClosure* cl) {
2246 if (_markStack.size() > 0 && verbose_low())
2247 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2248 "size = %d", _markStack.size());
2249 // we first iterate over the contents of the mark stack...
2250 _markStack.oops_do(cl);
2251
2252 for (int i = 0; i < (int)_max_task_num; ++i) {
2253 OopTaskQueue* queue = _task_queues->queue((int)i);
2254
2255 if (queue->size() > 0 && verbose_low())
2256 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2257 "size = %d", i, queue->size());
2258
2259 // ...then over the contents of the all the task queues.
2260 queue->oops_do(cl);
2261 }
2262
2263 // finally, invalidate any entries that in the region stack that
2264 // point into the collection set
2265 if (_regionStack.invalidate_entries_into_cset()) {
2266 // otherwise, any gray objects copied during the evacuation pause
2267 // might not be visited.
2268 guarantee( _should_gray_objects, "invariant" );
2269 }
2270 }
2271
2272 void ConcurrentMark::clear_marking_state() {
2273 _markStack.setEmpty();
2274 _markStack.clear_overflow();
2275 _regionStack.setEmpty();
2276 _regionStack.clear_overflow();
2277 clear_has_overflown();
2278 _finger = _heap_start;
2279
2280 for (int i = 0; i < (int)_max_task_num; ++i) {
2281 OopTaskQueue* queue = _task_queues->queue(i);
2282 queue->set_empty();
2283 }
2284 }
2285
2286 void ConcurrentMark::print_stats() {
2287 if (verbose_stats()) {
2288 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2289 for (size_t i = 0; i < _active_tasks; ++i) {
2290 _tasks[i]->print_stats();
2291 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2292 }
2293 }
2294 }
2295
2296 class CSMarkOopClosure: public OopClosure {
2297 friend class CSMarkBitMapClosure;
2298
2299 G1CollectedHeap* _g1h;
2300 CMBitMap* _bm;
2301 ConcurrentMark* _cm;
2302 oop* _ms;
2303 jint* _array_ind_stack;
2304 int _ms_size;
2305 int _ms_ind;
2306 int _array_increment;
2307
2308 bool push(oop obj, int arr_ind = 0) {
2309 if (_ms_ind == _ms_size) {
2310 gclog_or_tty->print_cr("Mark stack is full.");
2311 return false;
2312 }
2313 _ms[_ms_ind] = obj;
2314 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2315 _ms_ind++;
2316 return true;
2317 }
2318
2319 oop pop() {
2320 if (_ms_ind == 0) return NULL;
2321 else {
2322 _ms_ind--;
2323 return _ms[_ms_ind];
2324 }
2325 }
2326
2327 bool drain() {
2328 while (_ms_ind > 0) {
2329 oop obj = pop();
2330 assert(obj != NULL, "Since index was non-zero.");
2331 if (obj->is_objArray()) {
2332 jint arr_ind = _array_ind_stack[_ms_ind];
2333 objArrayOop aobj = objArrayOop(obj);
2334 jint len = aobj->length();
2335 jint next_arr_ind = arr_ind + _array_increment;
2336 if (next_arr_ind < len) {
2337 push(obj, next_arr_ind);
2338 }
2339 // Now process this portion of this one.
2340 int lim = MIN2(next_arr_ind, len);
2341 assert(!UseCompressedOops, "This needs to be fixed");
2342 for (int j = arr_ind; j < lim; j++) {
2343 do_oop(aobj->obj_at_addr<oop>(j));
2344 }
2345
2346 } else {
2347 obj->oop_iterate(this);
2348 }
2349 if (abort()) return false;
2350 }
2351 return true;
2352 }
2353
2354 public:
2355 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2356 _g1h(G1CollectedHeap::heap()),
2357 _cm(cm),
2358 _bm(cm->nextMarkBitMap()),
2359 _ms_size(ms_size), _ms_ind(0),
2360 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2361 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2362 _array_increment(MAX2(ms_size/8, 16))
2363 {}
2364
2365 ~CSMarkOopClosure() {
2366 FREE_C_HEAP_ARRAY(oop, _ms);
2367 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2368 }
2369
2370 void do_oop(narrowOop* p) {
2371 guarantee(false, "NYI");
2372 }
2373
2374 void do_oop(oop* p) {
2375 oop obj = *p;
2376 if (obj == NULL) return;
2377 if (obj->is_forwarded()) {
2378 // If the object has already been forwarded, we have to make sure
2379 // that it's marked. So follow the forwarding pointer. Note that
2380 // this does the right thing for self-forwarding pointers in the
2381 // evacuation failure case.
2382 obj = obj->forwardee();
2383 }
2384 HeapRegion* hr = _g1h->heap_region_containing(obj);
2385 if (hr != NULL) {
2386 if (hr->in_collection_set()) {
2387 if (_g1h->is_obj_ill(obj)) {
2388 _bm->mark((HeapWord*)obj);
2389 if (!push(obj)) {
2390 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2391 set_abort();
2392 }
2393 }
2394 } else {
2395 // Outside the collection set; we need to gray it
2396 _cm->deal_with_reference(obj);
2397 }
2398 }
2399 }
2400 };
2401
2402 class CSMarkBitMapClosure: public BitMapClosure {
2403 G1CollectedHeap* _g1h;
2404 CMBitMap* _bitMap;
2405 ConcurrentMark* _cm;
2406 CSMarkOopClosure _oop_cl;
2407 public:
2408 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2409 _g1h(G1CollectedHeap::heap()),
2410 _bitMap(cm->nextMarkBitMap()),
2411 _oop_cl(cm, ms_size)
2412 {}
2413
2414 ~CSMarkBitMapClosure() {}
2415
2416 bool do_bit(size_t offset) {
2417 // convert offset into a HeapWord*
2418 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2419 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2420 "address out of range");
2421 assert(_bitMap->isMarked(addr), "tautology");
2422 oop obj = oop(addr);
2423 if (!obj->is_forwarded()) {
2424 if (!_oop_cl.push(obj)) return false;
2425 if (!_oop_cl.drain()) return false;
2426 }
2427 // Otherwise...
2428 return true;
2429 }
2430 };
2431
2432
2433 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2434 CMBitMap* _bm;
2435 CSMarkBitMapClosure _bit_cl;
2436 enum SomePrivateConstants {
2437 MSSize = 1000
2438 };
2439 bool _completed;
2440 public:
2441 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2442 _bm(cm->nextMarkBitMap()),
2443 _bit_cl(cm, MSSize),
2444 _completed(true)
2445 {}
2446
2447 ~CompleteMarkingInCSHRClosure() {}
2448
2449 bool doHeapRegion(HeapRegion* r) {
2450 if (!r->evacuation_failed()) {
2451 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2452 if (!mr.is_empty()) {
2453 if (!_bm->iterate(&_bit_cl, mr)) {
2454 _completed = false;
2455 return true;
2456 }
2457 }
2458 }
2459 return false;
2460 }
2461
2462 bool completed() { return _completed; }
2463 };
2464
2465 class ClearMarksInHRClosure: public HeapRegionClosure {
2466 CMBitMap* _bm;
2467 public:
2468 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2469
2470 bool doHeapRegion(HeapRegion* r) {
2471 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2472 MemRegion usedMR = r->used_region();
2473 _bm->clearRange(r->used_region());
2474 }
2475 return false;
2476 }
2477 };
2478
2479 void ConcurrentMark::complete_marking_in_collection_set() {
2480 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2481
2482 if (!g1h->mark_in_progress()) {
2483 g1h->g1_policy()->record_mark_closure_time(0.0);
2484 return;
2485 }
2486
2487 int i = 1;
2488 double start = os::elapsedTime();
2489 while (true) {
2490 i++;
2491 CompleteMarkingInCSHRClosure cmplt(this);
2492 g1h->collection_set_iterate(&cmplt);
2493 if (cmplt.completed()) break;
2494 }
2495 double end_time = os::elapsedTime();
2496 double elapsed_time_ms = (end_time - start) * 1000.0;
2497 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2498 if (PrintGCDetails) {
2499 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2500 }
2501
2502 ClearMarksInHRClosure clr(nextMarkBitMap());
2503 g1h->collection_set_iterate(&clr);
2504 }
2505
2506 // The next two methods deal with the following optimisation. Some
2507 // objects are gray by being marked and located above the finger. If
2508 // they are copied, during an evacuation pause, below the finger then
2509 // the need to be pushed on the stack. The observation is that, if
2510 // there are no regions in the collection set located above the
2511 // finger, then the above cannot happen, hence we do not need to
2512 // explicitly gray any objects when copying them to below the
2513 // finger. The global stack will be scanned to ensure that, if it
2514 // points to objects being copied, it will update their
2515 // location. There is a tricky situation with the gray objects in
2516 // region stack that are being coped, however. See the comment in
2517 // newCSet().
2518
2519 void ConcurrentMark::newCSet() {
2520 if (!concurrent_marking_in_progress())
2521 // nothing to do if marking is not in progress
2522 return;
2523
2524 // find what the lowest finger is among the global and local fingers
2525 _min_finger = _finger;
2526 for (int i = 0; i < (int)_max_task_num; ++i) {
2527 CMTask* task = _tasks[i];
2528 HeapWord* task_finger = task->finger();
2529 if (task_finger != NULL && task_finger < _min_finger)
2530 _min_finger = task_finger;
2531 }
2532
2533 _should_gray_objects = false;
2534
2535 // This fixes a very subtle and fustrating bug. It might be the case
2536 // that, during en evacuation pause, heap regions that contain
2537 // objects that are gray (by being in regions contained in the
2538 // region stack) are included in the collection set. Since such gray
2539 // objects will be moved, and because it's not easy to redirect
2540 // region stack entries to point to a new location (because objects
2541 // in one region might be scattered to multiple regions after they
2542 // are copied), one option is to ensure that all marked objects
2543 // copied during a pause are pushed on the stack. Notice, however,
2544 // that this problem can only happen when the region stack is not
2545 // empty during an evacuation pause. So, we make the fix a bit less
2546 // conservative and ensure that regions are pushed on the stack,
2547 // irrespective whether all collection set regions are below the
2548 // finger, if the region stack is not empty. This is expected to be
2549 // a rare case, so I don't think it's necessary to be smarted about it.
2550 if (!region_stack_empty())
2551 _should_gray_objects = true;
2552 }
2553
2554 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2555 if (!concurrent_marking_in_progress())
2556 return;
2557
2558 HeapWord* region_end = hr->end();
2559 if (region_end > _min_finger)
2560 _should_gray_objects = true;
2561 }
2562
2563 void ConcurrentMark::disable_co_trackers() {
2564 if (has_aborted()) {
2565 if (_cleanup_co_tracker.enabled())
2566 _cleanup_co_tracker.disable();
2567 for (int i = 0; i < (int)_max_task_num; ++i) {
2568 CMTask* task = _tasks[i];
2569 if (task->co_tracker_enabled())
2570 task->disable_co_tracker();
2571 }
2572 } else {
2573 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
2574 for (int i = 0; i < (int)_max_task_num; ++i) {
2575 CMTask* task = _tasks[i];
2576 guarantee( !task->co_tracker_enabled(), "invariant" );
2577 }
2578 }
2579 }
2580
2581 // abandon current marking iteration due to a Full GC
2582 void ConcurrentMark::abort() {
2583 // If we're not marking, nothing to do.
2584 if (!G1ConcMark) return;
2585
2586 // Clear all marks to force marking thread to do nothing
2587 _nextMarkBitMap->clearAll();
2588 // Empty mark stack
2589 clear_marking_state();
2590 for (int i = 0; i < (int)_max_task_num; ++i)
2591 _tasks[i]->clear_region_fields();
2592 _has_aborted = true;
2593
2594 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2595 satb_mq_set.abandon_partial_marking();
2596 satb_mq_set.set_active_all_threads(false);
2597 }
2598
2599 static void print_ms_time_info(const char* prefix, const char* name,
2600 NumberSeq& ns) {
2601 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2602 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2603 if (ns.num() > 0) {
2604 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2605 prefix, ns.sd(), ns.maximum());
2606 }
2607 }
2608
2609 void ConcurrentMark::print_summary_info() {
2610 gclog_or_tty->print_cr(" Concurrent marking:");
2611 print_ms_time_info(" ", "init marks", _init_times);
2612 print_ms_time_info(" ", "remarks", _remark_times);
2613 {
2614 print_ms_time_info(" ", "final marks", _remark_mark_times);
2615 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2616
2617 }
2618 print_ms_time_info(" ", "cleanups", _cleanup_times);
2619 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2620 _total_counting_time,
2621 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2622 (double)_cleanup_times.num()
2623 : 0.0));
2624 if (G1ScrubRemSets) {
2625 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2626 _total_rs_scrub_time,
2627 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2628 (double)_cleanup_times.num()
2629 : 0.0));
2630 }
2631 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2632 (_init_times.sum() + _remark_times.sum() +
2633 _cleanup_times.sum())/1000.0);
2634 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2635 "(%8.2f s marking, %8.2f s counting).",
2636 cmThread()->vtime_accum(),
2637 cmThread()->vtime_mark_accum(),
2638 cmThread()->vtime_count_accum());
2639 }
2640
2641 // Closures
2642 // XXX: there seems to be a lot of code duplication here;
2643 // should refactor and consolidate the shared code.
2644
2645 // This closure is used to mark refs into the CMS generation in
2646 // the CMS bit map. Called at the first checkpoint.
2647
2648 // We take a break if someone is trying to stop the world.
2649 bool ConcurrentMark::do_yield_check(int worker_i) {
2650 if (should_yield()) {
2651 if (worker_i == 0)
2652 _g1h->g1_policy()->record_concurrent_pause();
2653 cmThread()->yield();
2654 if (worker_i == 0)
2655 _g1h->g1_policy()->record_concurrent_pause_end();
2656 return true;
2657 } else {
2658 return false;
2659 }
2660 }
2661
2662 bool ConcurrentMark::should_yield() {
2663 return cmThread()->should_yield();
2664 }
2665
2666 bool ConcurrentMark::containing_card_is_marked(void* p) {
2667 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2668 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2669 }
2670
2671 bool ConcurrentMark::containing_cards_are_marked(void* start,
2672 void* last) {
2673 return
2674 containing_card_is_marked(start) &&
2675 containing_card_is_marked(last);
2676 }
2677
2678 #ifndef PRODUCT
2679 // for debugging purposes
2680 void ConcurrentMark::print_finger() {
2681 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2682 _heap_start, _heap_end, _finger);
2683 for (int i = 0; i < (int) _max_task_num; ++i) {
2684 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2685 }
2686 gclog_or_tty->print_cr("");
2687 }
2688 #endif
2689
2690 // Closure for iteration over bitmaps
2691 class CMBitMapClosure : public BitMapClosure {
2692 private:
2693 // the bitmap that is being iterated over
2694 CMBitMap* _nextMarkBitMap;
2695 ConcurrentMark* _cm;
2696 CMTask* _task;
2697 // true if we're scanning a heap region claimed by the task (so that
2698 // we move the finger along), false if we're not, i.e. currently when
2699 // scanning a heap region popped from the region stack (so that we
2700 // do not move the task finger along; it'd be a mistake if we did so).
2701 bool _scanning_heap_region;
2702
2703 public:
2704 CMBitMapClosure(CMTask *task,
2705 ConcurrentMark* cm,
2706 CMBitMap* nextMarkBitMap)
2707 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2708
2709 void set_scanning_heap_region(bool scanning_heap_region) {
2710 _scanning_heap_region = scanning_heap_region;
2711 }
2712
2713 bool do_bit(size_t offset) {
2714 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2715 tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
2716 tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
2717
2718 if (_scanning_heap_region) {
2719 statsOnly( _task->increase_objs_found_on_bitmap() );
2720 tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
2721 // We move that task's local finger along.
2722 _task->move_finger_to(addr);
2723 } else {
2724 // We move the task's region finger along.
2725 _task->move_region_finger_to(addr);
2726 }
2727
2728 _task->scan_object(oop(addr));
2729 // we only partially drain the local queue and global stack
2730 _task->drain_local_queue(true);
2731 _task->drain_global_stack(true);
2732
2733 // if the has_aborted flag has been raised, we need to bail out of
2734 // the iteration
2735 return !_task->has_aborted();
2736 }
2737 };
2738
2739 // Closure for iterating over objects, currently only used for
2740 // processing SATB buffers.
2741 class CMObjectClosure : public ObjectClosure {
2742 private:
2743 CMTask* _task;
2744
2745 public:
2746 void do_object(oop obj) {
2747 _task->deal_with_reference(obj);
2748 }
2749
2750 CMObjectClosure(CMTask* task) : _task(task) { }
2751 };
2752
2753 // Closure for iterating over object fields
2754 class CMOopClosure : public OopClosure {
2755 private:
2756 G1CollectedHeap* _g1h;
2757 ConcurrentMark* _cm;
2758 CMTask* _task;
2759
2760 public:
2761 void do_oop(narrowOop* p) {
2762 guarantee(false, "NYI");
2763 }
2764
2765 void do_oop(oop* p) {
2766 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
2767
2768 oop obj = *p;
2769 if (_cm->verbose_high())
2770 gclog_or_tty->print_cr("[%d] we're looking at location "
2771 "*"PTR_FORMAT" = "PTR_FORMAT,
2772 _task->task_id(), p, (void*) obj);
2773 _task->deal_with_reference(obj);
2774 }
2775
2776 CMOopClosure(G1CollectedHeap* g1h,
2777 ConcurrentMark* cm,
2778 CMTask* task)
2779 : _g1h(g1h), _cm(cm), _task(task) { }
2780 };
2781
2782 void CMTask::setup_for_region(HeapRegion* hr) {
2783 tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
2784 "claim_region() should have filtered out continues humongous regions" );
2785
2786 if (_cm->verbose_low())
2787 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2788 _task_id, hr);
2789
2790 _curr_region = hr;
2791 _finger = hr->bottom();
2792 update_region_limit();
2793 }
2794
2795 void CMTask::update_region_limit() {
2796 HeapRegion* hr = _curr_region;
2797 HeapWord* bottom = hr->bottom();
2798 HeapWord* limit = hr->next_top_at_mark_start();
2799
2800 if (limit == bottom) {
2801 if (_cm->verbose_low())
2802 gclog_or_tty->print_cr("[%d] found an empty region "
2803 "["PTR_FORMAT", "PTR_FORMAT")",
2804 _task_id, bottom, limit);
2805 // The region was collected underneath our feet.
2806 // We set the finger to bottom to ensure that the bitmap
2807 // iteration that will follow this will not do anything.
2808 // (this is not a condition that holds when we set the region up,
2809 // as the region is not supposed to be empty in the first place)
2810 _finger = bottom;
2811 } else if (limit >= _region_limit) {
2812 tmp_guarantee_CM( limit >= _finger, "peace of mind" );
2813 } else {
2814 tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
2815 // This can happen under some pretty unusual circumstances. An
2816 // evacuation pause empties the region underneath our feet (NTAMS
2817 // at bottom). We then do some allocation in the region (NTAMS
2818 // stays at bottom), followed by the region being used as a GC
2819 // alloc region (NTAMS will move to top() and the objects
2820 // originally below it will be grayed). All objects now marked in
2821 // the region are explicitly grayed, if below the global finger,
2822 // and we do not need in fact to scan anything else. So, we simply
2823 // set _finger to be limit to ensure that the bitmap iteration
2824 // doesn't do anything.
2825 _finger = limit;
2826 }
2827
2828 _region_limit = limit;
2829 }
2830
2831 void CMTask::giveup_current_region() {
2832 tmp_guarantee_CM( _curr_region != NULL, "invariant" );
2833 if (_cm->verbose_low())
2834 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2835 _task_id, _curr_region);
2836 clear_region_fields();
2837 }
2838
2839 void CMTask::clear_region_fields() {
2840 // Values for these three fields that indicate that we're not
2841 // holding on to a region.
2842 _curr_region = NULL;
2843 _finger = NULL;
2844 _region_limit = NULL;
2845
2846 _region_finger = NULL;
2847 }
2848
2849 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2850 guarantee( nextMarkBitMap != NULL, "invariant" );
2851
2852 if (_cm->verbose_low())
2853 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2854
2855 _nextMarkBitMap = nextMarkBitMap;
2856 clear_region_fields();
2857
2858 _calls = 0;
2859 _elapsed_time_ms = 0.0;
2860 _termination_time_ms = 0.0;
2861 _termination_start_time_ms = 0.0;
2862
2863 #if _MARKING_STATS_
2864 _local_pushes = 0;
2865 _local_pops = 0;
2866 _local_max_size = 0;
2867 _objs_scanned = 0;
2868 _global_pushes = 0;
2869 _global_pops = 0;
2870 _global_max_size = 0;
2871 _global_transfers_to = 0;
2872 _global_transfers_from = 0;
2873 _region_stack_pops = 0;
2874 _regions_claimed = 0;
2875 _objs_found_on_bitmap = 0;
2876 _satb_buffers_processed = 0;
2877 _steal_attempts = 0;
2878 _steals = 0;
2879 _aborted = 0;
2880 _aborted_overflow = 0;
2881 _aborted_cm_aborted = 0;
2882 _aborted_yield = 0;
2883 _aborted_timed_out = 0;
2884 _aborted_satb = 0;
2885 _aborted_termination = 0;
2886 #endif // _MARKING_STATS_
2887 }
2888
2889 bool CMTask::should_exit_termination() {
2890 regular_clock_call();
2891 // This is called when we are in the termination protocol. We should
2892 // quit if, for some reason, this task wants to abort or the global
2893 // stack is not empty (this means that we can get work from it).
2894 return !_cm->mark_stack_empty() || has_aborted();
2895 }
2896
2897 // This determines whether the method below will check both the local
2898 // and global fingers when determining whether to push on the stack a
2899 // gray object (value 1) or whether it will only check the global one
2900 // (value 0). The tradeoffs are that the former will be a bit more
2901 // accurate and possibly push less on the stack, but it might also be
2902 // a little bit slower.
2903
2904 #define _CHECK_BOTH_FINGERS_ 1
2905
2906 void CMTask::deal_with_reference(oop obj) {
2907 if (_cm->verbose_high())
2908 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2909 _task_id, (void*) obj);
2910
2911 ++_refs_reached;
2912
2913 HeapWord* objAddr = (HeapWord*) obj;
2914 if (_g1h->is_in_g1_reserved(objAddr)) {
2915 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2916 HeapRegion* hr = _g1h->heap_region_containing(obj);
2917 if (_g1h->is_obj_ill(obj, hr)) {
2918 if (_cm->verbose_high())
2919 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2920 _task_id, (void*) obj);
2921
2922 // we need to mark it first
2923 if (_nextMarkBitMap->parMark(objAddr)) {
2924 // No OrderAccess:store_load() is needed. It is implicit in the
2925 // CAS done in parMark(objAddr) above
2926 HeapWord* global_finger = _cm->finger();
2927
2928 #if _CHECK_BOTH_FINGERS_
2929 // we will check both the local and global fingers
2930
2931 if (_finger != NULL && objAddr < _finger) {
2932 if (_cm->verbose_high())
2933 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2934 "pushing it", _task_id, _finger);
2935 push(obj);
2936 } else if (_curr_region != NULL && objAddr < _region_limit) {
2937 // do nothing
2938 } else if (objAddr < global_finger) {
2939 // Notice that the global finger might be moving forward
2940 // concurrently. This is not a problem. In the worst case, we
2941 // mark the object while it is above the global finger and, by
2942 // the time we read the global finger, it has moved forward
2943 // passed this object. In this case, the object will probably
2944 // be visited when a task is scanning the region and will also
2945 // be pushed on the stack. So, some duplicate work, but no
2946 // correctness problems.
2947
2948 if (_cm->verbose_high())
2949 gclog_or_tty->print_cr("[%d] below the global finger "
2950 "("PTR_FORMAT"), pushing it",
2951 _task_id, global_finger);
2952 push(obj);
2953 } else {
2954 // do nothing
2955 }
2956 #else // _CHECK_BOTH_FINGERS_
2957 // we will only check the global finger
2958
2959 if (objAddr < global_finger) {
2960 // see long comment above
2961
2962 if (_cm->verbose_high())
2963 gclog_or_tty->print_cr("[%d] below the global finger "
2964 "("PTR_FORMAT"), pushing it",
2965 _task_id, global_finger);
2966 push(obj);
2967 }
2968 #endif // _CHECK_BOTH_FINGERS_
2969 }
2970 }
2971 }
2972 }
2973
2974 void CMTask::push(oop obj) {
2975 HeapWord* objAddr = (HeapWord*) obj;
2976 tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
2977 tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
2978 tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
2979
2980 if (_cm->verbose_high())
2981 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
2982
2983 if (!_task_queue->push(obj)) {
2984 // The local task queue looks full. We need to push some entries
2985 // to the global stack.
2986
2987 if (_cm->verbose_medium())
2988 gclog_or_tty->print_cr("[%d] task queue overflow, "
2989 "moving entries to the global stack",
2990 _task_id);
2991 move_entries_to_global_stack();
2992
2993 // this should succeed since, even if we overflow the global
2994 // stack, we should have definitely removed some entries from the
2995 // local queue. So, there must be space on it.
2996 bool success = _task_queue->push(obj);
2997 tmp_guarantee_CM( success, "invariant" );
2998 }
2999
3000 statsOnly( int tmp_size = _task_queue->size();
3001 if (tmp_size > _local_max_size)
3002 _local_max_size = tmp_size;
3003 ++_local_pushes );
3004 }
3005
3006 void CMTask::reached_limit() {
3007 tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
3008 _refs_reached >= _refs_reached_limit ,
3009 "shouldn't have been called otherwise" );
3010 regular_clock_call();
3011 }
3012
3013 void CMTask::regular_clock_call() {
3014 if (has_aborted())
3015 return;
3016
3017 // First, we need to recalculate the words scanned and refs reached
3018 // limits for the next clock call.
3019 recalculate_limits();
3020
3021 // During the regular clock call we do the following
3022
3023 // (1) If an overflow has been flagged, then we abort.
3024 if (_cm->has_overflown()) {
3025 set_has_aborted();
3026 return;
3027 }
3028
3029 // If we are not concurrent (i.e. we're doing remark) we don't need
3030 // to check anything else. The other steps are only needed during
3031 // the concurrent marking phase.
3032 if (!concurrent())
3033 return;
3034
3035 // (2) If marking has been aborted for Full GC, then we also abort.
3036 if (_cm->has_aborted()) {
3037 set_has_aborted();
3038 statsOnly( ++_aborted_cm_aborted );
3039 return;
3040 }
3041
3042 double curr_time_ms = os::elapsedVTime() * 1000.0;
3043
3044 // (3) If marking stats are enabled, then we update the step history.
3045 #if _MARKING_STATS_
3046 if (_words_scanned >= _words_scanned_limit)
3047 ++_clock_due_to_scanning;
3048 if (_refs_reached >= _refs_reached_limit)
3049 ++_clock_due_to_marking;
3050
3051 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3052 _interval_start_time_ms = curr_time_ms;
3053 _all_clock_intervals_ms.add(last_interval_ms);
3054
3055 if (_cm->verbose_medium()) {
3056 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3057 "scanned = %d%s, refs reached = %d%s",
3058 _task_id, last_interval_ms,
3059 _words_scanned,
3060 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3061 _refs_reached,
3062 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3063 }
3064 #endif // _MARKING_STATS_
3065
3066 // (4) We check whether we should yield. If we have to, then we abort.
3067 if (_cm->should_yield()) {
3068 // We should yield. To do this we abort the task. The caller is
3069 // responsible for yielding.
3070 set_has_aborted();
3071 statsOnly( ++_aborted_yield );
3072 return;
3073 }
3074
3075 // (5) We check whether we've reached our time quota. If we have,
3076 // then we abort.
3077 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3078 if (elapsed_time_ms > _time_target_ms) {
3079 set_has_aborted();
3080 _has_aborted_timed_out = true;
3081 statsOnly( ++_aborted_timed_out );
3082 return;
3083 }
3084
3085 // (6) Finally, we check whether there are enough completed STAB
3086 // buffers available for processing. If there are, we abort.
3087 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3088 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3089 if (_cm->verbose_low())
3090 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3091 _task_id);
3092 // we do need to process SATB buffers, we'll abort and restart
3093 // the marking task to do so
3094 set_has_aborted();
3095 statsOnly( ++_aborted_satb );
3096 return;
3097 }
3098 }
3099
3100 void CMTask::recalculate_limits() {
3101 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3102 _words_scanned_limit = _real_words_scanned_limit;
3103
3104 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3105 _refs_reached_limit = _real_refs_reached_limit;
3106 }
3107
3108 void CMTask::decrease_limits() {
3109 // This is called when we believe that we're going to do an infrequent
3110 // operation which will increase the per byte scanned cost (i.e. move
3111 // entries to/from the global stack). It basically tries to decrease the
3112 // scanning limit so that the clock is called earlier.
3113
3114 if (_cm->verbose_medium())
3115 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3116
3117 _words_scanned_limit = _real_words_scanned_limit -
3118 3 * words_scanned_period / 4;
3119 _refs_reached_limit = _real_refs_reached_limit -
3120 3 * refs_reached_period / 4;
3121 }
3122
3123 void CMTask::move_entries_to_global_stack() {
3124 // local array where we'll store the entries that will be popped
3125 // from the local queue
3126 oop buffer[global_stack_transfer_size];
3127
3128 int n = 0;
3129 oop obj;
3130 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3131 buffer[n] = obj;
3132 ++n;
3133 }
3134
3135 if (n > 0) {
3136 // we popped at least one entry from the local queue
3137
3138 statsOnly( ++_global_transfers_to; _local_pops += n );
3139
3140 if (!_cm->mark_stack_push(buffer, n)) {
3141 if (_cm->verbose_low())
3142 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3143 set_has_aborted();
3144 } else {
3145 // the transfer was successful
3146
3147 if (_cm->verbose_medium())
3148 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3149 _task_id, n);
3150 statsOnly( int tmp_size = _cm->mark_stack_size();
3151 if (tmp_size > _global_max_size)
3152 _global_max_size = tmp_size;
3153 _global_pushes += n );
3154 }
3155 }
3156
3157 // this operation was quite expensive, so decrease the limits
3158 decrease_limits();
3159 }
3160
3161 void CMTask::get_entries_from_global_stack() {
3162 // local array where we'll store the entries that will be popped
3163 // from the global stack.
3164 oop buffer[global_stack_transfer_size];
3165 int n;
3166 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3167 tmp_guarantee_CM( n <= global_stack_transfer_size,
3168 "we should not pop more than the given limit" );
3169 if (n > 0) {
3170 // yes, we did actually pop at least one entry
3171
3172 statsOnly( ++_global_transfers_from; _global_pops += n );
3173 if (_cm->verbose_medium())
3174 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3175 _task_id, n);
3176 for (int i = 0; i < n; ++i) {
3177 bool success = _task_queue->push(buffer[i]);
3178 // We only call this when the local queue is empty or under a
3179 // given target limit. So, we do not expect this push to fail.
3180 tmp_guarantee_CM( success, "invariant" );
3181 }
3182
3183 statsOnly( int tmp_size = _task_queue->size();
3184 if (tmp_size > _local_max_size)
3185 _local_max_size = tmp_size;
3186 _local_pushes += n );
3187 }
3188
3189 // this operation was quite expensive, so decrease the limits
3190 decrease_limits();
3191 }
3192
3193 void CMTask::drain_local_queue(bool partially) {
3194 if (has_aborted())
3195 return;
3196
3197 // Decide what the target size is, depending whether we're going to
3198 // drain it partially (so that other tasks can steal if they run out
3199 // of things to do) or totally (at the very end).
3200 size_t target_size;
3201 if (partially)
3202 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3203 else
3204 target_size = 0;
3205
3206 if (_task_queue->size() > target_size) {
3207 if (_cm->verbose_high())
3208 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3209 _task_id, target_size);
3210
3211 oop obj;
3212 bool ret = _task_queue->pop_local(obj);
3213 while (ret) {
3214 statsOnly( ++_local_pops );
3215
3216 if (_cm->verbose_high())
3217 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3218 (void*) obj);
3219
3220 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
3221 "invariant" );
3222
3223 scan_object(obj);
3224
3225 if (_task_queue->size() <= target_size || has_aborted())
3226 ret = false;
3227 else
3228 ret = _task_queue->pop_local(obj);
3229 }
3230
3231 if (_cm->verbose_high())
3232 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3233 _task_id, _task_queue->size());
3234 }
3235 }
3236
3237 void CMTask::drain_global_stack(bool partially) {
3238 if (has_aborted())
3239 return;
3240
3241 // We have a policy to drain the local queue before we attempt to
3242 // drain the global stack.
3243 tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
3244
3245 // Decide what the target size is, depending whether we're going to
3246 // drain it partially (so that other tasks can steal if they run out
3247 // of things to do) or totally (at the very end). Notice that,
3248 // because we move entries from the global stack in chunks or
3249 // because another task might be doing the same, we might in fact
3250 // drop below the target. But, this is not a problem.
3251 size_t target_size;
3252 if (partially)
3253 target_size = _cm->partial_mark_stack_size_target();
3254 else
3255 target_size = 0;
3256
3257 if (_cm->mark_stack_size() > target_size) {
3258 if (_cm->verbose_low())
3259 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3260 _task_id, target_size);
3261
3262 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3263 get_entries_from_global_stack();
3264 drain_local_queue(partially);
3265 }
3266
3267 if (_cm->verbose_low())
3268 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3269 _task_id, _cm->mark_stack_size());
3270 }
3271 }
3272
3273 // SATB Queue has several assumptions on whether to call the par or
3274 // non-par versions of the methods. this is why some of the code is
3275 // replicated. We should really get rid of the single-threaded version
3276 // of the code to simplify things.
3277 void CMTask::drain_satb_buffers() {
3278 if (has_aborted())
3279 return;
3280
3281 // We set this so that the regular clock knows that we're in the
3282 // middle of draining buffers and doesn't set the abort flag when it
3283 // notices that SATB buffers are available for draining. It'd be
3284 // very counter productive if it did that. :-)
3285 _draining_satb_buffers = true;
3286
3287 CMObjectClosure oc(this);
3288 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3289 if (ParallelGCThreads > 0)
3290 satb_mq_set.set_par_closure(_task_id, &oc);
3291 else
3292 satb_mq_set.set_closure(&oc);
3293
3294 // This keeps claiming and applying the closure to completed buffers
3295 // until we run out of buffers or we need to abort.
3296 if (ParallelGCThreads > 0) {
3297 while (!has_aborted() &&
3298 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3299 if (_cm->verbose_medium())
3300 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3301 statsOnly( ++_satb_buffers_processed );
3302 regular_clock_call();
3303 }
3304 } else {
3305 while (!has_aborted() &&
3306 satb_mq_set.apply_closure_to_completed_buffer()) {
3307 if (_cm->verbose_medium())
3308 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3309 statsOnly( ++_satb_buffers_processed );
3310 regular_clock_call();
3311 }
3312 }
3313
3314 if (!concurrent() && !has_aborted()) {
3315 // We should only do this during remark.
3316 if (ParallelGCThreads > 0)
3317 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3318 else
3319 satb_mq_set.iterate_closure_all_threads();
3320 }
3321
3322 _draining_satb_buffers = false;
3323
3324 tmp_guarantee_CM( has_aborted() ||
3325 concurrent() ||
3326 satb_mq_set.completed_buffers_num() == 0, "invariant" );
3327
3328 if (ParallelGCThreads > 0)
3329 satb_mq_set.set_par_closure(_task_id, NULL);
3330 else
3331 satb_mq_set.set_closure(NULL);
3332
3333 // again, this was a potentially expensive operation, decrease the
3334 // limits to get the regular clock call early
3335 decrease_limits();
3336 }
3337
3338 void CMTask::drain_region_stack(BitMapClosure* bc) {
3339 if (has_aborted())
3340 return;
3341
3342 tmp_guarantee_CM( _region_finger == NULL,
3343 "it should be NULL when we're not scanning a region" );
3344
3345 if (!_cm->region_stack_empty()) {
3346 if (_cm->verbose_low())
3347 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3348 _task_id, _cm->region_stack_size());
3349
3350 MemRegion mr = _cm->region_stack_pop();
3351 // it returns MemRegion() if the pop fails
3352 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3353
3354 while (mr.start() != NULL) {
3355 if (_cm->verbose_medium())
3356 gclog_or_tty->print_cr("[%d] we are scanning region "
3357 "["PTR_FORMAT", "PTR_FORMAT")",
3358 _task_id, mr.start(), mr.end());
3359 tmp_guarantee_CM( mr.end() <= _cm->finger(),
3360 "otherwise the region shouldn't be on the stack" );
3361 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3362 if (_nextMarkBitMap->iterate(bc, mr)) {
3363 tmp_guarantee_CM( !has_aborted(),
3364 "cannot abort the task without aborting the bitmap iteration" );
3365
3366 // We finished iterating over the region without aborting.
3367 regular_clock_call();
3368 if (has_aborted())
3369 mr = MemRegion();
3370 else {
3371 mr = _cm->region_stack_pop();
3372 // it returns MemRegion() if the pop fails
3373 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3374 }
3375 } else {
3376 guarantee( has_aborted(), "currently the only way to do so" );
3377
3378 // The only way to abort the bitmap iteration is to return
3379 // false from the do_bit() method. However, inside the
3380 // do_bit() method we move the _region_finger to point to the
3381 // object currently being looked at. So, if we bail out, we
3382 // have definitely set _region_finger to something non-null.
3383 guarantee( _region_finger != NULL, "invariant" );
3384
3385 // The iteration was actually aborted. So now _region_finger
3386 // points to the address of the object we last scanned. If we
3387 // leave it there, when we restart this task, we will rescan
3388 // the object. It is easy to avoid this. We move the finger by
3389 // enough to point to the next possible object header (the
3390 // bitmap knows by how much we need to move it as it knows its
3391 // granularity).
3392 MemRegion newRegion =
3393 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3394
3395 if (!newRegion.is_empty()) {
3396 if (_cm->verbose_low()) {
3397 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3398 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3399 _task_id,
3400 newRegion.start(), newRegion.end());
3401 }
3402 // Now push the part of the region we didn't scan on the
3403 // region stack to make sure a task scans it later.
3404 _cm->region_stack_push(newRegion);
3405 }
3406 // break from while
3407 mr = MemRegion();
3408 }
3409 _region_finger = NULL;
3410 }
3411
3412 // We only push regions on the region stack during evacuation
3413 // pauses. So if we come out the above iteration because we region
3414 // stack is empty, it will remain empty until the next yield
3415 // point. So, the guarantee below is safe.
3416 guarantee( has_aborted() || _cm->region_stack_empty(),
3417 "only way to exit the loop" );
3418
3419 if (_cm->verbose_low())
3420 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3421 _task_id, _cm->region_stack_size());
3422 }
3423 }
3424
3425 void CMTask::print_stats() {
3426 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3427 _task_id, _calls);
3428 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3429 _elapsed_time_ms, _termination_time_ms);
3430 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3431 _step_times_ms.num(), _step_times_ms.avg(),
3432 _step_times_ms.sd());
3433 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3434 _step_times_ms.maximum(), _step_times_ms.sum());
3435
3436 #if _MARKING_STATS_
3437 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3438 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3439 _all_clock_intervals_ms.sd());
3440 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3441 _all_clock_intervals_ms.maximum(),
3442 _all_clock_intervals_ms.sum());
3443 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3444 _clock_due_to_scanning, _clock_due_to_marking);
3445 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3446 _objs_scanned, _objs_found_on_bitmap);
3447 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3448 _local_pushes, _local_pops, _local_max_size);
3449 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3450 _global_pushes, _global_pops, _global_max_size);
3451 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3452 _global_transfers_to,_global_transfers_from);
3453 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3454 _regions_claimed, _region_stack_pops);
3455 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3456 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3457 _steal_attempts, _steals);
3458 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3459 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3460 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3461 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3462 _aborted_timed_out, _aborted_satb, _aborted_termination);
3463 #endif // _MARKING_STATS_
3464 }
3465
3466 /*****************************************************************************
3467
3468 The do_marking_step(time_target_ms) method is the building block
3469 of the parallel marking framework. It can be called in parallel
3470 with other invocations of do_marking_step() on different tasks
3471 (but only one per task, obviously) and concurrently with the
3472 mutator threads, or during remark, hence it eliminates the need
3473 for two versions of the code. When called during remark, it will
3474 pick up from where the task left off during the concurrent marking
3475 phase. Interestingly, tasks are also claimable during evacuation
3476 pauses too, since do_marking_step() ensures that it aborts before
3477 it needs to yield.
3478
3479 The data structures that is uses to do marking work are the
3480 following:
3481
3482 (1) Marking Bitmap. If there are gray objects that appear only
3483 on the bitmap (this happens either when dealing with an overflow
3484 or when the initial marking phase has simply marked the roots
3485 and didn't push them on the stack), then tasks claim heap
3486 regions whose bitmap they then scan to find gray objects. A
3487 global finger indicates where the end of the last claimed region
3488 is. A local finger indicates how far into the region a task has
3489 scanned. The two fingers are used to determine how to gray an
3490 object (i.e. whether simply marking it is OK, as it will be
3491 visited by a task in the future, or whether it needs to be also
3492 pushed on a stack).
3493
3494 (2) Local Queue. The local queue of the task which is accessed
3495 reasonably efficiently by the task. Other tasks can steal from
3496 it when they run out of work. Throughout the marking phase, a
3497 task attempts to keep its local queue short but not totally
3498 empty, so that entries are available for stealing by other
3499 tasks. Only when there is no more work, a task will totally
3500 drain its local queue.
3501
3502 (3) Global Mark Stack. This handles local queue overflow. During
3503 marking only sets of entries are moved between it and the local
3504 queues, as access to it requires a mutex and more fine-grain
3505 interaction with it which might cause contention. If it
3506 overflows, then the marking phase should restart and iterate
3507 over the bitmap to identify gray objects. Throughout the marking
3508 phase, tasks attempt to keep the global mark stack at a small
3509 length but not totally empty, so that entries are available for
3510 popping by other tasks. Only when there is no more work, tasks
3511 will totally drain the global mark stack.
3512
3513 (4) Global Region Stack. Entries on it correspond to areas of
3514 the bitmap that need to be scanned since they contain gray
3515 objects. Pushes on the region stack only happen during
3516 evacuation pauses and typically correspond to areas covered by
3517 GC LABS. If it overflows, then the marking phase should restart
3518 and iterate over the bitmap to identify gray objects. Tasks will
3519 try to totally drain the region stack as soon as possible.
3520
3521 (5) SATB Buffer Queue. This is where completed SATB buffers are
3522 made available. Buffers are regularly removed from this queue
3523 and scanned for roots, so that the queue doesn't get too
3524 long. During remark, all completed buffers are processed, as
3525 well as the filled in parts of any uncompleted buffers.
3526
3527 The do_marking_step() method tries to abort when the time target
3528 has been reached. There are a few other cases when the
3529 do_marking_step() method also aborts:
3530
3531 (1) When the marking phase has been aborted (after a Full GC).
3532
3533 (2) When a global overflow (either on the global stack or the
3534 region stack) has been triggered. Before the task aborts, it
3535 will actually sync up with the other tasks to ensure that all
3536 the marking data structures (local queues, stacks, fingers etc.)
3537 are re-initialised so that when do_marking_step() completes,
3538 the marking phase can immediately restart.
3539
3540 (3) When enough completed SATB buffers are available. The
3541 do_marking_step() method only tries to drain SATB buffers right
3542 at the beginning. So, if enough buffers are available, the
3543 marking step aborts and the SATB buffers are processed at
3544 the beginning of the next invocation.
3545
3546 (4) To yield. when we have to yield then we abort and yield
3547 right at the end of do_marking_step(). This saves us from a lot
3548 of hassle as, by yielding we might allow a Full GC. If this
3549 happens then objects will be compacted underneath our feet, the
3550 heap might shrink, etc. We save checking for this by just
3551 aborting and doing the yield right at the end.
3552
3553 From the above it follows that the do_marking_step() method should
3554 be called in a loop (or, otherwise, regularly) until it completes.
3555
3556 If a marking step completes without its has_aborted() flag being
3557 true, it means it has completed the current marking phase (and
3558 also all other marking tasks have done so and have all synced up).
3559
3560 A method called regular_clock_call() is invoked "regularly" (in
3561 sub ms intervals) throughout marking. It is this clock method that
3562 checks all the abort conditions which were mentioned above and
3563 decides when the task should abort. A work-based scheme is used to
3564 trigger this clock method: when the number of object words the
3565 marking phase has scanned or the number of references the marking
3566 phase has visited reach a given limit. Additional invocations to
3567 the method clock have been planted in a few other strategic places
3568 too. The initial reason for the clock method was to avoid calling
3569 vtime too regularly, as it is quite expensive. So, once it was in
3570 place, it was natural to piggy-back all the other conditions on it
3571 too and not constantly check them throughout the code.
3572
3573 *****************************************************************************/
3574
3575 void CMTask::do_marking_step(double time_target_ms) {
3576 guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
3577 guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
3578
3579 guarantee( concurrent() || _cm->region_stack_empty(),
3580 "the region stack should have been cleared before remark" );
3581 guarantee( _region_finger == NULL,
3582 "this should be non-null only when a region is being scanned" );
3583
3584 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3585 guarantee( _task_queues != NULL, "invariant" );
3586 guarantee( _task_queue != NULL, "invariant" );
3587 guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
3588
3589 guarantee( !_claimed,
3590 "only one thread should claim this task at any one time" );
3591
3592 // OK, this doesn't safeguard again all possible scenarios, as it is
3593 // possible for two threads to set the _claimed flag at the same
3594 // time. But it is only for debugging purposes anyway and it will
3595 // catch most problems.
3596 _claimed = true;
3597
3598 _start_time_ms = os::elapsedVTime() * 1000.0;
3599 statsOnly( _interval_start_time_ms = _start_time_ms );
3600
3601 double diff_prediction_ms =
3602 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3603 _time_target_ms = time_target_ms - diff_prediction_ms;
3604
3605 // set up the variables that are used in the work-based scheme to
3606 // call the regular clock method
3607 _words_scanned = 0;
3608 _refs_reached = 0;
3609 recalculate_limits();
3610
3611 // clear all flags
3612 clear_has_aborted();
3613 _has_aborted_timed_out = false;
3614 _draining_satb_buffers = false;
3615
3616 ++_calls;
3617
3618 if (_cm->verbose_low())
3619 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3620 "target = %1.2lfms >>>>>>>>>>",
3621 _task_id, _calls, _time_target_ms);
3622
3623 // Set up the bitmap and oop closures. Anything that uses them is
3624 // eventually called from this method, so it is OK to allocate these
3625 // statically.
3626 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3627 CMOopClosure oop_closure(_g1h, _cm, this);
3628 set_oop_closure(&oop_closure);
3629
3630 if (_cm->has_overflown()) {
3631 // This can happen if the region stack or the mark stack overflows
3632 // during a GC pause and this task, after a yield point,
3633 // restarts. We have to abort as we need to get into the overflow
3634 // protocol which happens right at the end of this task.
3635 set_has_aborted();
3636 }
3637
3638 // First drain any available SATB buffers. After this, we will not
3639 // look at SATB buffers before the next invocation of this method.
3640 // If enough completed SATB buffers are queued up, the regular clock
3641 // will abort this task so that it restarts.
3642 drain_satb_buffers();
3643 // ...then partially drain the local queue and the global stack
3644 drain_local_queue(true);
3645 drain_global_stack(true);
3646
3647 // Then totally drain the region stack. We will not look at
3648 // it again before the next invocation of this method. Entries on
3649 // the region stack are only added during evacuation pauses, for
3650 // which we have to yield. When we do, we abort the task anyway so
3651 // it will look at the region stack again when it restarts.
3652 bitmap_closure.set_scanning_heap_region(false);
3653 drain_region_stack(&bitmap_closure);
3654 // ...then partially drain the local queue and the global stack
3655 drain_local_queue(true);
3656 drain_global_stack(true);
3657
3658 do {
3659 if (!has_aborted() && _curr_region != NULL) {
3660 // This means that we're already holding on to a region.
3661 tmp_guarantee_CM( _finger != NULL,
3662 "if region is not NULL, then the finger "
3663 "should not be NULL either" );
3664
3665 // We might have restarted this task after an evacuation pause
3666 // which might have evacuated the region we're holding on to
3667 // underneath our feet. Let's read its limit again to make sure
3668 // that we do not iterate over a region of the heap that
3669 // contains garbage (update_region_limit() will also move
3670 // _finger to the start of the region if it is found empty).
3671 update_region_limit();
3672 // We will start from _finger not from the start of the region,
3673 // as we might be restarting this task after aborting half-way
3674 // through scanning this region. In this case, _finger points to
3675 // the address where we last found a marked object. If this is a
3676 // fresh region, _finger points to start().
3677 MemRegion mr = MemRegion(_finger, _region_limit);
3678
3679 if (_cm->verbose_low())
3680 gclog_or_tty->print_cr("[%d] we're scanning part "
3681 "["PTR_FORMAT", "PTR_FORMAT") "
3682 "of region "PTR_FORMAT,
3683 _task_id, _finger, _region_limit, _curr_region);
3684
3685 // Let's iterate over the bitmap of the part of the
3686 // region that is left.
3687 bitmap_closure.set_scanning_heap_region(true);
3688 if (mr.is_empty() ||
3689 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3690 // We successfully completed iterating over the region. Now,
3691 // let's give up the region.
3692 giveup_current_region();
3693 regular_clock_call();
3694 } else {
3695 guarantee( has_aborted(), "currently the only way to do so" );
3696 // The only way to abort the bitmap iteration is to return
3697 // false from the do_bit() method. However, inside the
3698 // do_bit() method we move the _finger to point to the
3699 // object currently being looked at. So, if we bail out, we
3700 // have definitely set _finger to something non-null.
3701 guarantee( _finger != NULL, "invariant" );
3702
3703 // Region iteration was actually aborted. So now _finger
3704 // points to the address of the object we last scanned. If we
3705 // leave it there, when we restart this task, we will rescan
3706 // the object. It is easy to avoid this. We move the finger by
3707 // enough to point to the next possible object header (the
3708 // bitmap knows by how much we need to move it as it knows its
3709 // granularity).
3710 move_finger_to(_nextMarkBitMap->nextWord(_finger));
3711 }
3712 }
3713 // At this point we have either completed iterating over the
3714 // region we were holding on to, or we have aborted.
3715
3716 // We then partially drain the local queue and the global stack.
3717 // (Do we really need this?)
3718 drain_local_queue(true);
3719 drain_global_stack(true);
3720
3721 // Read the note on the claim_region() method on why it might
3722 // return NULL with potentially more regions available for
3723 // claiming and why we have to check out_of_regions() to determine
3724 // whether we're done or not.
3725 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3726 // We are going to try to claim a new region. We should have
3727 // given up on the previous one.
3728 tmp_guarantee_CM( _curr_region == NULL &&
3729 _finger == NULL &&
3730 _region_limit == NULL, "invariant" );
3731 if (_cm->verbose_low())
3732 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3733 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3734 if (claimed_region != NULL) {
3735 // Yes, we managed to claim one
3736 statsOnly( ++_regions_claimed );
3737
3738 if (_cm->verbose_low())
3739 gclog_or_tty->print_cr("[%d] we successfully claimed "
3740 "region "PTR_FORMAT,
3741 _task_id, claimed_region);
3742
3743 setup_for_region(claimed_region);
3744 tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
3745 }
3746 // It is important to call the regular clock here. It might take
3747 // a while to claim a region if, for example, we hit a large
3748 // block of empty regions. So we need to call the regular clock
3749 // method once round the loop to make sure it's called
3750 // frequently enough.
3751 regular_clock_call();
3752 }
3753
3754 if (!has_aborted() && _curr_region == NULL) {
3755 tmp_guarantee_CM( _cm->out_of_regions(),
3756 "at this point we should be out of regions" );
3757 }
3758 } while ( _curr_region != NULL && !has_aborted());
3759
3760 if (!has_aborted()) {
3761 // We cannot check whether the global stack is empty, since other
3762 // tasks might be pushing objects to it concurrently. We also cannot
3763 // check if the region stack is empty because if a thread is aborting
3764 // it can push a partially done region back.
3765 tmp_guarantee_CM( _cm->out_of_regions(),
3766 "at this point we should be out of regions" );
3767
3768 if (_cm->verbose_low())
3769 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3770
3771 // Try to reduce the number of available SATB buffers so that
3772 // remark has less work to do.
3773 drain_satb_buffers();
3774 }
3775
3776 // Since we've done everything else, we can now totally drain the
3777 // local queue and global stack.
3778 drain_local_queue(false);
3779 drain_global_stack(false);
3780
3781 // Attempt at work stealing from other task's queues.
3782 if (!has_aborted()) {
3783 // We have not aborted. This means that we have finished all that
3784 // we could. Let's try to do some stealing...
3785
3786 // We cannot check whether the global stack is empty, since other
3787 // tasks might be pushing objects to it concurrently. We also cannot
3788 // check if the region stack is empty because if a thread is aborting
3789 // it can push a partially done region back.
3790 guarantee( _cm->out_of_regions() &&
3791 _task_queue->size() == 0, "only way to reach here" );
3792
3793 if (_cm->verbose_low())
3794 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3795
3796 while (!has_aborted()) {
3797 oop obj;
3798 statsOnly( ++_steal_attempts );
3799
3800 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3801 if (_cm->verbose_medium())
3802 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3803 _task_id, (void*) obj);
3804
3805 statsOnly( ++_steals );
3806
3807 tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
3808 "any stolen object should be marked" );
3809 scan_object(obj);
3810
3811 // And since we're towards the end, let's totally drain the
3812 // local queue and global stack.
3813 drain_local_queue(false);
3814 drain_global_stack(false);
3815 } else {
3816 break;
3817 }
3818 }
3819 }
3820
3821 // We still haven't aborted. Now, let's try to get into the
3822 // termination protocol.
3823 if (!has_aborted()) {
3824 // We cannot check whether the global stack is empty, since other
3825 // tasks might be concurrently pushing objects on it. We also cannot
3826 // check if the region stack is empty because if a thread is aborting
3827 // it can push a partially done region back.
3828 guarantee( _cm->out_of_regions() &&
3829 _task_queue->size() == 0, "only way to reach here" );
3830
3831 if (_cm->verbose_low())
3832 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3833
3834 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3835 // The CMTask class also extends the TerminatorTerminator class,
3836 // hence its should_exit_termination() method will also decide
3837 // whether to exit the termination protocol or not.
3838 bool finished = _cm->terminator()->offer_termination(this);
3839 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3840 _termination_time_ms +=
3841 termination_end_time_ms - _termination_start_time_ms;
3842
3843 if (finished) {
3844 // We're all done.
3845
3846 if (_task_id == 0) {
3847 // let's allow task 0 to do this
3848 if (concurrent()) {
3849 guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
3850 // we need to set this to false before the next
3851 // safepoint. This way we ensure that the marking phase
3852 // doesn't observe any more heap expansions.
3853 _cm->clear_concurrent_marking_in_progress();
3854 }
3855 }
3856
3857 // We can now guarantee that the global stack is empty, since
3858 // all other tasks have finished.
3859 guarantee( _cm->out_of_regions() &&
3860 _cm->region_stack_empty() &&
3861 _cm->mark_stack_empty() &&
3862 _task_queue->size() == 0 &&
3863 !_cm->has_overflown() &&
3864 !_cm->mark_stack_overflow() &&
3865 !_cm->region_stack_overflow(),
3866 "only way to reach here" );
3867
3868 if (_cm->verbose_low())
3869 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3870 } else {
3871 // Apparently there's more work to do. Let's abort this task. It
3872 // will restart it and we can hopefully find more things to do.
3873
3874 if (_cm->verbose_low())
3875 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3876
3877 set_has_aborted();
3878 statsOnly( ++_aborted_termination );
3879 }
3880 }
3881
3882 // Mainly for debugging purposes to make sure that a pointer to the
3883 // closure which was statically allocated in this frame doesn't
3884 // escape it by accident.
3885 set_oop_closure(NULL);
3886 double end_time_ms = os::elapsedVTime() * 1000.0;
3887 double elapsed_time_ms = end_time_ms - _start_time_ms;
3888 // Update the step history.
3889 _step_times_ms.add(elapsed_time_ms);
3890
3891 if (has_aborted()) {
3892 // The task was aborted for some reason.
3893
3894 statsOnly( ++_aborted );
3895
3896 if (_has_aborted_timed_out) {
3897 double diff_ms = elapsed_time_ms - _time_target_ms;
3898 // Keep statistics of how well we did with respect to hitting
3899 // our target only if we actually timed out (if we aborted for
3900 // other reasons, then the results might get skewed).
3901 _marking_step_diffs_ms.add(diff_ms);
3902 }
3903
3904 if (_cm->has_overflown()) {
3905 // This is the interesting one. We aborted because a global
3906 // overflow was raised. This means we have to restart the
3907 // marking phase and start iterating over regions. However, in
3908 // order to do this we have to make sure that all tasks stop
3909 // what they are doing and re-initialise in a safe manner. We
3910 // will achieve this with the use of two barrier sync points.
3911
3912 if (_cm->verbose_low())
3913 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3914
3915 _cm->enter_first_sync_barrier(_task_id);
3916 // When we exit this sync barrier we know that all tasks have
3917 // stopped doing marking work. So, it's now safe to
3918 // re-initialise our data structures. At the end of this method,
3919 // task 0 will clear the global data structures.
3920
3921 statsOnly( ++_aborted_overflow );
3922
3923 // We clear the local state of this task...
3924 clear_region_fields();
3925
3926 // ...and enter the second barrier.
3927 _cm->enter_second_sync_barrier(_task_id);
3928 // At this point everything has bee re-initialised and we're
3929 // ready to restart.
3930 }
3931
3932 if (_cm->verbose_low()) {
3933 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3934 "elapsed = %1.2lfms <<<<<<<<<<",
3935 _task_id, _time_target_ms, elapsed_time_ms);
3936 if (_cm->has_aborted())
3937 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
3938 _task_id);
3939 }
3940 } else {
3941 if (_cm->verbose_low())
3942 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
3943 "elapsed = %1.2lfms <<<<<<<<<<",
3944 _task_id, _time_target_ms, elapsed_time_ms);
3945 }
3946
3947 _claimed = false;
3948 }
3949
3950 CMTask::CMTask(int task_id,
3951 ConcurrentMark* cm,
3952 CMTaskQueue* task_queue,
3953 CMTaskQueueSet* task_queues)
3954 : _g1h(G1CollectedHeap::heap()),
3955 _co_tracker(G1CMGroup),
3956 _task_id(task_id), _cm(cm),
3957 _claimed(false),
3958 _nextMarkBitMap(NULL), _hash_seed(17),
3959 _task_queue(task_queue),
3960 _task_queues(task_queues),
3961 _oop_closure(NULL) {
3962 guarantee( task_queue != NULL, "invariant" );
3963 guarantee( task_queues != NULL, "invariant" );
3964
3965 statsOnly( _clock_due_to_scanning = 0;
3966 _clock_due_to_marking = 0 );
3967
3968 _marking_step_diffs_ms.add(0.5);
3969 }