58 patches for repository /home/simonmar/ghc-HEAD: Fri Aug 20 10:31:33 BST 2010 Simon Marlow * fix some shutdown memory leaks Fri Sep 3 14:49:16 BST 2010 Simon Marlow * add comment about locking rules for storage data structures Tue Sep 7 10:11:09 BST 2010 Simon Marlow * comment fix Tue Oct 12 16:50:42 BST 2010 Simon Marlow * sanity checking fix for PAPs Tue Oct 12 16:51:01 BST 2010 Simon Marlow * add a missing forwarding-ptr check to the generic apply code Wed Oct 13 16:42:59 BST 2010 Simon Marlow * GhcDebugged only applies to stage2/3 Wed Oct 13 16:47:57 BST 2010 Simon Marlow * bugfix (missing linker symbol) Wed Oct 13 16:48:24 BST 2010 Simon Marlow * bugfix (debug output) Fri Oct 15 11:00:54 BST 2010 Simon Marlow * add assertion Tue Oct 19 11:59:53 BST 2010 Simon Marlow * fix for +RTS -qg Tue Oct 19 14:07:52 BST 2010 Simon Marlow * comment Thu Oct 21 10:55:27 BST 2010 Simon Marlow * Partially-working snapshot of local GC work, before redesign Thu Oct 21 10:56:02 BST 2010 Simon Marlow * snapshot of local GC work at a relatively stable point Thu Oct 21 10:56:32 BST 2010 Simon Marlow * fix various bugs Thu Oct 21 10:56:34 BST 2010 Simon Marlow * move debugging utils into a separate file Thu Oct 21 10:56:35 BST 2010 Simon Marlow * use globalise, not globalise_ Thu Oct 21 10:56:35 BST 2010 Simon Marlow * bugfix (wrong block counts) Thu Oct 21 10:56:35 BST 2010 Simon Marlow * simplify globalise_maybe_record_mutable Thu Oct 21 10:56:35 BST 2010 Simon Marlow * don't keep mutable arrays on the mutable list all the time Thu Oct 21 10:56:36 BST 2010 Simon Marlow * fix bug Thu Oct 21 10:56:36 BST 2010 Simon Marlow * updates globalise rather than publish (experimental) Thu Oct 21 10:56:36 BST 2010 Simon Marlow * write barriers globalise rather than publish (experimental) Thu Oct 21 11:03:46 BST 2010 Simon Marlow * bugs fixed Thu Oct 21 11:04:07 BST 2010 Simon Marlow * warning fixes Thu Oct 21 11:04:07 BST 2010 Simon Marlow * ws->todo_bd is always non-NULL Thu Oct 21 11:04:07 BST 2010 Simon Marlow * fix bugs with initialisation of ws->todo_bd Thu Oct 21 11:04:07 BST 2010 Simon Marlow * don't globalise thunks (experimental) Thu Oct 21 11:07:15 BST 2010 Simon Marlow * Local GC checkpoint Thu Oct 21 11:07:36 BST 2010 Simon Marlow * correct the write barriers for MUT_VAR and MUT_ARR Thu Oct 21 11:07:37 BST 2010 Simon Marlow * use the result of globalise_ (not actually a bug fix, just a cleanup) Thu Oct 21 11:07:37 BST 2010 Simon Marlow * checkpoint prior to merge Thu Oct 21 11:12:56 BST 2010 Simon Marlow * bugfix: mark a TSO dirty when we globalise it Thu Oct 28 15:01:28 BST 2010 Simon Marlow * stats output tidyup Thu Oct 28 15:01:43 BST 2010 Simon Marlow * merge Thu Oct 28 15:02:40 BST 2010 Simon Marlow * small sanity fix Thu Oct 28 15:03:22 BST 2010 Simon Marlow * dirty_TSO: add assertion Thu Oct 28 15:03:41 BST 2010 Simon Marlow * merge Fri Oct 29 10:28:43 BST 2010 Simon Marlow * sanity: fix places where we weren't filling fresh memory with 0xaa Fri Oct 29 11:22:47 BST 2010 Simon Marlow * fix race condition in atomicModifyIORef Fri Oct 29 11:25:02 BST 2010 Simon Marlow * no need to dirty_TSO in putMVar/tryPutMVar Mon Nov 1 12:41:43 GMT 2010 Simon Marlow * count "dud" sparks (expressions that were already evaluated when sparked) Mon Nov 1 13:51:23 GMT 2010 Simon Marlow * relax ASSERT_PARTIAL_CAPABILITY_INVARIANTS Mon Nov 1 13:51:55 GMT 2010 Simon Marlow * messageGlobalise fixes Mon Nov 1 13:52:17 GMT 2010 Simon Marlow * scheduleProcessInbox fix Wed Oct 13 16:42:00 BST 2010 Simon Marlow * 32-bit fix Fri Nov 5 14:03:13 GMT 2010 Simon Marlow * Fix parallel GC (duh) Thu Nov 11 13:27:27 GMT 2010 Simon Marlow * count fizzled and GC'd sparks separately Thu Nov 11 13:30:21 GMT 2010 Simon Marlow * collect the write-barrier policies into one place (WritePolicy.h) Thu Nov 11 13:30:46 GMT 2010 Simon Marlow * fix a missing RELEASE_LOCK Thu Nov 11 13:31:13 GMT 2010 Simon Marlow * set INBOX_THRESHOLD to zero (not sure if we need this yet) Thu Nov 11 13:32:06 GMT 2010 Simon Marlow * avoid reading non-local memory when looking for work to steal Thu Nov 11 13:32:26 GMT 2010 Simon Marlow * allow IND_LOCALs to be eliminated during LOCAL_GC Thu Nov 11 13:32:34 GMT 2010 Simon Marlow * PLAN updates Mon Nov 1 09:29:17 GMT 2010 Simon Marlow * x86 fixes Fri Nov 12 15:59:46 GMT 2010 Simon Marlow * In processInbox(), use TRY_ACQUIRE_LOCK instead of ACQUIRE_LOCK Seems to avoid getting stuck in slow message ping-pong situations. Mon Nov 15 13:31:19 GMT 2010 Simon Marlow * scheduleProcessInbox: grab the whole queue at once Fri Nov 19 12:51:38 GMT 2010 Simon Marlow * PLAN updates Fri Nov 19 12:52:57 GMT 2010 Simon Marlow * globalise: globalise a top-level thunk, but none further down If we're asked to globalise a thunk, it's not useful to return an IND_LOCAL. New patches: [fix some shutdown memory leaks Simon Marlow **20100820093133 Ignore-this: 3e7b80b5f4846d6c56319c150895953d ] { hunk ./rts/sm/MBlock.c 91 if(map == NULL) { mblock_map_count++; - mblock_maps = realloc(mblock_maps, - sizeof(MBlockMap*) * mblock_map_count); + mblock_maps = stgReallocBytes(mblock_maps, + sizeof(MBlockMap*) * mblock_map_count, + "markHeapAlloced(1)"); map = mblock_maps[mblock_map_count-1] = hunk ./rts/sm/MBlock.c 95 - stgMallocBytes(sizeof(MBlockMap),"markHeapAlloced"); + stgMallocBytes(sizeof(MBlockMap),"markHeapAlloced(2)"); memset(map,0,sizeof(MBlockMap)); map->addrHigh32 = (StgWord32) (((StgWord)p) >> 32); } hunk ./rts/sm/MBlock.c 268 void freeAllMBlocks(void) { + nat n; + debugTrace(DEBUG_gc, "freeing all megablocks"); hunk ./rts/sm/MBlock.c 271 + osFreeAllMBlocks(); hunk ./rts/sm/MBlock.c 273 + + for (n = 0; n < mblock_map_count; n++) { + stgFree(mblock_maps[n]); + } + stgFree(mblock_maps); } void } [add comment about locking rules for storage data structures Simon Marlow **20100903134916 Ignore-this: 4adbcb37aeadff1975bb232c823b4636 ] hunk ./rts/sm/Storage.c 36 #include "ffi.h" -/* - * All these globals require sm_mutex to access in THREADED_RTS mode. +/* ----------------------------------------------------------------------------- + Storage manager state + ------------------------------------------------------------------------- */ + +/* LOCKING RULES + * + * protected by sm_mutex: + * - the block allocator + * - caf_list + * - revertible_caf_list + * - keepCAFs + * - exec_block + * + * protected by gen->sync: + * - all generation structs except when gen->is_local + * + * during parallel GC, the rules are modified: + * - the block allocator requires gc_alloc_block_sync, not sm_mutex + * - even local generations require gen->sync + * + * owned by a Capability: + * - local generations + * - nurseries */ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; [comment fix Simon Marlow **20100907091109 Ignore-this: 43c9d5c5503c4e97b3558290c9d5eda8 ] { hunk ./rts/Task.c 315 taskTimeStamp (Task *task USED_IF_THREADS) { #if defined(THREADED_RTS) - Ticks currentElapsedTime, currentUserTime, elapsedGCTime; + Ticks currentElapsedTime, currentUserTime; currentUserTime = getThreadCPUTime(); currentElapsedTime = getProcessElapsedTime(); hunk ./rts/Task.c 320 - // XXX this is wrong; we want elapsed GC time since the - // Task started. - elapsedGCTime = stat_getElapsedGCTime(); - task->mut_time = currentUserTime - task->muttimestart - task->gc_time; task->mut_etime = hunk ./rts/Task.c 323 - currentElapsedTime - task->elapsedtimestart - elapsedGCTime; + currentElapsedTime - task->elapsedtimestart - task->gc_etime; hunk ./rts/Task.c 325 + if (task->gc_time < 0) { task->gc_time = 0; } + if (task->gc_etime < 0) { task->gc_etime = 0; } if (task->mut_time < 0) { task->mut_time = 0; } if (task->mut_etime < 0) { task->mut_etime = 0; } #endif hunk ./rts/Task.c 332 } +void +taskDoneGC (Task *task, Ticks cpu_time, Ticks elapsed_time) +{ + task->gc_time += cpu_time; + task->gc_etime += elapsed_time; +} + #if defined(THREADED_RTS) void hunk ./rts/Task.c 390 // A worker always gets a fresh Task structure. task = newTask(rtsTrue); - // The lock here is to synchronise with taskStart(), to make sure + // The lock here is to synchronise with workerStart(), to make sure // that we have finished setting up the Task structure before the // worker thread reads it. ACQUIRE_LOCK(&task->lock); } [sanity checking fix for PAPs Simon Marlow **20101012155042 Ignore-this: d04ece2a88190cdbe0293db9aa79e97d ] { hunk ./rts/sm/Sanity.c 213 StgClosure *fun; StgClosure *p; StgFunInfoTable *fun_info; + const StgInfoTable *info; fun = UNTAG_CLOSURE(tagged_fun); ASSERT(LOOKS_LIKE_CLOSURE_PTR(fun)); hunk ./rts/sm/Sanity.c 217 - fun_info = get_fun_itbl(fun); + + info = fun->header.info; + if (IS_FORWARDING_PTR(info)) { + fun = (StgClosure*)UN_FORWARDING_PTR(info); + info = fun->header.info; + } + fun_info = FUN_INFO_PTR_TO_STRUCT(info); p = (StgClosure *)payload; switch (fun_info->f.fun_type) { } [add a missing forwarding-ptr check to the generic apply code Simon Marlow **20101012155101 Ignore-this: 277bc429ed31d1cba0d0be2a14099194 ] hunk ./utils/genapply/GenApply.hs 505 -- Functions can be tagged, so we untag them! text "R1 = UNTAG(R1);", text "info = %INFO_PTR(R1);", + -- check for forwarding pointers + text "if (IS_FORWARDING_PTR(info)) { R1 = UN_FORWARDING_PTR(info); info = %INFO_PTR(R1); }", -- if fast == 1: -- print " goto *lbls[info->type];"; [GhcDebugged only applies to stage2/3 Simon Marlow **20101013154259 Ignore-this: c5eb273f2e17b6c4f08f2e10dbf732e3 ] hunk ./ghc/ghc.mk 35 endif ifeq "$(GhcDebugged)" "YES" -ghc_HC_OPTS += -debug +ghc_stage2_HC_OPTS += -debug +ghc_stage3_HC_OPTS += -debug endif ifeq "$(GhcDynamic)" "YES" [bugfix (missing linker symbol) Simon Marlow **20101013154757 Ignore-this: 97c2aafcdca2ddbf80dae14ec8753bfe ] hunk ./rts/Linker.c 769 SymI_HasProto(stg_deRefWeakzh) \ SymI_HasProto(stg_deRefStablePtrzh) \ SymI_HasProto(dirty_MUT_VAR) \ + SymI_HasProto(dirty_MUT_ARR) \ SymI_HasProto(stg_forkzh) \ SymI_HasProto(stg_forkOnzh) \ SymI_HasProto(forkProcess) \ [bugfix (debug output) Simon Marlow **20101013154824 Ignore-this: f163a7c3f90ed1ab5e1c49ae195f8d59 ] hunk ./rts/Trace.c 132 [ThreadYielding] = "yielding", [ThreadBlocked] = "blocked", [ThreadFinished] = "finished", - [THREAD_SUSPENDED_FOREIGN_CALL] = "suspended while making a foreign call" + [THREAD_SUSPENDED_FOREIGN_CALL] = "suspended while making a foreign call", + [6 + BlockedOnMVar] = "blocked on an MVar", + [6 + BlockedOnBlackHole] = "blocked on a black hole", + [6 + BlockedOnRead] = "blocked on a read operation", + [6 + BlockedOnWrite] = "blocked on a write operation", + [6 + BlockedOnDelay] = "blocked on a delay operation", + [6 + BlockedOnSTM] = "blocked on STM", + [6 + BlockedOnDoProc] = "blocked on asyncDoProc", + [6 + BlockedOnCCall] = "blocked on a foreign call", + [6 + BlockedOnCCall_NoUnblockExc] = "blocked on a foreign call", + [6 + BlockedOnMsgThrowTo] = "blocked on throwTo", + [6 + ThreadMigrating] = "migrating", + [6 + BlockedOnMsgGlobalise] = "blocked waiting for data to be globalised" }; #endif [add assertion Simon Marlow **20101015100054 Ignore-this: ef7bcd17f97c6fbc08d690915e408915 ] hunk ./rts/Schedule.c 415 ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task); ASSERT(t->cap == cap); ASSERT(t->bound ? t->bound->task->cap == cap : 1); + ASSERT(isGlobal((StgClosure*)t) || Bdescr((P_)t)->gen_ix == cap->no); prev_what_next = t->what_next; [fix for +RTS -qg Simon Marlow **20101019105953 Ignore-this: e1ef4bf4a6ee2a8c53687c6dc673b4f5 ] hunk ./rts/sm/GC.c 722 // zero the scavenged static object list if (major_gc) { nat i; - for (i = 0; i < n_gc_threads; i++) { - zero_static_object_list(gc_threads[i]->scavenged_static_objects); + if (gc_type == GC_SEQ) { + zero_static_object_list(gct->scavenged_static_objects); + } else { + for (i = 0; i < n_gc_threads; i++) { + zero_static_object_list(gc_threads[i]->scavenged_static_objects); + } } } [comment Simon Marlow **20101019130752 Ignore-this: a8ff155f3175ef2590446fede80a5838 ] hunk ./rts/sm/GC.c 1351 /* ---------------------------------------------------------------------------- - Initialise a generation that is *not* to be collected + Save the mutable lists in saved_mut_lists ------------------------------------------------------------------------- */ static void [Partially-working snapshot of local GC work, before redesign Simon Marlow **20101021095527 Ignore-this: 8f2a0f08bc4fb6f57e47a8370a74e2e0 ] { addfile ./PLAN hunk ./PLAN 1 +----------------------------------------------------------------------------- +-- Plan + + - conc023(threaded1) + - throwto001 + + - when allocating messages that point to TSOs, we need to globalise + the TSO. + + - is HEAP_ALLOCED_GC() safe to call during local GC? + + - implement messages for requesting globalisation + + - re-enable thread migration + - to migrate a thread: + - flush all its blocking queues, wake up the threads. + (so we can discard all the BLOCKING_QUEUE structure) + - abandon any transactions + (so we can discard all the TREC_CHUNK structures) + - Alternative: when migrating threads, we may have to do a GC and + flush everything to the global heap. Problem is that we may end + up with BLOCKING_QUEUEs that point to local BLACKHOLEs otherwise. + + - count how many words of data we globalise (gct->globalised) + + - add comment to describe tradeoffs in CAF handling. Putting the BH + in the old generation doesn't work out: we have to globalise the + TSO, but then we'd have to SAVE/LOAD_THREAD_STATE(). + + - no need to maintain the global heap invariant for non-THREADED_RTS + and when -N1. + + - sparks: we should look through the IND_LOCAL when deciding whether + to fizzle a spark during GC. + + - sparks: can we do better than just publishing all sparks? + e.g. separate the spark pool into local and global pools + + - STM invariants + +----------------------------------------------------------------------------- +-- STM + + TRecHeader (MUT_PRIM) -> { TRecHeader enclosing, + TRecChunk, + InvariantCheckQueue } + + * TRecHeaders must be visible to all Capabilities, because a TVar is + locked by writing a pointer to the current TRecHeader into the + tvar->current_value field. Other Capabilities need to examine + this closure to check whether the TVar is locked. + + Hence, we allocate all TRecHeaders in the global heap. Their + contents are PRIVATE, they may contain global->local pointers, and + they must be on the mutable list. + + TRecChunk (TREC_CHUNK) -> + { TRecChunk prev_chunk, + TRecEntry -> { TVar, expected, new } + } + + * We consider the TRecChunk to be PRIVATE. They can contain + global->local pointers, and must be on the correct mutable list, + but they do not have to be allocated in the global heap. + + TVar (MUT_PRIM) -> { value, TVarWatchQueue } + + * When we write to a TVar, we must globalise as usual. Both TVars + and TVarWatchQueues are MUT_PRIM, so they both get eagerly promoted. + + TVarWatchQueue (MUT_PRIM) -> { closure, prev, next } + + * When creating a TVarWatchQueue we first need to check whether to + allocate it globally or locally, depending on the TVar. + + AtomicInvariant (MUT_PRIM) -> { code, TrecHeader } + + * TODO + + InvariantCheckQueue (MUT_PRIM) -> { AtomicInvariant, TrecHeader, next } + + * TODO + +----------------------------------------------------------------------------- +-- BLOCKING_QUEUE structures + +typedef struct StgBlockingQueue_ { + StgHeader header; + struct StgBlockingQueue_ *link; // here so it looks like an IND + StgClosure *bh; // the BLACKHOLE + StgTSO *owner; + struct MessageBlackHole_ *queue; +} StgBlockingQueue; + + - BLOCKING_QUEUE and MSG_BLACKHOLE always get eagerly promoted, so we + never end up with global->local pointers in the link and queue + fields. + + - BLOCKING_QUEUE may be promoted ahead of BLACKHOLE, so the BH field + may be global->local. But: a BLOCKING_QUEUE object is only pointed + to by the BH and the TSO that owns it, and the TSO must belong to + the owner of the BH, so this should be safe. + +----------------------------------------------------------------------------- +-- Remembered set overhaul + +If we're going to support more than one local generation, we may want +to simplify the remembered set handling. Here are some thoughts on +that: + + - All remembered sets contain only TSO / IND_LOCAL objects + - every update of an old-gen thunk promotes the transitive closure + - no more recordMutable + - every GC must do globalise_mut_list for all mut lists + - write barriers: + - MUT_VAR/MUT_ARR: publish the written pointer + - TSO: as before, TSO can be in the remembered set + - MVAR + - blocking on a BLACKHOLE (modifies the BH, or the BQ) + - sending a message (modifies its link) + +----------------------------------------------------------------------------- +-- Allocate MSG_BLACKHOLE locally or globally? (also MSG_THROWTO) + + We have a choice here; + 1. allocate the MSG_BLACKHOLE in global memory, + and globalise CurrentTSO immediately. + + 2. allocate the MSG_BLACKHOLE in local memory, leaving it + to messageBlackHole to globalise it (and CurrentTSO) if + necessary. + + We need the MSG_BLACKHOLE to be global if: + - the BLACKHOLE is already in global memory, or + - the MSG_BLACKHOLE will be sent to another CPU + + For now we do (1), ToDo: try (2) and measure + +----------------------------------------------------------------------------- +-- Plan, later + +* MUT_VAR_CLEAN and MUT_VAR_DIRTY aren't being used. Decide what to + do about them. (similarly for other clean/dirty pairs?) + +* Can we do globalise_capability_mut_lists() in the worker threads for + GC_PAR, rather than having the main thread do all of them? The + problem is that mut_list entries might be on the wrong mut_list. + +* Clean up mutable arrays: + - card marking is unnecessary for an array in the global heap, because + we publish every pointer written into it + - a mutable array doesn't need to be on the mut_list, unless we + have more generations (local or global). + +* leaving an IND_LOCAL every time we write to a mutable object in the + global gen is bad, because we'll accumulate floating garbage from + all the old values. Can we do anything at all here? + - for objects that already have a read barrier (e.g. MVar), we + could do better. + +* maybe we should have MUT_VAR_PUBLIC / MUT_VAR_PRIVATE? + +* Push down SM lock in GC.c + * GLOBALS that need locking in GC: + * threads lists attached to old generations (modified in tidyThreadList) + * gen->weak_ptrs lists + * resurrectUnreachableThreads() can be pulled outside the lock, it only + touches the threads lists of generations collected. + +* XXX we broke LDV profiling in CgTailCall +* XXX we probably don't need to duplicate the whole of Scav.c/Evac.c +* XXX slop calculation is wrong: 4,294,967,128 bytes maximum slop +* XXX major_gc is global, we rely on it always being rtsFalse for local GC +* XXX what to do about heapSizeSuggestion and resize_nursery: should + we resize the current nursery accoring to the amount of stuff that + was retained? +* XXX -G1 is not compatible with -N2 and greater, becauser there would + be no global heap. +* XXX should make publish() faster by inlining the allocation + + * ToDo: check whether the interpreter looks at the info table of a + tagged closure + +- do more sanity checking: + - check for proper BF_EVACUATED flags on blocks + + - Things to measure: + - code size impact of extra tag & fwd ptr checking + - mutator impact (just the codeGen changes, no GC changes) + +----------------------------------------------------------------------------- +-- Invariants + +1. Pointers from the global heap to the local heap are allowed only in + two places: IND_LOCAL, or the stack of a TSO + + 1a. TSO stacks point to at most one local heap. + + +2. every global object that points into a Capability's local heap must + be in that Capability's remembered set (cap->mut_lists[g]). + + This means that + * GC_LOCAL need only consult the local mut lists. + * MAINTAINING THE INVARIANT: + * GC_LCCAL needs to do nothing special: the invariant is + automatically maintained. + * GC_SEQ needs to add entries to the appropriate mut list + * GC_PAR may need to save up entries for adding to the appropriate + list. + +3. Objects in a Capability's remembered set may only point to the + local heap of that Capability or the global heap. + + So when doing a local GC, we only visit objects in the global heap + or our local heap. + +----------------------------------------------------------------------------- +-- Why this design? + +1. Why have IND_LOCAL rather than always promoting the transitive + closure to maintain the global heap invariant? + + - we can have some closures that we do not globalise + - closures with identity, e.g. primitive arrays and suchlike + - mutable objects can still be allocated in the local heap + - we probably want to try to keep mutable closures in the private + heap, otherwise we have to do globalisation/publish on every + write. For this we might want to have two generations in the + local heap. + + - might be able to reduce the latency of globalisation requests + from another CPU, by not promoting the whole transitive closure. + + - we can use a shared public spark pool and work-stealing without + globalising the entire transitive closure of its contents. + +----------------------------------------------------------------------------- +-- How to globalise different closure types [GLOBALISE] + +Two operations: + + StgClosure * publish (StgClosure *p) + // make an object representing P in the global heap + + void globalise (StgClosure **p) + // move P from local to global storage + +Every time we write a pointer P into an object in the old generation, +if P points to the local heap, we must write publish(P) instead. This +includes updates. (we cannot overwrite with an IND_LOCAL, because a +race condition between two processors updating the same thunk could +leave an IND_LOCAL pointing to the wrong local heap). + +If we receive a message requesting that we globalise a pointer P, then +we try to copy the transitive closure of the pointer into the global +heap, this is globalise(P). + +class B: BLACKHOLE +class T: THUNK/PAP/AP, untagged +class C: CONSTR, tagged +class F: FUN, tagged or untagged +class L: large or pinned unpointed +class T: TSOs +class U: small unpointed (MVar#, MutVar#, Array#, Weak#, BCO#, + StableName#, TVar#, PRIM, MUT_PRIM) + +publish: + B: IND_LOCAL + T: globalise, or IND_LOCAL + C: globalise, or IND_LOCAL + F: globalise, or IND_LOCAL + L: globalise, or IND_LOCAL + T: globalise, or IND_LOCAL + U: NO + +globalise: + + B: NO (pointed to by stack, will be updated) + T: copy and replace with IND, publish children + C: copy and replace with forwarding ptr, publish children + F: copy and replace with forwarding ptr, publish children + L: re-link the block, publish children + T: copy, replacing with ThreadRelocated, publish children + U: NO + +Note that we can neither publish nor globalise U, hence objects that +point to U can only be published, not globalised. + +Since we cannot globalise U, what happens to the closures that point to U? + + WE NEVER GLOBALISE A CLOSURE THAT POINTS TO U + + Thunks, functions and constructors that point to U are flagged in + their info tables (info->flags & HAS_UNLIFTED_FIELDS), and if + globalise() sees one of these it refuses to globalise the closure. + + +What about during GC? + + U CLOSURES ARE EAGERLY PROMOTED DURING GC + + This is to avoid the situation where we have promoted a closure + that points to U, but not the U itself. There is no way to fix + this up post-GC to reestablish the local heap invariant, so instead + we avoid it happening by automatically promoting U closures. + + L->U: we can promote L without moving it, so everything's fine. + +Alternative: + + do *not* eagerly promote C->U, F->U or T->U. We can guarantee + that the C->U, F->U, T->U pointers are not back-pointers, so as long + as we do not eagerly promote the C or F or T closure, we can be sure + that they stay forward-pointers, and hence we never get into a + state where C/F/T has been promoted but not U. + +----------------------------------------------------------------------------- +How to globalise large-family constructors? + +Problem is that to get the constructor tag we have to read the info +table, but we want to globalise constructors by replacing the info +pointer with a forwarding pointer. + + 1. don't tag large-family constructors at all, + case code checks closure type before entering + - extra 2 memory refs on entry :-( + 1a. like 1, but code to check closure type is at the return pt, + so can share loading of info ptr. + - 1 extra memory ref + - extra test/branch in common case for returns + - need to distinguish tagged from untagged CONSTR in globalise() :-( + 2. tag large-family constructors as before + info table identifies large-family somehow + globalise code replaces with an IND + case return code checks for a special tag value (eg. 0) for IND, + and re-enters + - extra code for re-entry, extra tag in info table :-( + 3. tag large-family constructors as before + case join point checks for forwarding ptr + - extra test/branch at return pt for large-family constrs + 4. globalising a large-family constructor requires building a new + info table instance in the heap. The new itbl has type + CON_GLOBALISED, and contains the address of the relocated + closure. + - complicated, but no runtime overhead + +we currently do (3). + +----------------------------------------------------------------------------- +Globalising the mutable list after GC + +An object may contain back-pointers after scavenging it; in this case +the GC puts a reference to the object on the appropriate mutable list. + +In order to respect the global heap invariant, we must ensure that the +mutable lists of the global generation(s) only contain TSO or +IND_LOCAL objects. + +GC always promotes U objects (see [GLOBALISE]), so that directly after +GC globalising a pointer is always possible. So after GC we can +always globalise all the objects on the mutable list, except for TSO +and IND_LOCAL. + +The remaining problem is whether we might have TSO or IND_LOCAL +objects on the *wrong* mutable list, i.e. pointing into the wrong +local heap. + + TSO: this can certainly happen, we must move the TSO to the correct + mutable list. + + IND_LOCAL: this cannot happen, since local GC begins with all + IND_LOCALs pointing to the right local heap, and global GC always + shorts out the IND_LOCALs. + +----------------------------------------------------------------------------- +More generations + +We would like to be able to expand the heap structure to allow more +generations both local and global. Here I collect notes about the +ramifications of doing so: + +1. More local generations + + - write barriers are more complicated: if the object is local but + in a non-zero generation, then we use the ordinary write barrier + (add it to the mutable list), otherwise globalise. + + - we probably want to keep data in the local old-generation unless + it is required to be global, so the local old-gen's dest points + to itself. Might even want to mark-sweep or compact these. + +2. More global generations + + - mutable lists may contain not only TSO and IND_LOCAL, but also + objects that point to younger global generations. + +----------------------------------------------------------------------------- +* TSO fields + + The only TSO fields that may be read by another Capability are + + tso->cap + - to determine whether the TSO belongs to the current cap or not + + A TSO is special in that it may hold global->local pointers either + in its metadata (e.g. tso->bq) or on its stack. The TSO itself must + still reside in the global heap if it is referenced from there, but + clients other than the owning capability can read only the tso->cap + field. + + TSO fields are subject to the write barrier as usual. A TSO is + marked dirty (tso->dirty) if any of its fields may contain + global->local pointers. If only the tso->_link and + tso->block_info.prev fields are dirty, then instead of setting + tso->dirty, the TSO_LINK_DIRTY bit may be set in tso->flags. When + marking a TSO dirty it is placed on the mutable list of the owning + Capability. + +* ThreadRelocated + + When a TSO is moved, the old tso has tso->what_next set to + ThreadRelocated, and tso->_link points to the new instance. + tso->cap in a ThreadRelocated may still be read, but it might be + incorrect (the TSO may have migrated to another Capability). This + may lead to a message being sent to the wrong Capability, which + shouldn't cause any problems other than a message having to be + redirected. + +* XXX + + The MVar operations still modify a TSO directly, but I think + harmlessly. They modify + + ** tso->_link + To set it to END_TSO_QUEUE, which is fine + + ** the stack + To pass the value back for takeMVar, but if the MVar is global + then the TSO and the value will be global too. + +----------------------------------------------------------------------------- +-- Weak pointers. + + - per-generation weak pointer lists: (WE DO THIS ONE) + + - old-gen weak pointers would need to be on the mut list if they + pointed into a younger gen + - would need to globalise_scavenge them on the mut list + - key/value/finalizer could end up being IND_LOCALs + - link could never be an IND_LOCAL, because the link should + always point only to WEAKs in the same gen. + - in global GC, the leader thread calls traverseWeakPtrList + - in local GC, the thread traverses the weak ptr list for its + local gen(s) + + - alternative: all WEAK objects are in the global heap + + - no need to traverse WEAK objects during local GC + - finalizers do not start until a global GC happens + - must globalise the fields of a WEAK when creating one + - probably terrible for memo tables etc. + - probably terrible for ForeignPtrs + +----------------------------------------------------------------------------- +Shorting out IND_LOCAL + + - better not short out IND_LOCAL during global GC, as then a TSO + might end up pointing to more than one local heap. Instead, the + best we can do is turn them into IND during scavenge if we find + they no longer point to the local heap. + +----------------------------------------------------------------------------- +Next stage + + * async globalisation? allow other caps to globalise from each + other's heaps. + - must synchronise with GC, or else allow GC to happen concurrently + with globalisation + + * use a read barrier, rather than a write barrier on mutable objects? + +----------------------------------------------------------------------------- + +2 steps per capability or 1? +---------------------------- + +If we had 1 step per capability, with step 2 being shared, then + + - local GC would be effectively parallel, because objects in the + shared step would be subject to copying by any Cap running a local + GC. + +If we have 2 steps per capability, then + + - local GC can be done without locking any objects. + +----------------------------------------------------------------------------- + +get rid of steps, just use generations? + +Each block points to its "destination", so in theory we can get rid of +the step structure, merging it with the generation structure. + +The problem is the nursery. We want the nursery to be fixed, that is, +we allocate from the same list of blocks each time. The nursery is +the from-space in generation 0 during GC. If we had no separate +steps, then generation 0 would also have a to-space (the objects we +are going to avoid promoting because they are young). The problem is +what happens after GC in generation 0: we have nursery blocks that we +want to allocate into again, and we also have some full blocks. + +We could keep the nursery blocks on a separate list. But that +introduces some non-orthogonality into the data structures, since +these blocks would not belong to a particular generation. We could +treat the nursery as a free list, and each time we want a new block we +take it off the list and put it in generation 0, but then if gen 0 is +shared we have to take a lock to update its block list. Also we want +to keep these blocks in the right order. + +----------------------------------------------------------------------------- +OLD, I think: + + * what about TSOs that point to other TSOs in their link, + block_info, or blocked_exception fields? Must promote these? + - promoting a TSO just because we need to put it on a linked + list of TSOs (e.g. the run queue) seems wrong. + - we can't have just one dirty bit per TSO, because that doesn't + distinguish between whether the TSO is on this local mut list or + another mut list. + - if we *don't* promote TSOs then + - we may have TSOs on a local mutable list that belong to another cap, + so we can't look at/modify them + - seems that the only TSOs on the local mutable list should be + those that belong to this cap + - proposal: + - dirtyTSO and setTSOLink normally promote TSOs + - except when adding to the run queue, then we set the dirty bit + and recordMutable() + - the TSO dirty bit is only for the stack + - if a TSO is on the mutable list, then its stack is dirty, and + it can only point into locally-accessible memory + + * MAINTAINING INVARIANT 1: + * instead of setting failed_to_evac for global->local pointers: + - if the closure is lifted, create an IND_LOCAL and + recordMutable(). + - so failed_to_evac is only set for global->global pointers, + or when the closure is unlifted. + - Add a new flag, failed_to_evac_unlifted. + * in scavenge_block, if failed_to_evac is set, + - if failed_to_evac_unlifted is set, + - *demote* the current object, and replace it by IND_LOCAL + (the current object must be lifted) + - otherwise, recordMutable as usual. + * migrating a TSO: globalise the stack and state + + +----------------------------------------------------------------------------- +ALTERNATIVE IDEA + +Primitive and/or mutable objects are allocated in a special area of +the global heap (one per HEC), and marked as private or global. e.g. + + MUT_VAR_PRIVATE, MUT_VAR_GLOBAL + +Thus we can globalise one of these objects by marking it global and +globalising its children. All objects can therefore be globalised +(with the possible exception of BLACKHOLEs). + +During a local GC, we must use mark/sweep for this area of the global +heap. Global objects are treated as marked, and we can free any +unreachable private objects (as in Domani et. al. ISMM'00). In GHC we +would either free complete blocks only, or implement Immix-style +region marking. + +Compared to the other scheme: + + - no need for eager promotion of primitive objects! + + - we can globalise everything, globalise is simpler. + + - write barriers are a bit simpler and cheaper: check info pointer, + maybe globalise + + - we need to duplicate all info tables for primitive objects, but + private/global can be a bit in the flags field of the info table, + so don't need to duplicate all the closure types too. + + - we need to set up the per-Capability primitive areas of the global + generation, and arrange that they get marked/swept. Sweeping has + to traverse the primitive heap linearly looking for global objects. + + - TSOs don't need to be globalised, they can be implicitly global, + similarly for BLOCKING_QUEUE and other private objects. + +What about clean/dirty? + + - for TSO: still need clean/dirty + + - for MUT_VAR and others: no need to do this, as the write barrier + globalises. Though we could also have muliple local generations, + and then we would need + + MUT_VAR_CLEAN, MUT_VAR_DIRTY, MUT_VAR_GLOBAL + + and a 3-way write barrier. hunk ./compiler/codeGen/CgCase.lhs 402 ; cgPrimAlts GCMayHappen alt_type reg alts } ; lbl <- emitReturnTarget (idName bndr) abs_c - ; returnFC (CaseAlts lbl Nothing bndr) } + ; returnFC (CaseAlts lbl bndr 0) } cgEvalAlts cc_slot bndr (UbxTupAlt _) [(con,args,_,rhs)] = -- Unboxed tuple case hunk ./compiler/codeGen/CgCase.lhs 424 ; unbxTupleHeapCheck live_regs ptrs nptrs noStmts (cgExpr rhs) } ; lbl <- emitReturnTarget (idName bndr) abs_c - ; returnFC (CaseAlts lbl Nothing bndr) } + ; returnFC (CaseAlts lbl bndr 0) } cgEvalAlts cc_slot bndr alt_type alts = -- Algebraic and polymorphic case hunk ./compiler/codeGen/CgCase.lhs 442 ; (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot alt_type alts - ; (lbl, branches) <- emitAlgReturnTarget (idName bndr) - alts mb_deflt fam_sz + ; lbl <- emitAlgReturnTarget (idName bndr) alts mb_deflt fam_sz hunk ./compiler/codeGen/CgCase.lhs 444 - ; returnFC (CaseAlts lbl branches bndr) } + ; returnFC (CaseAlts lbl bndr fam_sz) } where fam_sz = case alt_type of AlgAlt tc -> tyConFamilySize tc hunk ./compiler/codeGen/CgClosure.lhs 39 import CLabel import StgSyn import CostCentre +import Type import Id import Name import Module hunk ./compiler/codeGen/CgClosure.lhs 80 ; srt_info <- getSRTInfo ; mod_name <- getModuleName ; let descr = closureDescription mod_name name - closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr + closure_info = mkClosureInfo True False id lf_info 0 0 srt_info descr closure_label = mkLocalClosureLabel name $ idCafInfo id cg_id_info = stableIdInfo id (mkLblExpr closure_label) lf_info closure_rep = mkStaticClosureFields closure_info ccs True [] hunk ./compiler/codeGen/CgClosure.lhs 123 descr = closureDescription mod_name (idName bndr) closure_info = mkClosureInfo False -- Not static + False -- no unlifted/pointed fields bndr lf_info tot_wds ptr_wds NoC_SRT -- No SRT for a std-form closure descr hunk ./compiler/codeGen/CgClosure.lhs 174 add_rep info = (cgIdInfoArgRep info, info) + any_unlifted = any unlifted fv_infos + where unlifted info = isFollowableArg (cgIdInfoArgRep info) + && isUnLiftedType (idType (cgIdInfoId info)) + descr = closureDescription mod_name name closure_info = mkClosureInfo False -- Not static hunk ./compiler/codeGen/CgClosure.lhs 180 + any_unlifted bndr lf_info tot_wds ptr_wds srt_info descr hunk ./compiler/codeGen/CgClosure.lhs 578 ; hp_offset <- allocDynClosure bh_cl_info use_cc blame_cc [(tso,fixedHdrSize)] ; hp_rel <- getHpRelOffset hp_offset - -- Call the RTS function newCAF to add the CAF to the CafList - -- so that the garbage collector can find them + -- Call the RTS function newCAF. This: + -- - creates an IND_LOCAL to point to the local BH + -- - updates the CAF with an IND_STATIC pointing to the IND_LOCAL + -- - adds the CAF to the CAF list if necessary -- This must be done *before* the info table pointer is overwritten, -- because the old info table ptr is needed for reversion ; emitRtsCallWithVols rtsPackageId (fsLit "newCAF") hunk ./compiler/codeGen/CgClosure.lhs 586 [ CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint, - CmmHinted (CmmReg nodeReg) AddrHint ] + CmmHinted (CmmReg nodeReg) AddrHint, + CmmHinted hp_rel AddrHint ] [node] False -- node is live, so save it. hunk ./compiler/codeGen/CgClosure.lhs 591 - -- Overwrite the closure with a (static) indirection - -- to the newly-allocated black hole - ; stmtsC [ CmmStore (cmmRegOffW nodeReg off_indirectee) hp_rel - , CmmStore (CmmReg nodeReg) ind_static_info ] - ; returnFC hp_rel } where bh_cl_info :: ClosureInfo hunk ./compiler/codeGen/CgCon.lhs 47 import Type import PrelInfo import Outputable -import ListSetOps import Util import Module import FastString hunk ./compiler/codeGen/CgCon.lhs 82 lf_info = mkConLFInfo con closure_label = mkClosureLabel name $ idCafInfo id caffy = any stgArgHasCafRefs args - (closure_info, amodes_w_offsets) = layOutStaticConstr con amodes + (closure_info, amodes_w_offsets) = layOutStaticConstr False con amodes closure_rep = mkStaticClosureFields closure_info dontCareCCS -- Because it's static data hunk ./compiler/codeGen/CgCon.lhs 209 buildDynCon binder ccs con args = do { ; let - (closure_info, amodes_w_offsets) = layOutDynConstr con args + (closure_info, amodes_w_offsets) = layOutDynConstr unlifted_fields con args ; hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets ; returnFC (taggedHeapIdInfo binder hp_off lf_info con) } hunk ./compiler/codeGen/CgCon.lhs 221 | otherwise = CmmLit (mkCCostCentreStack ccs) blame_cc = use_cc -- cost-centre on which to blame the alloc (same) + + unlifted_fields = dataConHasUnliftedPtrFields con + +dataConHasUnliftedPtrFields :: DataCon -> Bool +dataConHasUnliftedPtrFields con = any unlifted (dataConRepArgTys con) + where unlifted ty = isUnLiftedType ty && typePrimRep ty == PtrRep \end{code} hunk ./compiler/codeGen/CgCon.lhs 250 -- The binding below forces the masking out of the tag bits -- when accessing the constructor field. bind_arg (arg, offset) = bindNewToUntagNode arg offset (mkLFArgument arg) (tagForCon con) - (_, args_w_offsets) = layOutDynConstr con (addIdReps args) + (_, _, args_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} + (addIdReps args) -- ASSERT(not (isUnboxedTupleCon con)) return () mapCs bind_arg args_w_offsets hunk ./compiler/codeGen/CgCon.lhs 325 | opt_SccProfilingOn = build_it_then enter_it | otherwise = ASSERT( amodes `lengthIs` dataConRepArity con ) - do { EndOfBlockInfo _ sequel <- getEndOfBlockInfo - ; case sequel of - CaseAlts _ (Just (alts, deflt_lbl)) bndr - -> -- Ho! We know the constructor so we can - -- go straight to the right alternative - case assocMaybe alts (dataConTagZ con) of { - Just join_lbl -> build_it_then (jump_to join_lbl); - Nothing - -- Special case! We're returning a constructor to the default case - -- of an enclosing case. For example: - -- - -- case (case e of (a,b) -> C a b) of - -- D x -> ... - -- y -> ...... - -- - -- In this case, - -- if the default is a non-bind-default (ie does not use y), - -- then we should simply jump to the default join point; - - | isDeadBinder bndr -> performReturn (jump_to deflt_lbl) - | otherwise -> build_it_then (jump_to deflt_lbl) } - - _otherwise -- The usual case - -> build_it_then emitReturnInstr - } + build_it_then emitReturnInstr where enter_it = stmtsC [ CmmAssign nodeReg (cmmUntag (CmmReg nodeReg)), CmmJump (entryCode (closureInfoPtr (CmmReg nodeReg))) [] ] hunk ./compiler/codeGen/CgCon.lhs 426 -- time), we take care that info-table contains the -- information we need. (static_cl_info, _) = - layOutStaticConstr data_con arg_reps + layOutStaticConstr False data_con arg_reps (dyn_cl_info, arg_things) = hunk ./compiler/codeGen/CgCon.lhs 429 - layOutDynConstr data_con arg_reps + layOutDynConstr unlifted_fields data_con arg_reps + + unlifted_fields = dataConHasUnliftedPtrFields data_con emit_info cl_info ticky_code = do { code_blks <- getCgStmts the_code hunk ./compiler/codeGen/CgExpr.lhs 344 where lf_info = mkSelectorLFInfo bndr offset_into_int (isUpdatable upd_flag) - (_, params_w_offsets) = layOutDynConstr con (addIdReps params) + + (_, _, params_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} + (addIdReps params) + -- Just want the layout maybe_offset = assocMaybe params_w_offsets selectee Just the_offset = maybe_offset hunk ./compiler/codeGen/CgHeapery.lhs 117 \begin{code} layOutDynConstr, layOutStaticConstr - :: DataCon + :: Bool -- any fields are unlifted/pointed? + -> DataCon -> [(CgRep,a)] -> (ClosureInfo, [(a,VirtualHpOffset)]) hunk ./compiler/codeGen/CgHeapery.lhs 126 layOutDynConstr = layOutConstr False layOutStaticConstr = layOutConstr True -layOutConstr :: Bool -> DataCon -> [(CgRep, a)] +layOutConstr :: Bool -> Bool -> DataCon -> [(CgRep, a)] -> (ClosureInfo, [(a, VirtualHpOffset)]) hunk ./compiler/codeGen/CgHeapery.lhs 128 -layOutConstr is_static data_con args - = (mkConInfo is_static data_con tot_wds ptr_wds, +layOutConstr is_static unlifted_fields data_con args + = (mkConInfo is_static unlifted_fields data_con tot_wds ptr_wds, things_w_offsets) where (tot_wds, -- #ptr_wds + #nonptr_wds hunk ./compiler/codeGen/CgInfoTbls.hs 264 -> [(ConTagZ, CgStmts)] -- Tagged branches -> Maybe CgStmts -- Default branch (if any) -> Int -- family size - -> FCode (CLabel, SemiTaggingStuff) + -> FCode CLabel emitAlgReturnTarget name branches mb_deflt fam_sz hunk ./compiler/codeGen/CgInfoTbls.hs 267 - = do { blks <- getCgStmts $ - -- is the constructor tag in the node reg? - if isSmallFamily fam_sz - then do -- yes, node has constr. tag - let tag_expr = cmmConstrTag1 (CmmReg nodeReg) - branches' = [(tag+1,branch)|(tag,branch)<-branches] - emitSwitch tag_expr branches' mb_deflt 1 fam_sz - else do -- no, get tag from info table - let -- Note that ptr _always_ has tag 1 - -- when the family size is big enough - untagged_ptr = cmmRegOffB nodeReg (-1) - tag_expr = getConstrTag (untagged_ptr) - emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) - ; lbl <- emitReturnTarget name blks - ; return (lbl, Nothing) } - -- Nothing: the internal branches in the switch don't have - -- global labels, so we can't use them at the 'call site' + = do + blks <- getCgStmts $ + -- is the constructor tag in the node reg? + if isSmallFamily fam_sz + then do -- yes, node has constr. tag + let tag_expr = cmmConstrTag1 (CmmReg nodeReg) + branches' = [(tag+1,branch)|(tag,branch)<-branches] + emitSwitch tag_expr branches' mb_deflt 1 fam_sz + else do -- no, get tag from info table + redo <- newLabelC + is_fwd <- newLabelC + labelC redo + ip <- newTemp bWord + tag <- newTemp bWord + let iptr = CmmReg (CmmLocal ip) + let untagged_ptr = cmmRegOffB nodeReg (-1) + let one = CmmLit (mkIntCLit 1) + stmtC $ CmmAssign (CmmLocal ip) (closureInfoPtr untagged_ptr) + -- check for a forwarding pointer; if it is, we have to + -- follow it to the real closure + let + cond = CmmMachOp mo_wordNe [ + CmmMachOp mo_wordAnd [iptr, one], + CmmLit zeroCLit + ] + tag_expr = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [ + infoTableConstrTag (infoTable iptr) + ] + stmtC $ CmmCondBranch cond is_fwd + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) + labelC is_fwd + -- iptr is already tagged with 1, no need to do any arithmetic + stmtC $ CmmAssign nodeReg iptr + stmtC $ CmmBranch redo + -- + emitReturnTarget name blks -------------------------------- emitReturnInstr :: Code hunk ./compiler/codeGen/CgMonad.lhs 30 forkLabelledCode, forkClosureBody, forkStatics, forkAlts, forkEval, forkEvalHelp, forkProc, codeOnly, - SemiTaggingStuff, ConTagZ, + ConTagZ, EndOfBlockInfo(..), setEndOfBlockInfo, getEndOfBlockInfo, hunk ./compiler/codeGen/CgMonad.lhs 172 = OnStack -- Continuation is on the stack | CaseAlts - CLabel -- Jump to this; if the continuation is for a vectored - -- case this might be the label of a return vector - SemiTaggingStuff + CLabel -- Jump to this Id -- The case binder, only used to see if it's dead hunk ./compiler/codeGen/CgMonad.lhs 174 - -type SemiTaggingStuff - = Maybe -- Maybe[1] we don't have any semi-tagging stuff... - ([(ConTagZ, CmmLit)], -- Alternatives - CmmLit) -- Default (will be a can't happen RTS label if can't happen) + Int -- family size type ConTagZ = Int -- A *zero-indexed* contructor tag hunk ./compiler/codeGen/CgTailCall.lhs 185 fun_name = idName fun_id lf_info = cgIdInfoLF fun_info fun_has_cafs = idCafInfo fun_id - untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg)) -- Test if closure is a constructor maybeSwitchOnCons enterClosure eob | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob, hunk ./compiler/codeGen/CgTailCall.lhs 197 ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg)) is_constr) -- No, enter the closure. - ; enterClosure + ; _ <- enterClosure ; labelC is_constr ; stmtC (CmmJump (entryCode $ CmmLit (CmmLabel lbl)) []) } hunk ./compiler/codeGen/CgTailCall.lhs 222 -} -- No case expression involved, enter the closure. | otherwise - = do { stmtC untag_node - ; enterClosure + = do { is_constr <- newLabelC + -- Is the pointer tagged? + -- Yes, jump to switch statement + ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg)) + is_constr) + -- No, enter the closure. + ; _ <- enterClosure + ; labelC is_constr + ; emitReturnInstr } where --cond1 tag = cmmULtWord tag lowCons hunk ./compiler/codeGen/ClosureInfo.lhs 332 \begin{code} mkClosureInfo :: Bool -- Is static + -> Bool -- Has one or more unlifted pointer fields -> Id -> LambdaFormInfo -> Int -> Int -- Total and pointer words hunk ./compiler/codeGen/ClosureInfo.lhs 339 -> C_SRT -> String -- String descriptor -> ClosureInfo -mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr +mkClosureInfo is_static unlifted_fields id lf_info tot_wds ptr_wds srt_info descr = ClosureInfo { closureName = name, closureLFInfo = lf_info, closureSMRep = sm_rep, hunk ./compiler/codeGen/ClosureInfo.lhs 348 closureDescr = descr } where name = idName id - sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds + sm_rep = chooseSMRep is_static unlifted_fields lf_info tot_wds ptr_wds mkConInfo :: Bool -- Is static hunk ./compiler/codeGen/ClosureInfo.lhs 351 + -> Bool -- Has one or more unlifted pointer fields -> DataCon -> Int -> Int -- Total and pointer words -> ClosureInfo hunk ./compiler/codeGen/ClosureInfo.lhs 355 -mkConInfo is_static data_con tot_wds ptr_wds +mkConInfo is_static unlifted_fields data_con tot_wds ptr_wds = ConInfo { closureSMRep = sm_rep, closureCon = data_con } where hunk ./compiler/codeGen/ClosureInfo.lhs 359 - sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds + sm_rep = chooseSMRep is_static unlifted_fields (mkConLFInfo data_con) tot_wds ptr_wds \end{code} %************************************************************************ hunk ./compiler/codeGen/ClosureInfo.lhs 397 -- not exported: sizes_from_SMRep :: SMRep -> (WordOff,WordOff) -sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs) +sizes_from_SMRep (GenericRep _ _ ptrs nonptrs _) = (ptrs, nonptrs) sizes_from_SMRep BlackHoleRep = (0, 0) \end{code} hunk ./compiler/codeGen/ClosureInfo.lhs 432 minPayloadSize :: SMRep -> Bool -> WordOff minPayloadSize smrep updatable = case smrep of - BlackHoleRep -> min_upd_size - GenericRep _ _ _ _ | updatable -> min_upd_size - GenericRep True _ _ _ -> 0 -- static - GenericRep False _ _ _ -> mIN_PAYLOAD_SIZE + BlackHoleRep -> min_upd_size + GenericRep _ _ _ _ _ | updatable -> min_upd_size + GenericRep True _ _ _ _ -> 0 -- static + GenericRep False _ _ _ _ -> mIN_PAYLOAD_SIZE -- ^^^^^___ dynamic where min_upd_size = hunk ./compiler/codeGen/ClosureInfo.lhs 454 \begin{code} chooseSMRep :: Bool -- True <=> static closure + -> Bool -- Has one or more unlifted pointer fields -> LambdaFormInfo -> WordOff -> WordOff -- Tot wds, ptr wds -> SMRep hunk ./compiler/codeGen/ClosureInfo.lhs 459 -chooseSMRep is_static lf_info tot_wds ptr_wds +chooseSMRep is_static unlifted_fields lf_info tot_wds ptr_wds = let nonptr_wds = tot_wds - ptr_wds closure_type = getClosureType is_static ptr_wds lf_info hunk ./compiler/codeGen/ClosureInfo.lhs 464 in - GenericRep is_static ptr_wds nonptr_wds closure_type + GenericRep is_static unlifted_fields ptr_wds nonptr_wds closure_type -- We *do* get non-updatable top-level thunks sometimes. eg. f = g -- gets compiled to a jump to g (if g has non-zero arity), instead of hunk ./compiler/codeGen/ClosureInfo.lhs 737 where not_nocaf_constr = case sm_rep of - GenericRep _ _ _ ConstrNoCaf -> False - _other -> True + GenericRep _ _ _ _ ConstrNoCaf -> False + _other -> True \end{code} Note [Entering error thunks] hunk ./compiler/codeGen/SMRep.lhs 242 = GenericRep -- GC routines consult sizes in info tbl Bool -- True <=> This is a static closure. Affects how -- we garbage-collect it + Bool -- True <=> Has any unlifted pointer fields. !Int -- # ptr words !Int -- # non-ptr words ClosureType -- closure type hunk ./compiler/codeGen/SMRep.lhs 283 \begin{code} isStaticRep :: SMRep -> Bool -isStaticRep (GenericRep is_static _ _ _) = is_static +isStaticRep (GenericRep is_static _ _ _ _) = is_static isStaticRep BlackHoleRep = False \end{code} hunk ./compiler/codeGen/SMRep.lhs 294 -- krc: only called by tickyDynAlloc in CgTicky; return -- Nothing for a black hole so we can at least make something work. smRepClosureType :: SMRep -> Maybe ClosureType -smRepClosureType (GenericRep _ _ _ ty) = Just ty +smRepClosureType (GenericRep _ _ _ _ ty) = Just ty smRepClosureType BlackHoleRep = Nothing smRepClosureTypeInt :: SMRep -> StgHalfWord hunk ./compiler/codeGen/SMRep.lhs 298 -smRepClosureTypeInt (GenericRep False 1 0 Constr) = CONSTR_1_0 -smRepClosureTypeInt (GenericRep False 0 1 Constr) = CONSTR_0_1 -smRepClosureTypeInt (GenericRep False 2 0 Constr) = CONSTR_2_0 -smRepClosureTypeInt (GenericRep False 1 1 Constr) = CONSTR_1_1 -smRepClosureTypeInt (GenericRep False 0 2 Constr) = CONSTR_0_2 -smRepClosureTypeInt (GenericRep False _ _ Constr) = CONSTR +smRepClosureTypeInt (GenericRep False False 1 0 Constr) = CONSTR_1_0 +smRepClosureTypeInt (GenericRep False False 0 1 Constr) = CONSTR_0_1 +smRepClosureTypeInt (GenericRep False False 2 0 Constr) = CONSTR_2_0 +smRepClosureTypeInt (GenericRep False False 1 1 Constr) = CONSTR_1_1 +smRepClosureTypeInt (GenericRep False False 0 2 Constr) = CONSTR_0_2 +smRepClosureTypeInt (GenericRep False False _ _ Constr) = CONSTR +smRepClosureTypeInt (GenericRep False True _ _ Constr) = CONSTR_PRIMWRAP hunk ./compiler/codeGen/SMRep.lhs 306 -smRepClosureTypeInt (GenericRep False 1 0 Fun) = FUN_1_0 -smRepClosureTypeInt (GenericRep False 0 1 Fun) = FUN_0_1 -smRepClosureTypeInt (GenericRep False 2 0 Fun) = FUN_2_0 -smRepClosureTypeInt (GenericRep False 1 1 Fun) = FUN_1_1 -smRepClosureTypeInt (GenericRep False 0 2 Fun) = FUN_0_2 -smRepClosureTypeInt (GenericRep False _ _ Fun) = FUN +smRepClosureTypeInt (GenericRep False False 1 0 Fun) = FUN_1_0 +smRepClosureTypeInt (GenericRep False False 0 1 Fun) = FUN_0_1 +smRepClosureTypeInt (GenericRep False False 2 0 Fun) = FUN_2_0 +smRepClosureTypeInt (GenericRep False False 1 1 Fun) = FUN_1_1 +smRepClosureTypeInt (GenericRep False False 0 2 Fun) = FUN_0_2 +smRepClosureTypeInt (GenericRep False False _ _ Fun) = FUN +smRepClosureTypeInt (GenericRep False _ _ _ Fun) = FUN_PRIMWRAP hunk ./compiler/codeGen/SMRep.lhs 314 -smRepClosureTypeInt (GenericRep False 1 0 Thunk) = THUNK_1_0 -smRepClosureTypeInt (GenericRep False 0 1 Thunk) = THUNK_0_1 -smRepClosureTypeInt (GenericRep False 2 0 Thunk) = THUNK_2_0 -smRepClosureTypeInt (GenericRep False 1 1 Thunk) = THUNK_1_1 -smRepClosureTypeInt (GenericRep False 0 2 Thunk) = THUNK_0_2 -smRepClosureTypeInt (GenericRep False _ _ Thunk) = THUNK +smRepClosureTypeInt (GenericRep False _ 1 0 Thunk) = THUNK_1_0 +smRepClosureTypeInt (GenericRep False _ 0 1 Thunk) = THUNK_0_1 +smRepClosureTypeInt (GenericRep False _ 2 0 Thunk) = THUNK_2_0 +smRepClosureTypeInt (GenericRep False _ 1 1 Thunk) = THUNK_1_1 +smRepClosureTypeInt (GenericRep False _ 0 2 Thunk) = THUNK_0_2 +smRepClosureTypeInt (GenericRep False _ _ _ Thunk) = THUNK hunk ./compiler/codeGen/SMRep.lhs 321 -smRepClosureTypeInt (GenericRep False _ _ ThunkSelector) = THUNK_SELECTOR +smRepClosureTypeInt (GenericRep False _ _ _ ThunkSelector) = THUNK_SELECTOR hunk ./compiler/codeGen/SMRep.lhs 323 -smRepClosureTypeInt (GenericRep True _ _ Constr) = CONSTR_STATIC -smRepClosureTypeInt (GenericRep True _ _ ConstrNoCaf) = CONSTR_NOCAF_STATIC -smRepClosureTypeInt (GenericRep True _ _ Fun) = FUN_STATIC -smRepClosureTypeInt (GenericRep True _ _ Thunk) = THUNK_STATIC +smRepClosureTypeInt (GenericRep True _ _ _ Constr) = CONSTR_STATIC +smRepClosureTypeInt (GenericRep True _ _ _ ConstrNoCaf) = CONSTR_NOCAF_STATIC +smRepClosureTypeInt (GenericRep True _ _ _ Fun) = FUN_STATIC +smRepClosureTypeInt (GenericRep True _ _ _ Thunk) = THUNK_STATIC smRepClosureTypeInt BlackHoleRep = BLACKHOLE hunk ./compiler/codeGen/StgCmm.hs 328 -- static data structures (ie those built at compile -- time), we take care that info-table contains the -- information we need. - (static_cl_info, _) = layOutStaticConstr data_con arg_reps - (dyn_cl_info, arg_things) = layOutDynConstr data_con arg_reps + (static_cl_info, _) = layOutStaticConstr False data_con arg_reps + (dyn_cl_info, arg_things) = + layOutDynConstr unlifted_fields data_con arg_reps + + unlifted_fields = dataConHasUnliftedPtrFields data_con emit_info cl_info ticky_code = emitClosureAndInfoTable cl_info NativeDirectCall [] hunk ./compiler/codeGen/StgCmmBind.hs 38 import CLabel import StgSyn import CostCentre +import Type import Id import Control.Monad import Name hunk ./compiler/codeGen/StgCmmBind.hs 74 ; srt_info <- getSRTInfo srt ; mod_name <- getModuleName ; let descr = closureDescription mod_name name - closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr + closure_info = mkClosureInfo True False id lf_info 0 0 srt_info descr closure_label = mkLocalClosureLabel name (idCafInfo id) cg_id_info = litIdInfo id lf_info (CmmLabel closure_label) closure_rep = mkStaticClosureFields closure_info ccs True [] hunk ./compiler/codeGen/StgCmmBind.hs 211 body@(StgCase (StgApp scrutinee [{-no args-}]) _ _ _ _ -- ignore uniq, etc. (AlgAlt _) - [(DataAlt con, params, _use_mask, + [(DataAlt _, params, _use_mask, (StgApp selectee [{-no args-}]))]) | the_fv == scrutinee -- Scrutinee is the only free variable && maybeToBool maybe_offset -- Selectee is a component of the tuple hunk ./compiler/codeGen/StgCmmBind.hs 228 where lf_info = mkSelectorLFInfo bndr offset_into_int (isUpdatable upd_flag) - (_, params_w_offsets) = layOutDynConstr con (addIdReps params) - -- Just want the layout + (_, _, params_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} + (addIdReps params) maybe_offset = assocMaybe params_w_offsets (NonVoid selectee) Just the_offset = maybe_offset offset_into_int = the_offset - fixedHdrSize hunk ./compiler/codeGen/StgCmmBind.hs 279 ; c_srt <- getSRTInfo srt ; let name = idName bndr descr = closureDescription mod_name name + fv_infos = addIdReps (map stripNV reduced_fvs) fv_details :: [(NonVoid Id, VirtualHpOffset)] (tot_wds, ptr_wds, fv_details) hunk ./compiler/codeGen/StgCmmBind.hs 282 - = mkVirtHeapOffsets (isLFThunk lf_info) - (addIdReps (map stripNV reduced_fvs)) + = mkVirtHeapOffsets (isLFThunk lf_info) fv_infos closure_info = mkClosureInfo False -- Not static hunk ./compiler/codeGen/StgCmmBind.hs 284 + any_unlifted bndr lf_info tot_wds ptr_wds c_srt descr hunk ./compiler/codeGen/StgCmmBind.hs 288 + any_unlifted = any unlifted fv_infos + where unlifted (rep,id) = rep == PtrRep + && isUnLiftedType (idType id) + -- BUILD ITS INFO TABLE AND CODE ; forkClosureBody $ -- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere hunk ./compiler/codeGen/StgCmmBind.hs 333 descr = closureDescription mod_name (idName bndr) closure_info = mkClosureInfo False -- Not static + False -- no unlifted/pointed fields bndr lf_info tot_wds ptr_wds NoC_SRT -- No SRT for a std-form closure descr hunk ./compiler/codeGen/StgCmmClosure.hs 369 chooseSMRep :: Bool -- True <=> static closure + -> Bool -- Has one or more unlifted pointer fields -> LambdaFormInfo -> WordOff -> WordOff -- Tot wds, ptr wds -> SMRep hunk ./compiler/codeGen/StgCmmClosure.hs 374 -chooseSMRep is_static lf_info tot_wds ptr_wds +chooseSMRep is_static unlifted_fields lf_info tot_wds ptr_wds = let nonptr_wds = tot_wds - ptr_wds closure_type = getClosureType is_static ptr_wds lf_info hunk ./compiler/codeGen/StgCmmClosure.hs 379 in - GenericRep is_static ptr_wds nonptr_wds closure_type + GenericRep is_static unlifted_fields ptr_wds nonptr_wds closure_type -- We *do* get non-updatable top-level thunks sometimes. eg. f = g -- gets compiled to a jump to g (if g has non-zero arity), instead of hunk ./compiler/codeGen/StgCmmClosure.hs 715 -------------------------------------- mkClosureInfo :: Bool -- Is static + -> Bool -- Has one or more unlifted pointer fields -> Id -> LambdaFormInfo -> Int -> Int -- Total and pointer words hunk ./compiler/codeGen/StgCmmClosure.hs 722 -> C_SRT -> String -- String descriptor -> ClosureInfo -mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr +mkClosureInfo is_static unlifted_fields id lf_info tot_wds ptr_wds srt_info descr = ClosureInfo { closureName = name, closureLFInfo = lf_info, closureSMRep = sm_rep, hunk ./compiler/codeGen/StgCmmClosure.hs 732 closureCafs = idCafInfo id } where name = idName id - sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds + sm_rep = chooseSMRep is_static unlifted_fields lf_info tot_wds ptr_wds mkConInfo :: Bool -- Is static hunk ./compiler/codeGen/StgCmmClosure.hs 735 + -> Bool -- Has one or more unlifted pointer fields -> DataCon -> Int -> Int -- Total and pointer words -> ClosureInfo hunk ./compiler/codeGen/StgCmmClosure.hs 739 -mkConInfo is_static data_con tot_wds ptr_wds +mkConInfo is_static unlifted_fields data_con tot_wds ptr_wds = ConInfo { closureSMRep = sm_rep, closureCon = data_con } where hunk ./compiler/codeGen/StgCmmClosure.hs 743 - sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds - + sm_rep = chooseSMRep is_static unlifted_fields (mkConLFInfo data_con) tot_wds ptr_wds -- We need a black-hole closure info to pass to @allocDynClosure@ when we -- want to allocate the black hole on entry to a CAF. These are the only hunk ./compiler/codeGen/StgCmmClosure.hs 843 -- not exported: sizes_from_SMRep :: SMRep -> (WordOff,WordOff) -sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs) +sizes_from_SMRep (GenericRep _ _ ptrs nonptrs _) = (ptrs, nonptrs) sizes_from_SMRep BlackHoleRep = (0, 0) -- Computing slop size. WARNING: this looks dodgy --- it has deep hunk ./compiler/codeGen/StgCmmClosure.hs 876 minPayloadSize :: SMRep -> Bool -> WordOff minPayloadSize smrep updatable = case smrep of - BlackHoleRep -> min_upd_size - GenericRep _ _ _ _ | updatable -> min_upd_size - GenericRep True _ _ _ -> 0 -- static - GenericRep False _ _ _ -> mIN_PAYLOAD_SIZE + BlackHoleRep -> min_upd_size + GenericRep _ _ _ _ _ | updatable -> min_upd_size + GenericRep True _ _ _ _ -> 0 -- static + GenericRep False _ _ _ _ -> mIN_PAYLOAD_SIZE -- ^^^^^___ dynamic where min_upd_size = hunk ./compiler/codeGen/StgCmmClosure.hs 932 where not_nocaf_constr = case sm_rep of - GenericRep _ _ _ ConstrNoCaf -> False - _other -> True + GenericRep _ _ _ _ ConstrNoCaf -> False + _other -> True isStaticClosure :: ClosureInfo -> Bool isStaticClosure cl_info = isStaticRep (closureSMRep cl_info) hunk ./compiler/codeGen/StgCmmCon.hs 37 import Constants import DataCon import FastString +import Type import Id import Literal import PrelInfo hunk ./compiler/codeGen/StgCmmCon.hs 75 closure_label = mkClosureLabel name $ idCafInfo id caffy = any stgArgHasCafRefs args (closure_info, nv_args_w_offsets) - = layOutStaticConstr con (addArgReps args) + = layOutStaticConstr False con (addArgReps args) get_lit (arg, _offset) = do { CmmLit lit <- getArgAmode arg ; return lit } hunk ./compiler/codeGen/StgCmmCon.hs 194 -------- buildDynCon: the general case ----------- buildDynCon binder ccs con args - = do { let (cl_info, args_w_offsets) = layOutDynConstr con (addArgReps args) + = do { let (cl_info, args_w_offsets) = + layOutDynConstr unlifted_fields con (addArgReps args) -- No void args in args_w_offsets ; (tmp, init) <- allocDynClosure cl_info use_cc blame_cc args_w_offsets ; return (regIdInfo binder lf_info tmp, init) } hunk ./compiler/codeGen/StgCmmCon.hs 208 blame_cc = use_cc -- cost-centre on which to blame the alloc (same) + unlifted_fields = dataConHasUnliftedPtrFields con + + +dataConHasUnliftedPtrFields :: DataCon -> Bool +dataConHasUnliftedPtrFields con = any unlifted (dataConRepArgTys con) + where unlifted ty = isUnLiftedType ty && typePrimRep ty == PtrRep --------------------------------------------------------------- -- Binding constructor arguments hunk ./compiler/codeGen/StgCmmCon.hs 228 = ASSERT(not (isUnboxedTupleCon con)) mapM bind_arg args_w_offsets where - (_, args_w_offsets) = layOutDynConstr con (addIdReps args) + (_, _, args_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} + (addIdReps args) tag = tagForCon con hunk ./compiler/codeGen/StgCmmHeap.hs 53 ----------------------------------------------------------- layOutDynConstr, layOutStaticConstr - :: DataCon -> [(PrimRep, a)] + :: Bool -> DataCon -> [(PrimRep, a)] -> (ClosureInfo, [(NonVoid a, VirtualHpOffset)]) -- No Void arguments in result hunk ./compiler/codeGen/StgCmmHeap.hs 60 layOutDynConstr = layOutConstr False layOutStaticConstr = layOutConstr True -layOutConstr :: Bool -> DataCon -> [(PrimRep, a)] +layOutConstr :: Bool -> Bool -> DataCon -> [(PrimRep, a)] -> (ClosureInfo, [(NonVoid a, VirtualHpOffset)]) hunk ./compiler/codeGen/StgCmmHeap.hs 62 -layOutConstr is_static data_con args - = (mkConInfo is_static data_con tot_wds ptr_wds, +layOutConstr is_static unlifted_fields data_con args + = (mkConInfo is_static unlifted_fields data_con tot_wds ptr_wds, things_w_offsets) where (tot_wds, -- #ptr_wds + #nonptr_wds hunk ./compiler/codeGen/StgCmmUtils.hs 61 import ForeignCall import IdInfo import Type +import DataCon import TyCon import Constants import SMRep hunk ./compiler/codeGen/StgCmmUtils.hs 284 where closure_tbl = CmmLit (CmmLabel lbl) lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs +------------------------------------------------------------------------- +-- +-- Does a DataCon have any fields that point to unlifted (primitive) +-- types. This property is important for local GC. +-- +------------------------------------------------------------------------- + +dataConHasUnliftedPtrFields :: DataCon -> Bool +dataConHasUnliftedPtrFields con = any unlifted (dataConRepArgTys con) + where unlifted ty = isUnLiftedType ty && typePrimRep ty == PtrRep + ------------------------------------------------------------------------- -- -- Conditionals and rts calls hunk ./includes/Cmm.h 112 #define UNTAG(p) (p & ~TAG_MASK) #define GETTAG(p) (p & TAG_MASK) +#define IS_FORWARDING_PTR(p) (((p) & 1) != 0) +#define UN_FORWARDING_PTR(p) ((p) - 1) + #if SIZEOF_INT == 4 #define CInt bits32 #elif SIZEOF_INT == 8 hunk ./includes/Cmm.h 302 case \ IND, \ IND_PERM, \ - IND_STATIC: \ + IND_STATIC: \ { \ P1 = StgInd_indirectee(P1); \ goto again; \ hunk ./includes/Cmm.h 389 // allocate() - this includes many of the primops. #define MAYBE_GC(liveness,reentry) \ if (bdescr_link(CurrentNursery) == NULL || \ - generation_n_new_large_blocks(W_[g0]) >= CInt[alloc_blocks_lim]) { \ + generation_n_new_large_blocks(StgRegTable_rG0(BaseReg)) >= CInt[alloc_blocks_lim]) { \ R9 = liveness; \ R10 = reentry; \ HpAlloc = 0; \ hunk ./includes/Cmm.h 437 /* Debugging macros */ #define LOOKS_LIKE_INFO_PTR(p) \ ((p) != NULL && \ - LOOKS_LIKE_INFO_PTR_NOT_NULL(p)) + (IS_FORWARDING_PTR(p) || \ + LOOKS_LIKE_INFO_PTR_NOT_NULL(p))) #define LOOKS_LIKE_INFO_PTR_NOT_NULL(p) \ ( (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \ hunk ./includes/mkDerivedConstants.c 228 field_offset(StgRegTable, rHpAlloc); struct_field(StgRegTable, rRet); struct_field(StgRegTable, rNursery); + struct_field(StgRegTable, rG0); def_offset("stgEagerBlackholeInfo", FUN_OFFSET(stgEagerBlackholeInfo)); def_offset("stgGCEnter1", FUN_OFFSET(stgGCEnter1)); hunk ./includes/mkDerivedConstants.c 236 field_offset(Capability, r); field_offset(Capability, lock); + struct_field(Capability, no); struct_field(Capability, mut_lists); struct_field(Capability, context_switch); struct_field(Capability, sparks); hunk ./includes/mkDerivedConstants.c 245 struct_field(bdescr, free); struct_field(bdescr, blocks); struct_field(bdescr, gen_no); + struct_field(bdescr, gen_ix); struct_field(bdescr, link); struct_size(generation); hunk ./includes/mkDerivedConstants.c 249 - struct_field(generation, mut_list); struct_field(generation, n_new_large_blocks); hunk ./includes/mkDerivedConstants.c 250 + struct_field(generation, weak_ptrs); struct_size(CostCentreStack); struct_field(CostCentreStack, ccsID); hunk ./includes/rts/Constants.h 286 */ #define TSO_SQUEEZED 128 +/* + * Used temporarily in rts/sm/Globalise.c + */ +#define TSO_GLOBALISE 256 + /* ----------------------------------------------------------------------------- RET_DYN stack frames -------------------------------------------------------------------------- */ hunk ./includes/rts/Flags.h 75 rtsBool squeeze; /* 'z' stack squeezing & lazy blackholing */ rtsBool hpc; /* 'c' coverage */ rtsBool sparks; /* 'r' */ + rtsBool mallocleaks; /* 'k' */ }; struct COST_CENTRE_FLAGS { hunk ./includes/rts/storage/Block.h 61 } u; struct generation_ *gen; /* generation */ - struct generation_ *dest; /* destination gen */ hunk ./includes/rts/storage/Block.h 62 - StgWord32 blocks; /* no. of blocks (if grp head, 0 otherwise) */ + StgWord16 gen_no; // gen->no, cached + StgWord16 gen_ix; // gen->ix, cached + StgWord16 dest_ix; // ix of destination generation hunk ./includes/rts/storage/Block.h 66 - StgWord16 gen_no; StgWord16 flags; /* block flags, see below */ hunk ./includes/rts/storage/Block.h 67 + + StgWord32 blocks; /* no. of blocks (if grp head, 0 otherwise) */ + #if SIZEOF_VOID_P == 8 StgWord32 _padding[2]; #else hunk ./includes/rts/storage/ClosureMacros.h 333 return pap_sizeW((StgPAP *)p); case IND: case IND_PERM: + case IND_LOCAL: return sizeofW(StgInd); case ARR_WORDS: return arr_words_sizeW((StgArrWords *)p); hunk ./includes/rts/storage/ClosureTypes.h 29 #define CONSTR_1_1 5 #define CONSTR_0_2 6 #define CONSTR_STATIC 7 -#define CONSTR_NOCAF_STATIC 8 +#define CONSTR_NOCAF_STATIC 8 #define FUN 9 #define FUN_1_0 10 #define FUN_0_1 11 hunk ./includes/rts/storage/ClosureTypes.h 52 #define IND 28 #define IND_PERM 29 #define IND_STATIC 30 -#define RET_BCO 31 -#define RET_SMALL 32 -#define RET_BIG 33 -#define RET_DYN 34 -#define RET_FUN 35 -#define UPDATE_FRAME 36 -#define CATCH_FRAME 37 -#define STOP_FRAME 38 -#define BLOCKING_QUEUE 39 -#define BLACKHOLE 40 -#define MVAR_CLEAN 41 -#define MVAR_DIRTY 42 -#define ARR_WORDS 43 -#define MUT_ARR_PTRS_CLEAN 44 -#define MUT_ARR_PTRS_DIRTY 45 -#define MUT_ARR_PTRS_FROZEN0 46 -#define MUT_ARR_PTRS_FROZEN 47 -#define MUT_VAR_CLEAN 48 -#define MUT_VAR_DIRTY 49 -#define WEAK 50 -#define PRIM 51 -#define MUT_PRIM 52 -#define TSO 53 -#define TREC_CHUNK 54 -#define ATOMICALLY_FRAME 55 -#define CATCH_RETRY_FRAME 56 -#define CATCH_STM_FRAME 57 -#define WHITEHOLE 58 -#define N_CLOSURE_TYPES 59 +#define IND_LOCAL 31 +#define RET_BCO 32 +#define RET_SMALL 33 +#define RET_BIG 34 +#define RET_DYN 35 +#define RET_FUN 36 +#define UPDATE_FRAME 37 +#define CATCH_FRAME 38 +#define STOP_FRAME 39 +#define BLOCKING_QUEUE 40 +#define BLACKHOLE 41 +#define MVAR_CLEAN 42 +#define MVAR_DIRTY 43 +#define ARR_WORDS 44 +#define MUT_ARR_PTRS_CLEAN 45 +#define MUT_ARR_PTRS_DIRTY 46 +#define MUT_ARR_PTRS_FROZEN0 47 +#define MUT_ARR_PTRS_FROZEN 48 +#define MUT_VAR_CLEAN 49 +#define MUT_VAR_DIRTY 50 +#define WEAK 51 +#define PRIM 52 +#define MUT_PRIM 53 +#define TSO 54 +#define TREC_CHUNK 55 +#define ATOMICALLY_FRAME 56 +#define CATCH_RETRY_FRAME 57 +#define CATCH_STM_FRAME 58 +#define WHITEHOLE 59 +#define N_CLOSURE_TYPES 60 + +/* Closure flags, for the info->flags field. */ +#define HAS_UNLIFTED_FIELDS 1 #endif /* RTS_STORAGE_CLOSURETYPES_H */ hunk ./includes/rts/storage/Closures.h 127 StgHeader header; StgClosure *indirectee; StgClosure *static_link; - StgInfoTable *saved_info; + const StgInfoTable *saved_info; } StgIndStatic; typedef struct StgBlockingQueue_ { hunk ./includes/rts/storage/Closures.h 238 #define BCO_BITMAP_SIZEW(bco) ((BCO_BITMAP_SIZE(bco) + BITS_IN(StgWord) - 1) \ / BITS_IN(StgWord)) + /* ----------------------------------------------------------------------------- Dynamic stack frames for generic heap checks. hunk ./includes/rts/storage/Closures.h 387 typedef struct StgTRecChunk_ { StgHeader header; struct StgTRecChunk_ *prev_chunk; - StgWord next_entry_idx; + StgHalfWord next_entry_idx; + StgHalfWord cap_no; // which Capability this TRec belongs to TRecEntry entries[TREC_CHUNK_NUM_ENTRIES]; } StgTRecChunk; hunk ./includes/rts/storage/Closures.h 412 struct StgTRecHeader_ *enclosing_trec; StgTRecChunk *current_chunk; StgInvariantCheckQueue *invariants_to_check; - TRecState state; + StgHalfWord state; + StgHalfWord cap_no; // which Capability this TRec belongs to }; typedef struct { hunk ./includes/rts/storage/GC.h 63 typedef struct generation_ { unsigned int no; // generation number + unsigned int ix; // generation "index", i.e the + // offset in the all_generations array + + unsigned int is_local; // true if a local generation + unsigned int cap; // capability this gen is local to bdescr * blocks; // blocks in this gen unsigned int n_blocks; // number of blocks hunk ./includes/rts/storage/GC.h 78 unsigned int n_new_large_blocks; // count freshly allocated large objects unsigned int max_blocks; // max blocks - bdescr *mut_list; // mut objects in this gen (not G0) StgTSO * threads; // threads in this gen // linked via global_link hunk ./includes/rts/storage/GC.h 81 + + StgWeak * weak_ptrs; // WEAK objects in this gen + // linked via w->link + struct generation_ *to; // destination gen for live objects // stats information hunk ./includes/rts/storage/GC.h 96 // Fields below are used during GC only #if defined(THREADED_RTS) - char pad[128]; // make sure the following is + StgWord8 pad[128]; // make sure the following is // on a separate cache line. hunk ./includes/rts/storage/GC.h 98 - SpinLock sync_large_objects; // lock for large_objects - // and scavenged_large_objects + SpinLock sync; // lock for large_objects, + // blocks, n_blocks, n_words, + // scavenged_large_objects #endif int mark; // mark (not copy)? (old gen only) hunk ./includes/rts/storage/GC.h 113 unsigned int n_old_blocks; // number of blocks in from-space unsigned int live_estimate; // for sweeping: estimate of live data - bdescr * saved_mut_list; - - bdescr * part_blocks; // partially-full scanned blocks - unsigned int n_part_blocks; // count of above - bdescr * scavenged_large_objects; // live large objs after GC (d-link) unsigned int n_scavenged_large_blocks; // size (not count) of above hunk ./includes/rts/storage/GC.h 121 StgTSO * old_threads; } generation; -extern generation * generations; -extern generation * g0; -extern generation * oldest_gen; +extern nat total_generations; // size of all_generations +extern generation * all_generations; // indexed by gen->ix +extern generation * old_generations; // indexed by gen->no +extern generation * g0; // == &old_generations[0] +extern generation * oldest_gen; // == &old_generations[G-1] +extern generation * global_gen; // == &all_generations[global_gen_ix] +extern nat global_gen_ix; // ix of first non-local gen +extern nat global_gen_no; // no of first non-local gen /* ----------------------------------------------------------------------------- Generic allocation hunk ./includes/rts/storage/GC.h 179 The CAF table - used to let us revert CAFs in GHCi -------------------------------------------------------------------------- */ -void newCAF (StgRegTable *reg, StgClosure *); -void newDynCAF (StgRegTable *reg, StgClosure *); +void newCAF (StgRegTable *reg, StgClosure *, StgClosure *); +void newDynCAF (StgRegTable *reg, StgClosure *, StgClosure *); void revertCAFs (void); // Request that all CAFs are retained indefinitely. hunk ./includes/rts/storage/GC.h 202 void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p); +// similarly, the barrier for arrays: +void dirty_MUT_ARR (StgRegTable *reg, StgMutArrPtrs *arr, nat ix); + +// and for MVars: +void dirty_MVAR(StgRegTable *reg, StgClosure *p); + +/* --------------------------------------------------------------------------- + Globalisation + ------------------------------------------------------------------------- */ + +INLINE_HEADER rtsBool +isLocal (StgClosure *p) +{ + return Bdescr((P_)p)->gen->is_local; +} + +/* --------------------------------------------------------------------------- + Misc + ------------------------------------------------------------------------- */ + /* set to disable CAF garbage collection in GHCi. */ /* (needed when dynamic libraries are used). */ extern rtsBool keepCAFs; hunk ./includes/rts/storage/GC.h 228 INLINE_HEADER void initBdescr(bdescr *bd, generation *gen, generation *dest) { - bd->gen = gen; - bd->gen_no = gen->no; - bd->dest = dest; + bd->gen = gen; + bd->gen_no = gen->no; + bd->gen_ix = gen->ix; + bd->dest_ix = dest->ix; } #endif /* RTS_STORAGE_GC_H */ hunk ./includes/rts/storage/InfoTables.h 233 StgClosureInfo layout; /* closure layout info (one word) */ - StgHalfWord type; /* closure type */ + StgQtrWord type; /* closure type */ + StgQtrWord flags; StgHalfWord srt_bitmap; /* In a CONSTR: - the constructor tag hunk ./includes/rts/storage/TSO.h 172 // might be ThreadRelocated or not (basically, that's most of the time // unless the TSO is the current TSO). // -INLINE_HEADER StgTSO * deRefTSO(StgTSO *tso) +EXTERN_INLINE StgTSO * deRefTSO(StgTSO *tso); +EXTERN_INLINE StgTSO * deRefTSO(StgTSO *tso) { while (tso->what_next == ThreadRelocated) { tso = tso->_link; hunk ./includes/stg/MiscClosures.h 84 RTS_RET(stg_apply_interp); +#if IN_STG_CODE +extern W_ stg_IND_LOCAL_tbl[]; +#else +extern DLL_IMPORT_RTS const StgInfoTable *stg_IND_LOCAL_tbl[]; +#endif + +RTS_ENTRY(stg_IND_LOCAL0); +RTS_ENTRY(stg_IND_LOCAL1); +RTS_ENTRY(stg_IND_LOCAL2); +RTS_ENTRY(stg_IND_LOCAL3); +RTS_ENTRY(stg_IND_LOCAL4); +RTS_ENTRY(stg_IND_LOCAL5); +RTS_ENTRY(stg_IND_LOCAL6); +RTS_ENTRY(stg_IND_LOCAL7); +RTS_ENTRY(stg_IND_LOCAL8); +RTS_ENTRY(stg_IND_LOCAL9); +RTS_ENTRY(stg_IND_LOCAL10); +RTS_ENTRY(stg_IND_LOCAL11); +RTS_ENTRY(stg_IND_LOCAL12); +RTS_ENTRY(stg_IND_LOCAL13); +RTS_ENTRY(stg_IND_LOCAL14); +RTS_ENTRY(stg_IND_LOCAL15); +RTS_ENTRY(stg_IND_LOCAL16); +RTS_ENTRY(stg_IND_LOCAL17); +RTS_ENTRY(stg_IND_LOCAL18); +RTS_ENTRY(stg_IND_LOCAL19); +RTS_ENTRY(stg_IND_LOCAL20); +RTS_ENTRY(stg_IND_LOCAL21); +RTS_ENTRY(stg_IND_LOCAL22); +RTS_ENTRY(stg_IND_LOCAL23); +RTS_ENTRY(stg_IND_LOCAL24); +RTS_ENTRY(stg_IND_LOCAL25); +RTS_ENTRY(stg_IND_LOCAL26); +RTS_ENTRY(stg_IND_LOCAL27); +RTS_ENTRY(stg_IND_LOCAL28); +RTS_ENTRY(stg_IND_LOCAL29); +RTS_ENTRY(stg_IND_LOCAL30); +RTS_ENTRY(stg_IND_LOCAL31); +RTS_ENTRY(stg_IND_LOCAL32); +RTS_ENTRY(stg_IND_LOCAL33); +RTS_ENTRY(stg_IND_LOCAL34); +RTS_ENTRY(stg_IND_LOCAL35); +RTS_ENTRY(stg_IND_LOCAL36); +RTS_ENTRY(stg_IND_LOCAL37); +RTS_ENTRY(stg_IND_LOCAL38); +RTS_ENTRY(stg_IND_LOCAL39); +RTS_ENTRY(stg_IND_LOCAL40); +RTS_ENTRY(stg_IND_LOCAL41); +RTS_ENTRY(stg_IND_LOCAL42); +RTS_ENTRY(stg_IND_LOCAL43); +RTS_ENTRY(stg_IND_LOCAL44); +RTS_ENTRY(stg_IND_LOCAL45); +RTS_ENTRY(stg_IND_LOCAL46); +RTS_ENTRY(stg_IND_LOCAL47); +RTS_ENTRY(stg_IND_LOCAL48); +RTS_ENTRY(stg_IND_LOCAL49); +RTS_ENTRY(stg_IND_LOCAL50); +RTS_ENTRY(stg_IND_LOCAL51); +RTS_ENTRY(stg_IND_LOCAL52); +RTS_ENTRY(stg_IND_LOCAL53); +RTS_ENTRY(stg_IND_LOCAL54); +RTS_ENTRY(stg_IND_LOCAL55); +RTS_ENTRY(stg_IND_LOCAL56); +RTS_ENTRY(stg_IND_LOCAL57); +RTS_ENTRY(stg_IND_LOCAL58); +RTS_ENTRY(stg_IND_LOCAL59); +RTS_ENTRY(stg_IND_LOCAL60); +RTS_ENTRY(stg_IND_LOCAL61); +RTS_ENTRY(stg_IND_LOCAL62); +RTS_ENTRY(stg_IND_LOCAL63); +RTS_ENTRY(stg_IND_LOCAL64); + RTS_ENTRY(stg_IND); RTS_ENTRY(stg_IND_direct); hunk ./includes/stg/MiscClosures.h 158 +RTS_ENTRY(stg_IND_noenter); RTS_ENTRY(stg_IND_STATIC); RTS_ENTRY(stg_IND_PERM); RTS_ENTRY(stg_BLACKHOLE); hunk ./includes/stg/MiscClosures.h 167 RTS_ENTRY(stg_WHITEHOLE); RTS_ENTRY(stg_BLOCKING_QUEUE_CLEAN); RTS_ENTRY(stg_BLOCKING_QUEUE_DIRTY); +RTS_ENTRY(stg_STUB_BLOCKING_QUEUE); RTS_FUN(stg_BCO); RTS_ENTRY(stg_EVACUATED); hunk ./includes/stg/MiscClosures.h 189 RTS_ENTRY(stg_MSG_TRY_WAKEUP); RTS_ENTRY(stg_MSG_THROWTO); RTS_ENTRY(stg_MSG_BLACKHOLE); +RTS_ENTRY(stg_STUB_MSG_BLACKHOLE); RTS_ENTRY(stg_MSG_NULL); RTS_ENTRY(stg_MVAR_TSO_QUEUE); RTS_ENTRY(stg_catch); hunk ./includes/stg/Regs.h 84 StgPtr rHp; StgPtr rHpLim; struct StgTSO_ * rCurrentTSO; - struct nursery_ * rNursery; + struct nursery_ * rNursery; /* nursery struct */ + struct generation_ * rG0; /* the local generation 0 */ struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */ struct bdescr_ * rCurrentAlloc; /* for allocation using allocate() */ StgWord rHpAlloc; /* number of *bytes* being allocated in heap */ hunk ./includes/stg/Types.h 96 typedef StgWord64 StgWord; typedef StgInt32 StgHalfInt; typedef StgWord32 StgHalfWord; +typedef StgInt16 StgQtrInt; +typedef StgWord16 StgQtrWord; #else #if SIZEOF_VOID_P == 4 typedef StgInt32 StgInt; hunk ./includes/stg/Types.h 104 typedef StgWord32 StgWord; typedef StgInt16 StgHalfInt; typedef StgWord16 StgHalfWord; +typedef StgInt8 StgQtrInt; +typedef StgWord8 StgQtrWord; #else #error GHC untested on this architecture: sizeof(void *) != 4 or 8 #endif hunk ./rts/Arena.c 88 arena_blocks += req_blocks; bd->gen_no = 0; + bd->gen_ix = 0; bd->gen = NULL; hunk ./rts/Arena.c 90 - bd->dest = NULL; + bd->dest_ix = 0; bd->flags = 0; bd->free = bd->start; bd->link = arena->current; hunk ./rts/Capability.c 246 cap->mut_lists[g] = NULL; } - cap->free_tvar_watch_queues = END_STM_WATCH_QUEUE; cap->free_invariant_check_queues = END_INVARIANT_CHECK_QUEUE; cap->free_trec_chunks = END_STM_CHUNK_LIST; cap->free_trec_headers = NO_TREC; hunk ./rts/Capability.c 383 return; } - if (waiting_for_gc == PENDING_GC_SEQ) { + if (waiting_for_gc == GC_SEQ) { last_free_capability = cap; // needed? debugTrace(DEBUG_sched, "GC pending, set capability %d free", cap->no); return; hunk ./rts/Capability.c 578 { Capability *cap = *pCap; - if (waiting_for_gc == PENDING_GC_PAR) { + if (waiting_for_gc == GC_PAR) { traceEventGcStart(cap); gcWorkerThread(cap); traceEventGcEnd(cap); hunk ./rts/Capability.c 820 ------------------------------------------------------------------------ */ void -markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta, - rtsBool no_mark_sparks USED_IF_THREADS) +markCapability (evac_fn evac, void *user, Capability *cap, + rtsBool no_mark_sparks USED_IF_THREADS) { hunk ./rts/Capability.c 823 - nat i; - Capability *cap; InCall *incall; // Each GC thread is responsible for following roots from the hunk ./rts/Capability.c 830 // or fewer Capabilities as GC threads, but just in case there // are more, we mark every Capability whose number is the GC // thread's index plus a multiple of the number of GC threads. - for (i = i0; i < n_capabilities; i += delta) { - cap = &capabilities[i]; - evac(user, (StgClosure **)(void *)&cap->run_queue_hd); - evac(user, (StgClosure **)(void *)&cap->run_queue_tl); + evac(user, (StgClosure **)(void *)&cap->run_queue_hd); + evac(user, (StgClosure **)(void *)&cap->run_queue_tl); #if defined(THREADED_RTS) hunk ./rts/Capability.c 833 - evac(user, (StgClosure **)(void *)&cap->inbox); + evac(user, (StgClosure **)(void *)&cap->inbox); #endif hunk ./rts/Capability.c 835 - for (incall = cap->suspended_ccalls; incall != NULL; - incall=incall->next) { - evac(user, (StgClosure **)(void *)&incall->suspended_tso); - } + + for (incall = cap->suspended_ccalls; incall != NULL; + incall = incall->next) { + evac(user, (StgClosure **)(void *)&incall->suspended_tso); + } #if defined(THREADED_RTS) hunk ./rts/Capability.c 842 - if (!no_mark_sparks) { - traverseSparkQueue (evac, user, cap); - } -#endif + if (!no_mark_sparks) { + traverseSparkQueue (evac, user, cap); } hunk ./rts/Capability.c 845 +#endif hunk ./rts/Capability.c 847 -#if !defined(THREADED_RTS) - evac(user, (StgClosure **)(void *)&blocked_queue_hd); - evac(user, (StgClosure **)(void *)&blocked_queue_tl); - evac(user, (StgClosure **)(void *)&sleeping_queue); -#endif + // Free STM structures for this Capability + stmPreGCHook(cap); } void hunk ./rts/Capability.c 854 markCapabilities (evac_fn evac, void *user) { - markSomeCapabilities(evac, user, 0, 1, rtsFalse); + nat n; + for (n = 0; n < n_capabilities; n++) { + markCapability(evac, user, &capabilities[n], rtsFalse); + } } hunk ./rts/Capability.c 859 - -/* ----------------------------------------------------------------------------- - Messages - -------------------------------------------------------------------------- */ - hunk ./rts/Capability.h 190 extern Capability *last_free_capability; // GC indicator, in scope for the scheduler -#define PENDING_GC_SEQ 1 -#define PENDING_GC_PAR 2 +#define GC_SEQ 1 +#define GC_PAR 2 +#define GC_LOCAL 3 extern volatile StgWord waiting_for_gc; // Acquires a capability at a return point. If *cap is non-NULL, then hunk ./rts/Capability.h 206 // void waitForReturnCapability (Capability **cap/*in/out*/, Task *task); -EXTERN_INLINE void recordMutableCap (StgClosure *p, Capability *cap, nat gen); +EXTERN_INLINE void recordMutableCap (Capability *cap, StgClosure *p, nat gen); EXTERN_INLINE void recordClosureMutated (Capability *cap, StgClosure *p); hunk ./rts/Capability.h 209 +EXTERN_INLINE void recordClosureMutated_ (Capability *cap, StgClosure *p); #if defined(THREADED_RTS) hunk ./rts/Capability.h 280 void freeCapabilities (void); // For the GC: -void markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta, - rtsBool no_mark_sparks); +void markCapability (evac_fn evac, void *user, Capability *cap, + rtsBool no_mark_sparks USED_IF_THREADS); + void markCapabilities (evac_fn evac, void *user); hunk ./rts/Capability.h 284 + void traverseSparkQueues (evac_fn evac, void *user); /* ----------------------------------------------------------------------------- hunk ./rts/Capability.h 302 * -------------------------------------------------------------------------- */ EXTERN_INLINE void -recordMutableCap (StgClosure *p, Capability *cap, nat gen) +recordMutableCap (Capability *cap, StgClosure *p, nat gen_no) { bdescr *bd; hunk ./rts/Capability.h 309 // We must own this Capability in order to modify its mutable list. // ASSERT(cap->running_task == myTask()); // NO: assertion is violated by performPendingThrowTos() - bd = cap->mut_lists[gen]; + bd = cap->mut_lists[gen_no]; if (bd->free >= bd->start + BLOCK_SIZE_W) { bdescr *new_bd; new_bd = allocBlock_lock(); hunk ./rts/Capability.h 315 new_bd->link = bd; bd = new_bd; - cap->mut_lists[gen] = bd; + cap->mut_lists[gen_no] = bd; } *bd->free++ = (StgWord)p; } hunk ./rts/Capability.h 325 { bdescr *bd; bd = Bdescr((StgPtr)p); - if (bd->gen_no != 0) recordMutableCap(p,cap,bd->gen_no); + if (bd->gen_no != 0) { + barf("recordClosureMutated", cap); // recordMutableCap(cap,p,bd->gen_no); + } } hunk ./rts/Capability.h 330 +// For TSO and IND_LOCALs: +EXTERN_INLINE void +recordClosureMutated_ (Capability *cap, StgClosure *p) +{ + bdescr *bd; + bd = Bdescr((StgPtr)p); + if (bd->gen_no != 0) { + recordMutableCap(cap,p,bd->gen_no); + } +} #if defined(THREADED_RTS) INLINE_HEADER rtsBool hunk ./rts/ClosureFlags.c 55 [IND] = ( _NS| _IND ), [IND_PERM] = ( _NS| _IND ), [IND_STATIC] = ( _NS|_STA| _IND ), + [IND_LOCAL] = ( _IND ), [RET_BCO] = ( _BTM ), [RET_SMALL] = ( _BTM| _SRT ), [RET_BIG] = ( _SRT ), hunk ./rts/ClosureFlags.c 86 [WHITEHOLE] = ( 0 ) }; -#if N_CLOSURE_TYPES != 59 +#if N_CLOSURE_TYPES != 60 #error Closure types changed: update ClosureFlags.c! #endif hunk ./rts/Exception.cmm 316 W_ msg; out = Sp - WDS(1); /* ok to re-use stack space here */ + SAVE_THREAD_STATE(); (msg) = foreign "C" throwTo(MyCapability() "ptr", CurrentTSO "ptr", target "ptr", hunk ./rts/Exception.cmm 321 exception "ptr") [R1,R2]; + + // throwTo may have to globalise the current TSO and the + // target, so make sure we get the new ones: + loop2: + if (TO_W_(StgTSO_what_next(CurrentTSO)) == ThreadRelocated) { + CurrentTSO = StgTSO__link(CurrentTSO); + goto loop2; + } + LOAD_THREAD_STATE(); if (msg == NULL) { jump %ENTRY_CODE(Sp(0)); hunk ./rts/HeapStackCheck.cmm 392 { R1 = Sp(2); Sp_adj(3); + + W_ info; + info = %INFO_PTR(UNTAG(R1)); + if (IS_FORWARDING_PTR(info)) { + R1 = UN_FORWARDING_PTR(info) + GETTAG(R1); + info = %INFO_PTR(UNTAG(R1)); + } + #ifdef NO_ARG_REGS // Minor optimisation: there are no argument registers to load up, // so we can just jump straight to the function's entry point. hunk ./rts/HeapStackCheck.cmm 403 - jump %GET_ENTRY(UNTAG(R1)); + jump %ENTRY_CODE(info); #else W_ info; W_ type; hunk ./rts/HeapStackCheck.cmm 408 - info = %GET_FUN_INFO(UNTAG(R1)); + info = %FUN_INFO(info); type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN || type == ARG_GEN_BIG) { jump StgFunInfoExtra_slow_apply(info); hunk ./rts/Interpreter.c 280 case IND: case IND_PERM: + case IND_LOCAL: case IND_STATIC: { tagged_obj = ((StgInd*)obj)->indirectee; hunk ./rts/LdvProfile.c 155 // because they will perish before the next census at any // rate. case IND: + case IND_LOCAL: // Found a dead closure: record its size LDV_recordDead(c, size); return size; hunk ./rts/LdvProfile.c 271 } else { processNurseryForDead(); for (g = 0; g <= N; g++) { - processHeapForDead(generations[g].old_blocks); - processChainForDead(generations[g].large_objects); + processHeapForDead(old_generations[g].old_blocks); + processChainForDead(old_generations[g].large_objects); } } } hunk ./rts/Linker.c 975 SymI_NeedsProto(stg_interp_constr_entry) \ SymI_HasProto(stg_arg_bitmaps) \ SymI_HasProto(alloc_blocks_lim) \ - SymI_HasProto(g0) \ SymI_HasProto(allocate) \ SymI_HasProto(allocateExec) \ SymI_HasProto(freeExec) \ hunk ./rts/Messages.c 17 #include "Threads.h" #include "RaiseAsync.h" #include "sm/Storage.h" +#include "sm/Globalise.h" /* ---------------------------------------------------------------------------- Send a message to another Capability hunk ./rts/Messages.c 25 #ifdef THREADED_RTS -void sendMessage(Capability *from_cap, Capability *to_cap, Message *msg) +void sendMessage(Capability *from_cap STG_UNUSED, Capability *to_cap, Message *msg) { ACQUIRE_LOCK(&to_cap->lock); hunk ./rts/Messages.c 35 if (i != &stg_MSG_THROWTO_info && i != &stg_MSG_BLACKHOLE_info && i != &stg_MSG_TRY_WAKEUP_info && - i != &stg_IND_info && // can happen if a MSG_BLACKHOLE is revoked + i != &stg_STUB_MSG_BLACKHOLE_info && // can happen if a MSG_BLACKHOLE is revoked i != &stg_WHITEHOLE_info) { barf("sendMessage: %p", i); } hunk ./rts/Messages.c 40 } + #endif msg->link = to_cap->inbox; hunk ./rts/Messages.c 46 to_cap->inbox = msg; - recordClosureMutated(from_cap,(StgClosure*)msg); + // not necessary, since all messages are in the global heap: + // recordClosureMutated(from_cap,(StgClosure*)msg); + ASSERT(!(Bdescr((StgPtr)msg)->gen->is_local)); if (to_cap->running_task == NULL) { to_cap->running_task = myTask(); hunk ./rts/Messages.c 127 } return; } - else if (i == &stg_IND_info || i == &stg_MSG_NULL_info) + else if (i == &stg_STUB_MSG_BLACKHOLE_info || i == &stg_MSG_NULL_info) { // message was revoked return; hunk ./rts/Messages.c 199 p = UNTAG_CLOSURE((StgClosure*)VOLATILE_LOAD(&((StgInd*)bh)->indirectee)); info = p->header.info; - if (info == &stg_IND_info) + if (info == &stg_STUB_BLOCKING_QUEUE_info) { // This could happen, if e.g. we got a BLOCKING_QUEUE that has // just been replaced with an IND by another thread in hunk ./rts/Messages.c 214 #ifdef THREADED_RTS if (owner->cap != cap) { + // if we got here, then the msg should already be global, + // because for the BH to be owned by another cap it must + // be global, so stg_BLACKHOLE_info would have allocated + // the msg in global memory. sendMessage() will assert. sendMessage(cap, owner->cap, (Message*)msg); debugTraceCap(DEBUG_sched, cap, "forwarding message to cap %d", owner->cap->no); return 1; hunk ./rts/Messages.c 227 // Capability. msg->tso is the first thread to block on this // BLACKHOLE, so we first create a BLOCKING_QUEUE object. - bq = (StgBlockingQueue*)allocate(cap, sizeofW(StgBlockingQueue)); + if (Bdescr((P_)bh)->gen_ix >= global_gen_ix) { + // allocate the BQ in global memory iff the BH is global + bq = (StgBlockingQueue*)allocateInGen(cap, global_gen_ix, + sizeofW(StgBlockingQueue)); + } else { + bq = (StgBlockingQueue*)allocate(cap, sizeofW(StgBlockingQueue)); + } // initialise the BLOCKING_QUEUE object SET_HDR(bq, &stg_BLOCKING_QUEUE_DIRTY_info, CCS_SYSTEM); hunk ./rts/Messages.c 268 // point to the BLOCKING_QUEUE from the BLACKHOLE write_barrier(); // make the BQ visible ((StgInd*)bh)->indirectee = (StgClosure *)bq; - recordClosureMutated(cap,bh); // bh was mutated + // not necessary: if the bh was global, we allocated the BQ globally + // recordClosureMutated(cap,bh); debugTraceCap(DEBUG_sched, cap, "thread %d blocked on thread %d", (lnat)msg->tso->id, (lnat)owner->id); hunk ./rts/Messages.c 289 #ifdef THREADED_RTS if (owner->cap != cap) { + // if we got here, then the msg should already be global, + // because for the BH to be owned by another cap it must + // be global, so stg_BLACKHOLE_info would have allocated + // the msg in global memory. sendMessage() will assert. sendMessage(cap, owner->cap, (Message*)msg); debugTraceCap(DEBUG_sched, cap, "forwarding message to cap %d", owner->cap->no); return 1; hunk ./rts/Messages.c 301 msg->link = bq->queue; bq->queue = msg; - recordClosureMutated(cap,(StgClosure*)msg); + // no need to do this, the msg will be global if the bq is + // recordClosureMutated(cap,(StgClosure*)msg); if (info == &stg_BLOCKING_QUEUE_CLEAN_info) { bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; hunk ./rts/Messages.c 306 - recordClosureMutated(cap,(StgClosure*)bq); + // no need to do this, the bq will be global if the msg is + // recordClosureMutated(cap,(StgClosure*)bq); } debugTraceCap(DEBUG_sched, cap, "thread %d blocked on thread %d", hunk ./rts/PrimOps.cmm 308 StgMutVar_var(mv) = y; #endif - if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { +// if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") []; hunk ./rts/PrimOps.cmm 310 - } +// } RET_P(r); } hunk ./rts/PrimOps.cmm 348 StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; ACQUIRE_LOCK(sm_mutex); - StgWeak_link(w) = W_[weak_ptr_list]; - W_[weak_ptr_list] = w; + StgWeak_link(w) = generation_weak_ptrs(StgRegTable_rG0(BaseReg)); + generation_weak_ptrs(StgRegTable_rG0(BaseReg)) = w; RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); hunk ./rts/PrimOps.cmm 404 StgWeak_cfinalizer(w) = p; ACQUIRE_LOCK(sm_mutex); - StgWeak_link(w) = W_[weak_ptr_list]; - W_[weak_ptr_list] = w; + StgWeak_link(w) = W_[generation_weak_ptrs(StgRegTable_rG0(BaseReg))]; + W_[generation_weak_ptrs(StgRegTable_rG0(BaseReg))] = w; RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); hunk ./rts/PrimOps.cmm 1141 } -#define PerformTake(tso, value) \ - W_[StgTSO_sp(tso) + WDS(1)] = value; \ +#define PerformTake(tso, value) \ + W_[StgTSO_sp(tso) + WDS(1)] = value; \ W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info; hunk ./rts/PrimOps.cmm 1144 +// no need to globalise_wrt: the if the mvar is global, then the value and tso +// will be too. #define PerformPut(tso,lval) \ StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3); \ hunk ./rts/PrimOps.cmm 1153 stg_takeMVarzh { - W_ mvar, val, info, tso, q; + W_ mvar, val, info, tso, q, bd, new_tso; /* args: R1 = MVar closure */ mvar = R1; hunk ./rts/PrimOps.cmm 1164 info = GET_INFO(mvar); #endif - if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; - } + // Not necessary until we have multiple local generations: + // if (info == stg_MVAR_CLEAN_info) { + // foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; + // } /* If the MVar is empty, put ourselves on its blocking queue, * and wait until we're woken up. hunk ./rts/PrimOps.cmm 1174 */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { - // Note [mvar-heap-check] We want to do the heap check in the - // branch here, to avoid the conditional in the common case. - // However, we've already locked the MVar above, so we better - // be careful to unlock it again if the the heap check fails. - // Unfortunately we don't have an easy way to inject any code - // into the heap check generated by the code generator, so we - // have to do it in stg_gc_gen (see HeapStackCheck.cmm). - HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh); + bd = Bdescr(mvar); + if (TO_W_(bdescr_gen_ix(bd)) >= TO_W_(CInt[global_gen_ix])) + { + ("ptr" q) = foreign "C" allocateInGen(MyCapability(), + CInt[global_gen_ix], + BYTES_TO_WDS(SIZEOF_StgMVarTSOQueue)); hunk ./rts/PrimOps.cmm 1181 - q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); + SAVE_THREAD_STATE(); + ("ptr" new_tso) = foreign "C" globalise_(MyCapability(), + CurrentTSO); + CurrentTSO = new_tso; + LOAD_THREAD_STATE(); + } + else + { + // Note [mvar-heap-check] We want to do the heap check in the + // branch here, to avoid the conditional in the common case. + // However, we've already locked the MVar above, so we better + // be careful to unlock it again if the the heap check fails. + // Unfortunately we don't have an easy way to inject any code + // into the heap check generated by the code generator, so we + // have to do it in stg_gc_gen (see HeapStackCheck.cmm). + HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh); + + q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); + } SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = END_TSO_QUEUE; hunk ./rts/PrimOps.cmm 1209 StgMVar_head(mvar) = q; } else { StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q; - foreign "C" recordClosureMutated(MyCapability() "ptr", - StgMVar_tail(mvar)) []; + // No need to do this, at least until we have multiple + // local generations: + // foreign "C" recordClosureMutated(MyCapability() "ptr", + // StgMVar_tail(mvar)) []; } StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; hunk ./rts/PrimOps.cmm 1298 RET_NP(0, stg_NO_FINALIZER_closure); } - if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; - } + // Not necessary until we have multiple local generations: + // if (info == stg_MVAR_CLEAN_info) { + // foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; + // } /* we got the value... */ val = StgMVar_value(mvar); hunk ./rts/PrimOps.cmm 1354 stg_putMVarzh { - W_ mvar, val, info, tso, q; + W_ mvar, val, info, tso, q, bd, new_tso; /* args: R1 = MVar, R2 = value */ mvar = R1; hunk ./rts/PrimOps.cmm 1366 info = GET_INFO(mvar); #endif - if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); - } + // Not necessary until we have multiple local generations: + // if (info == stg_MVAR_CLEAN_info) { + // foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; + // } if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { hunk ./rts/PrimOps.cmm 1373 - // see Note [mvar-heap-check] above - HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh); + bd = Bdescr(mvar); + if (TO_W_(bdescr_gen_ix(bd)) >= TO_W_(CInt[global_gen_ix])) + { + ("ptr" q) = foreign "C" allocateInGen(MyCapability(), + CInt[global_gen_ix], + BYTES_TO_WDS(SIZEOF_StgMVarTSOQueue)); hunk ./rts/PrimOps.cmm 1380 - q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); + SAVE_THREAD_STATE(); + ("ptr" new_tso) = foreign "C" globalise_(MyCapability(), + CurrentTSO); + CurrentTSO = new_tso; + LOAD_THREAD_STATE(); + } + else + { + // see Note [mvar-heap-check] above + HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh); + + q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); + } SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = END_TSO_QUEUE; hunk ./rts/PrimOps.cmm 1402 StgMVar_head(mvar) = q; } else { StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q; - foreign "C" recordClosureMutated(MyCapability() "ptr", - StgMVar_tail(mvar)) []; + // No need to do this, at least until we have multiple + // local generations: + // foreign "C" recordClosureMutated(MyCapability() "ptr", + // StgMVar_tail(mvar)) []; } StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; hunk ./rts/PrimOps.cmm 1422 if (q == stg_END_TSO_QUEUE_closure) { /* No further takes, the MVar is now full. */ StgMVar_value(mvar) = val; + foreign "C" globalise_wrt(MyCapability(), mvar, + mvar+SIZEOF_StgHeader+OFFSET_StgMVar_value); + unlockClosure(mvar, stg_MVAR_DIRTY_info); jump %ENTRY_CODE(Sp(0)); } hunk ./rts/PrimOps.cmm 1482 info = GET_INFO(mvar); #endif - if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); - } + // Not necessary until we have multiple local generations: + // if (info == stg_MVAR_CLEAN_info) { + // foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; + // } if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) hunk ./rts/PrimOps.cmm 1499 if (q == stg_END_TSO_QUEUE_closure) { /* No further takes, the MVar is now full. */ StgMVar_value(mvar) = val; + foreign "C" globalise_wrt(MyCapability(), mvar, + mvar+SIZEOF_StgHeader+OFFSET_StgMVar_value); unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_N(1); } hunk ./rts/PrimOps.cmm 1643 // This function is *only* used to wrap zero-arity BCOs in an // updatable wrapper (see ByteCodeLink.lhs). An AP thunk is always // saturated and always points directly to a FUN or BCO. - ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) && - StgBCO_arity(R1) == HALF_W_(0)); + ASSERT(TO_W_(%INFO_TYPE(%GET_STD_INFO(R1))) == BCO && + TO_W_(StgBCO_arity(R1)) == 0); HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh); TICK_ALLOC_UP_THK(0, 0); hunk ./rts/PrimOps.cmm 1998 stg_noDuplicatezh { + W_ new_tso; STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh ); // leave noDuplicate frame in case the current // computation is suspended and restarted (see above). hunk ./rts/PrimOps.cmm 2007 SAVE_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); - foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") []; + ("ptr" new_tso) = foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO) []; + CurrentTSO = new_tso; if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) { jump stg_threadFinished; hunk ./rts/Printer.c 231 debugBelch(")\n"); break; + case IND_LOCAL: + debugBelch("IND_LOCAL("); + printPtr((StgPtr)((StgInd*)obj)->indirectee); + debugBelch(")\n"); + break; + case IND_PERM: debugBelch("IND("); printPtr((StgPtr)((StgInd*)obj)->indirectee); hunk ./rts/Printer.c 950 int i = 0; searched = 0; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - bd = generations[g].blocks; + for (g = 0; g < total_generations; g++) { + bd = all_generations[g].blocks; i = findPtrBlocks(p,bd,arr,arr_size,i); hunk ./rts/Printer.c 953 - bd = generations[g].large_objects; + bd = all_generations[g].large_objects; i = findPtrBlocks(p,bd,arr,arr_size,i); if (i >= arr_size) return; } hunk ./rts/Printer.c 990 while (type == IND || type == IND_STATIC || + type == IND_LOCAL || type == IND_PERM) { obj = ((StgInd *)obj)->indirectee; hunk ./rts/Printer.c 1104 [AP_STACK] = "AP_STACK", [IND] = "IND", [IND_PERM] = "IND_PERM", + [IND_LOCAL] = "IND_LOCAL", [IND_STATIC] = "IND_STATIC", [RET_BCO] = "RET_BCO", [RET_SMALL] = "RET_SMALL", hunk ./rts/ProfHeap.c 879 case CONSTR: case FUN: case IND_PERM: + case IND_LOCAL: case BLACKHOLE: case BLOCKING_QUEUE: case FUN_1_0: hunk ./rts/ProfHeap.c 1063 #endif // Traverse the heap, collecting the census info - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - heapCensusChain( census, generations[g].blocks ); + for (g = 0; g < total_generations; g++) { + heapCensusChain( census, all_generations[g].blocks ); // Are we interested in large objects? might be // confusing to include the stack in a heap profile. hunk ./rts/ProfHeap.c 1067 - heapCensusChain( census, generations[g].large_objects ); + heapCensusChain( census, all_generations[g].large_objects ); } // dump out the census info hunk ./rts/RaiseAsync.c 160 { MessageThrowTo *msg; - msg = (MessageThrowTo *) allocate(cap, sizeofW(MessageThrowTo)); + // globalise everything up front. The alternative, to globalise + // later (in throwToSendMsg() and blockedThrowTo()) doesn't work + // because we would be globalising a WHITEHOLE closure. + globalise(cap, (StgClosure**)&source); + globalise(cap, (StgClosure**)&target); + globalise(cap, (StgClosure**)&exception); + + msg = (MessageThrowTo *) allocateInGen(cap, global_gen_ix, + sizeofW(MessageThrowTo)); // message starts locked; the caller has to unlock it when it is // ready. SET_HDR(msg, &stg_WHITEHOLE_info, CCS_SYSTEM); hunk ./rts/RaiseAsync.c 179 switch (throwToMsg(cap, msg)) { case THROWTO_SUCCESS: +#ifdef DEBUG + // so that sanity checking can understand this closure. + SET_INFO(msg, &stg_MSG_THROWTO_info); +#endif return NULL; case THROWTO_BLOCKED: default: hunk ./rts/RaiseAsync.c 374 blockedThrowTo(cap,target,msg); return THROWTO_BLOCKED; } else { - // Revoke the message by replacing it with IND. We're not - // locking anything here, so we might still get a TRY_WAKEUP - // message from the owner of the blackhole some time in the - // future, but that doesn't matter. + // Revoke the message by replacing it with + // STUB_MSG_BLACKHOLE. We're not locking anything here, so we + // might still get a TRY_WAKEUP message from the owner of the + // blackhole some time in the future, but that doesn't matter. ASSERT(target->block_info.bh->header.info == &stg_MSG_BLACKHOLE_info); hunk ./rts/RaiseAsync.c 379 - OVERWRITE_INFO(target->block_info.bh, &stg_IND_info); + OVERWRITE_INFO(target->block_info.bh, &stg_STUB_MSG_BLACKHOLE_info); raiseAsync(cap, target, msg->exception, rtsFalse, NULL); return THROWTO_SUCCESS; } hunk ./rts/RaiseAsync.c 616 else { q->header.info = &stg_IND_info; } +#ifdef DEBUG + ZERO_SLOP((StgPtr)q + sizeofW(StgInd), + sizeofW(StgMVarTSOQueue) - sizeofW(StgInd)); +#endif // revoke the MVar operation tso->_link = END_TSO_QUEUE; hunk ./rts/RetainerProfile.c 469 *first_child = ((StgSelector *)c)->selectee; return; case IND_PERM: + case IND_LOCAL: case BLACKHOLE: *first_child = ((StgInd *)c)->indirectee; return; hunk ./rts/RetainerProfile.c 1062 // as isAlive doesn't look through IND_STATIC as it ignores static // closures. See trac #3956 for a program that hit this error. case IND_STATIC: + case IND_LOCAL: case BLACKHOLE: // static objects case CONSTR_STATIC: hunk ./rts/RetainerProfile.c 1778 // // The following code assumes that WEAK objects are considered to be roots // for retainer profilng. - for (weak = weak_ptr_list; weak != NULL; weak = weak->link) - // retainRoot((StgClosure *)weak); - retainRoot(NULL, (StgClosure **)&weak); + for (g = 0; g < total_generations; g++) { + for (weak = &all_generations[g].weak_ptrs; weak != NULL; + weak = weak->link) { + // retainRoot((StgClosure *)weak); + retainRoot(NULL, (StgClosure **)&weak); + } + } // Consider roots from the stable ptr table. markStablePtrTable(retainRoot, NULL); hunk ./rts/RetainerProfile.c 1792 // The following code resets the rs field of each unvisited mutable // object (computing sumOfNewCostExtra and updating costArray[] when // debugging retainer profiler). - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (g = 0; g < total_generations; g++) { // NOT TRUE: even G0 has a block on its mutable list // ASSERT(g != 0 || (generations[g].mut_list == NULL)); hunk ./rts/RetainerProfile.c 1799 // Traversing through mut_list is necessary // because we can find MUT_VAR objects which have not been // visited during retainer profiling. - for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { +#ifdef THREADED_RTS + if (RtsFlags.ParFlags.nNodes != 0) + barf("RetainerProfile.c: this code needs fixing for -N > 0"); +#endif + for (bd = capabilities[0].mut_lists[g]; bd != NULL; bd = bd->link) { for (ml = bd->start; ml < bd->free; ml++) { maybeInitRetainerSet((StgClosure *)*ml); hunk ./rts/RtsFlags.c 118 RtsFlags.DebugFlags.squeeze = rtsFalse; RtsFlags.DebugFlags.hpc = rtsFalse; RtsFlags.DebugFlags.sparks = rtsFalse; + RtsFlags.DebugFlags.mallocleaks = rtsFalse; #endif #if defined(PROFILING) hunk ./rts/RtsFlags.c 309 " -Dz DEBUG: stack squezing", " -Dc DEBUG: program coverage", " -Dr DEBUG: sparks", +" -Dk DEBUG: malloc leaks", "", " NOTE: DEBUG events are sent to stderr by default; add -l to create a", " binary event log file instead.", hunk ./rts/RtsFlags.c 686 case 'r': RtsFlags.DebugFlags.sparks = rtsTrue; break; + case 'k': + RtsFlags.DebugFlags.mallocleaks = rtsTrue; + break; default: bad_option( rts_argv[arg] ); } hunk ./rts/RtsStartup.c 136 setProgArgv(*argc,*argv); } - /* Initialise the stats department, phase 1 */ - initStats1(); - #ifdef USE_PAPI papi_init(); #endif hunk ./rts/RtsStartup.c 156 /* initialize the storage manager */ initStorage(); + /* Initialise the stats department, phase 1 */ + initStats1(); + /* initialise the stable pointer table */ initStablePtrTable(); hunk ./rts/RtsStartup.c 359 exitScheduler(wait_foreign); /* run C finalizers for all active weak pointers */ - runAllCFinalizers(weak_ptr_list); + runAllCFinalizers(); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { hunk ./rts/STM.c 94 #include "STM.h" #include "Trace.h" #include "Threads.h" +#include "sm/Globalise.h" #include hunk ./rts/STM.c 422 return result; } -static StgTVarWatchQueue *new_stg_tvar_watch_queue(Capability *cap, - StgClosure *closure) { - StgTVarWatchQueue *result; - result = (StgTVarWatchQueue *)allocate(cap, sizeofW(StgTVarWatchQueue)); - SET_HDR (result, &stg_TVAR_WATCH_QUEUE_info, CCS_SYSTEM); - result -> closure = closure; - return result; -} - static StgTRecChunk *new_stg_trec_chunk(Capability *cap) { StgTRecChunk *result; result = (StgTRecChunk *)allocate(cap, sizeofW(StgTRecChunk)); hunk ./rts/STM.c 428 SET_HDR (result, &stg_TREC_CHUNK_info, CCS_SYSTEM); result -> prev_chunk = END_STM_CHUNK_LIST; result -> next_entry_idx = 0; + result -> cap_no = cap -> no; return result; } hunk ./rts/STM.c 432 -static StgTRecHeader *new_stg_trec_header(Capability *cap, - StgTRecHeader *enclosing_trec) { +static StgTRecHeader *new_stg_trec_header(Capability *cap) { StgTRecHeader *result; hunk ./rts/STM.c 434 - result = (StgTRecHeader *) allocate(cap, sizeofW(StgTRecHeader)); - SET_HDR (result, &stg_TREC_HEADER_info, CCS_SYSTEM); hunk ./rts/STM.c 435 - result -> enclosing_trec = enclosing_trec; + result = (StgTRecHeader *) allocateInGen(cap, global_gen_ix, + sizeofW(StgTRecHeader)); + + SET_HDR (result, &stg_TREC_HEADER_info, CCS_SYSTEM); result -> current_chunk = new_stg_trec_chunk(cap); hunk ./rts/STM.c 440 - result -> invariants_to_check = END_INVARIANT_CHECK_QUEUE; hunk ./rts/STM.c 441 - if (enclosing_trec == NO_TREC) { - result -> state = TREC_ACTIVE; - } else { - ASSERT(enclosing_trec -> state == TREC_ACTIVE || - enclosing_trec -> state == TREC_CONDEMNED); - result -> state = enclosing_trec -> state; - } + recordMutableCap (cap, (StgClosure*)result, global_gen_no); return result; } hunk ./rts/STM.c 466 } static StgTVarWatchQueue *alloc_stg_tvar_watch_queue(Capability *cap, - StgClosure *closure) { + StgClosure *closure, + rtsBool is_local) { StgTVarWatchQueue *result = NULL; hunk ./rts/STM.c 469 - if (cap -> free_tvar_watch_queues == END_STM_WATCH_QUEUE) { - result = new_stg_tvar_watch_queue(cap, closure); + if (is_local) { + result = (StgTVarWatchQueue *)allocate(cap, sizeofW(StgTVarWatchQueue)); } else { hunk ./rts/STM.c 472 - result = cap -> free_tvar_watch_queues; - result -> closure = closure; - cap -> free_tvar_watch_queues = result -> next_queue_entry; + result = (StgTVarWatchQueue *)allocateInGen(cap, global_gen_ix, + sizeofW(StgTVarWatchQueue)); } hunk ./rts/STM.c 475 + SET_HDR (result, &stg_TVAR_WATCH_QUEUE_info, CCS_SYSTEM); + result -> closure = closure; return result; } hunk ./rts/STM.c 497 cap -> free_trec_chunks = result -> prev_chunk; result -> prev_chunk = END_STM_CHUNK_LIST; result -> next_entry_idx = 0; + ASSERT(result -> cap_no == cap -> no); } return result; } hunk ./rts/STM.c 514 StgTRecHeader *enclosing_trec) { StgTRecHeader *result = NULL; if (cap -> free_trec_headers == NO_TREC) { - result = new_stg_trec_header(cap, enclosing_trec); + result = new_stg_trec_header(cap); } else { result = cap -> free_trec_headers; cap -> free_trec_headers = result -> enclosing_trec; hunk ./rts/STM.c 518 - result -> enclosing_trec = enclosing_trec; result -> current_chunk -> next_entry_idx = 0; hunk ./rts/STM.c 519 - result -> invariants_to_check = END_INVARIANT_CHECK_QUEUE; - if (enclosing_trec == NO_TREC) { + } + + result -> enclosing_trec = enclosing_trec; + result -> invariants_to_check = END_INVARIANT_CHECK_QUEUE; + result -> cap_no = cap -> no; + + if (enclosing_trec == NO_TREC) { result -> state = TREC_ACTIVE; hunk ./rts/STM.c 527 - } else { + } else { ASSERT(enclosing_trec -> state == TREC_ACTIVE || enclosing_trec -> state == TREC_CONDEMNED); result -> state = enclosing_trec -> state; hunk ./rts/STM.c 531 - } } return result; } hunk ./rts/STM.c 572 ACQ_ASSERT(s -> current_value == (StgClosure *)trec); NACQ_ASSERT(s -> current_value == e -> expected_value); fq = s -> first_watch_queue_entry; - q = alloc_stg_tvar_watch_queue(cap, (StgClosure*) tso); + q = alloc_stg_tvar_watch_queue(cap, (StgClosure*) tso, + isLocal((StgClosure*)s)); q -> next_queue_entry = fq; q -> prev_queue_entry = END_STM_WATCH_QUEUE; if (fq != END_STM_WATCH_QUEUE) { hunk ./rts/STM.c 875 /************************************************************************/ -void stmPreGCHook() { - nat i; - +void stmPreGCHook (Capability *cap) { lock_stm(NO_TREC); TRACE("stmPreGCHook"); hunk ./rts/STM.c 878 - for (i = 0; i < n_capabilities; i ++) { - Capability *cap = &capabilities[i]; - cap -> free_tvar_watch_queues = END_STM_WATCH_QUEUE; - cap -> free_trec_chunks = END_STM_CHUNK_LIST; - cap -> free_trec_headers = NO_TREC; - } + cap->free_tvar_watch_queues = END_STM_WATCH_QUEUE; + cap->free_trec_chunks = END_STM_CHUNK_LIST; + cap->free_trec_headers = NO_TREC; unlock_stm(NO_TREC); } hunk ./rts/STM.c 1124 FOR_EACH_ENTRY(my_execution, e, { StgTVar *s = e -> tvar; - StgTVarWatchQueue *q = alloc_stg_tvar_watch_queue(cap, (StgClosure*)inv); + StgTVarWatchQueue *q = alloc_stg_tvar_watch_queue(cap, (StgClosure*)inv, + isLocal((StgClosure*)s)); StgTVarWatchQueue *fq = s -> first_watch_queue_entry; // We leave "last_execution" holding the values that will be hunk ./rts/STM.c 1394 IF_STM_FG_LOCKS({ s -> num_updates ++; }); + // we're about to write e->new_value into s, better ensure + // the global heap invariant is maintained: + globalise_wrt(cap, (StgClosure*)s, &e->new_value); unlock_tvar(trec, s, e -> new_value, TRUE); } ACQ_ASSERT(!tvar_is_locked(s, trec)); hunk ./rts/STM.c 1484 // (Otherwise the transaction was not valid and the thread will have to // retry it). + // Better globalise the TSO. We don't know for sure that this is + // necessary, but checking would be tedious: if any TVar is global + // then the TSO needs to be. + tso = globalise_(cap, tso); + // Put ourselves to sleep. We retain locks on all the TVars involved // until we are sound asleep : (a) on the wait queues, (b) BlockedOnSTM // in the TSO, (c) TREC_WAITING in the Trec. hunk ./rts/STM.h 51 -------------- */ -void stmPreGCHook(void); +void stmPreGCHook(Capability *cap); /*---------------------------------------------------------------------- hunk ./rts/Schedule.c 480 // happened. So find the new location: t = cap->r.rCurrentTSO; + ASSERT(t->what_next != ThreadRelocated); + // And save the current errno in this thread. // XXX: possibly bogus for SMP because this thread might already // be running again, see code below. hunk ./rts/Schedule.c 558 ASSERT(cap->run_queue_hd == tso); cap->run_queue_hd = tso->_link; } else { + tso->block_info.prev = deRefTSO(tso->block_info.prev); setTSOLink(cap, tso->block_info.prev, tso->_link); } if (tso->_link == END_TSO_QUEUE) { hunk ./rts/Schedule.c 562 - ASSERT(cap->run_queue_tl == tso); + ASSERT(deRefTSO(cap->run_queue_tl) == tso); cap->run_queue_tl = tso->block_info.prev; } else { setTSOPrev(cap, tso->_link, tso->block_info.prev); hunk ./rts/Schedule.c 723 "excess threads on run queue":"sparks to share (>=2)", n_free_caps); +#if THREAD_MIGRATION_DISABLED_NEED_TO_GLOBALISE_THREADS i = 0; pushed_to_all = rtsFalse; hunk ./rts/Schedule.c 761 IF_DEBUG(sanity, checkRunQueue(cap)); } +#endif #ifdef SPARK_PUSHING /* JB I left this code in place, it would work but is not necessary */ hunk ./rts/Schedule.c 1040 cap->r.rNursery->n_blocks == 1) { // paranoia to prevent infinite loop // if the nursery has only one block. - ACQUIRE_SM_LOCK - bd = allocGroup( blocks ); - RELEASE_SM_LOCK + bd = allocGroup_lock( blocks ); cap->r.rNursery->n_blocks += blocks; // link the new group into the list hunk ./rts/Schedule.c 1239 #endif } - ASSERT(task->incall->tso == t); + ASSERT(deRefTSO(task->incall->tso) == t); if (t->what_next == ThreadComplete) { if (task->incall->ret) { hunk ./rts/Schedule.c 1307 scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major) { rtsBool heap_census; + nat gc_type, N; #ifdef THREADED_RTS /* extern static volatile StgWord waiting_for_gc; lives inside capability.c */ hunk ./rts/Schedule.c 1311 - rtsBool gc_type, prev_pending_gc; + rtsBool prev_pending_gc; nat i; #endif hunk ./rts/Schedule.c 1322 return cap; } + // The final shutdown GC is always single-threaded, because it's + // possible that some of the Capabilities have no worker threads. + + heap_census = scheduleNeedHeapProfile(rtsTrue); + + if (heap_census || sched_state >= SCHED_INTERRUPTING) { + gc_type = GC_SEQ; #ifdef THREADED_RTS hunk ./rts/Schedule.c 1330 - if (sched_state < SCHED_INTERRUPTING - && RtsFlags.ParFlags.parGcEnabled - && N >= RtsFlags.ParFlags.parGcGen - && ! oldest_gen->mark) - { - gc_type = PENDING_GC_PAR; + } else if (next_gc_gen == 0 && !force_major) { + gc_type = GC_LOCAL; + } else if (RtsFlags.ParFlags.parGcEnabled + && n_capabilities > 1 + && next_gc_gen >= RtsFlags.ParFlags.parGcGen + && ! oldest_gen->mark) { + gc_type = GC_PAR; +#endif + } else { + gc_type = GC_SEQ; + } + + if (heap_census || force_major) { + N = RtsFlags.GcFlags.generations - 1; } else { hunk ./rts/Schedule.c 1345 - gc_type = PENDING_GC_SEQ; + N = next_gc_gen; } // In order to GC, there must be no threads running Haskell code. hunk ./rts/Schedule.c 1363 threads if waiting_for_gc is set. Tested inside yieldCapability() and releaseCapability() in Capability.c */ - prev_pending_gc = cas(&waiting_for_gc, 0, gc_type); - if (prev_pending_gc) { - do { - debugTrace(DEBUG_sched, "someone else is trying to GC (%d)...", - prev_pending_gc); - ASSERT(cap); - yieldCapability(&cap,task); - } while (waiting_for_gc); - return cap; // NOTE: task->cap might have changed here +#ifdef THREADED_RTS + if (gc_type != GC_LOCAL) { + prev_pending_gc = cas(&waiting_for_gc, 0, gc_type); + if (prev_pending_gc) { + do { + debugTrace(DEBUG_sched, "someone else is trying to GC (%d)...", + prev_pending_gc); + ASSERT(cap); + yieldCapability(&cap,task); + } while (waiting_for_gc); + return cap; // NOTE: task->cap might have changed here + } } hunk ./rts/Schedule.c 1376 +#endif hunk ./rts/Schedule.c 1378 - setContextSwitches(); - - // The final shutdown GC is always single-threaded, because it's - // possible that some of the Capabilities have no worker threads. - - if (gc_type == PENDING_GC_SEQ) - { - traceEventRequestSeqGc(cap); - } - else - { + switch (gc_type) { + case GC_PAR: + setContextSwitches(); traceEventRequestParGc(cap); hunk ./rts/Schedule.c 1382 - debugTrace(DEBUG_sched, "ready_to_gc, grabbing GC threads"); + break; + case GC_SEQ: + setContextSwitches(); + traceEventRequestSeqGc(cap); + break; } hunk ./rts/Schedule.c 1389 - if (gc_type == PENDING_GC_SEQ) +#ifdef THREADED_RTS + if (gc_type == GC_SEQ) { // single-threaded GC: grab all the capabilities for (i=0; i < n_capabilities; i++) { hunk ./rts/Schedule.c 1394 - debugTrace(DEBUG_sched, "ready_to_gc, grabbing all the capabilies (%d/%d)", i, n_capabilities); + debugTrace(DEBUG_sched, "scheduleDoGC: grabbing all the capabilies (%d/%d)", i, n_capabilities); if (cap != &capabilities[i]) { Capability *pcap = &capabilities[i]; // we better hope this task doesn't get migrated to hunk ./rts/Schedule.c 1410 } } } - else + else if (gc_type == GC_PAR) { hunk ./rts/Schedule.c 1412 + debugTrace(DEBUG_sched, "scheduleDoGC: grabbing GC threads"); // multi-threaded GC: make sure all the Capabilities donate one // GC thread each. waitForGcThreads(cap); hunk ./rts/Schedule.c 1417 } - #endif IF_DEBUG(scheduler, printAllThreads()); hunk ./rts/Schedule.c 1432 sched_state = SCHED_SHUTTING_DOWN; } - heap_census = scheduleNeedHeapProfile(rtsTrue); - - traceEventGcStart(cap); #if defined(THREADED_RTS) // reset waiting_for_gc *before* GC, so that when the GC threads // emerge they don't immediately re-enter the GC. hunk ./rts/Schedule.c 1436 waiting_for_gc = 0; - GarbageCollect(force_major || heap_census, gc_type, cap); -#else - GarbageCollect(force_major || heap_census, 0, cap); #endif hunk ./rts/Schedule.c 1437 + + traceEventGcStart(cap); + GarbageCollect(N, gc_type, cap); traceEventGcEnd(cap); if (recent_activity == ACTIVITY_INACTIVE && force_major) hunk ./rts/Schedule.c 1460 } #if defined(THREADED_RTS) - if (gc_type == PENDING_GC_PAR) + if (gc_type == GC_PAR) { releaseGCThreads(cap); } hunk ./rts/Schedule.c 1498 #endif #if defined(THREADED_RTS) - if (gc_type == PENDING_GC_SEQ) { + if (gc_type == GC_SEQ) { // release our stash of capabilities. for (i = 0; i < n_capabilities; i++) { if (cap != &capabilities[i]) { hunk ./rts/Schedule.c 1558 pid = fork(); + if (pid) { // parent startTimer(); // #4074 hunk ./rts/Schedule.c 1585 // all Tasks, because they correspond to OS threads that are // now gone. - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) { - if (t->what_next == ThreadRelocated) { - next = t->_link; - } else { + for (g = 0; g < total_generations; g++) { + for (t = all_generations[g].threads; t != END_TSO_QUEUE; t = next) { + next = t->global_link; hunk ./rts/Schedule.c 1589 + if (t->what_next == ThreadRelocated) continue; + // don't allow threads to catch the ThreadKilled // exception, but we do want to raiseAsync() because these // threads may be evaluating thunks that we need later. hunk ./rts/Schedule.c 1602 // won't get a chance to exit in the usual way (see // also scheduleHandleThreadFinished). t->bound = NULL; - } } } hunk ./rts/Schedule.c 1618 // Empty the threads lists. Otherwise, the garbage // collector may attempt to resurrect some of these threads. - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - generations[g].threads = END_TSO_QUEUE; + for (g = 0; g < total_generations; g++) { + all_generations[g].threads = END_TSO_QUEUE; } discardTasksExcept(cap->running_task); hunk ./rts/Schedule.c 1651 #else /* !FORKPROCESS_PRIMOP_SUPPORTED */ barf("forkProcess#: primop not supported on this platform, sorry!\n"); #endif + } /* --------------------------------------------------------------------------- hunk ./rts/Schedule.c 1667 nat g; debugTrace(DEBUG_sched,"deleting all threads"); - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) { - if (t->what_next == ThreadRelocated) { - next = t->_link; - } else { - next = t->global_link; + for (g = 0; g < total_generations; g++) { + for (t = all_generations[g].threads; t != END_TSO_QUEUE; t = next) { + next = t->global_link; + if (t->what_next != ThreadRelocated) { deleteThread(cap,t); } } hunk ./rts/Schedule.c 1830 // Remove the thread from the suspended list recoverSuspendedTask(cap,task); - tso = incall->suspended_tso; + tso = deRefTSO(incall->suspended_tso); incall->suspended_tso = NULL; incall->suspended_cap = NULL; hunk ./rts/Schedule.c 1833 + tso->_link = END_TSO_QUEUE; // no write barrier reqd traceEventRunThread(cap, tso); hunk ./rts/Schedule.c 2079 #endif } +void markScheduler (evac_fn evac USED_IF_NOT_THREADS, + void *user USED_IF_NOT_THREADS) +{ +#if !defined(THREADED_RTS) + evac(user, (StgClosure **)(void *)&blocked_queue_hd); + evac(user, (StgClosure **)(void *)&blocked_queue_tl); + evac(user, (StgClosure **)(void *)&sleeping_queue); +#endif +} + /* ----------------------------------------------------------------------------- performGC hunk ./rts/Schedule.c 2227 dest->sp = new_sp; dest->stack_size = new_stack_size; + // put the new TSO on the threads list for G0 + dest->global_link = cap->r.rG0->threads; + cap->r.rG0->threads = dest; + /* Mark the old TSO as relocated. We have to check for relocated * TSOs in the garbage collector and any primops that deal with TSOs. * hunk ./rts/Schedule.c 2294 new_tso->flags &= ~TSO_LINK_DIRTY; dirty_TSO(cap, new_tso); + // put the new TSO on the threads list for its generation + ACQUIRE_SM_LOCK + new_tso->global_link = bd->gen->threads; + bd->gen->threads = new_tso; + RELEASE_SM_LOCK + debugTrace(DEBUG_sched, "thread %ld: reducing TSO size from %lu words to %lu", (long)tso->id, tso_size_w, tso_sizeW(new_tso)); hunk ./rts/Schedule.h 26 void initScheduler (void); void exitScheduler (rtsBool wait_foreign); void freeScheduler (void); +void markScheduler (evac_fn evac, void *user); // Place a new thread on the run queue of the current Capability void scheduleThread (Capability *cap, StgTSO *tso); hunk ./rts/Schedule.h 124 cap->run_queue_hd = tso; tso->block_info.prev = END_TSO_QUEUE; } else { + cap->run_queue_tl = deRefTSO(cap->run_queue_tl); setTSOLink(cap, cap->run_queue_tl, tso); setTSOPrev(cap, tso, cap->run_queue_tl); } hunk ./rts/Schedule.h 144 setTSOLink(cap, tso, cap->run_queue_hd); tso->block_info.prev = END_TSO_QUEUE; if (cap->run_queue_hd != END_TSO_QUEUE) { + cap->run_queue_hd = deRefTSO(cap->run_queue_hd); setTSOPrev(cap, cap->run_queue_hd, tso); } cap->run_queue_hd = tso; hunk ./rts/Schedule.h 159 popRunQueue (Capability *cap) { StgTSO *t = cap->run_queue_hd; + + while (t->what_next == ThreadRelocated) { + t = t->_link; + } + ASSERT(t != END_TSO_QUEUE); cap->run_queue_hd = t->_link; if (t->_link != END_TSO_QUEUE) { hunk ./rts/Sparks.c 13 #include "Rts.h" #include "Schedule.h" +#include "sm/Globalise.h" #include "RtsUtils.h" #include "Trace.h" #include "Prelude.h" hunk ./rts/Sparks.c 74 p = UNTAG_CLOSURE(p); if (closure_SHOULD_SPARK(p)) { - pushWSDeque(pool,p); + pushWSDeque(pool,publish(cap,p)); } cap->sparks_created++; hunk ./rts/Stable.c 17 #include "RtsUtils.h" #include "Trace.h" #include "Stable.h" +#include "sm/Globalise.h" /* Comment from ADR's implementation in old RTS: hunk ./rts/Stable.c 192 while (get_itbl(q)->type == IND || get_itbl(q)->type == IND_STATIC || + get_itbl(q)->type == IND_LOCAL || get_itbl(q)->type == IND_PERM) { q = ((StgInd *)q)->indirectee; tag = GET_CLOSURE_TAG(q); hunk ./rts/Stable.c 207 { StgWord sn; void* sn_tmp; + Task *task; if (stable_ptr_free == NULL) { enlargeStablePtrTable(); hunk ./rts/Stable.c 218 */ p = (StgPtr)removeIndirections((StgClosure*)p); + // Globalise the pointer, since the stable pointer table is a global + // set of roots. + task = myTask(); // sometimes called for static closures without a Task + if (task != NULL) { + globalise(task->cap, &p); + } + // register the untagged pointer. This just makes things simpler. p = (StgPtr)UNTAG_CLOSURE((StgClosure*)p); hunk ./rts/Stats.c 19 #include "GetTime.h" #include "sm/Storage.h" #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin +#include "sm/GCThread.h" +#include "sm/BlockAlloc.h" #if USE_PAPI #include "Papi.h" hunk ./rts/Stats.c 50 static StgWord64 GC_par_max_copied = 0; static StgWord64 GC_par_avg_copied = 0; -static Ticks GC_start_time = 0, GC_tot_time = 0; /* User GC Time */ -static Ticks GCe_start_time = 0, GCe_tot_time = 0; /* Elapsed GC time */ +// GC start times are in the gc_thread struct +static Ticks GC_tot_time = 0; /* User GC Time */ +static Ticks GCe_tot_time = 0; /* Elapsed GC time */ #ifdef PROFILING static Ticks RP_start_time = 0, RP_tot_time = 0; /* retainer prof user time */ hunk ./rts/Stats.c 73 static lnat ResidencySamples = 0; // for stats only static lnat MaxSlop = 0; -static lnat GC_start_faults = 0, GC_end_faults = 0; +static lnat GC_end_faults = 0; static Ticks *GC_coll_times = NULL; static Ticks *GC_coll_etimes = NULL; hunk ./rts/Stats.c 91 return getProcessElapsedTime() - ElapsedTimeStart; } -/* mut_user_time_during_GC() and mut_user_time() - * - * The former function can be used to get the current mutator time - * *during* a GC, i.e. between stat_startGC and stat_endGC. This is - * used in the heap profiler for accurately time stamping the heap - * sample. - * - * ATTENTION: mut_user_time_during_GC() relies on GC_start_time being - * defined in stat_startGC() - to minimise system calls, - * GC_start_time is, however, only defined when really needed (check - * stat_startGC() for details) - */ -double -mut_user_time_during_GC( void ) -{ - return TICK_TO_DBL(GC_start_time - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time)); -} - double mut_user_time( void ) { hunk ./rts/Stats.c 101 #ifdef PROFILING /* - mut_user_time_during_RP() is similar to mut_user_time_during_GC(); - it returns the MUT time during retainer profiling. + mut_user_time_during_RP() returns the MUT time during retainer profiling. The same is for mut_user_time_during_HC(); */ double hunk ./rts/Stats.c 138 GC_tot_copied = 0; GC_par_max_copied = 0; GC_par_avg_copied = 0; - GC_start_time = 0; GC_tot_time = 0; hunk ./rts/Stats.c 139 - GCe_start_time = 0; GCe_tot_time = 0; #ifdef PROFILING hunk ./rts/Stats.c 158 ResidencySamples = 0; MaxSlop = 0; - GC_start_faults = 0; GC_end_faults = 0; } hunk ./rts/Stats.c 173 } GC_coll_times = (Ticks *)stgMallocBytes( - sizeof(Ticks)*RtsFlags.GcFlags.generations, + sizeof(Ticks)*total_generations, "initStats"); GC_coll_etimes = (Ticks *)stgMallocBytes( hunk ./rts/Stats.c 177 - sizeof(Ticks)*RtsFlags.GcFlags.generations, + sizeof(Ticks)*total_generations, "initStats"); hunk ./rts/Stats.c 179 - for (i = 0; i < RtsFlags.GcFlags.generations; i++) { + for (i = 0; i < total_generations; i++) { GC_coll_times[i] = 0; GC_coll_etimes[i] = 0; } hunk ./rts/Stats.c 281 static nat rub_bell = 0; /* initialise global variables needed during GC - * - * * GC_start_time is read in mut_user_time_during_GC(), which in turn is - * needed if either PROFILING or DEBUGing is enabled */ void hunk ./rts/Stats.c 283 -stat_startGC(void) +stat_startGC(gc_thread *gct) { nat bell = RtsFlags.GcFlags.ringBell; hunk ./rts/Stats.c 300 || RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time { - getProcessTimes(&GC_start_time, &GCe_start_time); - if (RtsFlags.GcFlags.giveStats) { - GC_start_faults = getPageFaults(); + gct->gc_start_time = getProcessCPUTime(); + gct->gce_start_time = getProcessElapsedTime(); + if (RtsFlags.GcFlags.giveStats) { + gct->gc_start_faults = getPageFaults(); } } hunk ./rts/Stats.c 322 -------------------------------------------------------------------------- */ void -stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen, +stat_endGC (gc_thread *gct, + lnat alloc, lnat live, lnat copied, nat gen, lnat max_copied, lnat avg_copied, lnat slop) { if (RtsFlags.GcFlags.giveStats != NO_GC_STATS || hunk ./rts/Stats.c 333 Ticks time, etime, gc_time, gc_etime; getProcessTimes(&time, &etime); - gc_time = time - GC_start_time; - gc_etime = etime - GCe_start_time; + gc_time = time - gct->gc_start_time; + gc_etime = etime - gct->gce_start_time; if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) { nat faults = getPageFaults(); hunk ./rts/Stats.c 342 statsPrintf("%9ld %9ld %9ld", alloc*sizeof(W_), copied*sizeof(W_), live*sizeof(W_)); - statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2ld)\n", + statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld ", TICK_TO_DBL(gc_time), TICK_TO_DBL(gc_etime), TICK_TO_DBL(time), hunk ./rts/Stats.c 347 TICK_TO_DBL(etime - ElapsedTimeStart), - faults - GC_start_faults, - GC_start_faults - GC_end_faults, - gen); + faults - gct->gc_start_faults, + gct->gc_start_faults - GC_end_faults); hunk ./rts/Stats.c 350 + switch (gct->gc_type) { + case GC_LOCAL: + statsPrintf("(G0.%d, loc)", gct->index); + break; + case GC_PAR: + statsPrintf("(G%d, par)", gct->collect_gen); + break; + case GC_SEQ: + statsPrintf("(G%d)", gct->collect_gen); + break; + } + statsPrintf("\n"); GC_end_faults = faults; statsFlush(); } hunk ./rts/Stats.c 533 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \ SLOW_CALLS_##arity * 100.0/TOTAL_CALLS) -extern lnat hw_alloc_blocks; - void stat_exit(int alloc) { hunk ./rts/Stats.c 536 + generation *gen; + if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { char temp[BIG_STRING_LEN]; hunk ./rts/Stats.c 551 GC_tot_alloc += alloc; /* Count total garbage collections */ - for (g = 0; g < RtsFlags.GcFlags.generations; g++) - total_collections += generations[g].collections; + for (g = 0; g < total_generations; g++) + total_collections += all_generations[g].collections; /* avoid divide by zero if time is measured as 0.00 seconds -- SDM */ if (time == 0.0) time = 1; hunk ./rts/Stats.c 587 (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_))); /* Print garbage collections in each gen */ - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - statsPrintf(" Generation %d: %5d collections, %5d parallel, %5.2fs, %5.2fs elapsed\n", - g, generations[g].collections, - generations[g].par_collections, - TICK_TO_DBL(GC_coll_times[g]), - TICK_TO_DBL(GC_coll_etimes[g])); + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; + if (gen->is_local) { + statsPrintf(" Generation %d.%-2d: %5d collections, %5d parallel, %5.2fs, %5.2fs elapsed\n", + gen->no, gen->cap, + gen->collections, + gen->par_collections, + TICK_TO_DBL(GC_coll_times[g]), + TICK_TO_DBL(GC_coll_etimes[g])); + } else { + statsPrintf(" Generation %d : %5d collections, %5d parallel, %5.2fs, %5.2fs elapsed\n", + gen->no, + gen->collections, + gen->par_collections, + TICK_TO_DBL(GC_coll_times[g]), + TICK_TO_DBL(GC_coll_etimes[g])); + } } #if defined(THREADED_RTS) hunk ./rts/Stats.c 710 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin); statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin); - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - statsPrintf("gen[%d].sync_large_objects: %"FMT_Word64"\n", g, generations[g].sync_large_objects.spin); + for (g = 0; g < total_generations; g++) { + statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, all_generations[g].sync.spin); } } #endif hunk ./rts/Stats.c 772 void statDescribeGens(void) { - nat g, mut, lge; - lnat live, slop; - lnat tot_live, tot_slop; + nat g, n, i, lge; + nat cap_blocks, gen_blocks; + nat cap_mut, gen_mut; + lnat cap_live, gen_live; + lnat slop, tot_live, tot_slop; bdescr *bd; generation *gen; hunk ./rts/Stats.c 781 debugBelch( -"----------------------------------------------------------\n" -" Gen Max Mut-list Blocks Large Live Slop\n" -" Blocks Bytes Objects \n" -"----------------------------------------------------------\n"); +"---------------------------------------------------------------\n" +" Gen Max Large Cap Mut-list Blocks Live Slop\n" +" Blocks Objects Bytes \n" +"---------------------------------------------------------------\n"); tot_live = 0; tot_slop = 0; hunk ./rts/Stats.c 788 - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - mut = 0; - for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { - mut += (bd->free - bd->start) * sizeof(W_); - } - - gen = &generations[g]; - - debugBelch("%5d %7d %9d", g, gen->max_blocks, mut); + for (n = 0; n < total_generations; n++) { + gen = &all_generations[n]; + g = gen->no; for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) { lge++; hunk ./rts/Stats.c 795 } - live = gen->n_words + countOccupied(gen->large_objects); - slop = (gen->n_blocks + gen->n_large_blocks) * BLOCK_SIZE_W - live; - debugBelch("%8d %8d %8ld %8ld\n", gen->n_blocks, lge, - live*sizeof(W_), slop*sizeof(W_)); - tot_live += live; + gen_live = gen->n_words + countOccupied(gen->large_objects); + gen_blocks = gen->n_blocks + gen->n_large_blocks; + + slop = gen_blocks * BLOCK_SIZE_W - gen_live; + + debugBelch("%5d %7d %8d %4s %8s %8d %8ld %8ld\n", + g, gen->max_blocks, lge, "", "", gen_blocks, + gen_live*sizeof(W_), slop*sizeof(W_)); + + for (i = 0; i < n_capabilities; i++) { + cap_mut = countOccupied(capabilities[i].mut_lists[g]); + + cap_live = countOccupied(gc_threads[i]->gens[n].todo_bd); + cap_live += countOccupied(gc_threads[i]->gens[n].part_list); + cap_live += countOccupied(gc_threads[i]->gens[n].scavd_list); + + cap_blocks = countBlocks(gc_threads[i]->gens[n].todo_bd); + cap_blocks += gc_threads[i]->gens[n].n_part_blocks; + cap_blocks += gc_threads[i]->gens[n].n_scavd_blocks; + + slop = cap_blocks * BLOCK_SIZE_W - cap_live; + + debugBelch("%5s %7s %8s %4d %8d %8d %8ld %8ld\n", + "", "", "", i, cap_mut*sizeof(W_), cap_blocks, + cap_live*sizeof(W_), slop*sizeof(W_)); + + gen_mut += cap_mut; + gen_live += cap_live; + gen_blocks += cap_blocks; + } + + debugBelch("%46s-----------------\n",""); + debugBelch("%5s %7s %8s %4s %8s %8s %8ld %8ld\n\n", + "", "", "", "", "", "", + gen_live*sizeof(W_), slop*sizeof(W_)); + + slop = gen_blocks * BLOCK_SIZE_W - gen_live; + + tot_live += gen_live; tot_slop += slop; } hunk ./rts/Stats.c 836 - debugBelch("----------------------------------------------------------\n"); - debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_)); - debugBelch("----------------------------------------------------------\n"); + debugBelch("---------------------------------------------------------------\n"); + debugBelch("%46s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_)); + debugBelch("---------------------------------------------------------------\n"); debugBelch("\n"); } hunk ./rts/Stats.h 16 #include "BeginPrivate.h" +struct gc_thread_; + void stat_startInit(void); void stat_endInit(void); hunk ./rts/Stats.h 21 -void stat_startGC(void); -void stat_endGC (lnat alloc, lnat live, - lnat copied, lnat gen, - lnat max_copied, lnat avg_copied, lnat slop); +void stat_startGC(struct gc_thread_ *gct); +void stat_endGC (struct gc_thread_ *gct, lnat alloc, lnat live, + lnat copied, nat gen, + lnat max_copied, lnat avg_copied, lnat slop); #ifdef PROFILING void stat_startRP(void); hunk ./rts/Stats.h 49 void initStats0(void); void initStats1(void); -double mut_user_time_during_GC(void); double mut_user_time(void); #ifdef PROFILING hunk ./rts/StgMiscClosures.cmm 238 jump %GET_ENTRY(R1); } +STRING(ind_local_msg,"IND_LOCAL: not mine") + +#define DEF_IND_LOCAL(n) \ + INFO_TABLE(stg_IND_LOCAL##n,1,0,IND_LOCAL,"IND_LOCAL","IND_LOCAL") \ + { \ + if (TO_W_(Capability_no(MyCapability())) != n) { \ + foreign "C" barf(ind_local_msg "ptr") never returns; \ + } else { \ + R1 = StgInd_indirectee(R1); \ + ENTER(); \ + } \ + } + +DEF_IND_LOCAL(0) +DEF_IND_LOCAL(1) +DEF_IND_LOCAL(2) +DEF_IND_LOCAL(3) +DEF_IND_LOCAL(4) +DEF_IND_LOCAL(5) +DEF_IND_LOCAL(6) +DEF_IND_LOCAL(7) +DEF_IND_LOCAL(8) +DEF_IND_LOCAL(9) +DEF_IND_LOCAL(10) +DEF_IND_LOCAL(11) +DEF_IND_LOCAL(12) +DEF_IND_LOCAL(13) +DEF_IND_LOCAL(14) +DEF_IND_LOCAL(15) +DEF_IND_LOCAL(16) +DEF_IND_LOCAL(17) +DEF_IND_LOCAL(18) +DEF_IND_LOCAL(19) +DEF_IND_LOCAL(20) +DEF_IND_LOCAL(21) +DEF_IND_LOCAL(22) +DEF_IND_LOCAL(23) +DEF_IND_LOCAL(24) +DEF_IND_LOCAL(25) +DEF_IND_LOCAL(26) +DEF_IND_LOCAL(27) +DEF_IND_LOCAL(28) +DEF_IND_LOCAL(29) +DEF_IND_LOCAL(30) +DEF_IND_LOCAL(31) +DEF_IND_LOCAL(32) +DEF_IND_LOCAL(33) +DEF_IND_LOCAL(34) +DEF_IND_LOCAL(35) +DEF_IND_LOCAL(36) +DEF_IND_LOCAL(37) +DEF_IND_LOCAL(38) +DEF_IND_LOCAL(39) +DEF_IND_LOCAL(40) +DEF_IND_LOCAL(41) +DEF_IND_LOCAL(42) +DEF_IND_LOCAL(43) +DEF_IND_LOCAL(44) +DEF_IND_LOCAL(45) +DEF_IND_LOCAL(46) +DEF_IND_LOCAL(47) +DEF_IND_LOCAL(48) +DEF_IND_LOCAL(49) +DEF_IND_LOCAL(50) +DEF_IND_LOCAL(51) +DEF_IND_LOCAL(52) +DEF_IND_LOCAL(53) +DEF_IND_LOCAL(54) +DEF_IND_LOCAL(55) +DEF_IND_LOCAL(56) +DEF_IND_LOCAL(57) +DEF_IND_LOCAL(58) +DEF_IND_LOCAL(59) +DEF_IND_LOCAL(60) +DEF_IND_LOCAL(61) +DEF_IND_LOCAL(62) +DEF_IND_LOCAL(63) +DEF_IND_LOCAL(64) + +section "rodata" { + stg_IND_LOCAL_tbl: + W_ stg_IND_LOCAL0_info; + W_ stg_IND_LOCAL1_info; + W_ stg_IND_LOCAL2_info; + W_ stg_IND_LOCAL3_info; + W_ stg_IND_LOCAL4_info; + W_ stg_IND_LOCAL5_info; + W_ stg_IND_LOCAL6_info; + W_ stg_IND_LOCAL7_info; + W_ stg_IND_LOCAL8_info; + W_ stg_IND_LOCAL9_info; + W_ stg_IND_LOCAL10_info; + W_ stg_IND_LOCAL11_info; + W_ stg_IND_LOCAL12_info; + W_ stg_IND_LOCAL13_info; + W_ stg_IND_LOCAL14_info; + W_ stg_IND_LOCAL15_info; + W_ stg_IND_LOCAL16_info; + W_ stg_IND_LOCAL17_info; + W_ stg_IND_LOCAL18_info; + W_ stg_IND_LOCAL19_info; + W_ stg_IND_LOCAL20_info; + W_ stg_IND_LOCAL21_info; + W_ stg_IND_LOCAL22_info; + W_ stg_IND_LOCAL23_info; + W_ stg_IND_LOCAL24_info; + W_ stg_IND_LOCAL25_info; + W_ stg_IND_LOCAL26_info; + W_ stg_IND_LOCAL27_info; + W_ stg_IND_LOCAL28_info; + W_ stg_IND_LOCAL29_info; + W_ stg_IND_LOCAL30_info; + W_ stg_IND_LOCAL31_info; + W_ stg_IND_LOCAL32_info; + W_ stg_IND_LOCAL33_info; + W_ stg_IND_LOCAL34_info; + W_ stg_IND_LOCAL35_info; + W_ stg_IND_LOCAL36_info; + W_ stg_IND_LOCAL37_info; + W_ stg_IND_LOCAL38_info; + W_ stg_IND_LOCAL39_info; + W_ stg_IND_LOCAL40_info; + W_ stg_IND_LOCAL41_info; + W_ stg_IND_LOCAL42_info; + W_ stg_IND_LOCAL43_info; + W_ stg_IND_LOCAL44_info; + W_ stg_IND_LOCAL45_info; + W_ stg_IND_LOCAL46_info; + W_ stg_IND_LOCAL47_info; + W_ stg_IND_LOCAL48_info; + W_ stg_IND_LOCAL49_info; + W_ stg_IND_LOCAL50_info; + W_ stg_IND_LOCAL51_info; + W_ stg_IND_LOCAL52_info; + W_ stg_IND_LOCAL53_info; + W_ stg_IND_LOCAL54_info; + W_ stg_IND_LOCAL55_info; + W_ stg_IND_LOCAL56_info; + W_ stg_IND_LOCAL57_info; + W_ stg_IND_LOCAL58_info; + W_ stg_IND_LOCAL59_info; + W_ stg_IND_LOCAL60_info; + W_ stg_IND_LOCAL61_info; + W_ stg_IND_LOCAL62_info; + W_ stg_IND_LOCAL63_info; + W_ stg_IND_LOCAL64_info; +} + /* ---------------------------------------------------------------------------- Black holes. hunk ./rts/StgMiscClosures.cmm 397 INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE") { - W_ r, p, info, bq, msg, owner, bd; + W_ r, p, info, bq, msg, owner, bd, new_tso; TICK_ENT_DYN_IND(); /* tick */ hunk ./rts/StgMiscClosures.cmm 409 } info = StgHeader_info(p); - if (info == stg_IND_info) { + if (info == stg_STUB_BLOCKING_QUEUE_info) { // This could happen, if e.g. we got a BLOCKING_QUEUE that has // just been replaced with an IND by another thread in // wakeBlockingQueue(). hunk ./rts/StgMiscClosures.cmm 420 info == stg_BLOCKING_QUEUE_CLEAN_info || info == stg_BLOCKING_QUEUE_DIRTY_info) { - ("ptr" msg) = foreign "C" allocate(MyCapability() "ptr", - BYTES_TO_WDS(SIZEOF_MessageBlackHole)) [R1]; + + // msg is allocated in global memory iff the BLAKCHOLE is global + + bd = Bdescr(R1); + if (TO_W_(bdescr_gen_ix(bd)) >= TO_W_(CInt[global_gen_ix])) + { + SAVE_THREAD_STATE(); + ("ptr" new_tso) = foreign "C" globalise_(MyCapability(), + CurrentTSO); + CurrentTSO = new_tso; + LOAD_THREAD_STATE(); + + ("ptr" msg) = foreign "C" allocateInGen(MyCapability() "ptr", + CInt[global_gen_ix], + BYTES_TO_WDS(SIZEOF_MessageBlackHole)) [R1]; + } + else + { + ("ptr" msg) = foreign "C" allocate(MyCapability() "ptr", + BYTES_TO_WDS(SIZEOF_MessageBlackHole)) [R1]; + } SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM); MessageBlackHole_tso(msg) = CurrentTSO; hunk ./rts/StgMiscClosures.cmm 486 INFO_TABLE(stg_BLOCKING_QUEUE_DIRTY,4,0,BLOCKING_QUEUE,"BLOCKING_QUEUE","BLOCKING_QUEUE") { foreign "C" barf("BLOCKING_QUEUE_DIRTY object entered!") never returns; } +/* + * A kind of indirection that is never intended to be entered. This + * is used for stubbing out BLOCKING_QUEUEs in wakeBlockingQueue(). + */ +INFO_TABLE(stg_STUB_BLOCKING_QUEUE,1,0,IND,"STUB_BLOCKING_QUEUE","STUB_BLOCKING_QUEUE") +{ foreign "C" barf("STUB_BLOCKING_QUEUE object entered!") never returns; } /* ---------------------------------------------------------------------------- Whiteholes are used for the "locked" state of a closure (see lockClosure()) hunk ./rts/StgMiscClosures.cmm 638 INFO_TABLE_CONSTR(stg_MSG_BLACKHOLE,3,0,0,PRIM,"MSG_BLACKHOLE","MSG_BLACKHOLE") { foreign "C" barf("MSG_BLACKHOLE object entered!") never returns; } +// used to overwrite a MSG_BLACKHOLE when the message has been used/revoked +// Closure type is IND so these get automatically eliminated by the GC. +INFO_TABLE(stg_STUB_MSG_BLACKHOLE,1,0,IND,"STUB_MSG_BLACKHOLE","STUB_MSG_BLACKHOLE") +{ foreign "C" barf("STUB_MSG_BLACKHOLE object entered!") never returns; } + // used to overwrite a MSG_THROWTO when the message has been used/revoked INFO_TABLE_CONSTR(stg_MSG_NULL,1,0,0,PRIM,"MSG_NULL","MSG_NULL") { foreign "C" barf("MSG_NULL object entered!") never returns; } hunk ./rts/StgStartup.cmm 99 stg_returnToSched { + W_ new_tso; SAVE_THREAD_STATE(); hunk ./rts/StgStartup.cmm 101 - foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO); + ("ptr" new_tso) = foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO); + CurrentTSO = new_tso; jump StgReturn; } hunk ./rts/StgStartup.cmm 122 // the MVar on which we are about to block in SMP mode. stg_returnToSchedButFirst { + W_ new_tso; SAVE_THREAD_STATE(); hunk ./rts/StgStartup.cmm 124 - foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO); + ("ptr" new_tso) = foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO); + CurrentTSO = new_tso; jump R2; } hunk ./rts/ThreadPaused.c 178 * here. We also take the opportunity to do stack squeezing if it's * turned on. * -------------------------------------------------------------------------- */ -void +StgTSO * // returns the new TSO, since it might have moved threadPaused(Capability *cap, StgTSO *tso) { StgClosure *frame; hunk ./rts/ThreadPaused.c 191 nat weight = 0; nat weight_pending = 0; rtsBool prev_was_update_frame = rtsFalse; + nat tso_gen_no; // Check to see whether we have threads waiting to raise // exceptions, and we're not blocking exceptions, or are blocked hunk ./rts/ThreadPaused.c 199 // TSO_BLOCKEX and becomes blocked interruptibly, this is the only // place we ensure that the blocked_exceptions get a chance. maybePerformBlockedException (cap, tso); - if (tso->what_next == ThreadKilled) { return; } + if (tso->what_next == ThreadKilled) { return tso; } // NB. Blackholing is *compulsory*, we must either do lazy // blackholing, or eager blackholing consistently. See Note hunk ./rts/ThreadPaused.c 209 frame = (StgClosure *)tso->sp; + // used to decide whether we need to globalise the TSO + tso_gen_no = Bdescr((P_)tso)->gen_no; + while (1) { // If we've already marked this frame, then stop here. if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) { hunk ./rts/ThreadPaused.c 288 } #endif + // .. we're about to write a TSO pointer into the BLACKHOLE, + // so we better not violate the global heap invariant: + if (Bdescr((P_)bh)->gen_no > 0 && tso_gen_no < global_gen_no) { + globalise(cap,(StgClosure**)&tso); + tso_gen_no = Bdescr((P_)tso)->gen_no; + } + // The payload of the BLACKHOLE points to the TSO ((StgInd *)bh)->indirectee = (StgClosure *)tso; write_barrier(); hunk ./rts/ThreadPaused.c 300 SET_INFO(bh,&stg_BLACKHOLE_info); - // .. and we need a write barrier, since we just mutated the closure: - recordClosureMutated(cap,bh); - // We pretend that bh has just been created. LDV_RECORD_CREATE(bh); hunk ./rts/ThreadPaused.c 345 } else { tso->flags &= ~TSO_SQUEEZED; } + + return tso; } hunk ./rts/ThreadPaused.h 12 #ifndef THREADPAUSED_H #define THREADPAUSED_H -RTS_PRIVATE void threadPaused ( Capability *cap, StgTSO * ); +RTS_PRIVATE StgTSO * threadPaused ( Capability *cap, StgTSO * ); #endif /* THREADPAUSED_H */ hunk ./rts/Threads.c 110 /* Link the new thread on the global thread list. */ - ACQUIRE_LOCK(&sched_mutex); - tso->id = next_thread_id++; // while we have the mutex - tso->global_link = g0->threads; - g0->threads = tso; - RELEASE_LOCK(&sched_mutex); + tso->id = atomic_inc(&next_thread_id) - 1; + tso->global_link = cap->r.rG0->threads; + cap->r.rG0->threads = tso; // ToDo: report the stack size in the event? traceEventCreateThread(cap, tso); hunk ./rts/Threads.c 161 prev = NULL; for (t = *queue; t != END_TSO_QUEUE; prev = t, t = t->_link) { + t = deRefTSO(t); if (t == tso) { if (prev) { setTSOLink(cap,prev,t->_link); hunk ./rts/Threads.c 231 if (tso->cap != cap) { MessageWakeup *msg; - msg = (MessageWakeup *)allocate(cap,sizeofW(MessageWakeup)); + msg = (MessageWakeup *)allocateInGen(cap,global_gen_ix, + sizeofW(MessageWakeup)); SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM); msg->tso = tso; sendMessage(cap, tso->cap, (Message*)msg); hunk ./rts/Threads.c 314 // the run queue when it receives the MSG_TRY_WAKEUP. tso->why_blocked = ThreadMigrating; tso->cap = to; + tso = globalise_TSO(from, tso); // after setting tso->cap tryWakeupThread(from, tso); } hunk ./rts/Threads.c 336 for (msg = bq->queue; msg != (MessageBlackHole*)END_TSO_QUEUE; msg = msg->link) { i = msg->header.info; - if (i != &stg_IND_info) { + if (i != &stg_STUB_MSG_BLACKHOLE_info) { ASSERT(i == &stg_MSG_BLACKHOLE_info); tryWakeupThread(cap,msg->tso); } hunk ./rts/Threads.c 350 // checking the owner field at the same time. bq->bh = 0; bq->queue = 0; bq->owner = 0; #endif - OVERWRITE_INFO(bq, &stg_IND_info); + OVERWRITE_INFO(bq, &stg_STUB_BLOCKING_QUEUE_info); } // If we update a closure that we know we BLACKHOLE'd, and the closure hunk ./rts/Threads.c 371 for (bq = tso->bq; bq != (StgBlockingQueue*)END_TSO_QUEUE; bq = next) { next = bq->link; - if (bq->header.info == &stg_IND_info) { + if (bq->header.info == &stg_STUB_BLOCKING_QUEUE_info) { // ToDo: could short it out right here, to avoid hunk ./rts/Threads.c 373 - // traversing this IND multiple times. + // traversing this STUB_BLOCKING_QUEUE multiple times. continue; } hunk ./rts/Threads.c 570 } debugBelch("other threads:\n"); - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) { + for (g = 0; g < total_generations; g++) { + for (t = all_generations[g].threads; t != END_TSO_QUEUE; t = next) { if (t->why_blocked != NotBlocked) { printThreadStatus(t); } hunk ./rts/Threads.c 575 - if (t->what_next == ThreadRelocated) { - next = t->_link; - } else { - next = t->global_link; - } + next = t->global_link; } } } hunk ./rts/Updates.cmm 51 /* ToDo: it might be a PAP, so we should check... */ TICK_UPD_CON_IN_NEW(sizeW_fromITBL(%GET_STD_INFO(updatee))); - updateWithIndirection(updatee, - R1, - jump %ENTRY_CODE(Sp(0))); + updateWithIndirectionToValue(updatee, + R1, + jump %ENTRY_CODE(Sp(0))); } hunk ./rts/Updates.cmm 80 // common case: it is still our BLACKHOLE if (v == CurrentTSO) { - updateWithIndirection(updatee, - R1, - jump %ENTRY_CODE(Sp(0))); + updateWithIndirectionToValue(updatee, + R1, + jump %ENTRY_CODE(Sp(0))); } // The other cases are all handled by the generic code hunk ./rts/Updates.h 12 #ifndef UPDATES_H #define UPDATES_H +#ifndef CMINUSMINUS +#include "sm/Globalise.h" +#endif + #ifndef CMINUSMINUS #include "BeginPrivate.h" #endif hunk ./rts/Updates.h 30 * * Note: * After all, we do *NOT* need to call LDV_RECORD_CREATE() for IND - * closures because they are inherently used. But, it corrupts - * the invariants that every closure keeps its creation time in the profiling - * field. So, we call LDV_RECORD_CREATE(). + * closures because they are inherently used. But, it corrupts the + * invariants that every closure keeps its creation time in the + * profiling field. So, we call LDV_RECORD_CREATE(). */ /* In the DEBUG case, we also zero out the slop of the old closure, hunk ./rts/Updates.h 50 */ #ifdef CMINUSMINUS -#define FILL_SLOP(p) \ +#define ZERO_SLOP(p,words) \ + W_ i; \ + i = 0; \ + for: \ + if (i < words) { \ + W_[p + WDS(i)] = 0; \ + i = i + 1; \ + goto for; \ + } + +#define FILL_THUNK_SLOP(p) \ W_ inf; \ W_ sz; \ hunk ./rts/Updates.h 63 - W_ i; \ inf = %GET_STD_INFO(p); \ hunk ./rts/Updates.h 64 - if (%INFO_TYPE(inf) != HALF_W_(BLACKHOLE)) { \ - if (%INFO_TYPE(inf) == HALF_W_(THUNK_SELECTOR)) { \ + if (TO_W_(%INFO_TYPE(inf)) != BLACKHOLE) { \ + if (TO_W_(%INFO_TYPE(inf)) == THUNK_SELECTOR) { \ sz = BYTES_TO_WDS(SIZEOF_StgSelector_NoThunkHdr); \ } else { \ hunk ./rts/Updates.h 68 - if (%INFO_TYPE(inf) == HALF_W_(AP_STACK)) { \ + if (TO_W_(%INFO_TYPE(inf)) == AP_STACK) { \ sz = StgAP_STACK_size(p) + BYTES_TO_WDS(SIZEOF_StgAP_STACK_NoThunkHdr); \ } else { \ hunk ./rts/Updates.h 71 - if (%INFO_TYPE(inf) == HALF_W_(AP)) { \ - sz = TO_W_(StgAP_n_args(p)) + BYTES_TO_WDS(SIZEOF_StgAP_NoThunkHdr); \ + if (TO_W_(%INFO_TYPE(inf)) == AP) { \ + sz = TO_W_(StgAP_n_args(p)) + BYTES_TO_WDS(SIZEOF_StgAP_NoThunkHdr); \ } else { \ sz = TO_W_(%INFO_PTRS(inf)) + TO_W_(%INFO_NPTRS(inf)); \ } \ hunk ./rts/Updates.h 78 } \ } \ - i = 0; \ - for: \ - if (i < sz) { \ - StgThunk_payload(p,i) = 0; \ - i = i + 1; \ - goto for; \ - } \ + ZERO_SLOP(p + SIZEOF_StgThunk, sz); \ } #else /* !CMINUSMINUS */ hunk ./rts/Updates.h 83 +#define ZERO_SLOP(p,words) \ + { \ + nat i; \ + for (i = 0; i < words; i++) { \ + *((StgPtr)p + i) = 0; \ + } \ + } + INLINE_HEADER void hunk ./rts/Updates.h 92 -FILL_SLOP(StgClosure *p) +FILL_THUNK_SLOP(StgClosure *p) { StgInfoTable *inf = get_itbl(p); hunk ./rts/Updates.h 95 - nat i, sz; + nat sz; switch (inf->type) { case BLACKHOLE: hunk ./rts/Updates.h 116 sz = inf->layout.payload.ptrs + inf->layout.payload.nptrs; break; } - for (i = 0; i < sz; i++) { - ((StgThunk *)p)->payload[i] = 0; - } + ZERO_SLOP((StgPtr)p + sizeofW(StgThunk), sz); no_slop: ; } hunk ./rts/Updates.h 126 #if !defined(DEBUG) || defined(THREADED_RTS) #define DEBUG_FILL_SLOP(p) /* do nothing */ #else -#define DEBUG_FILL_SLOP(p) FILL_SLOP(p) +#define DEBUG_FILL_SLOP(p) FILL_THUNK_SLOP(p) #endif /* We have two versions of this macro (sadly), one for use in C-- code, hunk ./rts/Updates.h 141 */ #ifdef CMINUSMINUS -#define updateWithIndirection(p1, p2, and_then) \ - W_ bd; \ +// NB. for young-generation updates we use stg_IND_direct_info as an +// optimisation - this kind of indirection assumes that the indirectee +// is a value, so can just return its payload rather than entering it. +// Hence the argument p2 here is required to be a value, not another +// indirection, BLACKHOLE, or thunk. +#define updateWithIndirectionToValue(p1, p2, and_then) \ + W_ bd, p3; \ \ DEBUG_FILL_SLOP(p1); \ LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); \ hunk ./rts/Updates.h 151 - StgInd_indirectee(p1) = p2; \ - prim %write_barrier() []; \ - SET_INFO(p1, stg_BLACKHOLE_info); \ - LDV_RECORD_CREATE(p1); \ bd = Bdescr(p1); \ if (bdescr_gen_no(bd) != 0 :: bits16) { \ hunk ./rts/Updates.h 153 - recordMutableCap(p1, TO_W_(bdescr_gen_no(bd)), R1); \ + ("ptr" p3) = foreign "C" publish(MyCapability(), p2 "ptr"); \ + StgInd_indirectee(p1) = p3; \ + prim %write_barrier() []; \ + SET_INFO(p1, stg_BLACKHOLE_info); \ + LDV_RECORD_CREATE(p1); \ TICK_UPD_OLD_IND(); \ and_then; \ } else { \ hunk ./rts/Updates.h 161 + StgInd_indirectee(p1) = p2; \ + prim %write_barrier() []; \ + SET_INFO(p1, stg_IND_direct_info); \ + LDV_RECORD_CREATE(p1); \ TICK_UPD_NEW_IND(); \ and_then; \ } hunk ./rts/Updates.h 171 #else /* !CMINUSMINUS */ +// NB. unlike updateWithIndirectionToValue above, p2 is not required +// to be a value, and we use the more generic stg_IND_info. INLINE_HEADER void updateWithIndirection (Capability *cap, StgClosure *p1, StgClosure *p2) hunk ./rts/Updates.h 178 { bdescr *bd; + StgClosure *p3; ASSERT( (P_)p1 != (P_)p2 ); /* not necessarily true: ASSERT( !closure_IND(p1) ); */ hunk ./rts/Updates.h 185 /* occurs in RaiseAsync.c:raiseAsync() */ DEBUG_FILL_SLOP(p1); LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); - ((StgInd *)p1)->indirectee = p2; - write_barrier(); - SET_INFO(p1, &stg_BLACKHOLE_info); - LDV_RECORD_CREATE(p1); bd = Bdescr((StgPtr)p1); if (bd->gen_no != 0) { hunk ./rts/Updates.h 187 - recordMutableCap(p1, cap, bd->gen_no); + p3 = publish(cap, p2); + ((StgInd *)p1)->indirectee = p3; + write_barrier(); + SET_INFO(p1, &stg_BLACKHOLE_info); + LDV_RECORD_CREATE(p1); TICK_UPD_OLD_IND(); } else { hunk ./rts/Updates.h 194 + ((StgInd *)p1)->indirectee = p2; + write_barrier(); + SET_INFO(p1, &stg_IND_info); + LDV_RECORD_CREATE(p1); TICK_UPD_NEW_IND(); } } hunk ./rts/Weak.c 34 } void -runAllCFinalizers(StgWeak *list) +runAllCFinalizers(void) { StgWeak *w; Task *task; hunk ./rts/Weak.c 38 + nat g; + generation *gen; task = myTask(); if (task != NULL) { hunk ./rts/Weak.c 46 task->running_finalizers = rtsTrue; } - for (w = list; w; w = w->link) { - StgArrWords *farr; - - farr = (StgArrWords *)UNTAG_CLOSURE(w->cfinalizer); - - if ((StgClosure *)farr != &stg_NO_FINALIZER_closure) - runCFinalizer((void *)farr->payload[0], - (void *)farr->payload[1], - (void *)farr->payload[2], - farr->payload[3]); + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; + + for (w = gen->weak_ptrs; w; w = w->link) { + StgArrWords *farr; + + farr = (StgArrWords *)UNTAG_CLOSURE(w->cfinalizer); + + if ((StgClosure *)farr != &stg_NO_FINALIZER_closure) + runCFinalizer((void *)farr->payload[0], + (void *)farr->payload[1], + (void *)farr->payload[2], + farr->payload[3]); + } } if (task != NULL) { hunk ./rts/Weak.h 20 extern StgWeak * weak_ptr_list; void runCFinalizer(void *fn, void *ptr, void *env, StgWord flag); -void runAllCFinalizers(StgWeak *w); +void runAllCFinalizers(void); void scheduleFinalizers(Capability *cap, StgWeak *w); void markWeakList(void); hunk ./rts/ghc.mk 69 cp $< $@ rts/dist/build/sm/Scav_thr.c : rts/sm/Scav.c | $$(dir $$@)/. cp $< $@ +rts/dist/build/sm/Evac_loc.c : rts/sm/Evac.c | $$(dir $$@)/. + cp $< $@ +rts/dist/build/sm/Scav_loc.c : rts/sm/Scav.c | $$(dir $$@)/. + cp $< $@ rts_H_FILES = $(wildcard includes/*.h) $(wildcard rts/*.h) hunk ./rts/ghc.mk 146 endif ifneq "$$(findstring thr, $1)" "" -rts_$1_EXTRA_C_SRCS = rts/dist/build/sm/Evac_thr.c rts/dist/build/sm/Scav_thr.c +rts_$1_EXTRA_C_SRCS = rts/dist/build/sm/Evac_thr.c \ + rts/dist/build/sm/Scav_thr.c \ + rts/dist/build/sm/Evac_loc.c \ + rts/dist/build/sm/Scav_loc.c endif $(call distdir-way-opts,rts,dist,$1) hunk ./rts/ghc.mk 382 # -O3 helps unroll some loops (especially in copy() with a constant argument). rts/sm/Evac_CC_OPTS += -funroll-loops rts/dist/build/sm/Evac_thr_HC_OPTS += -optc-funroll-loops +rts/dist/build/sm/Evac_loc_HC_OPTS += -optc-funroll-loops + +rts/sm/Globalise_CC_OPTS += -funroll-loops # These files are just copies of sm/Evac.c and sm/Scav.c respectively, # but compiled with -DPARALLEL_GC. hunk ./rts/ghc.mk 390 rts/dist/build/sm/Evac_thr_CC_OPTS += -DPARALLEL_GC -Irts/sm rts/dist/build/sm/Scav_thr_CC_OPTS += -DPARALLEL_GC -Irts/sm +rts/dist/build/sm/Evac_loc_CC_OPTS += -DLOCAL_GC -Irts/sm +rts/dist/build/sm/Scav_loc_CC_OPTS += -DLOCAL_GC -Irts/sm #----------------------------------------------------------------------------- # Add PAPI library if needed hunk ./rts/sm/BlockAlloc.c 361 bd = alloc_mega_group(1); bd->blocks = n; initGroup(bd); // we know the group will fit + IF_DEBUG(sanity,memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE)); rem = bd + n; rem->blocks = BLOCKS_PER_MBLOCK-n; initGroup(rem); // init the slop hunk ./rts/sm/Compact.c 21 #include "RtsUtils.h" #include "BlockAlloc.h" #include "GC.h" +#include "GCThread.h" #include "Compact.h" #include "Schedule.h" #include "Apply.h" hunk ./rts/sm/Compact.c 669 case IND: case IND_PERM: + case IND_LOCAL: thread(&((StgInd *)p)->indirectee); return p + sizeofW(StgInd); hunk ./rts/sm/Compact.c 927 } void -compact(StgClosure *static_objects) +compact(gc_thread *gct) { nat g, blocks; generation *gen; hunk ./rts/sm/Compact.c 935 // 1. thread the roots markCapabilities((evac_fn)thread_root, NULL); + markScheduler((evac_fn)thread_root, NULL); + // the weak pointer lists... hunk ./rts/sm/Compact.c 938 - if (weak_ptr_list != NULL) { - thread((void *)&weak_ptr_list); + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; + if (gen->weak_ptrs != NULL) { + thread((void *)&gen->weak_ptrs); + } } hunk ./rts/sm/Compact.c 944 - if (old_weak_ptr_list != NULL) { - thread((void *)&old_weak_ptr_list); // tmp + if (gct->old_weak_ptrs != NULL) { + thread((void *)&gct->old_weak_ptrs); } // mutable lists hunk ./rts/sm/Compact.c 953 bdescr *bd; StgPtr p; nat n; - for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { - for (p = bd->start; p < bd->free; p++) { - thread((StgClosure **)p); - } - } for (n = 0; n < n_capabilities; n++) { for (bd = capabilities[n].mut_lists[g]; bd != NULL; bd = bd->link) { hunk ./rts/sm/Compact.c 964 } // the global thread list - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - thread((void *)&generations[g].threads); + for (g = 0; g < total_generations; g++) { + thread((void *)&all_generations[g].threads); } // any threads resurrected during this GC hunk ./rts/sm/Compact.c 969 - thread((void *)&resurrected_threads); + thread((void *)&gct->resurrected_threads); + + // any threads resurrected during this GC + thread((void *)&gct->exception_threads); // the task list { hunk ./rts/sm/Compact.c 989 } // the static objects - thread_static(static_objects /* ToDo: ok? */); + thread_static(gct->scavenged_static_objects); // the stable pointer table threadStablePtrTable((evac_fn)thread_root, NULL); hunk ./rts/sm/Compact.c 998 markCAFs((evac_fn)thread_root, NULL); // 2. update forward ptrs - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen = &generations[g]; + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; debugTrace(DEBUG_gc, "update_fwd: %d", g); update_fwd(gen->blocks); hunk ./rts/sm/Compact.c 1004 update_fwd_large(gen->scavenged_large_objects); - if (g == RtsFlags.GcFlags.generations-1 && gen->old_blocks != NULL) { + if (g == total_generations-1 && gen->old_blocks != NULL) { debugTrace(DEBUG_gc, "update_fwd: %d (compact)", g); update_fwd_compact(gen->old_blocks); } hunk ./rts/sm/Compact.h 18 #define SM_COMPACT_H #include "BeginPrivate.h" +#include "GCThread.h" INLINE_HEADER void mark(StgPtr p, bdescr *bd) hunk ./rts/sm/Compact.h 50 return (*bitmap_word & bit_mask); } -void compact (StgClosure *static_objects); +void compact (gc_thread *); #include "EndPrivate.h" hunk ./rts/sm/Evac.c 21 #include "Storage.h" #include "GC.h" #include "GCThread.h" +#include "GCTDecl.h" #include "GCUtils.h" #include "Compact.h" #include "MarkStack.h" hunk ./rts/sm/Evac.c 33 StgWord64 whitehole_spin = 0; #endif -#if defined(THREADED_RTS) && !defined(PARALLEL_GC) -#define evacuate(p) evacuate1(p) -#define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p) +#if defined(THREADED_RTS) +#if defined(LOCAL_GC) +#define evacuate(p) evacuate_local(p) +#elif !defined(PARALLEL_GC) +#define evacuate(p) evacuate_seq(p) +// evacuate() is the safe one; evacuate1() can only be used for +// sequential GC. +#endif #endif #if !defined(PARALLEL_GC) hunk ./rts/sm/Evac.c 44 +#define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p) + #define copy_tag_nolock(p, info, src, size, stp, tag) \ copy_tag(p, info, src, size, stp, tag) #endif hunk ./rts/sm/Evac.c 62 -------------------------------------------------------------------------- */ STATIC_INLINE StgPtr -alloc_for_copy (nat size, generation *gen) +alloc_for_copy (nat size, nat gen_ix) { StgPtr to; gen_workspace *ws; hunk ./rts/sm/Evac.c 72 * evacuate to an older generation, adjust it here (see comment * by evacuate()). */ - if (gen < gct->evac_gen) { + if (gen_ix < gct->evac_gen_ix) { if (gct->eager_promotion) { hunk ./rts/sm/Evac.c 74 - gen = gct->evac_gen; + gen_ix = gct->evac_gen_ix; } else { gct->failed_to_evac = rtsTrue; } hunk ./rts/sm/Evac.c 80 } - ws = &gct->gens[gen->no]; - // this compiles to a single mem access to gen->abs_no only + ws = &gct->gens[gen_ix]; /* chain a new block onto the to-space for the destination gen if * necessary. hunk ./rts/sm/Evac.c 100 -------------------------------------------------------------------------- */ STATIC_INLINE GNUC_ATTR_HOT void -copy_tag(StgClosure **p, const StgInfoTable *info, - StgClosure *src, nat size, generation *gen, StgWord tag) +copy_closure(StgPtr from, StgPtr to, const StgInfoTable *info, nat size) { hunk ./rts/sm/Evac.c 102 - StgPtr to, from; nat i; hunk ./rts/sm/Evac.c 104 - to = alloc_for_copy(size,gen); - - from = (StgPtr)src; to[0] = (W_)info; hunk ./rts/sm/Evac.c 105 - for (i = 1; i < size; i++) { // unroll for small i + for (i = 1; i < size; i++) { // unroll for small size to[i] = from[i]; } hunk ./rts/sm/Evac.c 108 +} hunk ./rts/sm/Evac.c 110 -// if (to+size+2 < bd->start + BLOCK_SIZE_W) { -// __builtin_prefetch(to + size + 2, 1); -// } +STATIC_INLINE GNUC_ATTR_HOT void +copy_tag(StgClosure **p, const StgInfoTable *info, + StgClosure *src, nat size, nat gen_ix, StgWord tag) +{ + StgPtr to; + + to = alloc_for_copy(size,gen_ix); + copy_closure((StgPtr)src,to,info,size); #if defined(PARALLEL_GC) { hunk ./rts/sm/Evac.c 137 #ifdef PROFILING // We store the size of the just evacuated object in the LDV word so that // the profiler can guess the position of the next object later. - SET_EVACUAEE_FOR_LDV(from, size); + SET_EVACUAEE_FOR_LDV(src, size); #endif } hunk ./rts/sm/Evac.c 144 #if defined(PARALLEL_GC) STATIC_INLINE void copy_tag_nolock(StgClosure **p, const StgInfoTable *info, - StgClosure *src, nat size, generation *gen, StgWord tag) + StgClosure *src, nat size, nat gen_ix, StgWord tag) { hunk ./rts/sm/Evac.c 146 - StgPtr to, from; - nat i; - - to = alloc_for_copy(size,gen); + StgPtr to; hunk ./rts/sm/Evac.c 148 - from = (StgPtr)src; - to[0] = (W_)info; - for (i = 1; i < size; i++) { // unroll for small i - to[i] = from[i]; - } + to = alloc_for_copy(size,gen_ix); + copy_closure((StgPtr)src,to,info,size); // if somebody else reads the forwarding pointer, we better make // sure there's a closure at the end of it. hunk ./rts/sm/Evac.c 164 #ifdef PROFILING // We store the size of the just evacuated object in the LDV word so that // the profiler can guess the position of the next object later. - SET_EVACUAEE_FOR_LDV(from, size); + SET_EVACUAEE_FOR_LDV(src, size); #endif } #endif hunk ./rts/sm/Evac.c 175 */ static rtsBool copyPart(StgClosure **p, StgClosure *src, nat size_to_reserve, - nat size_to_copy, generation *gen) + nat size_to_copy, nat gen_ix) { hunk ./rts/sm/Evac.c 177 - StgPtr to, from; - nat i; + StgPtr to; StgWord info; #if defined(PARALLEL_GC) hunk ./rts/sm/Evac.c 198 info = (W_)src->header.info; #endif - to = alloc_for_copy(size_to_reserve, gen); + to = alloc_for_copy(size_to_reserve, gen_ix); hunk ./rts/sm/Evac.c 200 - from = (StgPtr)src; - to[0] = info; - for (i = 1; i < size_to_copy; i++) { // unroll for small i - to[i] = from[i]; - } + copy_closure((StgPtr)src,to,(const StgInfoTable*)info,size_to_copy); write_barrier(); src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to); hunk ./rts/sm/Evac.c 209 #ifdef PROFILING // We store the size of the just evacuated object in the LDV word so that // the profiler can guess the position of the next object later. - SET_EVACUAEE_FOR_LDV(from, size_to_reserve); + SET_EVACUAEE_FOR_LDV(src, size_to_reserve); // fill the slop if (size_to_reserve - size_to_copy > 0) LDV_FILL_SLOP(to + size_to_copy, (int)(size_to_reserve - size_to_copy)); hunk ./rts/sm/Evac.c 222 /* Copy wrappers that don't tag the closure after copying */ STATIC_INLINE GNUC_ATTR_HOT void copy(StgClosure **p, const StgInfoTable *info, - StgClosure *src, nat size, generation *gen) + StgClosure *src, nat size, nat gen_ix) { hunk ./rts/sm/Evac.c 224 - copy_tag(p,info,src,size,gen,0); + copy_tag(p,info,src,size,gen_ix,0); } /* ----------------------------------------------------------------------------- hunk ./rts/sm/Evac.c 243 { bdescr *bd; generation *gen, *new_gen; + nat gen_ix, new_gen_ix; gen_workspace *ws; bd = Bdescr(p); hunk ./rts/sm/Evac.c 247 + gen_ix = bd->gen_ix; gen = bd->gen; hunk ./rts/sm/Evac.c 249 - ACQUIRE_SPIN_LOCK(&gen->sync_large_objects); + ACQUIRE_SPIN_LOCK(&gen->sync); // already evacuated? if (bd->flags & BF_EVACUATED) { hunk ./rts/sm/Evac.c 256 /* Don't forget to set the gct->failed_to_evac flag if we didn't get * the desired destination (see comments in evacuate()). */ - if (gen < gct->evac_gen) { + if (gen_ix < gct->evac_gen_ix) { gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } hunk ./rts/sm/Evac.c 260 - RELEASE_SPIN_LOCK(&gen->sync_large_objects); + RELEASE_SPIN_LOCK(&gen->sync); return; } hunk ./rts/sm/Evac.c 276 /* link it on to the evacuated large object list of the destination gen */ - new_gen = bd->dest; - if (new_gen < gct->evac_gen) { + new_gen_ix = bd->dest_ix; + + // large mutable arrays get eagerly promoted, for now. + switch (get_itbl((StgClosure*)p)->type) { + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + new_gen_ix = stg_max(new_gen_ix,global_gen_ix); + break; + } + + if (new_gen_ix < gct->evac_gen_ix) { if (gct->eager_promotion) { hunk ./rts/sm/Evac.c 290 - new_gen = gct->evac_gen; + new_gen_ix = gct->evac_gen_ix; } else { gct->failed_to_evac = rtsTrue; } hunk ./rts/sm/Evac.c 296 } - ws = &gct->gens[new_gen->no]; + ws = &gct->gens[new_gen_ix]; + new_gen = &all_generations[new_gen_ix]; bd->flags |= BF_EVACUATED; initBdescr(bd, new_gen, new_gen->to); hunk ./rts/sm/Evac.c 308 // them straight on the scavenged_large_objects list. if (bd->flags & BF_PINNED) { ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS); - if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync_large_objects); } + if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); } dbl_link_onto(bd, &new_gen->scavenged_large_objects); new_gen->n_scavenged_large_blocks += bd->blocks; hunk ./rts/sm/Evac.c 311 - if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync_large_objects); } + if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync); } } else { bd->link = ws->todo_large_objects; ws->todo_large_objects = bd; hunk ./rts/sm/Evac.c 317 } - RELEASE_SPIN_LOCK(&gen->sync_large_objects); + RELEASE_SPIN_LOCK(&gen->sync); } /* ---------------------------------------------------------------------------- hunk ./rts/sm/Evac.c 352 as good as it's going to get. We pass the argument to evacuate() in a register using the 'regparm' attribute (see the prototype for evacuate() near the top of this file). - - Changing evacuate() to take an (StgClosure **) rather than - returning the new pointer seems attractive, because we can avoid - writing back the pointer when it hasn't changed (eg. for a static - object, or an object in a generation > N). However, I tried it and - it doesn't help. One reason is that the (StgClosure **) pointer - gets spilled to the stack inside evacuate(), resulting in far more - extra reads/writes than we save. ------------------------------------------------------------------------- */ REGPARM1 GNUC_ATTR_HOT void hunk ./rts/sm/Evac.c 358 evacuate(StgClosure **p) { bdescr *bd = NULL; - generation *gen; + nat gen_ix; StgClosure *q; const StgInfoTable *info; StgWord tag; hunk ./rts/sm/Evac.c 370 tag = GET_CLOSURE_TAG(q); q = UNTAG_CLOSURE(q); +loop2: + ASSERTM(LOOKS_LIKE_CLOSURE_PTR(q), "invalid closure, info=%p", q->header.info); if (!HEAP_ALLOCED_GC(q)) { hunk ./rts/sm/Evac.c 376 +#ifdef LOCAL_GC + return; +#else if (!major_gc) return; info = get_itbl(q); hunk ./rts/sm/Evac.c 470 default: barf("evacuate(static): strange closure type %d", (int)(info->type)); } +#endif /* !LOCAL_GC */ } bd = Bdescr((P_)q); hunk ./rts/sm/Evac.c 475 +#if defined(LOCAL_GC) + // make sure this isn't a pointer into the wrong local heap + ASSERT(bd->gen_no > gct->collect_gen || bd->gen_ix == gct->index); +#endif + if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED)) != 0) { // pointer into to-space: just return it. It might be a pointer hunk ./rts/sm/Evac.c 492 // We aren't copying this object, so we have to check // whether it is already in the target generation. (this is // the write barrier). - if (bd->gen < gct->evac_gen) { + if (bd->gen_ix < gct->evac_gen_ix) { gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } hunk ./rts/sm/Evac.c 523 return; } - gen = bd->dest; - info = q->header.info; if (IS_FORWARDING_PTR(info)) { hunk ./rts/sm/Evac.c 526 - /* Already evacuated, just return the forwarding address. - * HOWEVER: if the requested destination generation (gct->evac_gen) is - * older than the actual generation (because the object was - * already evacuated to a younger generation) then we have to - * set the gct->failed_to_evac flag to indicate that we couldn't - * manage to promote the object to the desired generation. - */ - /* - * Optimisation: the check is fairly expensive, but we can often - * shortcut it if either the required generation is 0, or the - * current object (the EVACUATED) is in a high enough generation. - * We know that an EVACUATED always points to an object in the - * same or an older generation. gen is the lowest generation that the - * current object would be evacuated to, so we only do the full - * check if gen is too low. - */ - StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info); - *p = TAG_CLOSURE(tag,e); - if (gen < gct->evac_gen) { // optimisation - if (Bdescr((P_)e)->gen < gct->evac_gen) { - gct->failed_to_evac = rtsTrue; - TICK_GC_FAILED_PROMOTION(); - } - } - return; + q = (StgClosure*)UN_FORWARDING_PTR(info); + *p = TAG_CLOSURE(tag,q); + // we must loop here: the forwarding pointer might be into + // from-space if this is a globalised closure. + goto loop2; // loop2 not loop: keep the previous tag } hunk ./rts/sm/Evac.c 533 + gen_ix = bd->dest_ix; + switch (INFO_PTR_TO_STRUCT(info)->type) { case WHITEHOLE: hunk ./rts/sm/Evac.c 540 goto loop; - case MUT_VAR_CLEAN: - case MUT_VAR_DIRTY: - case MVAR_CLEAN: - case MVAR_DIRTY: - copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen); - return; - // For ints and chars of low value, save space by replacing references to // these with closures with references to common, shared ones in the RTS. // hunk ./rts/sm/Evac.c 549 case CONSTR_0_1: { #if defined(__PIC__) && defined(mingw32_HOST_OS) - copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag); + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_ix,tag); #else StgWord w = (StgWord)q->payload[0]; if (info == Czh_con_info && hunk ./rts/sm/Evac.c 566 ); } else { - copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag); + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_ix,tag); } #endif return; hunk ./rts/sm/Evac.c 575 case FUN_0_1: case FUN_1_0: case CONSTR_1_0: - copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag); + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_ix,tag); return; case THUNK_1_0: hunk ./rts/sm/Evac.c 580 case THUNK_0_1: - copy(p,info,q,sizeofW(StgThunk)+1,gen); + copy(p,info,q,sizeofW(StgThunk)+1,gen_ix); return; case THUNK_1_1: hunk ./rts/sm/Evac.c 586 case THUNK_2_0: case THUNK_0_2: -#ifdef NO_PROMOTE_THUNKS -#error bitrotted -#endif - copy(p,info,q,sizeofW(StgThunk)+2,gen); - return; + copy(p,info,q,sizeofW(StgThunk)+2,gen_ix); + return; case FUN_1_1: case FUN_2_0: hunk ./rts/sm/Evac.c 594 case FUN_0_2: case CONSTR_1_1: case CONSTR_2_0: - copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen,tag); - return; - case CONSTR_0_2: hunk ./rts/sm/Evac.c 595 - copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen,tag); + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen_ix,tag); return; case THUNK: hunk ./rts/sm/Evac.c 599 - copy(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen); + copy(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_ix); return; case FUN: hunk ./rts/sm/Evac.c 604 case IND_PERM: + case IND_LOCAL: case CONSTR: hunk ./rts/sm/Evac.c 606 - copy_tag_nolock(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen,tag); + copy_tag_nolock(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_ix,tag); return; case BLACKHOLE: hunk ./rts/sm/Evac.c 624 || i == &stg_WHITEHOLE_info || i == &stg_BLOCKING_QUEUE_CLEAN_info || i == &stg_BLOCKING_QUEUE_DIRTY_info) { - copy(p,info,q,sizeofW(StgInd),gen); + copy(p,info,q,sizeofW(StgInd),gen_ix); return; } hunk ./rts/sm/Evac.c 627 - ASSERT(i != &stg_IND_info); + ASSERT(i != &stg_STUB_BLOCKING_QUEUE_info); } q = r; *p = r; hunk ./rts/sm/Evac.c 638 case WEAK: case PRIM: case MUT_PRIM: - copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen); + copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),global_gen_ix); return; case BCO: hunk ./rts/sm/Evac.c 642 - copy(p,info,q,bco_sizeW((StgBCO *)q),gen); + copy(p,info,q,bco_sizeW((StgBCO *)q),global_gen_ix); return; case THUNK_SELECTOR: hunk ./rts/sm/Evac.c 650 return; case IND: - // follow chains of indirections, don't evacuate them - q = ((StgInd*)q)->indirectee; - *p = q; - goto loop; - - case RET_BCO: - case RET_SMALL: - case RET_BIG: - case RET_DYN: - case UPDATE_FRAME: - case STOP_FRAME: - case CATCH_FRAME: - case CATCH_STM_FRAME: - case CATCH_RETRY_FRAME: - case ATOMICALLY_FRAME: - // shouldn't see these - barf("evacuate: stack frame at %p\n", q); + // follow chains of indirections, don't evacuate them + q = ((StgInd*)q)->indirectee; + *p = q; + goto loop; case PAP: hunk ./rts/sm/Evac.c 656 - copy(p,info,q,pap_sizeW((StgPAP*)q),gen); + copy(p,info,q,pap_sizeW((StgPAP*)q),gen_ix); return; case AP: hunk ./rts/sm/Evac.c 660 - copy(p,info,q,ap_sizeW((StgAP*)q),gen); + copy(p,info,q,ap_sizeW((StgAP*)q),gen_ix); return; case AP_STACK: hunk ./rts/sm/Evac.c 664 - copy(p,info,q,ap_stack_sizeW((StgAP_STACK*)q),gen); + copy(p,info,q,ap_stack_sizeW((StgAP_STACK*)q),gen_ix); return; case ARR_WORDS: hunk ./rts/sm/Evac.c 669 // just copy the block - copy(p,info,q,arr_words_sizeW((StgArrWords *)q),gen); + copy(p,info,q,arr_words_sizeW((StgArrWords *)q),global_gen_ix); + return; + + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + case MVAR_CLEAN: + case MVAR_DIRTY: + copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),global_gen_ix); return; case MUT_ARR_PTRS_CLEAN: hunk ./rts/sm/Evac.c 684 case MUT_ARR_PTRS_FROZEN: case MUT_ARR_PTRS_FROZEN0: // just copy the block - copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),gen); + copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),global_gen_ix); return; case TSO: hunk ./rts/sm/Evac.c 707 rtsBool mine; mine = copyPart(p,(StgClosure *)tso, tso_sizeW(tso), - sizeofW(StgTSO), gen); + sizeofW(StgTSO), gen_ix); if (mine) { new_tso = (StgTSO *)*p; move_TSO(tso, new_tso); hunk ./rts/sm/Evac.c 721 } case TREC_CHUNK: - copy(p,info,q,sizeofW(StgTRecChunk),gen); + copy(p,info,q,sizeofW(StgTRecChunk),global_gen_ix); return; default: hunk ./rts/sm/Evac.c 825 unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); *q = (StgClosure *)p; // shortcut, behave as for: if (evac) evacuate(q); - if (evac && bd->gen < gct->evac_gen) { + if (evac && bd->gen_ix < gct->evac_gen_ix) { gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } hunk ./rts/sm/Evac.c 947 info = INFO_PTR_TO_STRUCT(info_ptr); switch (info->type) { case IND: + case IND_LOCAL: case IND_PERM: case IND_STATIC: val = ((StgInd *)val)->indirectee; hunk ./rts/sm/Evac.c 981 } case IND: + case IND_LOCAL: case IND_PERM: case IND_STATIC: // Again, we might need to untag a constructor. hunk ./rts/sm/Evac.c 1008 || i == &stg_BLOCKING_QUEUE_DIRTY_info) { goto bale_out; } - ASSERT(i != &stg_IND_info); + ASSERT(i != &stg_STUB_BLOCKING_QUEUE_info); } selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee ); hunk ./rts/sm/Evac.c 1066 // check whether it was updated in the meantime. *q = (StgClosure *)p; if (evac) { - copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest); + copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest_ix); } unchain_thunk_selectors(prev_thunk_selector, *q); return; hunk ./rts/sm/Evac.h 35 #define REGPARM1 #endif -REGPARM1 void evacuate (StgClosure **p); -REGPARM1 void evacuate1 (StgClosure **p); +REGPARM1 void evacuate (StgClosure **p); +REGPARM1 void evacuate_seq (StgClosure **p); +REGPARM1 void evacuate_local (StgClosure **p); + +REGPARM1 void evacuate_local_only (StgClosure **p); extern lnat thunk_selector_depth; hunk ./rts/sm/GC.c 43 #include "GC.h" #include "GCThread.h" +#include "GCTDecl.h" #include "Compact.h" #include "Evac.h" #include "Scav.h" hunk ./rts/sm/GC.c 52 #include "MarkWeak.h" #include "Sparks.h" #include "Sweep.h" +#include "Globalise.h" #include // for memset() #include hunk ./rts/sm/GC.c 101 * flag) is when we're collecting all generations. We only attempt to * deal with static objects and GC CAFs when doing a major GC. */ -nat N; rtsBool major_gc; hunk ./rts/sm/GC.c 103 +rtsBool work_stealing; + +nat next_gc_gen; + +// Number of threads running in *this* GC +nat n_gc_threads; + /* Data used for allocation area sizing. */ static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC hunk ./rts/sm/GC.c 130 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]; #endif -// Number of threads running in *this* GC. Affects how many -// step->todos[] lists we have to look in to find work. -nat n_gc_threads; - -// For stats: -long copied; // *words* copied & scavenged during this GC +// The number of currently active GC threads; use inc_running()/dec_running() +static volatile StgWord gc_running_threads; hunk ./rts/sm/GC.c 133 -rtsBool work_stealing; +#if defined(THREADED_RTS) +static Mutex gc_local_mutex; +#endif DECLARE_GCT hunk ./rts/sm/GC.c 143 Static function declarations -------------------------------------------------------------------------- */ -static void mark_root (void *user, StgClosure **root); -static void zero_static_object_list (StgClosure* first_static); -static nat initialise_N (rtsBool force_major_gc); -static void init_collected_gen (nat g, nat threads); -static void init_uncollected_gen (nat g, nat threads); -static void init_gc_thread (gc_thread *t); -static void resize_generations (void); -static void resize_nursery (void); -static void start_gc_threads (void); -static void scavenge_until_all_done (void); -static StgWord inc_running (void); -static StgWord dec_running (void); -static void wakeup_gc_threads (nat n_threads, nat me); -static void shutdown_gc_threads (nat n_threads, nat me); +static void mark_root (void *user, StgClosure **root); +static void zero_static_object_list (StgClosure* first_static); +static nat determine_collect_gen (void); +static void stash_mut_list (Capability *cap, nat gen_no); +static void prepare_collected_gen (generation *gen); +static void prepare_uncollected_gen (nat gc_type, generation *gen); +static void init_gc_thread (gc_thread *); +static void prepare_gc_thread (void); +static void resize_generations (void); +static void resize_nursery (lnat copied, nat N); +static void scavenge_until_all_done (void); +static StgWord inc_running (void); +static StgWord dec_running (void); +static void wakeup_gc_threads (nat me, nat N, nat gc_type); +static void shutdown_gc_threads (nat me); +static void collect_gct_blocks (void); +static lnat count_part_lists (nat start_gen); #if 0 && defined(DEBUG) static void gcCAFs (void); hunk ./rts/sm/GC.c 180 -------------------------------------------------------------------------- */ void -GarbageCollect (rtsBool force_major_gc, +GarbageCollect (nat N, // generation to collect nat gc_type USED_IF_THREADS, Capability *cap) { hunk ./rts/sm/GC.c 186 bdescr *bd; generation *gen; - lnat live, allocated, max_copied, avg_copied, slop; + lnat live, allocated, copied, max_copied, avg_copied, slop; gc_thread *saved_gct; hunk ./rts/sm/GC.c 188 - nat g, t, n; + nat g, n; // necessary if we stole a callee-saves register for gct: saved_gct = gct; hunk ./rts/sm/GC.c 197 CostCentreStack *prev_CCS; #endif - ACQUIRE_SM_LOCK; + if (gc_type != GC_LOCAL) { ACQUIRE_SM_LOCK; } #if defined(RTS_USER_SIGNALS) hunk ./rts/sm/GC.c 200 - if (RtsFlags.MiscFlags.install_signal_handlers) { + if (RtsFlags.MiscFlags.install_signal_handlers && + gc_type != GC_LOCAL) { // block signals blockUserSignals(); } hunk ./rts/sm/GC.c 210 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord)); // otherwise adjust the padding in gen_workspace. - // tell the stats department that we've started a GC - stat_startGC(); + SET_GCT(gc_threads[cap->no]); hunk ./rts/sm/GC.c 212 - // tell the STM to discard any cached closures it's hoping to re-use - stmPreGCHook(); + // tell the stats department that we've started a GC + stat_startGC(gct); // lock the StablePtr table hunk ./rts/sm/GC.c 216 - stablePtrPreGC(); + if (gc_type != GC_LOCAL) stablePtrPreGC(); #ifdef DEBUG hunk ./rts/sm/GC.c 219 + // these are stats only; we could make them thread-local but it + // doesn't matter if they aren't accurate. mutlist_MUTVARS = 0; mutlist_MUTARRS = 0; hunk ./rts/sm/GC.c 223 - mutlist_OTHERS = 0; + mutlist_OTHERS = 0; #endif // attribute any costs to CCS_GC hunk ./rts/sm/GC.c 232 CCCS = CCS_GC; #endif - /* Approximate how much we allocated. - * Todo: only when generating stats? - */ - allocated = calcAllocated(); + // Approximate how much we allocated since the last GC. + if (gc_type == GC_LOCAL) { + allocated = calcAllocatedCap(cap); + } else { + allocated = calcAllocated(); + } /* Figure out which generation to collect */ hunk ./rts/sm/GC.c 241 - n = initialise_N(force_major_gc); + if (gc_type == GC_LOCAL) { + ASSERT(N == 0); + } + gct->collect_gen = N; + + major_gc = (N == RtsFlags.GcFlags.generations-1); + gct->gc_type = gc_type; #if defined(THREADED_RTS) hunk ./rts/sm/GC.c 250 - work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled && - N >= RtsFlags.ParFlags.parGcLoadBalancingGen; + if (gc_type == GC_LOCAL) { + work_stealing = rtsFalse; + } else { + work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled && + N >= RtsFlags.ParFlags.parGcLoadBalancingGen; + } // It's not always a good idea to do load balancing in parallel // GC. In particular, for a parallel program we don't want to // lose locality by moving cached data into another CPU's cache hunk ./rts/sm/GC.c 267 // a flag. #endif - /* Start threads, so they can be spinning up while we finish initialisation. - */ - start_gc_threads(); + gc_running_threads = 0; #if defined(THREADED_RTS) hunk ./rts/sm/GC.c 270 - /* How many threads will be participating in this GC? - * We don't try to parallelise minor GCs (unless the user asks for - * it with +RTS -gn0), or mark/compact/sweep GC. - */ - if (gc_type == PENDING_GC_PAR) { + // How many threads will be participating in this GC? + if (gc_type == GC_PAR) { n_gc_threads = RtsFlags.ParFlags.nNodes; } else { n_gc_threads = 1; hunk ./rts/sm/GC.c 280 n_gc_threads = 1; #endif - debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)", - N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads); + debugTrace(DEBUG_gc, "GC (%s, gen %d): %ld MB in use, using %d thread(s)", + gc_type==GC_LOCAL ? "GC_LOCAL" : + gc_type==GC_PAR ? "GC_PAR" : "GC_SEQ", + N, mblocks_allocated, n_gc_threads); #ifdef RTS_GTK_FRONTPANEL if (RtsFlags.GcFlags.frontpanel) { hunk ./rts/sm/GC.c 293 #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(DEBUG_gc); + if (gc_type != GC_LOCAL || n_capabilities == 1) { + memInventory(DEBUG_gc); + } #endif hunk ./rts/sm/GC.c 298 + // paranoia until I figure out how much of the following code can be + // run concurrently... + if (gc_type == GC_LOCAL) { + ACQUIRE_LOCK(&gc_local_mutex); + } + // check sanity *before* GC hunk ./rts/sm/GC.c 305 - IF_DEBUG(sanity, checkSanity(rtsTrue)); + IF_DEBUG(sanity, checkSanity (gc_type == GC_LOCAL && n_capabilities != 1, + rtsFalse /* before GC */, + major_gc, + cap->no)); hunk ./rts/sm/GC.c 310 - // Initialise all our gc_thread structures - for (t = 0; t < n_gc_threads; t++) { - init_gc_thread(gc_threads[t]); + // Prepare the young generations: + if (gc_type == GC_LOCAL) { + prepare_collected_gen(&all_generations[gct->index]); + } else { + for (n = 0; n < n_capabilities; n++) { + prepare_collected_gen(&all_generations[n]); + } } hunk ./rts/sm/GC.c 318 - - // Initialise all the generations/steps that we're collecting. - for (g = 0; g <= N; g++) { - init_collected_gen(g,n_gc_threads); + // Prepare the generatinos we're collecting, 1..N + for (g = 1; g <= N; g++) { + prepare_collected_gen(&old_generations[g]); } hunk ./rts/sm/GC.c 322 - - // Initialise all the generations/steps that we're *not* collecting. + // Prepare the generations/steps that we're *not* collecting. for (g = N+1; g < RtsFlags.GcFlags.generations; g++) { hunk ./rts/sm/GC.c 324 - init_uncollected_gen(g,n_gc_threads); + prepare_uncollected_gen(gc_type, &old_generations[g]); } hunk ./rts/sm/GC.c 327 + // Prepare the workspaces attached to this gc_thread + prepare_gc_thread(); + /* Allocate a mark stack if we're doing a major collection. */ if (major_gc && oldest_gen->mark) { hunk ./rts/sm/GC.c 344 mark_sp = NULL; } - // this is the main thread -#ifdef THREADED_RTS - if (n_gc_threads == 1) { - SET_GCT(gc_threads[0]); - } else { - SET_GCT(gc_threads[cap->no]); - } -#else -SET_GCT(gc_threads[0]); -#endif - /* ----------------------------------------------------------------------- * follow all the roots that we know about: */ hunk ./rts/sm/GC.c 353 // NB. do this after the mutable lists have been saved above, otherwise // the other GC threads will be writing into the old mutable lists. inc_running(); - wakeup_gc_threads(n_gc_threads, gct->thread_index); - - // Mutable lists from each generation > N - // we want to *scavenge* these roots, not evacuate them: they're not - // going to move in this GC. - // Also do them in reverse generation order, for the usual reason: - // namely to reduce the likelihood of spurious old->new pointers. - // - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { -#if defined(THREADED_RTS) - if (n_gc_threads > 1) { - scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]); - } else { - scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]); - } -#else - scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]); -#endif - freeChain_sync(generations[g].saved_mut_list); - generations[g].saved_mut_list = NULL; - - } - + wakeup_gc_threads(gct->index, N, gc_type); + // scavenge the capability-private mutable lists. This isn't part // of markSomeCapabilities() because markSomeCapabilities() can only // call back into the GC via mark_root() (due to the gct register hunk ./rts/sm/GC.c 359 // variable). - if (n_gc_threads == 1) { + switch (gc_type) { + case GC_SEQ: for (n = 0; n < n_capabilities; n++) { hunk ./rts/sm/GC.c 362 -#if defined(THREADED_RTS) - scavenge_capability_mut_Lists1(&capabilities[n]); -#else scavenge_capability_mut_lists(&capabilities[n]); hunk ./rts/sm/GC.c 363 -#endif } hunk ./rts/sm/GC.c 364 - } else { - scavenge_capability_mut_lists(&capabilities[gct->thread_index]); + break; +#ifdef THREADED_RTS + case GC_LOCAL: + scavenge_capability_mut_lists_local(gct->cap); + break; + case GC_PAR: + scavenge_capability_mut_lists_par(gct->cap); + break; +#endif } // follow roots from the CAF list (used by GHCi) hunk ./rts/sm/GC.c 376 - gct->evac_gen = 0; + gct->evac_gen_ix = 0; markCAFs(mark_root, gct); hunk ./rts/sm/GC.c 379 - // follow all the roots that the application knows about. - gct->evac_gen = 0; - markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, - rtsTrue/*prune sparks*/); - #if defined(RTS_USER_SIGNALS) // mark the signal handlers (signals should be already blocked) markSignalHandlers(mark_root, gct); hunk ./rts/sm/GC.c 384 #endif + // follow all the roots that the application knows about. + gct->evac_gen_ix = 0; + if (gc_type == GC_SEQ) { + for (n = 0; n < n_capabilities; n++) { + markCapability(mark_root, gct, &capabilities[n], + rtsTrue/*prune sparks*/); + } + } else { + markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/); + } + + markScheduler(mark_root, gct); + // Mark the weak pointer list, and prepare to detect dead weak pointers. hunk ./rts/sm/GC.c 398 - markWeakPtrList(); initWeakForGC(); hunk ./rts/sm/GC.c 399 + markWeakPtrList(); // Mark the stable pointer table. hunk ./rts/sm/GC.c 402 - markStablePtrTable(mark_root, gct); + if (gc_type != GC_LOCAL) markStablePtrTable(mark_root, gct); /* ------------------------------------------------------------------------- * Repeatedly scavenge all the areas we know about until there's no hunk ./rts/sm/GC.c 425 break; } - shutdown_gc_threads(n_gc_threads, gct->thread_index); + if (gc_type == GC_PAR) { + shutdown_gc_threads(gct->index); + } // Now see which stable names are still alive. hunk ./rts/sm/GC.c 430 - gcStablePtrTable(); + if (gc_type != GC_LOCAL) gcStablePtrTable(); #ifdef THREADED_RTS if (n_gc_threads == 1) { hunk ./rts/sm/GC.c 438 pruneSparkQueue(&capabilities[n]); } } else { - pruneSparkQueue(&capabilities[gct->thread_index]); + pruneSparkQueue(gct->cap); } #endif hunk ./rts/sm/GC.c 462 } } - // For each workspace, in each thread, move the copied blocks to the step - { - gc_thread *thr; - gen_workspace *ws; - bdescr *prev, *next; - - for (t = 0; t < n_gc_threads; t++) { - thr = gc_threads[t]; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - ws = &thr->gens[g]; - - // Push the final block - if (ws->todo_bd) { - push_scanned_block(ws->todo_bd, ws); - } - - ASSERT(gct->scan_bd == NULL); - ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - - prev = NULL; - for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { - ws->gen->n_words += bd->free - bd->start; - prev = bd; - } - if (prev != NULL) { - prev->link = ws->gen->blocks; - ws->gen->blocks = ws->scavd_list; - } - ws->gen->n_blocks += ws->n_scavd_blocks; - } - } - - // Add all the partial blocks *after* we've added all the full - // blocks. This is so that we can grab the partial blocks back - // again and try to fill them up in the next GC. - for (t = 0; t < n_gc_threads; t++) { - thr = gc_threads[t]; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - ws = &thr->gens[g]; - - prev = NULL; - for (bd = ws->part_list; bd != NULL; bd = next) { - next = bd->link; - if (bd->free == bd->start) { - if (prev == NULL) { - ws->part_list = next; - } else { - prev->link = next; - } - freeGroup(bd); - ws->n_part_blocks--; - } else { - ws->gen->n_words += bd->free - bd->start; - prev = bd; - } - } - if (prev != NULL) { - prev->link = ws->gen->blocks; - ws->gen->blocks = ws->part_list; - } - ws->gen->n_blocks += ws->n_part_blocks; - - ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks); - ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words); - } - } - } - // Finally: compact or sweep the oldest generation. if (major_gc && oldest_gen->mark) { if (oldest_gen->compact) hunk ./rts/sm/GC.c 465 - compact(gct->scavenged_static_objects); + compact(gct); else sweep(oldest_gen); } hunk ./rts/sm/GC.c 477 avg_copied = 0; { nat i; - for (i=0; i < n_gc_threads; i++) { - if (n_gc_threads > 1) { + if (n_gc_threads > 1) { + for (i=0; i < n_gc_threads; i++) { debugTrace(DEBUG_gc,"thread %d:", i); debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); hunk ./rts/sm/GC.c 485 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work); debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work); debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); + copied += gc_threads[i]->copied; + max_copied = stg_max(gc_threads[i]->copied, max_copied); } hunk ./rts/sm/GC.c 488 - copied += gc_threads[i]->copied; - max_copied = stg_max(gc_threads[i]->copied, max_copied); - } - if (n_gc_threads == 1) { + avg_copied = copied; + } else { + copied = gct->copied; max_copied = 0; avg_copied = 0; hunk ./rts/sm/GC.c 493 - } else { - avg_copied = copied; } } hunk ./rts/sm/GC.c 496 - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (n = 0; n < total_generations; n++) { + + gen = &all_generations[n]; + g = gen->no; + + // someone else's local generation? + if (gc_type == GC_LOCAL && isNonLocalGen(gen)) + continue; if (g == N) { hunk ./rts/sm/GC.c 506 - generations[g].collections++; // for stats - if (n_gc_threads > 1) generations[g].par_collections++; + gen->collections++; // for stats + if (n_gc_threads > 1) gen->par_collections++; } // Count the mutable list as bytes "copied" for the purposes of hunk ./rts/sm/GC.c 514 // stats. Every mutable list is copied during every GC. if (g > 0) { nat mut_list_size = 0; - for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { - mut_list_size += bd->free - bd->start; - } for (n = 0; n < n_capabilities; n++) { hunk ./rts/sm/GC.c 515 - for (bd = capabilities[n].mut_lists[g]; - bd != NULL; bd = bd->link) { - mut_list_size += bd->free - bd->start; - } + mut_list_size += countOccupied(capabilities[n].mut_lists[g]); } copied += mut_list_size; hunk ./rts/sm/GC.c 526 } bdescr *next, *prev; - gen = &generations[g]; // for generations we collected... if (g <= N) { hunk ./rts/sm/GC.c 582 ASSERT(countBlocks(gen->blocks) == gen->n_blocks); ASSERT(countOccupied(gen->blocks) == gen->n_words); } - else // not copacted + else // not compacted { freeChain(gen->old_blocks); } hunk ./rts/sm/GC.c 599 gen->large_objects = gen->scavenged_large_objects; gen->n_large_blocks = gen->n_scavenged_large_blocks; gen->n_new_large_blocks = 0; - ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); } else // for generations > N { hunk ./rts/sm/GC.c 613 // add the new blocks we promoted during this GC gen->n_large_blocks += gen->n_scavenged_large_blocks; - ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); } hunk ./rts/sm/GC.c 614 + + ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); + + gen->scavenged_large_objects = NULL; + gen->n_scavenged_large_blocks = 0; } // for all generations // update the max size of older generations after a major GC hunk ./rts/sm/GC.c 626 // Calculate the amount of live data for stats. live = calcLiveWords(); + // add in the partial blocks in the gen_workspaces, but ignore gen 0 + // if this is a local GC (we can't count another capability's part_list) + live += count_part_lists( (gc_type == GC_LOCAL) ? n_capabilities : 0 ); // Free the small objects allocated via allocate(), since this will // all have been copied into G0S1 now. hunk ./rts/sm/GC.c 635 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; // Start a new pinned_object_block - for (n = 0; n < n_capabilities; n++) { - capabilities[n].pinned_object_block = NULL; - } - - // Free the mark stack. - if (mark_stack_top_bd != NULL) { - debugTrace(DEBUG_gc, "mark stack: %d blocks", - countBlocks(mark_stack_top_bd)); - freeChain(mark_stack_top_bd); + if (gc_type == GC_LOCAL) { + cap->pinned_object_block = NULL; + } else { + for (n = 0; n < n_capabilities; n++) { + capabilities[n].pinned_object_block = NULL; + } } hunk ./rts/sm/GC.c 643 - // Free any bitmaps. - for (g = 0; g <= N; g++) { - gen = &generations[g]; - if (gen->bitmap != NULL) { - freeGroup(gen->bitmap); - gen->bitmap = NULL; + if (gc_type != GC_LOCAL) { + // Free the mark stack. + if (mark_stack_top_bd != NULL) { + debugTrace(DEBUG_gc, "mark stack: %d blocks", + countBlocks(mark_stack_top_bd)); + freeChain(mark_stack_top_bd); + } + // Free any bitmaps. + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; + if (gen->bitmap != NULL) { + freeGroup(gen->bitmap); + gen->bitmap = NULL; + } } } hunk ./rts/sm/GC.c 660 - resize_nursery(); + // Resize the nursery if necessary + if (gc_type != GC_LOCAL) { + // for local GC we don't know how big the nursery was supposed + // to be, so just leave it as is. It might be a bit bigger than + // before due to adding new large blocks and/or new blocks in + // allocate(), but we'll resize at the next major GC. + resize_nursery(copied,N); + } hunk ./rts/sm/GC.c 669 - // mark the garbage collected CAFs as dead -#if 0 && defined(DEBUG) // doesn't work at the moment - if (major_gc) { gcCAFs(); } -#endif - #ifdef PROFILING // resetStaticObjectForRetainerProfiling() must be called before // zeroing below. hunk ./rts/sm/GC.c 692 } // Reset the nursery - resetNurseries(); - - // send exceptions to any threads which were about to die - RELEASE_SM_LOCK; - resurrectThreads(resurrected_threads); - ACQUIRE_SM_LOCK; + if (gc_type == GC_LOCAL) { + resetNursery(cap->no); + } else { + resetNurseries(); + } // Update the stable pointer hash table. hunk ./rts/sm/GC.c 699 - updateStablePtrTable(major_gc); + if (gc_type != GC_LOCAL) updateStablePtrTable(major_gc); + + // ok, GC over: tell the stats department what happened. + slop = calcLiveBlocks() * BLOCK_SIZE_W - live; + stat_endGC(gct, allocated, live, copied, N, max_copied, avg_copied, slop); + + // make sure our mut_lists don't point to anything local. This step + // also moves mut_list entries to the right mut_list if they ended + // up on the wrong one during a GC_PAR. + for (g = RtsFlags.GcFlags.generations-1; g > 0; g--) { + if (gc_type == GC_LOCAL) { + stash_mut_list (cap, g); + } else { + for (n = 0; n < n_capabilities; n++) { + stash_mut_list (&capabilities[n], g); + } + } + } + if (gc_type == GC_LOCAL) { + globalise_capability_mut_lists (cap); + } else { + RELEASE_SM_LOCK; // globalise acquires sm_mutex itself as needed + for (n = 0; n < n_capabilities; n++) { + globalise_capability_mut_lists (&capabilities[n]); + } + ACQUIRE_SM_LOCK; + } // unlock the StablePtr table. Must be before scheduleFinalizers(), // because a finalizer may call hs_free_fun_ptr() or hunk ./rts/sm/GC.c 730 // hs_free_stable_ptr(), both of which access the StablePtr table. - stablePtrPostGC(); + if (gc_type != GC_LOCAL) stablePtrPostGC(); // Start any pending finalizers. Must be after // updateStablePtrTable() and stablePtrPostGC() (see #4221). hunk ./rts/sm/GC.c 734 - RELEASE_SM_LOCK; - scheduleFinalizers(cap, old_weak_ptr_list); - ACQUIRE_SM_LOCK; + if (gc_type != GC_LOCAL) { RELEASE_SM_LOCK; } + scheduleFinalizers(cap, gct->old_weak_ptrs); + if (gc_type != GC_LOCAL) { ACQUIRE_SM_LOCK; } + + // send exceptions to any threads which were about to die + if (gc_type != GC_LOCAL) { + RELEASE_SM_LOCK; + resurrectThreads(gct->resurrected_threads); + ACQUIRE_SM_LOCK; + } else { + // we don't do this in local GC (yet) + ASSERT(gct->resurrected_threads == END_TSO_QUEUE); + } if (major_gc) { nat need, got; hunk ./rts/sm/GC.c 762 } // check sanity after GC - IF_DEBUG(sanity, checkSanity(rtsTrue)); + IF_DEBUG(sanity, checkSanity (gc_type == GC_LOCAL && n_capabilities != 1, + rtsTrue /* after GC */, + major_gc, + cap->no)); // extra GC trace info IF_DEBUG(gc, statDescribeGens()); hunk ./rts/sm/GC.c 782 #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(DEBUG_gc); + if (gc_type != GC_LOCAL || n_capabilities == 1) { + // we can't account for blocks in another local heap, so only do this + // when doing a global GC. + memInventory(DEBUG_gc); + } #endif #ifdef RTS_GTK_FRONTPANEL hunk ./rts/sm/GC.c 795 } #endif - // ok, GC over: tell the stats department what happened. - slop = calcLiveBlocks() * BLOCK_SIZE_W - live; - stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop); - // Guess which generation we'll collect *next* time hunk ./rts/sm/GC.c 796 - initialise_N(force_major_gc); + next_gc_gen = determine_collect_gen(); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { hunk ./rts/sm/GC.c 805 } #endif - RELEASE_SM_LOCK; + if (gc_type == GC_LOCAL) { + RELEASE_LOCK(&gc_local_mutex); + } else { + RELEASE_SM_LOCK; + } SET_GCT(saved_gct); } hunk ./rts/sm/GC.c 814 +/* ----------------------------------------------------------------------------- + allocate memory in a generation using the GC's data structures + -------------------------------------------------------------------------- */ + +StgPtr allocateInGen (Capability *cap USED_IF_THREADS, nat gen_ix, nat size) +{ + StgPtr p; + gen_workspace *ws; + gc_thread *saved_gct; + bdescr *bd; + + saved_gct = gct; + SET_GCT(gc_threads[cap->no]); + gct->gc_type = GC_LOCAL; + + ws = &gct->gens[gen_ix]; + + // XXX todo: ensure this is never NULL + if (ws->todo_bd == NULL) { + alloc_todo_block(ws,0); + } + + p = ws->todo_free; + ws->todo_free += size; + if (ws->todo_free > ws->todo_lim) { + bd = ws->todo_bd; + if (ws->todo_lim + size <= bd->start + bd->blocks * BLOCK_SIZE_W) { + ws->todo_lim = bd->start + bd->blocks * BLOCK_SIZE_W; + } else { + bd->link = ws->scavd_list; + ws->scavd_list = bd; + ws->n_scavd_blocks += bd->blocks; + IF_DEBUG(sanity, + ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks)); + alloc_todo_block (ws, size); + p = ws->todo_free; + ws->todo_free += size; + } + } + ASSERT(ws->todo_free >= ws->todo_bd->free && ws->todo_free <= ws->todo_lim); + + SET_GCT(saved_gct); + + return p; +} + /* ----------------------------------------------------------------------------- Figure out which generation to collect, initialise N and major_gc. hunk ./rts/sm/GC.c 868 -------------------------------------------------------------------------- */ static nat -initialise_N (rtsBool force_major_gc) +determine_collect_gen (void) { hunk ./rts/sm/GC.c 870 - int g; - nat blocks, blocks_total; - - blocks = 0; - blocks_total = 0; - - if (force_major_gc) { - N = RtsFlags.GcFlags.generations - 1; - } else { - N = 0; - } + nat g, blocks; hunk ./rts/sm/GC.c 872 - for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) { + // we always collect at least gen 0 + for (g = RtsFlags.GcFlags.generations - 1; g > 0; g--) { hunk ./rts/sm/GC.c 875 - blocks = generations[g].n_words / BLOCK_SIZE_W - + generations[g].n_large_blocks; + blocks = old_generations[g].n_words / BLOCK_SIZE_W + + old_generations[g].n_large_blocks; hunk ./rts/sm/GC.c 878 - if (blocks >= generations[g].max_blocks) { - N = stg_max(N,g); - } - if ((nat)g <= N) { - blocks_total += blocks; - } + if (blocks >= old_generations[g].max_blocks) + break; } hunk ./rts/sm/GC.c 881 - - blocks_total += countNurseryBlocks(); - - major_gc = (N == RtsFlags.GcFlags.generations-1); - return blocks_total; + return g; } hunk ./rts/sm/GC.c 884 + /* ----------------------------------------------------------------------------- Initialise the gc_thread structures. -------------------------------------------------------------------------- */ hunk ./rts/sm/GC.c 894 #define GC_THREAD_RUNNING 2 #define GC_THREAD_WAITING_TO_CONTINUE 3 +static void +init_gc_thread (gc_thread *t) +{ + t->static_objects = END_OF_STATIC_LIST; + t->scavenged_static_objects = END_OF_STATIC_LIST; + t->scan_bd = NULL; + t->evac_gen_ix = 0; + t->failed_to_evac = rtsFalse; + t->eager_promotion = rtsTrue; + t->thunk_selector_depth = 0; + t->copied = 0; + t->scanned = 0; + t->any_work = 0; + t->no_work = 0; + t->scav_find_work = 0; + t->resurrected_threads = NULL; +} + static void new_gc_thread (nat n, gc_thread *t) { hunk ./rts/sm/GC.c 918 nat g; gen_workspace *ws; + t->cap = &capabilities[n]; + #ifdef THREADED_RTS hunk ./rts/sm/GC.c 921 - t->id = 0; initSpinLock(&t->gc_spin); initSpinLock(&t->mut_spin); ACQUIRE_SPIN_LOCK(&t->gc_spin); hunk ./rts/sm/GC.c 928 // thread to start up, see wakeup_gc_threads #endif - t->thread_index = n; + t->index = n; + t->localg0 = &all_generations[n]; t->free_blocks = NULL; t->gc_count = 0; hunk ./rts/sm/GC.c 932 + t->mut_lists = capabilities[t->index].mut_lists; init_gc_thread(t); hunk ./rts/sm/GC.c 935 - + #ifdef USE_PAPI t->papi_events = -1; #endif hunk ./rts/sm/GC.c 940 - for (g = 0; g < RtsFlags.GcFlags.generations; g++) + // we need a workspace for every generation, even the local + // generations of other Capabilities, in case we do a + // single-threaded GC. + for (g = 0; g < total_generations; g++) { ws = &t->gens[g]; hunk ./rts/sm/GC.c 946 - ws->gen = &generations[g]; - ASSERT(g == ws->gen->no); + ws->gen = &all_generations[g]; + ASSERT(g == ws->gen->ix); ws->my_gct = t; ws->todo_bd = NULL; hunk ./rts/sm/GC.c 954 ws->todo_q = newWSDeque(128); ws->todo_overflow = NULL; ws->n_todo_overflow = 0; + ws->todo_large_objects = NULL; ws->part_list = NULL; ws->n_part_blocks = 0; hunk ./rts/sm/GC.c 978 for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { gc_threads[i] = stgMallocBytes(sizeof(gc_thread) + - RtsFlags.GcFlags.generations * sizeof(gen_workspace), + total_generations * sizeof(gen_workspace), "alloc_gc_threads"); new_gc_thread(i, gc_threads[i]); hunk ./rts/sm/GC.c 983 } + + initMutex(&gc_local_mutex); #else gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads"); gc_threads[0] = gct; hunk ./rts/sm/GC.c 1019 } } +static lnat +count_part_lists (nat start_gen) +{ + nat n, g; + lnat words; + + words = 0; + for (n = 0; n < n_capabilities; n++) { + for (g = start_gen; g < total_generations; g++) { + words += countOccupied(gc_threads[n]->gens[g].part_list); + } + } + return words; +} + /* ---------------------------------------------------------------------------- Start GC threads ------------------------------------------------------------------------- */ hunk ./rts/sm/GC.c 1038 -static volatile StgWord gc_running_threads; - static StgWord inc_running (void) { hunk ./rts/sm/GC.c 1042 StgWord new; + if (gct->gc_type != GC_PAR) return 1; new = atomic_inc(&gc_running_threads); ASSERT(new <= n_gc_threads); return new; hunk ./rts/sm/GC.c 1051 static StgWord dec_running (void) { + if (gct->gc_type != GC_PAR) return 0; ASSERT(gc_running_threads != 0); return atomic_dec(&gc_running_threads); } hunk ./rts/sm/GC.c 1059 static rtsBool any_work (void) { - int g; + nat g; gen_workspace *ws; gct->any_work++; hunk ./rts/sm/GC.c 1071 return rtsTrue; } - // Check for global work in any step. We don't need to check for - // local work, because we have already exited scavenge_loop(), - // which means there is no local work for this thread. - for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) { + // Check for global work in any generation. We don't need to + // check for local work, because we have already exited + // scavenge_loop(), which means there is no local work for this + // thread. + for (g = 0; g < total_generations; g++) { ws = &gct->gens[g]; hunk ./rts/sm/GC.c 1077 + if (gct->gc_type != GC_SEQ && isNonLocalGen(ws->gen)) + continue; if (ws->todo_large_objects) return rtsTrue; if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue; if (ws->todo_overflow) return rtsTrue; hunk ./rts/sm/GC.c 1087 #if defined(THREADED_RTS) if (work_stealing) { nat n; + int i; // look for work to steal for (n = 0; n < n_gc_threads; n++) { hunk ./rts/sm/GC.c 1090 - if (n == gct->thread_index) continue; - for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) { - ws = &gc_threads[n]->gens[g]; + if (n == gct->index) continue; + for (i = total_generations-1; i >= 0; i--) { + // never steal from local heap: + ws = &gc_threads[n]->gens[i]; + if (ws->gen->is_local) continue; if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue; } } hunk ./rts/sm/GC.c 1116 loop: - traceEventGcWork(&capabilities[gct->thread_index]); + traceEventGcWork(gct->cap); #if defined(THREADED_RTS) hunk ./rts/sm/GC.c 1119 - if (n_gc_threads > 1) { - scavenge_loop(); + if (gct->gc_type == GC_LOCAL) { + scavenge_loop_local(); + } + else if (n_gc_threads > 1) { + scavenge_loop_par(); } else { hunk ./rts/sm/GC.c 1125 - scavenge_loop1(); + scavenge_loop(); } #else scavenge_loop(); hunk ./rts/sm/GC.c 1131 #endif + collect_gct_blocks(); + // scavenge_loop() only exits when there's no work to do r = dec_running(); hunk ./rts/sm/GC.c 1136 - traceEventGcIdle(&capabilities[gct->thread_index]); + traceEventGcIdle(gct->cap); debugTrace(DEBUG_gc, "%d GC threads still running", r); hunk ./rts/sm/GC.c 1152 // scavenge_loop() to perform any pending work. } - traceEventGcDone(&capabilities[gct->thread_index]); + traceEventGcDone(gct->cap); } #if defined(THREADED_RTS) hunk ./rts/sm/GC.c 1166 saved_gct = gct; gct = gc_threads[cap->no]; - gct->id = osThreadId(); // Wait until we're told to wake up RELEASE_SPIN_LOCK(&gct->mut_spin); hunk ./rts/sm/GC.c 1170 gct->wakeup = GC_THREAD_STANDING_BY; - debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index); + debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->index); ACQUIRE_SPIN_LOCK(&gct->gc_spin); #ifdef USE_PAPI hunk ./rts/sm/GC.c 1181 papi_thread_start_gc1_count(gct->papi_events); #endif + prepare_gc_thread(); + // Every thread evacuates some roots. hunk ./rts/sm/GC.c 1184 - gct->evac_gen = 0; - markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, - rtsTrue/*prune sparks*/); - scavenge_capability_mut_lists(&capabilities[gct->thread_index]); + gct->evac_gen_ix = 0; + markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/); + scavenge_capability_mut_lists(cap); scavenge_until_all_done(); hunk ./rts/sm/GC.c 1190 + // can't do this here, because we might have to shuffle entries + // betwee mut_lists after parallel GC, so the main thread does it all. + // globalise_mut_lists (cap); + #ifdef THREADED_RTS // Now that the whole heap is marked, we discard any sparks that // were found to be unreachable. The main GC thread is currently hunk ./rts/sm/GC.c 1213 RELEASE_SPIN_LOCK(&gct->gc_spin); gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE; debugTrace(DEBUG_gc, "GC thread %d waiting to continue...", - gct->thread_index); + gct->index); ACQUIRE_SPIN_LOCK(&gct->mut_spin); hunk ./rts/sm/GC.c 1215 - debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index); + debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->index); SET_GCT(saved_gct); } hunk ./rts/sm/GC.c 1258 #endif // THREADED_RTS static void -start_gc_threads (void) -{ -#if defined(THREADED_RTS) - gc_running_threads = 0; -#endif -} - -static void -wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) +wakeup_gc_threads (nat me USED_IF_THREADS, + nat N USED_IF_THREADS, nat gc_type USED_IF_THREADS) { #if defined(THREADED_RTS) nat i; hunk ./rts/sm/GC.c 1263 - for (i=0; i < n_threads; i++) { + + if (n_gc_threads == 1) return; + + for (i=0; i < n_gc_threads; i++) { if (i == me) continue; inc_running(); debugTrace(DEBUG_gc, "waking up gc thread %d", i); hunk ./rts/sm/GC.c 1270 + gc_threads[i]->collect_gen = N; + gc_threads[i]->gc_type = gc_type; if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads"); gc_threads[i]->wakeup = GC_THREAD_RUNNING; hunk ./rts/sm/GC.c 1285 // standby state, otherwise they may still be executing inside // any_work(), and may even remain awake until the next GC starts. static void -shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) +shutdown_gc_threads (nat me USED_IF_THREADS) { #if defined(THREADED_RTS) nat i; hunk ./rts/sm/GC.c 1289 - for (i=0; i < n_threads; i++) { + for (i=0; i < n_gc_threads; i++) { if (i == me) continue; while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); } } hunk ./rts/sm/GC.c 1320 ------------------------------------------------------------------------- */ static void -init_collected_gen (nat g, nat n_threads) +prepare_collected_gen (generation *gen) { hunk ./rts/sm/GC.c 1322 - nat t, i; + nat i, g, n; gen_workspace *ws; hunk ./rts/sm/GC.c 1324 - generation *gen; - bdescr *bd; + bdescr *bd, *next; // Throw away the current mutable list. Invariant: the mutable // list always has at least one block; this means we can avoid a hunk ./rts/sm/GC.c 1329 // check for NULL in recordMutable(). + g = gen->no; if (g != 0) { hunk ./rts/sm/GC.c 1331 - freeChain(generations[g].mut_list); - generations[g].mut_list = allocBlock(); for (i = 0; i < n_capabilities; i++) { freeChain(capabilities[i].mut_lists[g]); capabilities[i].mut_lists[g] = allocBlock(); hunk ./rts/sm/GC.c 1337 } } - gen = &generations[g]; - ASSERT(gen->no == g); - // we'll construct a new list of threads in this step // during GC, throw away the current list. gen->old_threads = gen->threads; hunk ./rts/sm/GC.c 1351 gen->live_estimate = 0; // initialise the large object queues. - gen->scavenged_large_objects = NULL; - gen->n_scavenged_large_blocks = 0; + ASSERT(gen->scavenged_large_objects == NULL); + ASSERT(gen->n_scavenged_large_blocks == 0); hunk ./rts/sm/GC.c 1354 + // grab all the partial blocks stashed in the gc_thread workspaces and + // move them to the old_threads list of this gen. + for (n = 0; n < n_capabilities; n++) { + ws = &gc_threads[n]->gens[gen->ix]; + for (bd = ws->part_list; bd != NULL; bd = next) { + next = bd->link; + bd->link = gen->old_blocks; + gen->old_blocks = bd; + gen->n_old_blocks += bd->blocks; + } + for (bd = ws->scavd_list; bd != NULL; bd = next) { + next = bd->link; + bd->link = gen->old_blocks; + gen->old_blocks = bd; + gen->n_old_blocks += bd->blocks; + } + if (ws->todo_bd != NULL) { + ws->todo_bd->link = gen->old_blocks; + gen->old_blocks = ws->todo_bd; + gen->n_old_blocks += ws->todo_bd->blocks; + } + ws->todo_bd = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; + } + // mark the small objects as from-space for (bd = gen->old_blocks; bd; bd = bd->link) { bd->flags &= ~BF_EVACUATED; hunk ./rts/sm/GC.c 1433 } } } - - // For each GC thread, for each step, allocate a "todo" block to - // store evacuated objects to be scavenged, and a block to store - // evacuated objects that do not need to be scavenged. - for (t = 0; t < n_threads; t++) { - ws = &gc_threads[t]->gens[g]; - - ws->todo_large_objects = NULL; - - ws->part_list = NULL; - ws->n_part_blocks = 0; - - // allocate the first to-space block; extra blocks will be - // chained on as necessary. - ws->todo_bd = NULL; - ASSERT(looksEmptyWSDeque(ws->todo_q)); - alloc_todo_block(ws,0); - - ws->todo_overflow = NULL; - ws->n_todo_overflow = 0; - - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - } } hunk ./rts/sm/GC.c 1441 ------------------------------------------------------------------------- */ static void -init_uncollected_gen (nat g, nat threads) +stash_mut_list (Capability *cap, nat gen_no) { hunk ./rts/sm/GC.c 1443 - nat t, n; - gen_workspace *ws; - generation *gen; - bdescr *bd; + cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no]; + cap->mut_lists[gen_no] = allocBlock_sync(); +} + +/* ---------------------------------------------------------------------------- + Initialise a generation that is *not* to be collected + ------------------------------------------------------------------------- */ + +static void +prepare_uncollected_gen (nat gc_type, generation *gen) +{ + nat i; + + ASSERT(gen->no > 0); // save the current mutable lists for this generation, and // allocate a fresh block for each one. We'll traverse these hunk ./rts/sm/GC.c 1461 // mutable lists as roots early on in the GC. - generations[g].saved_mut_list = generations[g].mut_list; - generations[g].mut_list = allocBlock(); - for (n = 0; n < n_capabilities; n++) { - capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g]; - capabilities[n].mut_lists[g] = allocBlock(); + if (gc_type == GC_LOCAL) { + // for a local GC, we use the private mutable lists for this + // capability only. + stash_mut_list(gct->cap, gen->no); + } else { + // for a global GC, we use the private mutable lists of every + // capability. + for (i = 0; i < n_capabilities; i++) { + stash_mut_list(&capabilities[i], gen->no); + } } hunk ./rts/sm/GC.c 1473 - gen = &generations[g]; + ASSERT(gen->scavenged_large_objects == NULL); + ASSERT(gen->n_scavenged_large_blocks == 0); +} hunk ./rts/sm/GC.c 1477 - gen->scavenged_large_objects = NULL; - gen->n_scavenged_large_blocks = 0; +/* ----------------------------------------------------------------------------- + Initialise a gc_thread before GC + -------------------------------------------------------------------------- */ hunk ./rts/sm/GC.c 1481 - for (t = 0; t < threads; t++) { - ws = &gc_threads[t]->gens[g]; +void +prepare_gen_workspace (nat g) +{ + gen_workspace *ws; + + ws = &gct->gens[g]; hunk ./rts/sm/GC.c 1488 - ASSERT(looksEmptyWSDeque(ws->todo_q)); - ws->todo_large_objects = NULL; - - ws->part_list = NULL; - ws->n_part_blocks = 0; - - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - - // If the block at the head of the list in this generation - // is less than 3/4 full, then use it as a todo block. - if (gen->blocks && isPartiallyFull(gen->blocks)) - { - ws->todo_bd = gen->blocks; - ws->todo_free = ws->todo_bd->free; - ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; - gen->blocks = gen->blocks->link; - gen->n_blocks -= 1; - gen->n_words -= ws->todo_bd->free - ws->todo_bd->start; - ws->todo_bd->link = NULL; - // we must scan from the current end point. - ws->todo_bd->u.scan = ws->todo_bd->free; - } - else - { - ws->todo_bd = NULL; - alloc_todo_block(ws,0); - } + ASSERT(looksEmptyWSDeque(ws->todo_q)); + ASSERT(ws->todo_large_objects == NULL); + + ASSERT(countBlocks(ws->part_list) == ws->n_part_blocks); + ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); + // scavd_list can be populated by globalise() during mutation. + + // allocate a todo block if necessary. + if (ws->todo_bd == NULL) { + alloc_todo_block(ws,0); } hunk ./rts/sm/GC.c 1499 + // don't scavenge anything we allocated using allocateInGen + // XXX there should be a better place for this + ws->todo_bd->u.scan = ws->todo_free; + + ASSERT(ws->todo_overflow == NULL); + ASSERT(ws->n_todo_overflow == 0); +} hunk ./rts/sm/GC.c 1507 - // deal out any more partial blocks to the threads' part_lists - t = 0; - while (gen->blocks && isPartiallyFull(gen->blocks)) - { - bd = gen->blocks; - gen->blocks = bd->link; - ws = &gc_threads[t]->gens[g]; - bd->link = ws->part_list; - ws->part_list = bd; - ws->n_part_blocks += 1; - bd->u.scan = bd->free; - gen->n_blocks -= 1; - gen->n_words -= bd->free - bd->start; - t++; - if (t == n_gc_threads) t = 0; +static void +prepare_gc_thread (void) +{ + nat g; + + init_gc_thread(gct); + + // Initialise workspaces for all generations + for (g = 0; g < total_generations; g++) { + // If this gen is the local G0 for another Capability and + // we're doing a local GC, then we don't bother allocating a + // todo block. + if (gct->gc_type != GC_LOCAL + || all_generations[g].no != 0 + || g == gct->index) { + prepare_gen_workspace(g); + } } } hunk ./rts/sm/GC.c 1527 -/* ----------------------------------------------------------------------------- - Initialise a gc_thread before GC - -------------------------------------------------------------------------- */ - static void hunk ./rts/sm/GC.c 1528 -init_gc_thread (gc_thread *t) +collect_gct_blocks (void) { hunk ./rts/sm/GC.c 1530 - t->static_objects = END_OF_STATIC_LIST; - t->scavenged_static_objects = END_OF_STATIC_LIST; - t->scan_bd = NULL; - t->mut_lists = capabilities[t->thread_index].mut_lists; - t->evac_gen = 0; - t->failed_to_evac = rtsFalse; - t->eager_promotion = rtsTrue; - t->thunk_selector_depth = 0; - t->copied = 0; - t->scanned = 0; - t->any_work = 0; - t->no_work = 0; - t->scav_find_work = 0; + nat g; + gen_workspace *ws; + bdescr *bd, *prev; + + for (g = 0; g < total_generations; g++) { + ws = &gct->gens[g]; + + if (gct->gc_type != GC_SEQ && isNonLocalGen(ws->gen)) + continue; + + // there may still be a block attached to ws->todo_bd; + // leave it there to use next time. + + if (ws->scavd_list != NULL) { + ACQUIRE_SPIN_LOCK(&ws->gen->sync); + + ASSERT(gct->scan_bd == NULL); + ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); + + prev = NULL; + for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { + ws->gen->n_words += bd->free - bd->start; + prev = bd; + } + if (prev != NULL) { + prev->link = ws->gen->blocks; + ws->gen->blocks = ws->scavd_list; + } + ws->gen->n_blocks += ws->n_scavd_blocks; + + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; + + RELEASE_SPIN_LOCK(&ws->gen->sync); + } + } } /* ----------------------------------------------------------------------------- hunk ./rts/sm/GC.c 1584 saved_gct = gct; SET_GCT(user); - evacuate(root); +#ifdef THREADED_RTS + if (gct->gc_type == GC_LOCAL) { + evacuate_local(root); + } else +#endif + { + evacuate(root); + } SET_GCT(saved_gct); } hunk ./rts/sm/GC.c 1706 min_alloc, size, max); #endif - for (g = 0; g < gens; g++) { - generations[g].max_blocks = size; + for (g = 1; g < gens; g++) { + old_generations[g].max_blocks = size; } } } hunk ./rts/sm/GC.c 1717 -------------------------------------------------------------------------- */ static void -resize_nursery (void) +resize_nursery (lnat copied, nat N) { const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; hunk ./rts/sm/GC.c 1739 * performance we get from 3L bytes, reducing to the same * performance at 2L bytes. */ - blocks = generations[0].n_blocks; + blocks = all_generations[0].n_blocks; if ( RtsFlags.GcFlags.maxHeapSize != 0 && blocks * RtsFlags.GcFlags.oldGenFactor * 2 > hunk ./rts/sm/GC.h 26 StgClosure * isAlive ( StgClosure *p ); void markCAFs ( evac_fn evac, void *user ); -extern nat N; -extern rtsBool major_gc; +StgPtr allocateInGen (Capability *cap, nat gen_ix, nat size); + +extern rtsBool major_gc; // collecting the oldest gen? +extern rtsBool work_stealing; // work stealing is enabled? +extern nat next_gc_gen; // generation to collect next time extern bdescr *mark_stack_bd; extern bdescr *mark_stack_top_bd; hunk ./rts/sm/GC.h 36 extern StgPtr mark_sp; -extern long copied; - -extern rtsBool work_stealing; - #ifdef DEBUG extern nat mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS; #endif hunk ./rts/sm/GC.h 53 void releaseGCThreads (Capability *cap); #endif +// used in globalise() +void prepare_gen_workspace (nat g); + #define WORK_UNIT_WORDS 128 #include "EndPrivate.h" hunk ./rts/sm/GCAux.c 20 #include "Capability.h" #include "Trace.h" #include "Schedule.h" -// DO NOT include "GCThread.h", we don't want the register variable +// DO NOT include "GCTDecl.h", we don't want the register variable /* ----------------------------------------------------------------------------- isAlive determines whether the given closure is still alive (after hunk ./rts/sm/GCAux.c 153 evac(user, &c->indirectee); } } + addfile ./rts/sm/GCTDecl.h hunk ./rts/sm/GCTDecl.h 1 +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 1998-2009 + * + * Documentation on the architecture of the Garbage Collector can be + * found in the online commentary: + * + * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC + * + * ---------------------------------------------------------------------------*/ + +#ifndef SM_GCTDECL_H +#define SM_GCTDECL_H + +#include "BeginPrivate.h" + +/* ----------------------------------------------------------------------------- + The gct variable is thread-local and points to the current thread's + gc_thread structure. It is heavily accessed, so we try to put gct + into a global register variable if possible; if we don't have a + register then use gcc's __thread extension to create a thread-local + variable. + -------------------------------------------------------------------------- */ + +#if defined(THREADED_RTS) + +#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg); + +#define SET_GCT(to) gct = (to) + + + +#if (defined(i386_HOST_ARCH) && defined(linux_HOST_OS)) +// Using __thread is better than stealing a register on x86/Linux, because +// we have too few registers available. In my tests it was worth +// about 5% in GC performance, but of course that might change as gcc +// improves. -- SDM 2009/04/03 +// +// We ought to do the same on MacOS X, but __thread is not +// supported there yet (gcc 4.0.1). + +extern __thread gc_thread* gct; +#define DECLARE_GCT __thread gc_thread* gct; + + +#elif defined(sparc_HOST_ARCH) +// On SPARC we can't pin gct to a register. Names like %l1 are just offsets +// into the register window, which change on each function call. +// +// There are eight global (non-window) registers, but they're used for other purposes. +// %g0 -- always zero +// %g1 -- volatile over function calls, used by the linker +// %g2-%g3 -- used as scratch regs by the C compiler (caller saves) +// %g4 -- volatile over function calls, used by the linker +// %g5-%g7 -- reserved by the OS + +extern __thread gc_thread* gct; +#define DECLARE_GCT __thread gc_thread* gct; + + +#elif defined(REG_Base) && !defined(i386_HOST_ARCH) +// on i386, REG_Base is %ebx which is also used for PIC, so we don't +// want to steal it + +GLOBAL_REG_DECL(gc_thread*, gct, REG_Base) +#define DECLARE_GCT /* nothing */ + + +#elif defined(REG_R1) + +GLOBAL_REG_DECL(gc_thread*, gct, REG_R1) +#define DECLARE_GCT /* nothing */ + + +#elif defined(__GNUC__) + +extern __thread gc_thread* gct; +#define DECLARE_GCT __thread gc_thread* gct; + +#else + +#error Cannot find a way to declare the thread-local gct + +#endif + +#else // not the threaded RTS + +extern StgWord8 the_gc_thread[]; + +#define gct ((gc_thread*)&the_gc_thread) +#define SET_GCT(to) /*nothing*/ +#define DECLARE_GCT /*nothing*/ + +#endif // THREADED_RTS + +#include "EndPrivate.h" + +#endif // SM_GCTDECL_H hunk ./rts/sm/GCThread.h 18 #define SM_GCTHREAD_H #include "WSDeque.h" +#include "GetTime.h" // for Ticks #include "BeginPrivate.h" hunk ./rts/sm/GCThread.h 119 ------------------------------------------------------------------------- */ typedef struct gc_thread_ { + Capability *cap; + #ifdef THREADED_RTS hunk ./rts/sm/GCThread.h 122 - OSThreadId id; // The OS thread that this struct belongs to SpinLock gc_spin; SpinLock mut_spin; volatile rtsBool wakeup; hunk ./rts/sm/GCThread.h 126 #endif - nat thread_index; // a zero based index identifying the thread + nat index; // a zero based index identifying the thread + // equal to cap->no + + generation *localg0; // The local G0 for this Capability bdescr * free_blocks; // a buffer of free blocks for this thread // during GC without accessing the block hunk ./rts/sm/GCThread.h 154 // -------------------- // evacuate flags - generation *evac_gen; // Youngest generation that objects + nat evac_gen_ix; // Youngest generation that objects // should be evacuated to in // evacuate(). (Logically an // argument to evacuate, but it's hunk ./rts/sm/GCThread.h 170 // instead of the to-space // corresponding to the object - lnat thunk_selector_depth; // ummm.... not used as of now + lnat thunk_selector_depth; // used to avoid unbounded recursion in + // evacuate() for THUNK_SELECTOR + + nat collect_gen; // maximum generation (no) to collect + + nat gc_type; // The gc type (GC_SEQ, GC_PAR, GC_LOCAL) + + StgTSO *resurrected_threads; // threads found to be unreachable, + // linked by ->global_link field. + + StgTSO *exception_threads; // List of blocked threads found to + // have pending throwTos + + /* Which stage of processing various kinds of weak pointer are we at? + * (see traverse_weak_ptr_list() below for discussion). + */ + enum { WeakPtrs, WeakThreads, WeakDone } weak_stage; + + StgWeak *old_weak_ptrs; #ifdef USE_PAPI int papi_events; hunk ./rts/sm/GCThread.h 203 lnat no_work; lnat scav_find_work; + Ticks gc_start_time; + Ticks gce_start_time; + lnat gc_start_faults; + // ------------------- // workspaces hunk ./rts/sm/GCThread.h 210 - // array of workspaces, indexed by stp->abs_no. This is placed + // array of workspaces, indexed by gen->abs_no. This is placed // directly at the end of the gc_thread structure so that we can get from // the gc_thread pointer to a workspace using only pointer // arithmetic, no memory access. This happens in the inner loop hunk ./rts/sm/GCThread.h 221 extern nat n_gc_threads; -/* ----------------------------------------------------------------------------- - The gct variable is thread-local and points to the current thread's - gc_thread structure. It is heavily accessed, so we try to put gct - into a global register variable if possible; if we don't have a - register then use gcc's __thread extension to create a thread-local - variable. - - Even on x86 where registers are scarce, it is worthwhile using a - register variable here: I measured about a 2-5% slowdown with the - __thread version. - -------------------------------------------------------------------------- */ - extern gc_thread **gc_threads; hunk ./rts/sm/GCThread.h 223 -#if defined(THREADED_RTS) - -#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg); - -#define SET_GCT(to) gct = (to) - - - -#if (defined(i386_HOST_ARCH) && defined(linux_HOST_OS)) -// Using __thread is better than stealing a register on x86/Linux, because -// we have too few registers available. In my tests it was worth -// about 5% in GC performance, but of course that might change as gcc -// improves. -- SDM 2009/04/03 -// -// We ought to do the same on MacOS X, but __thread is not -// supported there yet (gcc 4.0.1). - -extern __thread gc_thread* gct; -#define DECLARE_GCT __thread gc_thread* gct; - - -#elif defined(sparc_HOST_ARCH) -// On SPARC we can't pin gct to a register. Names like %l1 are just offsets -// into the register window, which change on each function call. -// -// There are eight global (non-window) registers, but they're used for other purposes. -// %g0 -- always zero -// %g1 -- volatile over function calls, used by the linker -// %g2-%g3 -- used as scratch regs by the C compiler (caller saves) -// %g4 -- volatile over function calls, used by the linker -// %g5-%g7 -- reserved by the OS - -extern __thread gc_thread* gct; -#define DECLARE_GCT __thread gc_thread* gct; - - -#elif defined(REG_Base) && !defined(i386_HOST_ARCH) -// on i386, REG_Base is %ebx which is also used for PIC, so we don't -// want to steal it - -GLOBAL_REG_DECL(gc_thread*, gct, REG_Base) -#define DECLARE_GCT /* nothing */ - - -#elif defined(REG_R1) - -GLOBAL_REG_DECL(gc_thread*, gct, REG_R1) -#define DECLARE_GCT /* nothing */ - - -#elif defined(__GNUC__) - -extern __thread gc_thread* gct; -#define DECLARE_GCT __thread gc_thread* gct; - -#else - -#error Cannot find a way to declare the thread-local gct - -#endif - -#else // not the threaded RTS - -extern StgWord8 the_gc_thread[]; - -#define gct ((gc_thread*)&the_gc_thread) -#define SET_GCT(to) /*nothing*/ -#define DECLARE_GCT /*nothing*/ - -#endif - #include "EndPrivate.h" #endif // SM_GCTHREAD_H hunk ./rts/sm/GCUtils.c 21 #include "Storage.h" #include "GC.h" #include "GCThread.h" +#include "GCTDecl.h" #include "GCUtils.h" #include "Printer.h" #include "Trace.h" hunk ./rts/sm/GCUtils.c 36 bdescr * allocBlock_sync(void) { - bdescr *bd; - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - bd = allocBlock(); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); - return bd; + // GC_LOCAL uses the ordinary locking protocol for the block + // allocator because it runs concurrently with the mutator. + if (gct->gc_type == GC_LOCAL) { + return allocBlock_lock(); + } else { + bdescr *bd; + ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + bd = allocBlock(); + RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + return bd; + } } static bdescr * hunk ./rts/sm/GCUtils.c 52 allocGroup_sync(nat n) { - bdescr *bd; - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - bd = allocGroup(n); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); - return bd; + // GC_LOCAL uses the ordinary locking protocol for the block + // allocator because it runs concurrently with the mutator. + if (gct->gc_type == GC_LOCAL) { + return allocGroup_lock(n); + } else { + bdescr *bd; + ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + bd = allocGroup(n); + RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + return bd; + } } hunk ./rts/sm/GCUtils.c 138 // look for work to steal for (n = 0; n < n_gc_threads; n++) { - if (n == gct->thread_index) continue; + if (n == gct->index) continue; bd = stealWSDeque(gc_threads[n]->gens[g].todo_q); if (bd) { return bd; hunk ./rts/sm/GCUtils.c 295 ws->todo_bd = bd; ws->todo_free = bd->free; - ws->todo_lim = stg_min(bd->start + bd->blocks * BLOCK_SIZE_W, - bd->free + stg_max(WORK_UNIT_WORDS,size)); + + if (gct->gc_type == GC_PAR && work_stealing) { + // use a smaller limit if we're doing load-balancing, so we + // can share available work more quickly. + ws->todo_lim = stg_min(bd->start + bd->blocks * BLOCK_SIZE_W, + bd->free + stg_max(WORK_UNIT_WORDS,size)); + } else { + ws->todo_lim = bd->start + bd->blocks * BLOCK_SIZE_W; + } debugTrace(DEBUG_gc, "alloc new todo block %p for gen %d", bd->free, ws->gen->no); hunk ./rts/sm/GCUtils.c 317 #if DEBUG void -printMutableList(generation *gen) +printMutableList(bdescr *bd) { hunk ./rts/sm/GCUtils.c 319 - bdescr *bd; StgPtr p; hunk ./rts/sm/GCUtils.c 321 - debugBelch("mutable list %p: ", gen->mut_list); + debugBelch("mutable list %p: ", bd); hunk ./rts/sm/GCUtils.c 323 - for (bd = gen->mut_list; bd != NULL; bd = bd->link) { + for (; bd != NULL; bd = bd->link) { for (p = bd->start; p < bd->free; p++) { debugBelch("%p (%s), ", (void *)*p, info_type((StgClosure *)*p)); } hunk ./rts/sm/GCUtils.h 19 #include "BeginPrivate.h" +#include "GCTDecl.h" + bdescr *allocBlock_sync(void); void freeChain_sync(bdescr *bd); hunk ./rts/sm/GCUtils.h 44 #if DEBUG -void printMutableList (generation *gen); +void printMutableList (bdescr *bd); #endif hunk ./rts/sm/GCUtils.h 47 +// returns True if the given generation index belongs to +// another GC thread. +INLINE_HEADER rtsBool isNonLocalGen (generation *gen) +{ + return (gen->is_local && gen->cap != gct->index); +} + +INLINE_HEADER rtsBool isNonLocalGenIx (nat ix) +{ + return isNonLocalGen(&all_generations[ix]); +} + // Version of recordMutableGen for use during GC. This uses the // mutable lists attached to the current gc_thread structure, which // are the same as the mutable lists on the Capability. hunk ./rts/sm/GCUtils.h 70 bd = gct->mut_lists[gen_no]; if (bd->free >= bd->start + BLOCK_SIZE_W) { bdescr *new_bd; - new_bd = allocBlock_sync(); + new_bd = allocBlock_sync(); new_bd->link = bd; bd = new_bd; gct->mut_lists[gen_no] = bd; addfile ./rts/sm/Globalise.c hunk ./rts/sm/Globalise.c 1 +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 1998-2010 + * + * Globalise data from the local heap to the global heap. + * + * Documentation on the architecture of the Garbage Collector can be + * found in the online commentary: + * + * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC + * + * ---------------------------------------------------------------------------*/ + +#include "PosixSource.h" +#include "Rts.h" + +#include "Evac.h" +#include "GC.h" +#include "GCThread.h" +#include "GCTDecl.h" +#include "GCUtils.h" +#include "Capability.h" +#include "Globalise.h" +#include "Prelude.h" +#include "Storage.h" +#include "Trace.h" +#include "Apply.h" +#include "Printer.h" +#include "Updates.h" + +static REGPARM1 GNUC_ATTR_HOT void globalise_evac (StgClosure **p); +static void globalise_scavenge (void); +STATIC_INLINE void globalise_large (StgPtr p); +static void globalise_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, + nat size); + +rtsBool +globalise_wrt (Capability *cap USED_IF_THREADS, + StgClosure *parent, StgClosure **root) +{ + bdescr *bd; + bd = Bdescr((StgPtr)parent); + if (bd->gen_ix >= global_gen_ix) { + return globalise(cap,root); + } + return rtsTrue; +} + +rtsBool +globalise (Capability *cap USED_IF_THREADS, StgClosure **root) +{ + gc_thread *saved_gct; + generation *gen; + + // necessary if we stole a callee-saves register for gct: + saved_gct = gct; + SET_GCT(gc_threads[cap->no]); + + gct->gc_type = GC_LOCAL; + + gen = &old_generations[1]; + + prepare_gen_workspace(gen->ix); + + globalise_evac(root); + globalise_scavenge(); + + if (gct->failed_to_evac) { + barf("globalise: could not globalise the whole structure"); + return rtsFalse; + } + + SET_GCT(saved_gct); + return rtsTrue; +} + +StgClosure * +globalise_ (Capability *cap USED_IF_THREADS, StgClosure *root) +{ + StgClosure *tmp = root; + rtsBool r; + + r = globalise(cap, &tmp); + + if (r) { + return tmp; + } else { + return NULL; + } +} + +// Use globalise_TSO() if you want to migrate a thread from one +// Capability to another. It ensures that all the fields of the TSO +// are published, whereas the ordinary globalise() only ensures that +// the StgTSO structure itself is published. + +StgTSO * +globalise_TSO (Capability *cap USED_IF_THREADS, StgTSO *tso) +{ + gc_thread *saved_gct; + generation *gen; + + // necessary if we stole a callee-saves register for gct: + saved_gct = gct; + SET_GCT(gc_threads[cap->no]); + + gct->gc_type = GC_LOCAL; + + gen = &old_generations[1]; + + // ToDo: inline/tidyup + prepare_gen_workspace(gen->ix); + + globalise_evac((StgClosure**)&tso); + tso->flags |= TSO_GLOBALISE; // reset by globalise_scavenge_TSO(); + globalise_scavenge(); + + if (gct->failed_to_evac) { + barf("globalise: could not globalise the whole structure"); + } + + SET_GCT(saved_gct); + return tso; +} + + +STATIC_INLINE void +globalise_large (StgPtr p) +{ + bdescr *bd; + generation *gen, *new_gen; + nat gen_ix; + gen_workspace *ws; + + // XXX do not globalise mutable arrays. It's feasible in + // principle, but hard in practice until we sort out the mutable + // array write barrier properly (e.g. it should publish the + // pointer, and we probably don't need card marking). + switch (get_itbl((StgClosure*)p)->type) { + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + gct->failed_to_evac = rtsTrue; + return; + } + + bd = Bdescr(p); + gen_ix = bd->gen_ix; + gen = bd->gen; + + ACQUIRE_SPIN_LOCK(&gen->sync); + + // remove from large_object list + if (bd->u.back) { + bd->u.back->link = bd->link; + } else { // first object in the list + gen->large_objects = bd->link; + } + if (bd->link) { + bd->link->u.back = bd->u.back; + } + gen->n_large_blocks -= bd->blocks; + + // link it on to the evacuated large object list of the destination gen + new_gen = &all_generations[global_gen_ix]; + ws = &gct->gens[global_gen_ix]; + + bd->flags |= BF_EVACUATED; + initBdescr(bd, new_gen, new_gen->to); + + // If this is a block of pinned objects, we don't have to scan + // these objects, because they aren't allowed to contain any + // pointers. For these blocks, we skip the scavenge stage and put + // them straight on the large_objects list. + if (bd->flags & BF_PINNED) { + ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS); + if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); } + dbl_link_onto(bd, &new_gen->large_objects); + new_gen->n_large_blocks += bd->blocks; + if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync); } + } else { + bd->link = ws->todo_large_objects; + ws->todo_large_objects = bd; + } + + RELEASE_SPIN_LOCK(&gen->sync); +} + + +STATIC_INLINE StgPtr +alloc_for_copy (nat size, nat gen_ix) +{ + StgPtr to; + gen_workspace *ws; + + ws = &gct->gens[gen_ix]; + + /* chain a new block onto the to-space for the destination gen if + * necessary. + */ + to = ws->todo_free; + ws->todo_free += size; + if (ws->todo_free > ws->todo_lim) { + to = todo_block_full(size, ws); + } + ASSERT(ws->todo_free >= ws->todo_bd->free && ws->todo_free <= ws->todo_lim); + + return to; +} + +STATIC_INLINE StgPtr +alloc_for_copy_global (nat size) +{ + return alloc_for_copy(size, global_gen_ix); +} + +STATIC_INLINE GNUC_ATTR_HOT void +copy_closure(StgPtr from, StgPtr to, const StgInfoTable *info, nat size) +{ + nat i; + + to[0] = (W_)info; + for (i = 1; i < size; i++) { // unroll for small i + to[i] = from[i]; + } +} + +STATIC_INLINE GNUC_ATTR_HOT void +copy_tag(StgClosure **p, const StgInfoTable *info, + StgClosure *src, nat size, StgWord tag) +{ + StgPtr to; + + to = alloc_for_copy_global(size); + copy_closure((StgPtr)src, to, info, size); + src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to); + *p = TAG_CLOSURE(tag,(StgClosure*)to); +} + +STATIC_INLINE GNUC_ATTR_HOT void +copy_IND(StgClosure **p, const StgInfoTable *info, + StgClosure *src, nat size) +{ + StgPtr to; + + to = alloc_for_copy_global(size); + copy_closure((StgPtr)src, to, info, size); +#ifdef DEBUG + // can't use DEBUG_FILL_SLOP, because it doesn't work for PAPs + ZERO_SLOP((StgPtr)src + sizeofW(StgInd), size - sizeofW(StgInd)); +#endif + src->header.info = &stg_IND_info; + ((StgInd *)src)->indirectee = (StgClosure *)to; + *p = (StgClosure*)to; +} + +STATIC_INLINE GNUC_ATTR_HOT void +nocopy_IND_LOCAL(StgClosure **p, StgClosure *src, nat tag) +{ + StgPtr to; + + to = alloc_for_copy_global(sizeofW(StgInd)); + ((StgInd *)to)->header.info = stg_IND_LOCAL_tbl[gct->index]; + ((StgInd *)to)->indirectee = TAG_CLOSURE(tag,src); + *p = (StgClosure*)to; + recordMutableGen_GC((StgClosure*)to, global_gen_no); +} + +STATIC_INLINE void +copy_part(StgClosure **p, StgClosure *src, nat size_to_reserve, + nat size_to_copy) +{ + StgPtr to; + + to = alloc_for_copy_global(size_to_reserve); + *p = (StgClosure *)to; + copy_closure((StgPtr)src,to,src->header.info,size_to_copy); +} + +REGPARM1 GNUC_ATTR_HOT void +globalise_evac (StgClosure **p) +{ + bdescr *bd = NULL; + StgClosure *q; + const StgInfoTable *info; + StgWord tag; + + q = *p; + +loop: + /* The tag and the pointer are split, to be merged after evacing */ + tag = GET_CLOSURE_TAG(q); + q = UNTAG_CLOSURE(q); + + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); + + if (!HEAP_ALLOCED(q)) return; // already global + + bd = Bdescr((P_)q); + + if (bd->gen_no > 0) return; + + // make sure this isn't a pointer into the wrong local heap + // FALSE: we might be doing a global GC, and got here via + // globalise_mut_lists. + // ASSERT(bd->gen_ix == gct->index); + + if (bd->flags & BF_LARGE) { + info = get_itbl(q); + if (info->type == TSO && + ((StgTSO *)q)->what_next == ThreadRelocated) { + q = (StgClosure *)((StgTSO *)q)->_link; + *p = q; + goto loop; + } + globalise_large((P_)q); + return; + } + + info = q->header.info; + if (IS_FORWARDING_PTR(info)) + { + StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info); + *p = TAG_CLOSURE(tag,e); + return; + } + + if (INFO_PTR_TO_STRUCT(info)->flags & HAS_UNLIFTED_FIELDS) { + // These are the ones we can't promote because they point to + // primitive objects. + nocopy_IND_LOCAL(p,q,tag); + return; + } + + switch (INFO_PTR_TO_STRUCT(info)->type) { + + case WHITEHOLE: + goto loop; + + // For ints and chars of low value, save space by replacing references to + // these with closures with references to common, shared ones in the RTS. + // + // * Except when compiling into Windows DLLs which don't support cross-package + // data references very well. + // + case CONSTR_0_1: + { +#if defined(__PIC__) && defined(mingw32_HOST_OS) + copy_tag(p,info,q,sizeofW(StgHeader)+1,tag); +#else + StgWord w = (StgWord)q->payload[0]; + if (info == Czh_con_info && + // unsigned, so always true: (StgChar)w >= MIN_CHARLIKE && + (StgChar)w <= MAX_CHARLIKE) { + *p = TAG_CLOSURE(tag, + (StgClosure *)CHARLIKE_CLOSURE((StgChar)w) + ); + } + else if (info == Izh_con_info && + (StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) { + *p = TAG_CLOSURE(tag, + (StgClosure *)INTLIKE_CLOSURE((StgInt)w) + ); + } + else { + copy_tag(p,info,q,sizeofW(StgHeader)+1,tag); + } +#endif + return; + } + + case FUN_0_1: + case FUN_1_0: + case CONSTR_1_0: + copy_tag(p,info,q,sizeofW(StgHeader)+1,tag); + return; + + case THUNK_1_0: + case THUNK_0_1: + copy_IND(p,info,q,sizeofW(StgThunk)+1); + return; + + case THUNK_1_1: + case THUNK_2_0: + case THUNK_0_2: + copy_IND(p,info,q,sizeofW(StgThunk)+2); + return; + + case FUN_1_1: + case FUN_2_0: + case FUN_0_2: + case CONSTR_1_1: + case CONSTR_2_0: + case CONSTR_0_2: + copy_tag(p,info,q,sizeofW(StgHeader)+2,tag); + return; + + case THUNK: + copy_IND(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info))); + return; + + case FUN: + case CONSTR: + copy_tag(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),tag); + return; + + case THUNK_SELECTOR: + copy_IND(p,info,q,THUNK_SELECTOR_sizeW()); + return; + + case IND: + // follow chains of indirections, don't evacuate them + q = ((StgInd*)q)->indirectee; + *p = q; + goto loop; + + case PAP: + copy_IND(p,info,q,pap_sizeW((StgPAP*)q)); + return; + + case AP: + copy_IND(p,info,q,ap_sizeW((StgAP*)q)); + return; + + case AP_STACK: + copy_IND(p,info,q,ap_stack_sizeW((StgAP_STACK*)q)); + return; + + case TSO: + { + StgTSO *tso = (StgTSO *)q; + + /* Deal with relocated TSOs + */ + if (tso->what_next == ThreadRelocated) { + q = (StgClosure *)tso->_link; + *p = q; + goto loop; + } + + /* To evacuate a small TSO, we need to adjust the stack pointer + */ + { + StgTSO *new_tso; + StgPtr r, s; + + copy_part(p,(StgClosure *)tso, tso_sizeW(tso), + sizeofW(StgTSO)); + new_tso = (StgTSO *)*p; + move_TSO(tso, new_tso); + for (r = tso->sp, s = new_tso->sp; + r < tso->stack+tso->stack_size;) { + *s++ = *r++; + } + + // Link the new TSO onto the generation's threads list + ACQUIRE_SM_LOCK; + new_tso->global_link = global_gen->threads; + global_gen->threads = new_tso; + RELEASE_SM_LOCK; + + // mark the old TSO as relocated + tso->what_next = ThreadRelocated; + tso->_link = new_tso; + return; + } + } + + case BLACKHOLE: + // don't promote: we can't move BLACKHOLEs, because the update + // frame points to them, so leave an IND_LOCAL instead + nocopy_IND_LOCAL(p,q,0); + return; + + case ARR_WORDS: + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + case MVAR_CLEAN: + case MVAR_DIRTY: + case BCO: + case WEAK: + case PRIM: + case MUT_PRIM: + case BLOCKING_QUEUE: + gct->failed_to_evac = rtsTrue; + return; + + case IND_PERM: + case TREC_CHUNK: + barf("globalise_evac: cannot globalise type %d", (int)(INFO_PTR_TO_STRUCT(info)->type)); + + default: + barf("globalise_evac: strange closure type %d", (int)(INFO_PTR_TO_STRUCT(info)->type)); + } + + barf("globalise_evac"); +} + +/* ----------------------------------------------------------------------------- + Blocks of function args occur on the stack (at the top) and + in PAPs. + -------------------------------------------------------------------------- */ + +STATIC_INLINE StgPtr +globalise_arg_block (StgFunInfoTable *fun_info, StgClosure **args) +{ + StgPtr p; + StgWord bitmap; + nat size; + + p = (StgPtr)args; + switch (fun_info->f.fun_type) { + case ARG_GEN: + bitmap = BITMAP_BITS(fun_info->f.b.bitmap); + size = BITMAP_SIZE(fun_info->f.b.bitmap); + goto small_bitmap; + case ARG_GEN_BIG: + size = GET_FUN_LARGE_BITMAP(fun_info)->size; + globalise_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size); + p += size; + break; + default: + bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); + size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]); + small_bitmap: + while (size > 0) { + if ((bitmap & 1) == 0) { + globalise_evac((StgClosure **)p); + } + p++; + bitmap = bitmap >> 1; + size--; + } + break; + } + return p; +} + +STATIC_INLINE GNUC_ATTR_HOT StgPtr +globalise_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size) +{ + StgPtr p; + StgWord bitmap; + StgFunInfoTable *fun_info; + + fun_info = get_fun_itbl(UNTAG_CLOSURE(fun)); + ASSERT(fun_info->i.type != PAP); + p = (StgPtr)payload; + + switch (fun_info->f.fun_type) { + case ARG_GEN: + bitmap = BITMAP_BITS(fun_info->f.b.bitmap); + goto small_bitmap; + case ARG_GEN_BIG: + globalise_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size); + p += size; + break; + case ARG_BCO: + globalise_large_bitmap((StgPtr)payload, BCO_BITMAP(fun), size); + p += size; + break; + default: + bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); + small_bitmap: + while (size > 0) { + if ((bitmap & 1) == 0) { + globalise_evac((StgClosure **)p); + } + p++; + bitmap = bitmap >> 1; + size--; + } + break; + } + return p; +} + +STATIC_INLINE GNUC_ATTR_HOT StgPtr +globalise_PAP (StgPAP *pap) +{ + globalise_evac(&pap->fun); + return globalise_PAP_payload (pap->fun, pap->payload, pap->n_args); +} + +STATIC_INLINE StgPtr +globalise_AP (StgAP *ap) +{ + globalise_evac(&ap->fun); + return globalise_PAP_payload (ap->fun, ap->payload, ap->n_args); +} + +/* ----------------------------------------------------------------------------- + scavenge a chunk of memory described by a bitmap + -------------------------------------------------------------------------- */ + +static void +globalise_large_bitmap( StgPtr p, StgLargeBitmap *large_bitmap, nat size ) +{ + nat i, b; + StgWord bitmap; + + b = 0; + bitmap = large_bitmap->bitmap[b]; + for (i = 0; i < size; ) { + if ((bitmap & 1) == 0) { + globalise_evac((StgClosure **)p); + } + i++; + p++; + if (i % BITS_IN(W_) == 0) { + b++; + bitmap = large_bitmap->bitmap[b]; + } else { + bitmap = bitmap >> 1; + } + } +} + +STATIC_INLINE StgPtr +globalise_small_bitmap (StgPtr p, nat size, StgWord bitmap) +{ + while (size > 0) { + if ((bitmap & 1) == 0) { + globalise_evac((StgClosure **)p); + } + p++; + bitmap = bitmap >> 1; + size--; + } + return p; +} + +/* ----------------------------------------------------------------------------- + globalise_stack walks over a section of stack and evacuates all the + objects pointed to by it. We can use the same code for walking + AP_STACK_UPDs, since these are just sections of copied stack. + -------------------------------------------------------------------------- */ + +static void +globalise_stack(StgPtr p, StgPtr stack_end) +{ + const StgRetInfoTable* info; + StgWord bitmap; + nat size; + + /* + * Each time around this loop, we are looking at a chunk of stack + * that starts with an activation record. + */ + while (p < stack_end) { + info = get_ret_itbl((StgClosure *)p); + + switch (info->i.type) { + + case UPDATE_FRAME: + { + nat type; + const StgInfoTable *i; + StgClosure *updatee; + + // see comment about UPDATE_FRAME in scavenge_stack(), which + // also applies here. + updatee = ((StgUpdateFrame *)p)->updatee; + i = updatee->header.info; + ASSERT(!IS_FORWARDING_PTR(i)); + type = INFO_PTR_TO_STRUCT(i)->type; + switch (type) { + // globalise_evac() won't move a BLACKHOLE, but we really + // want to move it here since we're globalising the thread + // that points to it. + case BLACKHOLE: + if (Bdescr((P_)updatee)->gen_ix < global_gen_ix) { + copy_IND(&((StgUpdateFrame *)p)->updatee,i,updatee, + BLACKHOLE_sizeW()); + } + break; + + default: + barf("globalise_stack: %d", type); + } + ASSERT(GET_CLOSURE_TAG(((StgUpdateFrame *)p)->updatee) == 0); + p += sizeofW(StgUpdateFrame); + continue; + } + + // small bitmap (< 32 entries, or 64 on a 64-bit machine) + case CATCH_STM_FRAME: + case CATCH_RETRY_FRAME: + case ATOMICALLY_FRAME: + case STOP_FRAME: + case CATCH_FRAME: + case RET_SMALL: + bitmap = BITMAP_BITS(info->i.layout.bitmap); + size = BITMAP_SIZE(info->i.layout.bitmap); + // NOTE: the payload starts immediately after the info-ptr, we + // don't have an StgHeader in the same sense as a heap closure. + p++; + p = globalise_small_bitmap(p, size, bitmap); + continue; + + case RET_BCO: { + StgBCO *bco; + nat size; + + p++; + globalise_evac((StgClosure **)p); + bco = (StgBCO *)*p; + p++; + size = BCO_BITMAP_SIZE(bco); + globalise_large_bitmap(p, BCO_BITMAP(bco), size); + p += size; + continue; + } + + // large bitmap (> 32 entries, or > 64 on a 64-bit machine) + case RET_BIG: + { + nat size; + + size = GET_LARGE_BITMAP(&info->i)->size; + p++; + globalise_large_bitmap(p, GET_LARGE_BITMAP(&info->i), size); + p += size; + continue; + } + + // Dynamic bitmap: the mask is stored on the stack, and + // there are a number of non-pointers followed by a number + // of pointers above the bitmapped area. (see StgMacros.h, + // HEAP_CHK_GEN). + case RET_DYN: + { + StgWord dyn; + dyn = ((StgRetDyn *)p)->liveness; + + // traverse the bitmap first + bitmap = RET_DYN_LIVENESS(dyn); + p = (P_)&((StgRetDyn *)p)->payload[0]; + size = RET_DYN_BITMAP_SIZE; + p = globalise_small_bitmap(p, size, bitmap); + + // skip over the non-ptr words + p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE; + + // follow the ptr words + for (size = RET_DYN_PTRS(dyn); size > 0; size--) { + globalise_evac((StgClosure **)p); + p++; + } + continue; + } + + case RET_FUN: + { + StgRetFun *ret_fun = (StgRetFun *)p; + StgFunInfoTable *fun_info; + + globalise_evac(&ret_fun->fun); + fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun)); + p = globalise_arg_block(fun_info, ret_fun->payload); + continue; + } + + default: + barf("globalise_stack: weird activation record found on stack: %d", (int)(info->i.type)); + } + } +} + +static void +globalise_scavengeTSO (StgTSO *tso) +{ + // if we just globalised this TSO, then we want to mark + // it dirty and put it on the mutable list. However, we don't + // have to globalise any of its fields, because the contents of a + // TSO is private to the Capability and is allowed to contain + // global->local pointers. + + if ((tso->flags & TSO_GLOBALISE) == 0) { + tso->dirty = 1; + gct->failed_to_evac = rtsTrue; + } + else + { + debugTrace(DEBUG_gc,"globalising thread %d",(int)tso->id); + + // update the pointer from the Task. + if (tso->bound != NULL) { + tso->bound->tso = tso; + } + + if ( tso->why_blocked == BlockedOnMVar + || tso->why_blocked == BlockedOnBlackHole + || tso->why_blocked == BlockedOnMsgThrowTo + || tso->why_blocked == NotBlocked + ) { + globalise_evac(&tso->block_info.closure); + } + globalise_evac((StgClosure **)&tso->blocked_exceptions); + + // scavange current transaction record + globalise_evac((StgClosure **)&tso->trec); + + // scavenge this thread's stack + globalise_stack(tso->sp, &(tso->stack[tso->stack_size])); + + tso->dirty = gct->failed_to_evac; + + globalise_evac((StgClosure **)&tso->_link); + if ( tso->why_blocked == BlockedOnMVar + || tso->why_blocked == BlockedOnBlackHole + || tso->why_blocked == BlockedOnMsgThrowTo + || tso->why_blocked == NotBlocked + ) { + globalise_evac(&tso->block_info.closure); + } + + if (tso->dirty == 0 && gct->failed_to_evac) { + tso->flags |= TSO_LINK_DIRTY; + } else { + tso->flags &= ~TSO_LINK_DIRTY; + } + + tso->flags &= ~TSO_GLOBALISE; + + // gct->failed_to_evac indicates whether we managed to globalise it + } +} + + +static StgPtr globalise_scavenge_mut_arr_ptrs (StgMutArrPtrs *a) +{ + lnat m; + StgPtr p, q; + + for (m = 0; m < mutArrPtrsCards(a->ptrs); m++) + { + if (*mutArrPtrsCard(a,m) != 0) { + p = (StgPtr)&a->payload[m << MUT_ARR_PTRS_CARD_BITS]; + q = stg_min(p + (1 << MUT_ARR_PTRS_CARD_BITS), + (StgPtr)&a->payload[a->ptrs]); + for (; p < q; p++) { + globalise_evac((StgClosure**)p); + } + } + } + + return (StgPtr)a + mut_arr_ptrs_sizeW(a); +} + + +static StgPtr +globalise_scavenge_prim (StgPtr p, nat type) +{ + switch (type) { + + case ARR_WORDS: + // nothing to do; these can happen if a large ARR_WORDS gets + // globalised. + return p += arr_words_sizeW((StgArrWords *)p); + + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + return globalise_scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); + + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + return globalise_scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); + + case TREC_CHUNK: + // shouldn't happen: we do not globalise TREC_CHUNKS, + // and they stay on the mut list. + case BLOCKING_QUEUE: + // shouldn't happen: we refuse to globalise BLOCKING_QUEUE, + // and if we end up with one on the mut list after GC then it + // is dealt with by globalise_mut_list. + default: + barf("globalise_scavenge_one: unimplemented/strange closure type %d @ %p", + type, p); + } +} + +STATIC_INLINE GNUC_ATTR_HOT StgPtr +globalise_scavenge_one (StgPtr p) +{ + const StgInfoTable *info; + + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); + info = get_itbl((StgClosure *)p); + + switch (info->type) { + + case FUN_2_0: + globalise_evac(&((StgClosure *)p)->payload[1]); + globalise_evac(&((StgClosure *)p)->payload[0]); + return p + sizeofW(StgHeader) + 2; + + case THUNK_2_0: + globalise_evac(&((StgThunk *)p)->payload[1]); + globalise_evac(&((StgThunk *)p)->payload[0]); + return p + sizeofW(StgThunk) + 2; + + case CONSTR_2_0: + globalise_evac(&((StgClosure *)p)->payload[1]); + globalise_evac(&((StgClosure *)p)->payload[0]); + return p + sizeofW(StgHeader) + 2; + + case THUNK_1_0: + globalise_evac(&((StgThunk *)p)->payload[0]); + return p + sizeofW(StgThunk) + 1; + + case FUN_1_0: + case CONSTR_1_0: + globalise_evac(&((StgClosure *)p)->payload[0]); + return p + sizeofW(StgHeader) + 1; + + case THUNK_0_1: + return p + sizeofW(StgThunk) + 1; + + case FUN_0_1: + case CONSTR_0_1: + return p + sizeofW(StgHeader) + 1; + + case THUNK_0_2: + return p + sizeofW(StgThunk) + 2; + + case FUN_0_2: + case CONSTR_0_2: + return p + sizeofW(StgHeader) + 2; + + case THUNK_1_1: + globalise_evac(&((StgThunk *)p)->payload[0]); + return p + sizeofW(StgThunk) + 2; + + case FUN_1_1: + case CONSTR_1_1: + globalise_evac(&((StgClosure *)p)->payload[0]); + return p + sizeofW(StgHeader) + 2; + + case THUNK: + { + StgPtr end; + + end = (P_)((StgThunk *)p)->payload + info->layout.payload.ptrs; + for (p = (P_)((StgThunk *)p)->payload; p < end; p++) { + globalise_evac((StgClosure **)p); + } + return p + info->layout.payload.nptrs; + } + + case FUN: + case CONSTR: + case BLACKHOLE: + case PRIM: + case MUT_PRIM: + case MVAR_CLEAN: + case MVAR_DIRTY: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + { + StgPtr end; + + end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs; + for (p = (P_)((StgClosure *)p)->payload; p < end; p++) { + globalise_evac((StgClosure **)p); + } + return p + info->layout.payload.nptrs; + } + + case WEAK: + { + StgWeak *w = (StgWeak *)p; + globalise_evac(&w->cfinalizer); + globalise_evac(&w->key); + globalise_evac(&w->value); + globalise_evac(&w->finalizer); + // link field already points into this gen + return p + sizeofW(StgWeak); + } + + case THUNK_SELECTOR: + { + StgSelector *s = (StgSelector *)p; + globalise_evac(&s->selectee); + return p + THUNK_SELECTOR_sizeW(); + } + + // A chunk of stack saved in a heap object + case AP_STACK: + { + StgAP_STACK *ap = (StgAP_STACK *)p; + + globalise_evac(&ap->fun); + globalise_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size); + return (StgPtr)ap->payload + ap->size; + } + + case PAP: + return globalise_PAP((StgPAP *)p); + + case AP: + return globalise_AP((StgAP *)p); + + case TSO: + { + StgTSO *tso = (StgTSO *)p; + globalise_scavengeTSO(tso); + return p + tso_sizeW(tso); + } + + // an IND_LOCAL is "already globalised" + case IND_LOCAL: + return p + sizeofW(StgInd); + + case IND: + case IND_STATIC: + globalise_evac(&((StgInd*)p)->indirectee); + return p + sizeofW(StgInd); + + default: + return globalise_scavenge_prim(p, info->type); + } +} + + +static void +globalise_scavenge_large (gen_workspace *ws) +{ + bdescr *bd; + StgPtr p; + + bd = ws->todo_large_objects; + + for (; bd != NULL; bd = ws->todo_large_objects) { + + // take this object *off* the large objects list and put it on + // the scavenged large objects list. This is so that we can + // treat new_large_objects as a stack and push new objects on + // the front when evacuating. + ws->todo_large_objects = bd->link; + + dbl_link_onto(bd, &ws->gen->large_objects); + ws->gen->n_large_blocks += bd->blocks; + + p = bd->start; + globalise_scavenge_one(p); + if (gct->failed_to_evac) { + gct->failed_to_evac = rtsFalse; + if (ws->gen->no > 0) { + recordMutableGen_GC((StgClosure *)p, ws->gen->no); + } + } + // stats + gct->scanned += closure_sizeW((StgClosure*)p); + } +} + + +static GNUC_ATTR_HOT void +globalise_scavenge_block (bdescr *bd) +{ + StgPtr p, q; + gen_workspace *ws; + + debugTrace(DEBUG_gc, "globalise: scavenging block %p (gen %d) @ %p", + bd->start, bd->gen_no, bd->u.scan); + + gct->scan_bd = bd; + ws = &gct->gens[bd->gen_ix]; + + p = bd->u.scan; + + // we might be evacuating into the very block that we're + // scavenging, so we have to check the real bd->free pointer each + // time around the loop. + while (p < bd->free || (bd == ws->todo_bd && p < ws->todo_free)) { + ASSERT(bd->link == NULL); + q = globalise_scavenge_one(p); + + if (gct->failed_to_evac) { + gct->failed_to_evac = rtsFalse; + recordMutableGen_GC((StgClosure*)p, ws->gen->no); + + switch (get_itbl((StgClosure*)p)->type) { + case TSO: + case IND_LOCAL: + case BLOCKING_QUEUE: // allowed to point to local BLACKHOLEs + debugTrace(DEBUG_gc, "not demoting %p", p); + break; + default: + barf("globalise_scavenge_block: failed_to_evac"); + } + } + + p = q; + } + + if (p > bd->free) { + gct->copied += ws->todo_free - bd->free; + bd->free = p; + } + + debugTrace(DEBUG_gc, " scavenged %ld bytes", + (unsigned long)((bd->free - bd->u.scan) * sizeof(W_))); + + // update stats: this is a block that has been scavenged + gct->scanned += bd->free - bd->u.scan; + bd->u.scan = bd->free; + + if (bd != ws->todo_bd) { + // we're not going to evac any more objects into + // this block, so push it now. + push_scanned_block(bd, ws); + } + + gct->scan_bd = NULL; +} + + +static void +globalise_scavenge (void) +{ + gen_workspace *ws; + bdescr *bd; + + do { + ws = &gct->gens[global_gen_ix]; // XXX assumes structure of generations + + gct->scan_bd = NULL; + + // If we have a scan block with some work to do, + // scavenge everything up to the free pointer. + if (ws->todo_bd->u.scan < ws->todo_free) + { + globalise_scavenge_block(ws->todo_bd); + continue; + } + + // If we have any large objects to scavenge, do them now. + if (ws->todo_large_objects) { + globalise_scavenge_large(ws); + continue; + } + + if ((bd = grab_local_todo_block(ws)) != NULL) { + globalise_scavenge_block(bd); + continue; + } + + return; + + } while (1); +} + +/* ----------------------------------------------------------------------------- + Mutable lists + -------------------------------------------------------------------------- */ + +static void +globalise_mut_list (Capability *cap USED_IF_DEBUG, bdescr *mut_list, nat g) +{ + bdescr *bd; + StgPtr p, r; + StgClosure *q; + +// gct->gc_type = GC_LOCAL; + gct->failed_to_evac = rtsFalse; + + for (bd = mut_list; bd != NULL; bd = bd->link) { + + for (p = bd->start; p < bd->free; p++) { + + q = (StgClosure *)*p; + + // we don't care about TSOs or IND_LOCALs: their + // contents are private to this Capability + switch (get_itbl((StgClosure*)q)->type) { + + case TSO: + // put it on the correct mutable list; during parallel + // GC it may have ended up on the wrong one. + recordMutableCap(((StgTSO*)q)->cap, q, g); + continue; + + case IND_LOCAL: + // XXX: this assertion may trigger? + ASSERT(Bdescr((P_)((StgInd*)q)->indirectee)->gen_ix == cap->no); + recordMutableGen_GC(q, g); + continue; + + case BLOCKING_QUEUE: { + bdescr *bd; + StgBlockingQueue *bq = (StgBlockingQueue *)q; + bd = Bdescr((P_)bq->bh); + if (bd->gen->is_local) { + recordMutableCap(&capabilities[bd->gen->cap], q, g); + } + continue; + } + + case MUT_PRIM: + if (q->header.info == &stg_TREC_HEADER_info) { + recordMutableCap(&capabilities[((StgTRecHeader*)q)->cap_no], + q, g); + continue; + } else { + goto other_closure; + } + + case TREC_CHUNK: + recordMutableCap(&capabilities[((StgTRecChunk*)q)->cap_no], + q, g); + continue; + + default: + other_closure: + r = globalise_scavenge_one((StgPtr)q); + if (gct->failed_to_evac) { + // cannot happen, because we just GC'd and + // promoted all the primitive objects, so + // globalisation should never fail at this point. + barf("globalise_mut_lists: failed_to_evac"); + } + } + } + } +} + + +// called after GC to clean up any global->local pointers that arose. +// c.f. scavenge_capability_mut_lists() +void +globalise_capability_mut_lists (Capability *cap) +{ + nat g; + + for (g = RtsFlags.GcFlags.generations-1; g > 0; g--) { + globalise_mut_list(cap, cap->saved_mut_lists[g], g); + freeChain_sync(cap->saved_mut_lists[g]); + cap->saved_mut_lists[g] = NULL; + } + + globalise_scavenge(); +} + +/* ----------------------------------------------------------------------------- + Publishing + -------------------------------------------------------------------------- */ + +StgClosure * +publish (Capability *cap, StgClosure *p) +{ + StgInd *i; + + if (HEAP_ALLOCED(p) && Bdescr((P_)p)->gen_ix < global_gen_ix) { + // ToDo: inline part of allocateInGen, like alloc_for_copy + i = (StgInd*) allocateInGen(cap, global_gen_ix, sizeofW(StgInd)); + SET_HDR(i, stg_IND_LOCAL_tbl[cap->no], CCS_SYSTEM); + i->indirectee = p; + recordMutableCap(cap,(StgClosure*)i,global_gen_no); + return (StgClosure *)i; + } else { + return p; + } +} + +StgClosure * +publish_gen (Capability *cap, StgClosure *p, generation *gen) +{ + StgInd *i; + + // ToDo: inline part of allocateInGen, like alloc_for_copy + i = (StgInd*) allocateInGen(cap, gen->ix, sizeofW(StgInd)); + SET_HDR(i, stg_IND_LOCAL_tbl[cap->no], CCS_SYSTEM); + i->indirectee = p; + recordMutableCap(cap,(StgClosure*)i,gen->no); + return (StgClosure *)i; +} addfile ./rts/sm/Globalise.h hunk ./rts/sm/Globalise.h 1 +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 1998-2010 + * + * Globalise data from the local heap to the global heap. + * + * Documentation on the architecture of the Garbage Collector can be + * found in the online commentary: + * + * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC + * + * ---------------------------------------------------------------------------*/ + +#ifndef SM_GLOBALISE_H +#define SM_GLOBALISE_H + +#include "BeginPrivate.h" + +StgClosure * publish (Capability *cap, StgClosure *p); +StgClosure * publish_gen (Capability *cap, StgClosure *p, generation *gen); + +rtsBool globalise (Capability *cap, StgClosure **root); +StgClosure * globalise_ (Capability *cap, StgClosure *root); +rtsBool globalise_wrt (Capability *cap, StgClosure *parent, + StgClosure **root); + +void globalise_capability_mut_lists (Capability *cap); + +// for use when migrating a TSO +StgTSO * globalise_TSO (Capability *cap, StgTSO *tso); + +#include "EndPrivate.h" + +#endif /* SM_GLOBALISE_H */ hunk ./rts/sm/MarkWeak.c 3 /* ----------------------------------------------------------------------------- * - * (c) The GHC Team 1998-2008 + * (c) The GHC Team 1998-2010 * * Weak pointers and weak-like things in the GC * hunk ./rts/sm/MarkWeak.c 20 #include "MarkWeak.h" #include "GC.h" #include "GCThread.h" +#include "GCTDecl.h" +#include "GCUtils.h" #include "Evac.h" #include "Trace.h" #include "Schedule.h" hunk ./rts/sm/MarkWeak.c 73 -------------------------------------------------------------------------- */ -/* Which stage of processing various kinds of weak pointer are we at? - * (see traverse_weak_ptr_list() below for discussion). - */ -typedef enum { WeakPtrs, WeakThreads, WeakDone } WeakStage; -static WeakStage weak_stage; - -/* Weak pointers - */ -StgWeak *old_weak_ptr_list; // also pending finaliser list - -// List of threads found to be unreachable -StgTSO *resurrected_threads; - static void resurrectUnreachableThreads (generation *gen); static rtsBool tidyThreadList (generation *gen); hunk ./rts/sm/MarkWeak.c 79 void initWeakForGC(void) { - old_weak_ptr_list = weak_ptr_list; - weak_ptr_list = NULL; - weak_stage = WeakPtrs; - resurrected_threads = END_TSO_QUEUE; + gct->old_weak_ptrs = NULL; + gct->weak_stage = WeakPtrs; + gct->resurrected_threads = END_TSO_QUEUE; } rtsBool hunk ./rts/sm/MarkWeak.c 88 traverseWeakPtrList(void) { StgWeak *w, **last_w, *next_w; - StgClosure *new; rtsBool flag = rtsFalse; hunk ./rts/sm/MarkWeak.c 89 + generation *gen; + bdescr *bd; const StgInfoTable *info; hunk ./rts/sm/MarkWeak.c 92 + StgClosure *new; + nat g; hunk ./rts/sm/MarkWeak.c 95 - switch (weak_stage) { + switch (gct->weak_stage) { case WeakDone: return rtsFalse; hunk ./rts/sm/MarkWeak.c 101 case WeakPtrs: - /* doesn't matter where we evacuate values/finalizers to, since - * these pointers are treated as roots (iff the keys are alive). - */ - gct->evac_gen = 0; - - last_w = &old_weak_ptr_list; - for (w = old_weak_ptr_list; w != NULL; w = next_w) { - - /* There might be a DEAD_WEAK on the list if finalizeWeak# was - * called on a live weak pointer object. Just remove it. - */ - if (w->header.info == &stg_DEAD_WEAK_info) { - next_w = ((StgDeadWeak *)w)->link; - *last_w = next_w; - continue; - } - - info = get_itbl(w); - switch (info->type) { - - case WEAK: - /* Now, check whether the key is reachable. - */ - new = isAlive(w->key); - if (new != NULL) { - w->key = new; - // evacuate the value and finalizer - evacuate(&w->value); - evacuate(&w->finalizer); - // remove this weak ptr from the old_weak_ptr list - *last_w = w->link; - // and put it on the new weak ptr list - next_w = w->link; - w->link = weak_ptr_list; - weak_ptr_list = w; - flag = rtsTrue; hunk ./rts/sm/MarkWeak.c 102 - debugTrace(DEBUG_weak, - "weak pointer still alive at %p -> %p", - w, w->key); - continue; - } - else { - last_w = &(w->link); - next_w = w->link; - continue; - } + last_w = &gct->old_weak_ptrs; + for (w = gct->old_weak_ptrs; w != NULL; w = next_w) { + + /* There might be a DEAD_WEAK on the list if finalizeWeak# was + * called on a live weak pointer object. Just remove it. + */ + if (w->header.info == &stg_DEAD_WEAK_info) { + next_w = ((StgDeadWeak *)w)->link; + *last_w = next_w; + continue; + } + + info = get_itbl(w); + switch (info->type) { + + case WEAK: + /* Now, check whether the key is reachable. + */ + new = isAlive(w->key); + if (new != NULL) { + w->key = new; hunk ./rts/sm/MarkWeak.c 124 - default: - barf("traverseWeakPtrList: not WEAK"); - } + bd = Bdescr((P_)w); + gct->evac_gen_ix = bd->gen_ix; + + // evacuate the value and finalizer + evacuate(&w->value); + evacuate(&w->finalizer); + // remove this weak ptr from the old_weak_ptrs list + *last_w = w->link; + // and put it on the new weak ptr list + next_w = w->link; + w->link = bd->gen->weak_ptrs; + bd->gen->weak_ptrs = w; + flag = rtsTrue; + + debugTrace(DEBUG_weak, + "weak pointer still alive at %p -> %p", + w, w->key); + + if (gct->failed_to_evac || + Bdescr((P_)w->key)->gen_ix < bd->gen_ix) { + gct->failed_to_evac = rtsFalse; + recordMutableGen_GC((StgClosure *)w,bd->gen_no); + } + + continue; + } + else { + last_w = &(w->link); + next_w = w->link; + continue; + } + + default: + barf("traverseWeakPtrListGen: not WEAK"); + } } hunk ./rts/sm/MarkWeak.c 160 - + /* If we didn't make any changes, then we can go round and kill all * the dead weak pointers. The old_weak_ptr list is used as a list * of pending finalizers later on. hunk ./rts/sm/MarkWeak.c 166 */ if (flag == rtsFalse) { - for (w = old_weak_ptr_list; w; w = w->link) { - evacuate(&w->finalizer); - } - - // Next, move to the WeakThreads stage after fully - // scavenging the finalizers we've just evacuated. - weak_stage = WeakThreads; + for (w = gct->old_weak_ptrs; w; w = w->link) { + bd = Bdescr((P_)w); + gct->evac_gen_ix = bd->gen_ix; + evacuate(&w->finalizer); + if (gct->failed_to_evac) { + gct->failed_to_evac = rtsFalse; + recordMutableGen_GC((StgClosure *)w,Bdescr((P_)w)->gen_no); + } + } + + // Next, move to the WeakThreads stage after fully + // scavenging the finalizers we've just evacuated. + gct->weak_stage = WeakThreads; } return rtsTrue; hunk ./rts/sm/MarkWeak.c 184 case WeakThreads: - /* Now deal with the step->threads lists, which behave somewhat like + /* Now deal with the gen->threads lists, which behave somewhat like * the weak ptr list. If we discover any threads that are about to * become garbage, we wake them up and administer an exception. hunk ./rts/sm/MarkWeak.c 187 + * + * We can't do in the same phase as WeakPtrs, because we want + * the property that a reachable finalizer can keep a thread + * alive (see test conc031). */ { hunk ./rts/sm/MarkWeak.c 193 - nat g; // Traverse thread lists for generations we collected... hunk ./rts/sm/MarkWeak.c 195 -// ToDo when we have one gen per capability: -// for (n = 0; n < n_capabilities; n++) { -// if (tidyThreadList(&nurseries[n])) { -// flag = rtsTrue; -// } -// } - for (g = 0; g <= N; g++) { - if (tidyThreadList(&generations[g])) { + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; + if (gen->no > gct->collect_gen) break; + if (gct->gc_type == GC_LOCAL && isNonLocalGen(gen)) continue; + if (tidyThreadList(gen)) { flag = rtsTrue; } } hunk ./rts/sm/MarkWeak.c 203 - + /* If we evacuated any threads, we need to go back to the scavenger. */ if (flag) return rtsTrue; hunk ./rts/sm/MarkWeak.c 210 /* And resurrect any threads which were about to become garbage. */ - { - nat g; - for (g = 0; g <= N; g++) { - resurrectUnreachableThreads(&generations[g]); - } + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; + if (gen->no > gct->collect_gen) break; + if (gct->gc_type == GC_LOCAL && isNonLocalGen(gen)) continue; + resurrectUnreachableThreads(gen); } hunk ./rts/sm/MarkWeak.c 216 - - weak_stage = WeakDone; // *now* we're done, + + gct->weak_stage = WeakDone; // *now* we're done, return rtsTrue; // but one more round of scavenging, please } hunk ./rts/sm/MarkWeak.c 227 } } - static void resurrectUnreachableThreads (generation *gen) +static void resurrectUnreachableThreads (generation *gen) { StgTSO *t, *tmp, *next; hunk ./rts/sm/MarkWeak.c 239 // become garbage, because they might get // pending exceptions. switch (t->what_next) { + case ThreadKilled: case ThreadComplete: continue; hunk ./rts/sm/MarkWeak.c 243 + default: tmp = t; evacuate((StgClosure **)&tmp); hunk ./rts/sm/MarkWeak.c 247 - tmp->global_link = resurrected_threads; - resurrected_threads = tmp; + + if (gct->gc_type == GC_LOCAL) { + // We don't currently resurrect threads with + // exceptions in GC_LOCAL. It's hard to get right, + // becuase we would have to do real throwTo rather + // than throwToSingleThhreaded. + generation *gen = Bdescr((P_)tmp)->gen; + tmp->global_link = gen->threads; + gen->threads = tmp; + } else { + tmp->global_link = gct->resurrected_threads; + gct->resurrected_threads = tmp; + } } } } hunk ./rts/sm/MarkWeak.c 264 + static rtsBool tidyThreadList (generation *gen) { StgTSO *t, *tmp, *next, **prev; hunk ./rts/sm/MarkWeak.c 274 for (t = gen->old_threads; t != END_TSO_QUEUE; t = next) { - tmp = (StgTSO *)isAlive((StgClosure *)t); - - if (tmp != NULL) { - t = tmp; - } - - ASSERT(get_itbl(t)->type == TSO); if (t->what_next == ThreadRelocated) { hunk ./rts/sm/MarkWeak.c 275 - next = t->_link; + // Just short it out; the real thread is on another list. + // When we relocate a TSO we always put the new TSO on the + // correct gen->threads list. + next = t->global_link; *prev = next; continue; } hunk ./rts/sm/MarkWeak.c 283 + if (gct->gc_type == GC_LOCAL) { + tmp = t; + evacuate((StgClosure **)&tmp); + } else { + tmp = (StgTSO *)isAlive((StgClosure *)t); + } + + if (tmp != NULL) { + t = tmp; + } + next = t->global_link; hunk ./rts/sm/MarkWeak.c 296 + ASSERT(get_itbl(t)->type == TSO); + // if the thread is not masking exceptions but there are // pending exceptions on its queue, then something has gone // wrong. However, pending exceptions are OK if there is an hunk ./rts/sm/MarkWeak.c 328 } /* ----------------------------------------------------------------------------- - Evacuate every weak pointer object on the weak_ptr_list, and update - the link fields. + Every weak pointer object (WEAK) is treated as implicitly alive to + begin with. Only when w->key is found to be unreachable does the + WEAK turn into a DEAD_WEAK which may be subsequently GC'd. hunk ./rts/sm/MarkWeak.c 332 - ToDo: with a lot of weak pointers, this will be expensive. We - should have a per-GC weak pointer list, just like threads. - -------------------------------------------------------------------------- */ + All WEAK objects are chained together by their w->link fields. + Since we're doing generational GC, it makes sense to divide this + list per-generation, so that we don't have to traverse the weak + pointers of old generations when collecting young generations, so + each generation has its own gen->weak_ptrs list. + + In markWeakPtr list we traverse the gen->weak_ptrs lists of + generations we are collecting, and evacuate each one. The + resulting weak pointer objects are chained together on a temporary + list gct->old_weak_ptrs, which we will traverse again (possibly + multiple times) in traverseWeakPtrList(). Eventually WEAK objects + will be placed on the appropriate gen->weak_ptrs list of the + generation that they now belong to. + -------------------------------------------------------------------------- */ void markWeakPtrList ( void ) hunk ./rts/sm/MarkWeak.c 350 { - StgWeak *w, **last_w; - - last_w = &weak_ptr_list; - for (w = weak_ptr_list; w; w = w->link) { - // w might be WEAK, EVACUATED, or DEAD_WEAK (actually CON_STATIC) here + StgWeak *w, *next; + generation *gen; + nat g; hunk ./rts/sm/MarkWeak.c 354 + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; + if (gen->no > gct->collect_gen) break; + if (gct->gc_type == GC_LOCAL && isNonLocalGen(gen)) continue; + + for (w = gen->weak_ptrs; w; w = next) { + // w might be WEAK, a forwarding ptr, or DEAD_WEAK (actually + // CONSTR_STATIC) here + #ifdef DEBUG hunk ./rts/sm/MarkWeak.c 364 - { // careful to do this assertion only reading the info ptr - // once, because during parallel GC it might change under our feet. - const StgInfoTable *info; - info = w->header.info; - ASSERT(IS_FORWARDING_PTR(info) - || info == &stg_DEAD_WEAK_info - || INFO_PTR_TO_STRUCT(info)->type == WEAK); - } + { // careful to do this assertion only reading the info ptr + // once, because during parallel GC it might change under our feet. + const StgInfoTable *info; + info = w->header.info; + ASSERT(IS_FORWARDING_PTR(info) + || info == &stg_DEAD_WEAK_info + || INFO_PTR_TO_STRUCT(info)->type == WEAK); + } #endif hunk ./rts/sm/MarkWeak.c 373 + + evacuate((StgClosure**)&w); hunk ./rts/sm/MarkWeak.c 376 - evacuate((StgClosure **)last_w); - w = *last_w; - if (w->header.info == &stg_DEAD_WEAK_info) { - last_w = &(((StgDeadWeak*)w)->link); - } else { - last_w = &(w->link); - } - } -} + next = w->link; + // We drop DEAD_WEAK objects from the list here. + if (w->header.info != &stg_DEAD_WEAK_info) { + w->link = gct->old_weak_ptrs; + gct->old_weak_ptrs = w; + } + } hunk ./rts/sm/MarkWeak.c 384 + gen->weak_ptrs = NULL; + // we'll re-populate these lists later in traverseWeakPtrList() + } +} hunk ./rts/sm/MarkWeak.h 19 #include "BeginPrivate.h" -extern StgWeak *old_weak_ptr_list; -extern StgTSO *resurrected_threads; -extern StgTSO *exception_threads; - void initWeakForGC ( void ); rtsBool traverseWeakPtrList ( void ); void markWeakPtrList ( void ); hunk ./rts/sm/Sanity.c 21 #ifdef DEBUG /* whole file */ +#include "Storage.h" +#include "BlockAlloc.h" +#include "GCThread.h" + #include "RtsUtils.h" hunk ./rts/sm/Sanity.c 26 -#include "sm/Storage.h" -#include "sm/BlockAlloc.h" #include "Sanity.h" #include "Schedule.h" #include "Apply.h" hunk ./rts/sm/Sanity.c 87 checkClosureShallow( StgClosure* p ) { StgClosure *q; + const StgInfoTable *info; q = UNTAG_CLOSURE(p); hunk ./rts/sm/Sanity.c 90 + + // forwarding pointers are allowed in the local heap + info = q->header.info; + if (IS_FORWARDING_PTR(info)) { + q = (StgClosure*)UN_FORWARDING_PTR(info); + } + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); /* Is it a static closure? */ hunk ./rts/sm/Sanity.c 268 { const StgInfoTable *info; - ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); - p = UNTAG_CLOSURE(p); /* Is it a static closure (i.e. in the data segment)? */ hunk ./rts/sm/Sanity.c 270 - if (!HEAP_ALLOCED(p)) { - ASSERT(closure_STATIC(p)); - } else { - ASSERT(!closure_STATIC(p)); - } - info = p->header.info; if (IS_FORWARDING_PTR(info)) { hunk ./rts/sm/Sanity.c 273 - barf("checkClosure: found EVACUATED closure %d", info->type); + return checkClosure((StgClosure *)UN_FORWARDING_PTR(info)); } info = INFO_PTR_TO_STRUCT(info); hunk ./rts/sm/Sanity.c 277 + ASSERT(HEAP_ALLOCED(p) || closure_STATIC(p)); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); + switch (info->type) { case MVAR_CLEAN: hunk ./rts/sm/Sanity.c 319 case CONSTR_0_2: case CONSTR_2_0: case IND_PERM: + case IND_LOCAL: case BLACKHOLE: case PRIM: case MUT_PRIM: hunk ./rts/sm/Sanity.c 347 ASSERT(get_itbl(bq->owner)->type == TSO); ASSERT(bq->queue == (MessageBlackHole*)END_TSO_QUEUE - || get_itbl(bq->queue)->type == TSO); + || bq->queue->header.info == &stg_MSG_BLACKHOLE_info); ASSERT(bq->link == (StgBlockingQueue*)END_TSO_QUEUE || get_itbl(bq->link)->type == IND || get_itbl(bq->link)->type == BLOCKING_QUEUE); hunk ./rts/sm/Sanity.c 382 } case THUNK_SELECTOR: - ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgSelector *)p)->selectee)); - return THUNK_SELECTOR_sizeW(); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgSelector *)p)->selectee)); + return THUNK_SELECTOR_sizeW(); case IND: { hunk ./rts/sm/Sanity.c 395 return sizeofW(StgInd); } - case RET_BCO: - case RET_SMALL: - case RET_BIG: - case RET_DYN: - case UPDATE_FRAME: - case STOP_FRAME: - case CATCH_FRAME: - case ATOMICALLY_FRAME: - case CATCH_RETRY_FRAME: - case CATCH_STM_FRAME: - barf("checkClosure: stack frame"); - case AP: { StgAP* ap = (StgAP *)p; hunk ./rts/sm/Sanity.c 418 } case ARR_WORDS: - return arr_words_sizeW((StgArrWords *)p); + return arr_words_sizeW((StgArrWords *)p); case MUT_ARR_PTRS_CLEAN: case MUT_ARR_PTRS_DIRTY: hunk ./rts/sm/Sanity.c 451 } default: - barf("checkClosure (closure type %d)", info->type); + barf("checkClosure (closure type %d)", info->type); } } hunk ./rts/sm/Sanity.c 455 +static void +checkGlobalPtr (StgClosure *p) +{ + bdescr *bd; + if (!HEAP_ALLOCED(p)) return; + bd = Bdescr((StgPtr)p); + ASSERT (bd->gen_no != 0); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); +} + +// Check that an object only points to global heap, unless it is +// IND_LOCAL or TSO. We use this to test that the global heap +// invariant is satisfied. +StgOffset +checkGlobalClosure (StgClosure* p) +{ + const StgInfoTable *info; + + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); + + p = UNTAG_CLOSURE(p); + + info = p->header.info; + + if (IS_FORWARDING_PTR(info)) { + barf("checkClosure: found EVACUATED closure %d", info->type); + } + info = INFO_PTR_TO_STRUCT(info); + + switch (info->type) { + + case MVAR_CLEAN: + case MVAR_DIRTY: + { + StgMVar *mvar = (StgMVar *)p; + checkGlobalPtr((StgClosure *)mvar->head); + checkGlobalPtr((StgClosure *)mvar->tail); + checkGlobalPtr((StgClosure *)mvar->value); + return sizeofW(StgMVar); + } + + case THUNK: + case THUNK_1_0: + case THUNK_0_1: + case THUNK_1_1: + case THUNK_0_2: + case THUNK_2_0: + { + nat i; + for (i = 0; i < info->layout.payload.ptrs; i++) { + checkGlobalPtr(((StgThunk *)p)->payload[i]); + } + return thunk_sizeW_fromITBL(info); + } + + case FUN: + case FUN_1_0: + case FUN_0_1: + case FUN_1_1: + case FUN_0_2: + case FUN_2_0: + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_1_1: + case CONSTR_0_2: + case CONSTR_2_0: + case IND_PERM: + case IND_STATIC: + case BLACKHOLE: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + case CONSTR_STATIC: + case CONSTR_NOCAF_STATIC: + case THUNK_STATIC: + case FUN_STATIC: + case PRIM: + case MUT_PRIM: + { + nat i; + for (i = 0; i < info->layout.payload.ptrs; i++) { + checkGlobalPtr(p->payload[i]); + } + return sizeW_fromITBL(info); + } + + case IND_LOCAL: + ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgInd*)p)->indirectee)); + return sizeofW(StgInd); + + case BLOCKING_QUEUE: + { + StgBlockingQueue *bq = (StgBlockingQueue *)p; + + // NO: the BH might have been updated now + // ASSERT(get_itbl(bq->bh)->type == BLACKHOLE); + + // NO: we allow global->local BH pointers + // checkGlobalPtr((StgClosure*)bq->bh); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure*)bq->bh))); + + ASSERT(get_itbl(bq->owner)->type == TSO); + ASSERT(bq->queue == (MessageBlackHole*)END_TSO_QUEUE + || bq->queue->header.info == &stg_MSG_BLACKHOLE_info); + ASSERT(bq->link == (StgBlockingQueue*)END_TSO_QUEUE || + get_itbl(bq->link)->type == IND || + get_itbl(bq->link)->type == BLOCKING_QUEUE); + + return sizeofW(StgBlockingQueue); + } + + case BCO: { + StgBCO *bco = (StgBCO *)p; + checkGlobalPtr((StgClosure *)bco->instrs); + checkGlobalPtr((StgClosure *)bco->literals); + checkGlobalPtr((StgClosure *)bco->ptrs); + return bco_sizeW(bco); + } + + case WEAK: + /* deal with these specially - the info table isn't + * representative of the actual layout. + */ + { StgWeak *w = (StgWeak *)p; + checkGlobalPtr(w->key); + checkGlobalPtr(w->value); + checkGlobalPtr(w->finalizer); + if (w->link) { + checkGlobalPtr((StgClosure *)w->link); + } + return sizeW_fromITBL(info); + } + + case THUNK_SELECTOR: + checkGlobalPtr(((StgSelector *)p)->selectee); + return THUNK_SELECTOR_sizeW(); + + case IND: + { + /* we don't expect to see any of these after GC + * but they might appear during execution + */ + StgInd *ind = (StgInd *)p; + checkGlobalPtr(ind->indirectee); + return sizeofW(StgInd); + } + + case AP: + { + StgAP* ap = (StgAP *)p; + checkPAP (ap->fun, ap->payload, ap->n_args); + return ap_sizeW(ap); + } + + case PAP: + { + StgPAP* pap = (StgPAP *)p; + checkPAP (pap->fun, pap->payload, pap->n_args); + return pap_sizeW(pap); + } + + case AP_STACK: + { + StgAP_STACK *ap = (StgAP_STACK *)p; + checkGlobalPtr(ap->fun); + checkStackChunk((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size); + return ap_stack_sizeW(ap); + } + + case ARR_WORDS: + return arr_words_sizeW((StgArrWords *)p); + + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + { + StgMutArrPtrs* a = (StgMutArrPtrs *)p; + nat i; + for (i = 0; i < a->ptrs; i++) { + // checkGlobalPtr(a->payload[i]); + // XXX: mutable arrays can have local ptrs at the + // moment, see globalise_scavenge_large(). + ASSERT(LOOKS_LIKE_CLOSURE_PTR(a->payload[i])); + } + return mut_arr_ptrs_sizeW(a); + } + + case TSO: + checkTSO((StgTSO *)p); + return tso_sizeW((StgTSO *)p); + + case TREC_CHUNK: + // these are allowed to point to the local heap, so we don't + // use checkGlobalPtr(). + { + nat i; + StgTRecChunk *tc = (StgTRecChunk *)p; + ASSERT(LOOKS_LIKE_CLOSURE_PTR((StgClosure *)tc->prev_chunk)); + for (i = 0; i < tc -> next_entry_idx; i ++) { + ASSERT(LOOKS_LIKE_CLOSURE_PTR((StgClosure *)tc->entries[i].tvar)); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(tc->entries[i].expected_value)); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(tc->entries[i].new_value)); + } + return sizeofW(StgTRecChunk); + } + + default: + barf("checkGlobalClosure (closure type %d)", info->type); + } +} /* ----------------------------------------------------------------------------- Check Heap Sanity hunk ./rts/sm/Sanity.c 676 all the objects in the remainder of the chain. -------------------------------------------------------------------------- */ -void -checkHeap(bdescr *bd) +void checkHeapChain (bdescr *bd) { StgPtr p; hunk ./rts/sm/Sanity.c 680 -#if defined(THREADED_RTS) - // heap sanity checking doesn't work with SMP, because we can't - // zero the slop (see Updates.h). - return; -#endif - for (; bd != NULL; bd = bd->link) { if(!(bd->flags & BF_SWEPT)) { p = bd->start; hunk ./rts/sm/Sanity.c 697 } } +void checkGlobalHeapChain (bdescr *bd) +{ + StgPtr p; + + for (; bd != NULL; bd = bd->link) { + p = bd->start; + while (p < bd->free) { + nat size = checkGlobalClosure((StgClosure *)p); + /* This is the smallest size of closure that can live in the heap */ + ASSERT( size >= MIN_PAYLOAD_SIZE + sizeofW(StgHeader) ); + p += size; + + /* skip over slop */ + while (p < bd->free && + (*p < 0x1000 || !LOOKS_LIKE_INFO_PTR(*p))) { p++; } + } + } +} + void checkHeapChunk(StgPtr start, StgPtr end) { hunk ./rts/sm/Sanity.c 744 void checkTSO(StgTSO *tso) { - StgPtr sp = tso->sp; - StgPtr stack = tso->stack; - StgOffset stack_size = tso->stack_size; - StgPtr stack_end = stack + stack_size; + StgPtr sp, stack, stack_end; hunk ./rts/sm/Sanity.c 746 - if (tso->what_next == ThreadRelocated) { - checkTSO(tso->_link); - return; + while (tso->what_next == ThreadRelocated) { + tso = tso->_link; } hunk ./rts/sm/Sanity.c 750 + sp = tso->sp; + stack = tso->stack; + stack_end = stack + tso->stack_size; + if (tso->what_next == ThreadKilled) { hunk ./rts/sm/Sanity.c 755 - /* The garbage collector doesn't bother following any pointers - * from dead threads, so don't check sanity here. - */ - return; + /* The garbage collector doesn't bother following any pointers + * from dead threads, so don't check sanity here. + */ + return; } ASSERT(tso->_link == END_TSO_QUEUE || hunk ./rts/sm/Sanity.c 764 tso->_link->header.info == &stg_MVAR_TSO_QUEUE_info || tso->_link->header.info == &stg_TSO_info); - ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->block_info.closure)); + + switch (tso->why_blocked) { + case NotBlocked: + case BlockedOnDelay: + case BlockedOnWrite: + case BlockedOnRead: + break; + default: + ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->block_info.closure)); + } + ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->bq)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->blocked_exceptions)); hunk ./rts/sm/Sanity.c 793 StgTSO *tso; nat g; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (tso=generations[g].threads; tso != END_TSO_QUEUE; + for (g = 0; g < total_generations; g++) { + for (tso = all_generations[g].threads; tso != END_TSO_QUEUE; tso = tso->global_link) { ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso)); ASSERT(get_itbl(tso)->type == TSO); hunk ./rts/sm/Sanity.c 801 if (checkTSOs) checkTSO(tso); - tso = deRefTSO(tso); + if (tso->what_next == ThreadRelocated) continue; // If this TSO is dirty and in an old generation, it better // be on the mutable list. hunk ./rts/sm/Sanity.c 817 Check mutable list sanity. -------------------------------------------------------------------------- */ -void -checkMutableList( bdescr *mut_bd, nat gen ) +static void +checkMutableList (bdescr *mut_bd, nat gen, nat cap_no) { bdescr *bd; StgPtr q; hunk ./rts/sm/Sanity.c 828 for (q = bd->start; q < bd->free; q++) { p = (StgClosure *)*q; ASSERT(!HEAP_ALLOCED(p) || Bdescr((P_)p)->gen_no == gen); + checkGlobalClosure(p); if (get_itbl(p)->type == TSO) { hunk ./rts/sm/Sanity.c 830 + // TSOs on the mutable list must belong to this capability + ASSERT(((StgTSO*)p)->cap->no == cap_no); ((StgTSO *)p)->flags |= TSO_MARKED; } } hunk ./rts/sm/Sanity.c 838 } } -void -checkMutableLists (rtsBool checkTSOs) +static void +checkLocalMutableLists (nat cap_no) { hunk ./rts/sm/Sanity.c 841 - nat g, i; + nat g; + for (g = 1; g < RtsFlags.GcFlags.generations; g++) { + checkMutableList(capabilities[cap_no].mut_lists[g], g, cap_no); + } +} hunk ./rts/sm/Sanity.c 847 - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - checkMutableList(generations[g].mut_list, g); - for (i = 0; i < n_capabilities; i++) { - checkMutableList(capabilities[i].mut_lists[g], g); - } +static void +checkMutableLists (void) +{ + nat i; + for (i = 0; i < n_capabilities; i++) { + checkLocalMutableLists(i); } hunk ./rts/sm/Sanity.c 854 - checkGlobalTSOList(checkTSOs); } /* hunk ./rts/sm/Sanity.c 916 } +static void checkGeneration (generation *gen, + rtsBool after_major_gc USED_IF_THREADS) +{ + nat n; + gen_workspace *ws; + + ASSERT(countBlocks(gen->blocks) == gen->n_blocks); + ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); + +#if defined(THREADED_RTS) + // heap sanity checking doesn't work with SMP, because we can't + // zero the slop (see Updates.h). However, we can sanity-check + // the heap after a major gc, because there is no slop. + if (!after_major_gc) return; +#endif + + if (gen->is_local) { + checkHeapChain(gen->blocks); + } else { + checkGlobalHeapChain(gen->blocks); + } + + for (n = 0; n < n_capabilities; n++) { + ws = &gc_threads[n]->gens[gen->ix]; + if (gen->is_local) { + checkHeapChain(ws->todo_bd); + checkHeapChain(ws->part_list); + checkHeapChain(ws->scavd_list); + } else { + checkGlobalHeapChain(ws->todo_bd); + checkGlobalHeapChain(ws->part_list); + checkGlobalHeapChain(ws->scavd_list); + } + } + + checkLargeObjects(gen->large_objects); +} + +/* Local heap sanity check. */ +static void checkLocalHeap (nat cap_no) +{ + nat g; + generation *gen; + + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; + if (gen->is_local && gen->cap == cap_no) { + checkGeneration(gen, rtsFalse); + } + } + checkNurserySanity(&nurseries[cap_no]); +} + /* Full heap sanity check. */ hunk ./rts/sm/Sanity.c 970 -void -checkSanity( rtsBool check_heap ) +static void checkFullHeap (rtsBool after_major_gc) { nat g, n; hunk ./rts/sm/Sanity.c 974 - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - ASSERT(countBlocks(generations[g].blocks) - == generations[g].n_blocks); - ASSERT(countBlocks(generations[g].large_objects) - == generations[g].n_large_blocks); - if (check_heap) { - checkHeap(generations[g].blocks); - } - checkLargeObjects(generations[g].large_objects); + for (g = 0; g < total_generations; g++) { + checkGeneration(&all_generations[g], after_major_gc); } hunk ./rts/sm/Sanity.c 977 - for (n = 0; n < n_capabilities; n++) { checkNurserySanity(&nurseries[n]); } hunk ./rts/sm/Sanity.c 980 - +} + +void checkSanity(rtsBool local_only, rtsBool after_gc, rtsBool major_gc, + nat cap_no) +{ + if (local_only) { + checkLocalHeap(cap_no); + } else { + checkFullHeap(after_gc && major_gc); + } + checkFreeListSanity(); hunk ./rts/sm/Sanity.c 993 -#if defined(THREADED_RTS) // always check the stacks in threaded mode, because checkHeap() // does nothing in this case. hunk ./rts/sm/Sanity.c 995 - checkMutableLists(rtsTrue); -#else - if (check_heap) { - checkMutableLists(rtsFalse); - } else { - checkMutableLists(rtsTrue); + if (after_gc) { + if (local_only) { + checkLocalMutableLists(cap_no); + } else { + checkMutableLists(); + checkGlobalTSOList(rtsTrue); + } } hunk ./rts/sm/Sanity.c 1003 -#endif } // If memInventory() calculates that we have a memory leak, this hunk ./rts/sm/Sanity.c 1015 static void findMemoryLeak (void) { - nat g, i; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (i = 0; i < n_capabilities; i++) { - markBlocks(capabilities[i].mut_lists[g]); - } - markBlocks(generations[g].mut_list); - markBlocks(generations[g].blocks); - markBlocks(generations[g].large_objects); + nat g, i, n; + for (n = 0; n < total_generations; n++) { + g = all_generations[n].no; + for (i = 0; i < n_capabilities; i++) { + if (g > 0) { markBlocks(capabilities[i].mut_lists[g]); } + markBlocks(gc_threads[i]->gens[n].part_list); + markBlocks(gc_threads[i]->gens[n].scavd_list); + markBlocks(gc_threads[i]->gens[n].todo_bd); + } + markBlocks(all_generations[n].blocks); + markBlocks(all_generations[n].large_objects); } hunk ./rts/sm/Sanity.c 1028 - for (i = 0; i < n_capabilities; i++) { - markBlocks(nurseries[i].blocks); - } + for (i = 0; i < n_capabilities; i++) { + markBlocks(nurseries[i].blocks); + } #ifdef PROFILING // TODO: hunk ./rts/sm/Sanity.c 1056 prev = END_TSO_QUEUE; for (tso = cap->run_queue_hd; tso != END_TSO_QUEUE; prev = tso, tso = tso->_link) { - ASSERT(prev == END_TSO_QUEUE || prev->_link == tso); - ASSERT(tso->block_info.prev == prev); + tso = deRefTSO(tso); + ASSERT(prev == END_TSO_QUEUE || deRefTSO(prev->_link) == tso); + ASSERT(deRefTSO(tso->block_info.prev) == prev); } ASSERT(cap->run_queue_tl == prev); } hunk ./rts/sm/Sanity.c 1063 +void +findBlockInList(bdescr *bd, bdescr *list) +{ + bdescr *p; + + for (p = list; p != NULL; p = p->link) { + ASSERT(bd != p); + } +} + +// useful for locating a block from within gdb. +void +findBlock (bdescr *bd) +{ + nat n, g, i; + generation *gen; + + for (n = 0; n < total_generations; n++) { + g = all_generations[n].no; + for (i = 0; i < n_capabilities; i++) { + if (g > 0) { + findBlockInList(bd, capabilities[i].mut_lists[g]); + } + findBlockInList(bd, gc_threads[i]->gens[n].part_list); + findBlockInList(bd, gc_threads[i]->gens[n].scavd_list); + findBlockInList(bd, gc_threads[i]->gens[n].todo_bd); + } + + gen = &all_generations[n]; + findBlockInList(bd, gen->blocks); + findBlockInList(bd, gen->old_blocks); + findBlockInList(bd, gen->large_objects); + findBlockInList(bd, gen->scavenged_large_objects); + } + + for (i = 0; i < n_capabilities; i++) { + findBlockInList(bd, nurseries[i].blocks); + } +} + + /* ----------------------------------------------------------------------------- Memory leak detection hunk ./rts/sm/Sanity.c 1133 ASSERT(countBlocks(gen->blocks) == gen->n_blocks); ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); return gen->n_blocks + gen->n_old_blocks + - countAllocdBlocks(gen->large_objects); + countAllocdBlocks(gen->large_objects); } void hunk ./rts/sm/Sanity.c 1139 memInventory (rtsBool show) { - nat g, i; - lnat gen_blocks[RtsFlags.GcFlags.generations]; + nat g, i, n; + lnat gen_blocks[total_generations]; lnat nursery_blocks, retainer_blocks, arena_blocks, exec_blocks; lnat live_blocks = 0, free_blocks = 0; hunk ./rts/sm/Sanity.c 1146 rtsBool leak; - // count the blocks we current have + // count the blocks we currently have hunk ./rts/sm/Sanity.c 1148 - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen_blocks[g] = 0; + for (n = 0; n < total_generations; n++) { + g = all_generations[n].no; + gen_blocks[n] = 0; for (i = 0; i < n_capabilities; i++) { hunk ./rts/sm/Sanity.c 1152 - gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]); + if (g > 0) { + gen_blocks[n] += countBlocks(capabilities[i].mut_lists[g]); + } + gen_blocks[n] += countBlocks(gc_threads[i]->gens[n].part_list); + gen_blocks[n] += countBlocks(gc_threads[i]->gens[n].scavd_list); + gen_blocks[n] += countBlocks(gc_threads[i]->gens[n].todo_bd); } hunk ./rts/sm/Sanity.c 1159 - gen_blocks[g] += countAllocdBlocks(generations[g].mut_list); - gen_blocks[g] += genBlocks(&generations[g]); + gen_blocks[n] += genBlocks(&all_generations[n]); } nursery_blocks = 0; hunk ./rts/sm/Sanity.c 1185 free_blocks = countFreeList(); live_blocks = 0; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (g = 0; g < total_generations; g++) { live_blocks += gen_blocks[g]; } live_blocks += nursery_blocks + hunk ./rts/sm/Sanity.c 1202 } else { debugBelch("Memory inventory:\n"); } - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (g = 0; g < total_generations; g++) { debugBelch(" gen %d blocks : %5lu blocks (%lu MB)\n", g, gen_blocks[g], MB(gen_blocks[g])); } hunk ./rts/sm/Sanity.h 24 # endif /* debugging routines */ -void checkSanity ( rtsBool check_heap ); +void checkSanity (rtsBool local_only, rtsBool after_gc, rtsBool major_gc, + nat cap_no); void checkNurserySanity ( nursery *nursery ); hunk ./rts/sm/Sanity.h 27 -void checkHeap ( bdescr *bd ); + +void checkHeapChain (bdescr *bd); +void checkGlobalHeapChain (bdescr *bd); + void checkHeapChunk ( StgPtr start, StgPtr end ); void checkLargeObjects ( bdescr *bd ); void checkTSO ( StgTSO* tso ); hunk ./rts/sm/Sanity.h 37 void checkGlobalTSOList ( rtsBool checkTSOs ); void checkStaticObjects ( StgClosure* static_objects ); void checkStackChunk ( StgPtr sp, StgPtr stack_end ); -StgOffset checkStackFrame ( StgPtr sp ); -StgOffset checkClosure ( StgClosure* p ); +StgOffset checkStackFrame (StgPtr sp); hunk ./rts/sm/Sanity.h 39 -void checkMutableList ( bdescr *bd, nat gen ); -void checkMutableLists ( rtsBool checkTSOs ); +StgOffset checkClosure (StgClosure* p); +StgOffset checkGlobalClosure (StgClosure* p); void checkRunQueue (Capability *cap); hunk ./rts/sm/Sanity.h 48 void checkBQ (StgTSO *bqe, StgClosure *closure); +void findBlockInList (bdescr *bd, bdescr *list); +void findBlock (bdescr *bd); + #include "EndPrivate.h" #endif /* DEBUG */ hunk ./rts/sm/Scav.c 37 StgLargeBitmap *large_bitmap, nat size ); -#if defined(THREADED_RTS) && !defined(PARALLEL_GC) -# define evacuate(a) evacuate1(a) -# define scavenge_loop(a) scavenge_loop1(a) -# define scavenge_block(a) scavenge_block1(a) -# define scavenge_mutable_list(bd,g) scavenge_mutable_list1(bd,g) -# define scavenge_capability_mut_lists(cap) scavenge_capability_mut_Lists1(cap) +#if defined(THREADED_RTS) +#if defined(LOCAL_GC) +# define evacuate(a) evacuate_local(a) +# define scavenge_loop(a) scavenge_loop_local(a) +# define scavenge_mutable_list(bd,g) scavenge_mutable_list_local(bd,g) +# define scavenge_capability_mut_lists(cap) scavenge_capability_mut_lists_local(cap) +# define scavengeTSO(tso) scavengeTSO_local(tso) +# define scavenge_large(ws) scavenge_large_local(ws) +#elif defined(PARALLEL_GC) +// use the thread-safe evacuate() for evacuation in parallel GC +# define scavenge_loop(a) scavenge_loop_par(a) +# define scavenge_mutable_list(bd,g) scavenge_mutable_list_par(bd,g) +# define scavenge_capability_mut_lists(cap) scavenge_capability_mut_lists_par(cap) +# define scavengeTSO(tso) scavengeTSO_par(tso) +# define scavenge_large(ws) scavenge_large_par(ws) +#else +// sequential GC in the threaded RTS can call evacuate_seq() +# define evacuate(a) evacuate_seq(a) +#endif #endif /* ----------------------------------------------------------------------------- hunk ./rts/sm/Scav.c 292 return scavenge_PAP_payload (ap->fun, ap->payload, ap->n_args); } +#ifndef LOCAL_GC /* ----------------------------------------------------------------------------- Scavenge SRTs -------------------------------------------------------------------------- */ hunk ./rts/sm/Scav.c 368 bitmap = bitmap >> 1; } } +#endif // not LOCAL_GC hunk ./rts/sm/Scav.c 370 +#ifdef LOCAL_GC +#define USED_IF_NOT_LOCAL STG_UNUSED +#else +#define USED_IF_NOT_LOCAL STG_UNUSED +#endif STATIC_INLINE GNUC_ATTR_HOT void hunk ./rts/sm/Scav.c 377 -scavenge_thunk_srt(const StgInfoTable *info) +scavenge_thunk_srt(const StgInfoTable *info USED_IF_NOT_LOCAL) { hunk ./rts/sm/Scav.c 379 +#ifdef LOCAL_GC + return; +#else StgThunkInfoTable *thunk_info; if (!major_gc) return; hunk ./rts/sm/Scav.c 388 thunk_info = itbl_to_thunk_itbl(info); scavenge_srt((StgClosure **)GET_SRT(thunk_info), thunk_info->i.srt_bitmap); +#endif } STATIC_INLINE GNUC_ATTR_HOT void hunk ./rts/sm/Scav.c 392 -scavenge_fun_srt(const StgInfoTable *info) +scavenge_fun_srt(const StgInfoTable *info USED_IF_NOT_LOCAL) { hunk ./rts/sm/Scav.c 394 +#ifdef LOCAL_GC + return; +#else StgFunInfoTable *fun_info; if (!major_gc) return; hunk ./rts/sm/Scav.c 403 fun_info = itbl_to_fun_itbl(info); scavenge_srt((StgClosure **)GET_FUN_SRT(fun_info), fun_info->i.srt_bitmap); +#endif } /* ----------------------------------------------------------------------------- hunk ./rts/sm/Scav.c 423 { StgPtr p, q; StgInfoTable *info; + nat saved_evac_gen_ix; rtsBool saved_eager_promotion; gen_workspace *ws; hunk ./rts/sm/Scav.c 431 bd->start, bd->gen_no, bd->u.scan); gct->scan_bd = bd; - gct->evac_gen = bd->gen; + gct->evac_gen_ix = bd->gen_ix; + saved_evac_gen_ix = gct->evac_gen_ix; saved_eager_promotion = gct->eager_promotion; gct->failed_to_evac = rtsFalse; hunk ./rts/sm/Scav.c 436 - ws = &gct->gens[bd->gen->no]; + ws = &gct->gens[bd->gen_ix]; p = bd->u.scan; hunk ./rts/sm/Scav.c 582 break; } + case IND_LOCAL: + evacuate(&((StgInd *)p)->indirectee); + // evacuated to the current gen: no need for IND_LOCAL any more + if (!gct->failed_to_evac) { + ((StgClosure *)p)->header.info = &stg_IND_info; + } + p += sizeofW(StgInd); + break; + case IND_PERM: case BLACKHOLE: evacuate(&((StgInd *)p)->indirectee); hunk ./rts/sm/Scav.c 731 StgWord i; StgTRecChunk *tc = ((StgTRecChunk *) p); TRecEntry *e = &(tc -> entries[0]); - gct->eager_promotion = rtsFalse; + gct->evac_gen_ix = 0; evacuate((StgClosure **)&tc->prev_chunk); for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) { evacuate((StgClosure **)&e->tvar); hunk ./rts/sm/Scav.c 738 evacuate((StgClosure **)&e->expected_value); evacuate((StgClosure **)&e->new_value); } - gct->eager_promotion = saved_eager_promotion; + gct->evac_gen_ix = saved_evac_gen_ix; gct->failed_to_evac = rtsTrue; // mutable p += sizeofW(StgTRecChunk); break; hunk ./rts/sm/Scav.c 797 { StgPtr p, q; StgInfoTable *info; + nat saved_evac_gen_ix; rtsBool saved_eager_promotion; hunk ./rts/sm/Scav.c 800 - gct->evac_gen = oldest_gen; + gct->evac_gen_ix = oldest_gen->ix; + saved_evac_gen_ix = gct->evac_gen_ix; saved_eager_promotion = gct->eager_promotion; while ((p = pop_mark_stack())) { hunk ./rts/sm/Scav.c 922 // no "old" generation. break; + case IND_LOCAL: + evacuate(&((StgInd *)p)->indirectee); + // evacuated to the current gen: no need for IND_LOCAL any more + if (!gct->failed_to_evac) { + ((StgClosure *)p)->header.info = &stg_IND_info; + } + break; + case IND: case BLACKHOLE: evacuate(&((StgInd *)p)->indirectee); hunk ./rts/sm/Scav.c 1064 StgWord i; StgTRecChunk *tc = ((StgTRecChunk *) p); TRecEntry *e = &(tc -> entries[0]); - gct->eager_promotion = rtsFalse; + gct->evac_gen_ix = 0; evacuate((StgClosure **)&tc->prev_chunk); for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) { evacuate((StgClosure **)&e->tvar); hunk ./rts/sm/Scav.c 1071 evacuate((StgClosure **)&e->expected_value); evacuate((StgClosure **)&e->new_value); } - gct->eager_promotion = saved_eager_promotion; + gct->evac_gen_ix = saved_evac_gen_ix; gct->failed_to_evac = rtsTrue; // mutable break; } hunk ./rts/sm/Scav.c 1083 if (gct->failed_to_evac) { gct->failed_to_evac = rtsFalse; - if (gct->evac_gen) { - recordMutableGen_GC((StgClosure *)q, gct->evac_gen->no); + if (gct->evac_gen_ix) { + recordMutableGen_GC((StgClosure *)q, oldest_gen->no); } } } // while (p = pop_mark_stack()) hunk ./rts/sm/Scav.c 1102 scavenge_one(StgPtr p) { const StgInfoTable *info; + nat saved_evac_gen_ix = gct->evac_gen_ix; rtsBool no_luck; rtsBool saved_eager_promotion; hunk ./rts/sm/Scav.c 1294 gct->eager_promotion = saved_eager_promotion; gct->failed_to_evac = rtsTrue; // mutable break; - } case TREC_CHUNK: hunk ./rts/sm/Scav.c 1301 StgWord i; StgTRecChunk *tc = ((StgTRecChunk *) p); TRecEntry *e = &(tc -> entries[0]); - gct->eager_promotion = rtsFalse; + gct->evac_gen_ix = 0; evacuate((StgClosure **)&tc->prev_chunk); for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) { evacuate((StgClosure **)&e->tvar); hunk ./rts/sm/Scav.c 1308 evacuate((StgClosure **)&e->expected_value); evacuate((StgClosure **)&e->new_value); } - gct->eager_promotion = saved_eager_promotion; + gct->evac_gen_ix = saved_evac_gen_ix; gct->failed_to_evac = rtsTrue; // mutable break; } hunk ./rts/sm/Scav.c 1320 case BLACKHOLE: case IND_STATIC: evacuate(&((StgInd *)p)->indirectee); + break; hunk ./rts/sm/Scav.c 1322 -#if 0 && defined(DEBUG) - if (RtsFlags.DebugFlags.gc) - /* Debugging code to print out the size of the thing we just - * promoted - */ - { - StgPtr start = gen->scan; - bdescr *start_bd = gen->scan_bd; - nat size = 0; - scavenge(&gen); - if (start_bd != gen->scan_bd) { - size += (P_)BLOCK_ROUND_UP(start) - start; - start_bd = start_bd->link; - while (start_bd != gen->scan_bd) { - size += BLOCK_SIZE_W; - start_bd = start_bd->link; - } - size += gen->scan - - (P_)BLOCK_ROUND_DOWN(gen->scan); - } else { - size = gen->scan - start; - } - debugBelch("evac IND_OLDGEN: %ld bytes", size * sizeof(W_)); - } -#endif - break; + case IND_LOCAL: + evacuate(&((StgInd *)p)->indirectee); + // if we succeeded in evacuating; turn it into an ordinary IND + if (!gct->failed_to_evac) { + ((StgInd *)p)->header.info = &stg_IND_info; + } + break; default: barf("scavenge_one: strange object %d", (int)(info->type)); hunk ./rts/sm/Scav.c 1352 { StgPtr p, q; - gct->evac_gen = gen; + gct->evac_gen_ix = gen->ix; for (; bd != NULL; bd = bd->link) { hunk ./rts/sm/Scav.c 1354 + + debugTrace(DEBUG_gc, "scavenging mut_list block %p", bd); + for (q = bd->start; q < bd->free; q++) { p = (StgPtr)*q; ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); hunk ./rts/sm/Scav.c 1381 } #endif - // Check whether this object is "clean", that is it - // definitely doesn't point into a young generation. - // Clean objects don't need to be scavenged. Some clean - // objects (MUT_VAR_CLEAN) are not kept on the mutable - // list at all; others, such as TSO - // are always on the mutable list. - // - switch (get_itbl((StgClosure *)p)->type) { - case MUT_ARR_PTRS_CLEAN: - recordMutableGen_GC((StgClosure *)p,gen->no); - continue; - case MUT_ARR_PTRS_DIRTY: + // Check whether this object is "clean", that is it + // definitely doesn't point into a young generation. + // Clean objects don't need to be scavenged. Some clean + // objects (MUT_VAR_CLEAN) are not kept on the mutable + // list at all; others, such as TSO + // are always on the mutable list. + // + switch (get_itbl((StgClosure *)p)->type) { + + case MUT_ARR_PTRS_CLEAN: + recordMutableGen_GC((StgClosure *)p,gen->no); + continue; + case MUT_ARR_PTRS_DIRTY: { rtsBool saved_eager_promotion; saved_eager_promotion = gct->eager_promotion; hunk ./rts/sm/Scav.c 1398 gct->eager_promotion = rtsFalse; - + scavenge_mut_arr_ptrs_marked((StgMutArrPtrs *)p); hunk ./rts/sm/Scav.c 1400 - + if (gct->failed_to_evac) { ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; } else { hunk ./rts/sm/Scav.c 1406 ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; } - + gct->eager_promotion = saved_eager_promotion; gct->failed_to_evac = rtsFalse; hunk ./rts/sm/Scav.c 1409 - recordMutableGen_GC((StgClosure *)p,gen->no); - continue; + recordMutableGen_GC((StgClosure *)p,gen->no); + continue; } hunk ./rts/sm/Scav.c 1412 - case TSO: { - StgTSO *tso = (StgTSO *)p; - if (tso->dirty == 0) { + + case TSO: { + StgTSO *tso = (StgTSO *)p; + + debugTrace(DEBUG_gc, "mut_list TSO %p", tso); + +#ifdef THREADED_RTS + if (gct->gc_type == GC_LOCAL && tso->cap->no != gct->index) { + // this TSO must have been migrated from here to + // another Capability, so we must ignore its entry + // on the local mut list. + debugTrace(DEBUG_gc, + "not scavenging TSO %u, belongs to cap %u", + (nat)tso->id, (nat)tso->cap->no); + continue; + } +#endif + + if (tso->dirty == 0) { // Should be on the mutable list because its link // field is dirty. However, in parallel GC we may // have a thread on multiple mutable lists, so hunk ./rts/sm/Scav.c 1446 evacuate((StgClosure **)&tso->block_info.prev); } if (gct->failed_to_evac) { + tso->flags |= TSO_LINK_DIRTY; recordMutableGen_GC((StgClosure *)p,gen->no); gct->failed_to_evac = rtsFalse; } else { hunk ./rts/sm/Scav.c 1452 tso->flags &= ~TSO_LINK_DIRTY; } - continue; - } - } - default: - ; - } + continue; + } + break; + } + default: + ; + } hunk ./rts/sm/Scav.c 1460 - if (scavenge_one(p)) { - // didn't manage to promote everything, so put the - // object back on the list. - recordMutableGen_GC((StgClosure *)p,gen->no); - } - } + if (scavenge_one(p)) { + // didn't manage to promote everything, so put the + // object back on the list. + recordMutableGen_GC((StgClosure *)p,gen->no); + } + } } } hunk ./rts/sm/Scav.c 1480 * Also do them in reverse generation order, for the usual reason: * namely to reduce the likelihood of spurious old->new pointers. */ - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { - scavenge_mutable_list(cap->saved_mut_lists[g], &generations[g]); + for (g = RtsFlags.GcFlags.generations-1; g > gct->collect_gen; g--) { + scavenge_mutable_list(cap->saved_mut_lists[g], &old_generations[g]); freeChain_sync(cap->saved_mut_lists[g]); cap->saved_mut_lists[g] = NULL; } hunk ./rts/sm/Scav.c 1505 /* Always evacuate straight to the oldest generation for static * objects */ - gct->evac_gen = oldest_gen; + gct->evac_gen_ix = oldest_gen->ix; /* keep going until we've scavenged all the objects on the linked list... */ hunk ./rts/sm/Scav.c 1710 p = scavenge_small_bitmap(p, size, bitmap); follow_srt: +#ifndef LOCAL_GC if (major_gc) scavenge_srt((StgClosure **)GET_SRT(info), info->i.srt_bitmap); hunk ./rts/sm/Scav.c 1713 +#endif continue; case RET_BCO: { hunk ./rts/sm/Scav.c 1789 /*----------------------------------------------------------------------------- scavenge the large object list. - evac_gen set by caller; similar games played with evac_gen as with + evac_gen_ix set by caller; similar games played with evac_gen_ix as with scavenge() - see comment at the top of scavenge(). Most large hunk ./rts/sm/Scav.c 1791 - objects are (repeatedly) mutable, so most of the time evac_gen will + objects are (repeatedly) mutable, so most of the time evac_gen_ix will be zero. --------------------------------------------------------------------------- */ hunk ./rts/sm/Scav.c 1801 bdescr *bd; StgPtr p; - gct->evac_gen = ws->gen; + gct->evac_gen_ix = ws->gen->ix; bd = ws->todo_large_objects; hunk ./rts/sm/Scav.c 1813 // the front when evacuating. ws->todo_large_objects = bd->link; - ACQUIRE_SPIN_LOCK(&ws->gen->sync_large_objects); + ACQUIRE_SPIN_LOCK(&ws->gen->sync); dbl_link_onto(bd, &ws->gen->scavenged_large_objects); ws->gen->n_scavenged_large_blocks += bd->blocks; hunk ./rts/sm/Scav.c 1816 - RELEASE_SPIN_LOCK(&ws->gen->sync_large_objects); + RELEASE_SPIN_LOCK(&ws->gen->sync); p = bd->start; if (scavenge_one(p)) { hunk ./rts/sm/Scav.c 1863 loop: did_something = rtsFalse; - for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) { + for (g = total_generations-1; g >= 0; g--) { ws = &gct->gens[g]; hunk ./rts/sm/Scav.c 1866 + if (gct->gc_type != GC_SEQ && isNonLocalGen(ws->gen)) + continue; + gct->scan_bd = NULL; // If we have a scan block with some work to do, hunk ./rts/sm/Scav.h 24 void scavenge_capability_mut_lists (Capability *cap); #ifdef THREADED_RTS -void scavenge_loop1 (void); -void scavenge_mutable_list1 (bdescr *bd, generation *gen); -void scavenge_capability_mut_Lists1 (Capability *cap); + +void scavenge_loop_par (void); +void scavenge_mutable_list_par (bdescr *bd, generation *gen); +void scavenge_capability_mut_lists_par (Capability *cap); + +void scavenge_loop_local (void); +void scavenge_mutable_list_local (bdescr *bd, generation *gen); +void scavenge_capability_mut_lists_local (Capability *cap); + +#else + +#define scavenge_loop1 scavenge_loop +#define scavenge_mutable_list1 scavenge_mutable_lists +#define scavenge_capability_mut_lists1 scavenge_capability_mut_lists + #endif #include "EndPrivate.h" hunk ./rts/sm/Storage.c 31 #include "Trace.h" #include "GC.h" #include "Evac.h" +#include "Globalise.h" #include hunk ./rts/sm/Storage.c 70 bdescr *exec_block; -generation *generations = NULL; /* all the generations */ -generation *g0 = NULL; /* generation 0, for convenience */ -generation *oldest_gen = NULL; /* oldest generation, for convenience */ +nat total_generations; // size of all_generations +generation * all_generations; // indexed by gen->ix +generation * old_generations; // indexed by gen->no +generation * g0; // == &old_generations[0] +generation * oldest_gen; // == &old_generations[G-1] +generation * global_gen; // == &all_generations[global_gen_ix] +nat global_gen_ix; // ix of first global gen +nat global_gen_no; // no of first global gen nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */ hunk ./rts/sm/Storage.c 92 static void allocNurseries ( void ); static void -initGeneration (generation *gen, int g) +initGeneration (generation *gen, int g, int ix) { gen->no = g; hunk ./rts/sm/Storage.c 95 + gen->ix = ix; gen->collections = 0; gen->par_collections = 0; gen->failed_promotions = 0; hunk ./rts/sm/Storage.c 109 gen->large_objects = NULL; gen->n_large_blocks = 0; gen->n_new_large_blocks = 0; - gen->mut_list = allocBlock(); gen->scavenged_large_objects = NULL; gen->n_scavenged_large_blocks = 0; gen->mark = 0; hunk ./rts/sm/Storage.c 115 gen->compact = 0; gen->bitmap = NULL; #ifdef THREADED_RTS - initSpinLock(&gen->sync_large_objects); + initSpinLock(&gen->sync); #endif gen->threads = END_TSO_QUEUE; gen->old_threads = END_TSO_QUEUE; hunk ./rts/sm/Storage.c 119 + gen->weak_ptrs = NULL; } void hunk ./rts/sm/Storage.c 125 initStorage( void ) { - nat g, n; + nat g, n; hunk ./rts/sm/Storage.c 127 - if (generations != NULL) { + if (all_generations != NULL) { // multi-init protection return; } hunk ./rts/sm/Storage.c 163 ACQUIRE_SM_LOCK; - /* allocate generation info array */ - generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations - * sizeof(struct generation_), - "initStorage: gens"); + // There are N young generations, one for each Capability. + total_generations = n_capabilities + RtsFlags.GcFlags.generations - 1; hunk ./rts/sm/Storage.c 166 - /* Initialise all generations */ - for(g = 0; g < RtsFlags.GcFlags.generations; g++) { - initGeneration(&generations[g], g); + // allocate generation array. all_generations is indexed by + // gen->ix. + all_generations = (generation *)stgMallocBytes(total_generations + * sizeof(struct generation_), + "initStorage: gens"); + + // generations[] is indexed by the generation number, and is only + // valid for generation[0] when n_capabilities==1. + old_generations = &all_generations[n_capabilities - 1]; + + // Initialise all generations + for(n = 0; n < n_capabilities; n++) { + initGeneration(&all_generations[n], 0, n); + all_generations[n].is_local = rtsTrue; + all_generations[n].cap = n; + } + for(g = 1; g < RtsFlags.GcFlags.generations; g++) { + initGeneration(&old_generations[g], g, g + n_capabilities - 1); + old_generations[g].is_local = rtsFalse; + old_generations[g].cap = 0; } hunk ./rts/sm/Storage.c 188 - /* A couple of convenience pointers */ - g0 = &generations[0]; - oldest_gen = &generations[RtsFlags.GcFlags.generations-1]; + // A couple of convenience pointers + g0 = &old_generations[0]; + oldest_gen = &old_generations[RtsFlags.GcFlags.generations-1]; + if (RtsFlags.GcFlags.generations == 1) { + global_gen_ix = 0; + global_gen_no = 0; + } else { + global_gen_ix = n_capabilities; + global_gen_no = 1; + } + global_gen = &all_generations[global_gen_ix]; hunk ./rts/sm/Storage.c 200 - nurseries = stgMallocBytes(n_capabilities * sizeof(struct nursery_), - "initStorage: nurseries"); - - /* Set up the destination pointers in each younger gen. step */ - for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) { - generations[g].to = &generations[g+1]; + // Set up the destination pointers in each younger gen. step + { + generation *g0_dest; + + if (RtsFlags.GcFlags.generations == 1) { + g0_dest = &old_generations[0]; + } else { + g0_dest = &old_generations[1]; + } + for(n = 0; n < n_capabilities; n++) { + all_generations[n].to = g0_dest; + } + for (g = 1; g < RtsFlags.GcFlags.generations-1; g++) { + old_generations[g].to = &old_generations[g+1]; + } + oldest_gen->to = oldest_gen; } hunk ./rts/sm/Storage.c 217 - oldest_gen->to = oldest_gen; /* The oldest generation has one step. */ if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) { hunk ./rts/sm/Storage.c 229 } } - generations[0].max_blocks = 0; - + nurseries = stgMallocBytes(n_capabilities * sizeof(struct nursery_), + "initStorage: nurseries"); + /* The allocation area. Policy: keep the allocation area * small to begin with, even if we have a large suggested heap * size. Reason: we're going to do a major collection first, and we hunk ./rts/sm/Storage.c 240 */ allocNurseries(); - weak_ptr_list = NULL; caf_list = END_OF_STATIC_LIST; revertible_caf_list = END_OF_STATIC_LIST; hunk ./rts/sm/Storage.c 253 whitehole_spin = 0; #endif - N = 0; + next_gc_gen = 0; // allocate a block for each mut list for (n = 0; n < n_capabilities; n++) { hunk ./rts/sm/Storage.c 278 void freeStorage (rtsBool free_heap) { - stgFree(generations); + stgFree(all_generations); if (free_heap) freeAllMBlocks(); #if defined(THREADED_RTS) closeMutex(&sm_mutex); hunk ./rts/sm/Storage.c 328 -------------------------------------------------------------------------- */ void -newCAF(StgRegTable *reg, StgClosure* caf) +newCAF(StgRegTable *reg, StgClosure* caf, StgClosure *bh) { hunk ./rts/sm/Storage.c 330 - if(keepCAFs) - { - // HACK: - // If we are in GHCi _and_ we are using dynamic libraries, - // then we can't redirect newCAF calls to newDynCAF (see below), - // so we make newCAF behave almost like newDynCAF. - // The dynamic libraries might be used by both the interpreted - // program and GHCi itself, so they must not be reverted. - // This also means that in GHCi with dynamic libraries, CAFs are not - // garbage collected. If this turns out to be a problem, we could - // do another hack here and do an address range test on caf to figure - // out whether it is from a dynamic library. - ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info; - - ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex - ((StgIndStatic *)caf)->static_link = caf_list; - caf_list = caf; - RELEASE_SM_LOCK; - } - else - { - // Put this CAF on the mutable list for the old generation. - ((StgIndStatic *)caf)->saved_info = NULL; - if (oldest_gen->no != 0) { - recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no); + StgIndStatic *caf_ind = (StgIndStatic *)caf; + Capability *cap = regTableToCapability(reg); + + if(keepCAFs) + { + // HACK: + // If we are in GHCi _and_ we are using dynamic libraries, + // then we can't redirect newCAF calls to newDynCAF (see below), + // so we make newCAF behave almost like newDynCAF. + // The dynamic libraries might be used by both the interpreted + // program and GHCi itself, so they must not be reverted. + // This also means that in GHCi with dynamic libraries, CAFs are not + // garbage collected. If this turns out to be a problem, we could + // do another hack here and do an address range test on caf to figure + // out whether it is from a dynamic library. + caf_ind->saved_info = caf_ind->header.info; + + ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex + caf_ind->static_link = caf_list; + caf_list = caf; + RELEASE_SM_LOCK; } hunk ./rts/sm/Storage.c 352 - } + + caf_ind->indirectee = publish_gen(cap,bh,oldest_gen); + SET_HDR(caf_ind, &stg_IND_STATIC_info, CCS_SYSTEM); } // External API for setting the keepCAFs flag. see #3900. hunk ./rts/sm/Storage.c 374 // The linker hackily arranges that references to newCaf from dynamic // code end up pointing to newDynCAF. void -newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf) +newDynCAF(StgRegTable *reg, StgClosure* caf, StgClosure *bh) { hunk ./rts/sm/Storage.c 376 + StgIndStatic *caf_ind = (StgIndStatic *)caf; + Capability *cap = regTableToCapability(reg); + ACQUIRE_SM_LOCK; ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info; hunk ./rts/sm/Storage.c 386 revertible_caf_list = caf; RELEASE_SM_LOCK; + + caf_ind->indirectee = publish_gen(cap,bh,oldest_gen); + SET_HDR(caf_ind, &stg_IND_STATIC_info, CCS_SYSTEM); } /* ----------------------------------------------------------------------------- hunk ./rts/sm/Storage.c 396 -------------------------------------------------------------------------- */ static bdescr * -allocNursery (bdescr *tail, nat blocks) +allocNursery (bdescr *tail, nat blocks, generation *gen) { bdescr *bd = NULL; nat i, n; hunk ./rts/sm/Storage.c 413 bd = allocGroup(n); for (i = 0; i < n; i++) { - initBdescr(&bd[i], g0, g0); + initBdescr(&bd[i], gen, gen); bd[i].blocks = 1; bd[i].flags = 0; hunk ./rts/sm/Storage.c 443 } static void -assignNurseriesToCapabilities (void) +assignNurseryToCapability (nat n) { hunk ./rts/sm/Storage.c 445 - nat i; - - for (i = 0; i < n_capabilities; i++) { - capabilities[i].r.rNursery = &nurseries[i]; - capabilities[i].r.rCurrentNursery = nurseries[i].blocks; - capabilities[i].r.rCurrentAlloc = NULL; - } + capabilities[n].r.rNursery = &nurseries[n]; + capabilities[n].r.rG0 = &all_generations[n]; + capabilities[n].r.rCurrentNursery = nurseries[n].blocks; + capabilities[n].r.rCurrentAlloc = NULL; } static void hunk ./rts/sm/Storage.c 456 { nat i; - for (i = 0; i < n_capabilities; i++) { + for (i = 0; i < n_capabilities; i++) + { nurseries[i].blocks = hunk ./rts/sm/Storage.c 459 - allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize); + allocNursery(NULL, + RtsFlags.GcFlags.minAllocAreaSize, + &all_generations[i]); + nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize; hunk ./rts/sm/Storage.c 465 + + assignNurseryToCapability(i); } hunk ./rts/sm/Storage.c 468 - assignNurseriesToCapabilities(); } hunk ./rts/sm/Storage.c 470 +void +resetNursery (nat n) +{ + bdescr *bd; + for (bd = nurseries[n].blocks; bd; bd = bd->link) { + bd->free = bd->start; + ASSERT(bd->gen_no == 0); + ASSERT(bd->gen == &all_generations[n]); + IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); + } + assignNurseryToCapability(n); +} + + void resetNurseries( void ) { hunk ./rts/sm/Storage.c 488 nat i; - bdescr *bd; for (i = 0; i < n_capabilities; i++) { hunk ./rts/sm/Storage.c 490 - for (bd = nurseries[i].blocks; bd; bd = bd->link) { - bd->free = bd->start; - ASSERT(bd->gen_no == 0); - ASSERT(bd->gen == g0); - IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); - } + resetNursery(i); } hunk ./rts/sm/Storage.c 492 - assignNurseriesToCapabilities(); } lnat hunk ./rts/sm/Storage.c 507 } static void -resizeNursery ( nursery *nursery, nat blocks ) +resizeNursery ( nursery *nursery, nat blocks, nat cap_no) { bdescr *bd; nat nursery_blocks; hunk ./rts/sm/Storage.c 518 if (nursery_blocks < blocks) { debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", blocks); - nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks); + nursery->blocks = allocNursery(nursery->blocks, + blocks-nursery_blocks, + &all_generations[cap_no]); } else { bdescr *next_bd; hunk ./rts/sm/Storage.c 540 // might have gone just under, by freeing a large block, so make // up the difference. if (nursery_blocks < blocks) { - nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks); + nursery->blocks = allocNursery(nursery->blocks, + blocks-nursery_blocks, + &all_generations[cap_no]); } } hunk ./rts/sm/Storage.c 558 { nat i; for (i = 0; i < n_capabilities; i++) { - resizeNursery(&nurseries[i], blocks); + resizeNursery(&nurseries[i], blocks, i); } } hunk ./rts/sm/Storage.c 646 { bdescr *bd; StgPtr p; + generation *gen; if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; hunk ./rts/sm/Storage.c 667 ACQUIRE_SM_LOCK bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0->large_objects); - g0->n_large_blocks += bd->blocks; // might be larger than req_blocks - g0->n_new_large_blocks += bd->blocks; + gen = cap->r.rG0; + dbl_link_onto(bd, &gen->large_objects); + gen->n_large_blocks += bd->blocks; // might be larger than req_blocks + gen->n_new_large_blocks += bd->blocks; RELEASE_SM_LOCK; hunk ./rts/sm/Storage.c 672 - initBdescr(bd, g0, g0); + initBdescr(bd, gen, gen); bd->flags = BF_LARGE; bd->free = bd->start + n; return bd->start; hunk ./rts/sm/Storage.c 698 bd = allocBlock(); cap->r.rNursery->n_blocks++; RELEASE_SM_LOCK; - initBdescr(bd, g0, g0); + initBdescr(bd, cap->r.rG0, cap->r.rG0); bd->flags = 0; // If we had to allocate a new block, then we'll GC // pretty quickly now, because MAYBE_GC() will hunk ./rts/sm/Storage.c 751 { StgPtr p; bdescr *bd; + generation *gen; // If the request is for a large object, then allocate() // will give us a pinned object anyway. hunk ./rts/sm/Storage.c 771 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { ACQUIRE_SM_LOCK; cap->pinned_object_block = bd = allocBlock(); - dbl_link_onto(bd, &g0->large_objects); - g0->n_large_blocks++; - g0->n_new_large_blocks++; + gen = cap->r.rG0; // use our local G0 + dbl_link_onto(bd, &gen->large_objects); + gen->n_large_blocks++; + gen->n_new_large_blocks++; RELEASE_SM_LOCK; hunk ./rts/sm/Storage.c 776 - initBdescr(bd, g0, g0); + initBdescr(bd, gen, gen); bd->flags = BF_PINNED | BF_LARGE; bd->free = bd->start; } hunk ./rts/sm/Storage.c 797 and is put on the mutable list. */ void -dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) +dirty_MUT_VAR (StgRegTable *reg, StgClosure *p) +{ + Capability *cap = regTableToCapability(reg); + bdescr *bd; + bd = Bdescr((StgPtr)p); + if (bd->gen_ix >= global_gen_ix) { + ((StgMutVar*)p)->var = publish(cap, ((StgMutVar*)p)->var); + } +} + +// The write barrier for arrays (or part of it). +void +dirty_MUT_ARR (StgRegTable *reg, StgMutArrPtrs *arr, nat ix) { Capability *cap = regTableToCapability(reg); hunk ./rts/sm/Storage.c 812 - if (p->header.info == &stg_MUT_VAR_CLEAN_info) { - p->header.info = &stg_MUT_VAR_DIRTY_info; - recordClosureMutated(cap,p); + bdescr *bd; + bd = Bdescr((StgPtr)arr); + if (bd->gen_ix >= global_gen_ix) { + arr->payload[ix] = publish(cap, arr->payload[ix]); } } hunk ./rts/sm/Storage.c 822 // Setting a TSO's link field with a write barrier. // It is *not* necessary to call this function when // * setting the link field to END_TSO_QUEUE -// * putting a TSO on the blackhole_queue // * setting the link field of the currently running TSO, as it // will already be dirty. void hunk ./rts/sm/Storage.c 829 { if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { tso->flags |= TSO_LINK_DIRTY; - recordClosureMutated(cap,(StgClosure*)tso); + recordClosureMutated_(cap,(StgClosure*)tso); } tso->_link = target; } hunk ./rts/sm/Storage.c 839 { if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { tso->flags |= TSO_LINK_DIRTY; - recordClosureMutated(cap,(StgClosure*)tso); + recordClosureMutated_(cap,(StgClosure*)tso); } tso->block_info.prev = target; } hunk ./rts/sm/Storage.c 848 dirty_TSO (Capability *cap, StgTSO *tso) { if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { - recordClosureMutated(cap,(StgClosure*)tso); + recordClosureMutated_(cap,(StgClosure*)tso); } tso->dirty = 1; } hunk ./rts/sm/Storage.c 879 * This leaves a little slop at the end of each block. * -------------------------------------------------------------------------- */ +lnat +calcAllocatedCap (Capability *cap) +{ + nat allocated; + bdescr *bd; + + allocated = cap->r.rNursery->n_blocks * BLOCK_SIZE_W; + + for ( bd = cap->r.rCurrentNursery->link; bd != NULL; bd = bd->link ) { + allocated -= BLOCK_SIZE_W; + } + if (cap->r.rCurrentNursery->free < + cap->r.rCurrentNursery->start + BLOCK_SIZE_W) { + allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W) + - cap->r.rCurrentNursery->free; + } + if (cap->pinned_object_block != NULL) { + allocated -= (cap->pinned_object_block->start + BLOCK_SIZE_W) - + cap->pinned_object_block->free; + } + + allocated += cap->r.rG0->n_new_large_blocks * BLOCK_SIZE_W; + + return allocated; +} + lnat calcAllocated( void ) { hunk ./rts/sm/Storage.c 909 nat allocated; - bdescr *bd; nat i; hunk ./rts/sm/Storage.c 911 - allocated = countNurseryBlocks() * BLOCK_SIZE_W; - + allocated = 0; for (i = 0; i < n_capabilities; i++) { hunk ./rts/sm/Storage.c 913 - Capability *cap; - for ( bd = capabilities[i].r.rCurrentNursery->link; - bd != NULL; bd = bd->link ) { - allocated -= BLOCK_SIZE_W; - } - cap = &capabilities[i]; - if (cap->r.rCurrentNursery->free < - cap->r.rCurrentNursery->start + BLOCK_SIZE_W) { - allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W) - - cap->r.rCurrentNursery->free; - } - if (cap->pinned_object_block != NULL) { - allocated -= (cap->pinned_object_block->start + BLOCK_SIZE_W) - - cap->pinned_object_block->free; - } + allocated += calcAllocatedCap(&capabilities[i]); } hunk ./rts/sm/Storage.c 916 - allocated += g0->n_new_large_blocks * BLOCK_SIZE_W; - return allocated; } hunk ./rts/sm/Storage.c 925 lnat calcLiveBlocks (void) { nat g; - lnat live = 0; - generation *gen; + lnat live; hunk ./rts/sm/Storage.c 927 - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - /* approximate amount of live data (doesn't take into account slop - * at end of each block). - */ - gen = &generations[g]; - live += gen->n_large_blocks + gen->n_blocks; + live = 0; + for (g = 0; g < total_generations; g++) { + // approximate amount of live data (doesn't take into account slop + // at end of each block). + live += all_generations[g].n_blocks + all_generations[g].n_large_blocks; } return live; } hunk ./rts/sm/Storage.c 948 return words; } -// Return an accurate count of the live data in the heap, excluding -// generation 0. +// Return an accurate count of the live data in the heap lnat calcLiveWords (void) { nat g; hunk ./rts/sm/Storage.c 956 generation *gen; live = 0; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen = &generations[g]; + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; live += gen->n_words + countOccupied(gen->large_objects); } return live; hunk ./rts/sm/Storage.c 977 nat g; generation *gen; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen = &generations[g]; + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; // we need at least this much space needed += gen->n_blocks + gen->n_large_blocks; hunk ./rts/sm/Storage.c 984 // any additional space needed to collect this gen next time? - if (g == 0 || // always collect gen 0 + if (gen->no == 0 || // always collect gen 0 (gen->n_blocks + gen->n_large_blocks > gen->max_blocks)) { // we will collect this gen next time if (gen->mark) { hunk ./rts/sm/Storage.h 41 /* ----------------------------------------------------------------------------- Generational garbage collection support - recordMutable(StgPtr p) Informs the garbage collector that a - previously immutable object has - become (permanently) mutable. Used - by thawArray and similar. - updateWithIndirection(p1,p2) Updates the object at p1 with an indirection pointing to p2. This is normally called for objects in an old hunk ./rts/sm/Storage.h 46 generation (>0) when they are updated. - updateWithPermIndirection(p1,p2) As above but uses a permanent indir. - -------------------------------------------------------------------------- */ /* hunk ./rts/sm/Storage.h 65 #define ASSERT_SM_LOCK() #endif -INLINE_HEADER void -recordMutableGen(StgClosure *p, nat gen_no) -{ - bdescr *bd; - - bd = generations[gen_no].mut_list; - if (bd->free >= bd->start + BLOCK_SIZE_W) { - bdescr *new_bd; - new_bd = allocBlock(); - new_bd->link = bd; - bd = new_bd; - generations[gen_no].mut_list = bd; - } - *bd->free++ = (StgWord)p; - -} - -INLINE_HEADER void -recordMutableGenLock(StgClosure *p, nat gen_no) -{ - ACQUIRE_SM_LOCK; - recordMutableGen(p,gen_no); - RELEASE_SM_LOCK; -} - -INLINE_HEADER void -recordMutable(StgClosure *p) -{ - bdescr *bd; - ASSERT(closure_MUTABLE(p)); - bd = Bdescr((P_)p); - if (bd->gen_no > 0) recordMutableGen(p, bd->gen_no); -} - -INLINE_HEADER void -recordMutableLock(StgClosure *p) -{ - ACQUIRE_SM_LOCK; - recordMutable(p); - RELEASE_SM_LOCK; -} - -/* ----------------------------------------------------------------------------- - The write barrier for MVARs - -------------------------------------------------------------------------- */ - -void dirty_MVAR(StgRegTable *reg, StgClosure *p); - /* ----------------------------------------------------------------------------- Nursery manipulation -------------------------------------------------------------------------- */ hunk ./rts/sm/Storage.h 71 extern nursery *nurseries; +void resetNursery ( nat n ); void resetNurseries ( void ); void resizeNurseries ( nat blocks ); void resizeNurseriesFixed ( nat blocks ); hunk ./rts/sm/Storage.h 82 -------------------------------------------------------------------------- */ lnat calcAllocated (void); +lnat calcAllocatedCap (Capability *cap); lnat calcLiveBlocks (void); lnat calcLiveWords (void); lnat countOccupied (bdescr *bd); hunk ./utils/genapply/GenApply.hs 383 reg_doc, text " Sp_adj(" <> int sp' <> text ");", -- enter, but adjust offset with tag - text " jump " <> text "%GET_ENTRY(R1-" <> int tag <> text ");", + text " info = %INFO_PTR(R1-" <> int tag <> text ");", + text " if (IS_FORWARDING_PTR(info)) { R1 = UN_FORWARDING_PTR(info) + " <> int tag <> text "; info = %INFO_PTR(R1-" <> int tag <> text "); }", + text " jump " <> text "%ENTRY_CODE(info);", text "}" ] -- I don't totally understand this code, I copied it from hunk ./utils/genapply/GenApply.hs 570 text "case AP,", text " AP_STACK,", text " BLACKHOLE,", + text " IND_LOCAL,", text " WHITEHOLE,", text " THUNK,", text " THUNK_1_0,", hunk ./utils/genapply/GenApply.hs 644 -- Functions can be tagged, so we untag them! text "R1 = UNTAG(R1);", - text "info = %GET_STD_INFO(R1);", + text "info = %INFO_PTR(R1);", + -- check for forwarding pointers + text "if (IS_FORWARDING_PTR(info)) { R1 = UN_FORWARDING_PTR(info); info = %INFO_PTR(R1); }", + text "info = %STD_INFO(info);", text "switch [INVALID_OBJECT .. N_CLOSURE_TYPES] (TO_W_(%INFO_TYPE(info))) {", nest 4 (vcat [ text "case FUN,", } [snapshot of local GC work at a relatively stable point Simon Marlow **20101021095602 Ignore-this: f82e78a78cd301465f576909ccd8b774 ] { hunk ./PLAN 4 ----------------------------------------------------------------------------- -- Plan - - conc023(threaded1) - - throwto001 + - IND_LOCAL pointing to BLACKHOLE + + - scavenge_mark_stack should probably set evac_gen_ix to the old gen + if the marked closure is global. + + - nofib/parallel benchmarks + + - concurrent(threaded2) tests + + - right now we're not promoting anything from the local prim heap + unless it is referred to by the global heap. Is this the best + option? Alternative: mark closures global in evacuate(). + + - sort out where we allocate prim memory from. Using the nursery is + a bit of a hack, and the nursery has to be resized after each + collection. + + - options for global mark bits: + - word before each object + - bitmap at the beginning of the block + - low bit in the info pointer + + - we could inline part of allocatePrim if newMVar, newMutVar + performance is important. + + - An alternative: allocate prim objects in the nursery as normal, + but pin the whole block if they become global. Other private + objects would be copied out of the block as normal. The pinned + block then becomes part of the global heap (if we did Immix + marking then we could reclaim the free parts of the block). + + We still have to mark individual objects as global/local, and we + can't use word-before-object. + + - the first time we sweep a prim block that has a global object in it, + mark the block BF_GLOBAL so we don't sweep it again? + + - checking whether a closure pointer is global is too complex: + + EXTERN_INLINE rtsBool isGlobal (StgClosure *p) + { + bdescr *bd; + bd = Bdescr((P_)p); + return bd->gen_no != 0 || ((bd->flags & BF_PRIM) && isGlobalPrim(p)); + } + + (used in globalise_wrt, checkMutableList, + dirty_MUT_VAR, dirty_MUT_ARR, recordClosureMutated_) + + how can we make this easier? + + 1. use MUT_VAR_GLOBAL iff the MUT_VAR is in the global gen, + otherwise use MUT_VAR_LOCAL. Only MUT_VAR_GLOBAL writes need to + publish. + + 2. similarly for TSO: !tso->dirty means global and clean. + + 3. can omit the BF_PRIM check for objects we know to be prim. + + - recordClosureMutated_: gen is wrong if we have >2 gens + + - calcAllocated: take into account allocations from allocatePrim(). + + - threadStackOverflow: hacky -1s to account for global flags + + - PROFILING??? + - selector thunks: must check for forwarding ptr before entering - when allocating messages that point to TSOs, we need to globalise the TSO. hunk ./PLAN 77 - is HEAP_ALLOCED_GC() safe to call during local GC? - - implement messages for requesting globalisation + - if a Cap is idle, it should probably do a local GC, otherwise it + will keep receiving messages requesting globalisation. - re-enable thread migration - to migrate a thread: hunk ./PLAN 384 that they stay forward-pointers, and hence we never get into a state where C/F/T has been promoted but not U. +Should we change ENTER() to check for forwarding pointers? why has +this not gone wrong so far? - because forwarded closures are always +tagged, so ENTER just returns. + ----------------------------------------------------------------------------- How to globalise large-family constructors? hunk ./PLAN 649 Compared to the other scheme: - - no need for eager promotion of primitive objects! + - no need for eager promotion of primitive objects! (well actually, + primitive objects do get eagerly promoted, but without moving + them). - we can globalise everything, globalise is simpler. hunk ./PLAN 656 - write barriers are a bit simpler and cheaper: check info pointer, - maybe globalise + maybe globalise (well actually, currently the check is worse + because it is a bdescr->gen check plus isGlobal). - we need to duplicate all info tables for primitive objects, but private/global can be a bit in the flags field of the info table, hunk ./PLAN 670 - TSOs don't need to be globalised, they can be implicitly global, similarly for BLOCKING_QUEUE and other private objects. + - Evac: + - in local GC: + if the closure is global, we're done + if the closure is private, mark it and push it on the mark stack. + - in global GC: + copy the closure as normal. + + Even though the prim areas are local heap, we don't have to + record pointers from the global heap to the prim heap in the + remembered set. Instead we transitively mark all the global + objects in the prim area, so that they don't get discarded by + local GC. + + What about clean/dirty? - for TSO: still need clean/dirty hunk ./PLAN 695 MUT_VAR_CLEAN, MUT_VAR_DIRTY, MUT_VAR_GLOBAL and a 3-way write barrier. + + +----------------------------------------------------------------------------- +Things to measure + + - publish vs. globalises vs. globalise-to-depth + - for updates + - for MUT_VAR / MUT_ARR / TVAR / MVAR + + - eager promotion vs. lazy promotion for prim objects in local heap + +----------------------------------------------------------------------------- +Sources of overhead + + - checking for forwarding pointers in apply code, PAP code, large + family constructor cases. + + - slower allocation of primitive objects hunk ./compiler/cmm/CLabel.hs 60 mkSplitMarkerLabel, mkDirty_MUT_VAR_Label, + mkDirty_MUT_ARR_Label, mkUpdInfoLabel, mkBHUpdInfoLabel, mkIndStaticInfoLabel, hunk ./compiler/cmm/CLabel.hs 404 -- Constructing Cmm Labels mkSplitMarkerLabel = CmmLabel rtsPackageId (fsLit "__stg_split_marker") CmmCode mkDirty_MUT_VAR_Label = CmmLabel rtsPackageId (fsLit "dirty_MUT_VAR") CmmCode +mkDirty_MUT_ARR_Label = CmmLabel rtsPackageId (fsLit "dirty_MUT_ARR") CmmCode mkUpdInfoLabel = CmmLabel rtsPackageId (fsLit "stg_upd_frame") CmmInfo mkBHUpdInfoLabel = CmmLabel rtsPackageId (fsLit "stg_bh_upd_frame" ) CmmInfo mkIndStaticInfoLabel = CmmLabel rtsPackageId (fsLit "stg_IND_STATIC") CmmInfo hunk ./compiler/codeGen/CgClosure.lhs 39 import CLabel import StgSyn import CostCentre -import Type import Id import Name import Module hunk ./compiler/codeGen/CgClosure.lhs 79 ; srt_info <- getSRTInfo ; mod_name <- getModuleName ; let descr = closureDescription mod_name name - closure_info = mkClosureInfo True False id lf_info 0 0 srt_info descr + closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr closure_label = mkLocalClosureLabel name $ idCafInfo id cg_id_info = stableIdInfo id (mkLblExpr closure_label) lf_info closure_rep = mkStaticClosureFields closure_info ccs True [] hunk ./compiler/codeGen/CgClosure.lhs 122 descr = closureDescription mod_name (idName bndr) closure_info = mkClosureInfo False -- Not static - False -- no unlifted/pointed fields bndr lf_info tot_wds ptr_wds NoC_SRT -- No SRT for a std-form closure descr hunk ./compiler/codeGen/CgClosure.lhs 172 add_rep info = (cgIdInfoArgRep info, info) - any_unlifted = any unlifted fv_infos - where unlifted info = isFollowableArg (cgIdInfoArgRep info) - && isUnLiftedType (idType (cgIdInfoId info)) - descr = closureDescription mod_name name closure_info = mkClosureInfo False -- Not static hunk ./compiler/codeGen/CgClosure.lhs 174 - any_unlifted bndr lf_info tot_wds ptr_wds srt_info descr hunk ./compiler/codeGen/CgCon.lhs 82 lf_info = mkConLFInfo con closure_label = mkClosureLabel name $ idCafInfo id caffy = any stgArgHasCafRefs args - (closure_info, amodes_w_offsets) = layOutStaticConstr False con amodes + (closure_info, amodes_w_offsets) = layOutStaticConstr con amodes closure_rep = mkStaticClosureFields closure_info dontCareCCS -- Because it's static data hunk ./compiler/codeGen/CgCon.lhs 209 buildDynCon binder ccs con args = do { ; let - (closure_info, amodes_w_offsets) = layOutDynConstr unlifted_fields con args + (closure_info, amodes_w_offsets) = layOutDynConstr con args ; hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets ; returnFC (taggedHeapIdInfo binder hp_off lf_info con) } hunk ./compiler/codeGen/CgCon.lhs 221 | otherwise = CmmLit (mkCCostCentreStack ccs) blame_cc = use_cc -- cost-centre on which to blame the alloc (same) - - unlifted_fields = dataConHasUnliftedPtrFields con - -dataConHasUnliftedPtrFields :: DataCon -> Bool -dataConHasUnliftedPtrFields con = any unlifted (dataConRepArgTys con) - where unlifted ty = isUnLiftedType ty && typePrimRep ty == PtrRep \end{code} hunk ./compiler/codeGen/CgCon.lhs 420 -- time), we take care that info-table contains the -- information we need. (static_cl_info, _) = - layOutStaticConstr False data_con arg_reps + layOutStaticConstr data_con arg_reps (dyn_cl_info, arg_things) = hunk ./compiler/codeGen/CgCon.lhs 423 - layOutDynConstr unlifted_fields data_con arg_reps - - unlifted_fields = dataConHasUnliftedPtrFields data_con + layOutDynConstr data_con arg_reps emit_info cl_info ticky_code = do { code_blks <- getCgStmts the_code hunk ./compiler/codeGen/CgHeapery.lhs 117 \begin{code} layOutDynConstr, layOutStaticConstr - :: Bool -- any fields are unlifted/pointed? - -> DataCon + :: DataCon -> [(CgRep,a)] -> (ClosureInfo, [(a,VirtualHpOffset)]) hunk ./compiler/codeGen/CgHeapery.lhs 125 layOutDynConstr = layOutConstr False layOutStaticConstr = layOutConstr True -layOutConstr :: Bool -> Bool -> DataCon -> [(CgRep, a)] +layOutConstr :: Bool -> DataCon -> [(CgRep, a)] -> (ClosureInfo, [(a, VirtualHpOffset)]) hunk ./compiler/codeGen/CgHeapery.lhs 127 -layOutConstr is_static unlifted_fields data_con args - = (mkConInfo is_static unlifted_fields data_con tot_wds ptr_wds, +layOutConstr is_static data_con args + = (mkConInfo is_static data_con tot_wds ptr_wds, things_w_offsets) where (tot_wds, -- #ptr_wds + #nonptr_wds hunk ./compiler/codeGen/CgPrimOp.hs 212 emitPrimOp [r] ReadArrayOp [obj,ix] _ = doReadPtrArrayOp r obj ix emitPrimOp [r] IndexArrayOp [obj,ix] _ = doReadPtrArrayOp r obj ix -emitPrimOp [] WriteArrayOp [obj,ix,v] _ = doWritePtrArrayOp obj ix v +emitPrimOp [] WriteArrayOp [obj,ix,v] live = doWritePtrArrayOp obj ix v live -- IndexXXXoffAddr hunk ./compiler/codeGen/CgPrimOp.hs 569 doWriteByteArrayOp _ _ _ _ = panic "CgPrimOp: doWriteByteArrayOp" -doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> Code -doWritePtrArrayOp addr idx val +doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars -> Code +doWritePtrArrayOp addr idx val live = do mkBasicIndexedWrite arrPtrsHdrSize Nothing bWord addr idx val stmtC (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) -- the write barrier. We must write a byte into the mark table: hunk ./compiler/codeGen/CgPrimOp.hs 582 (CmmMachOp mo_wordUShr [idx, CmmLit (mkIntCLit mUT_ARR_PTRS_CARD_BITS)]) ) (CmmLit (CmmInt 1 W8)) + vols <- getVolatileRegs live + emitForeignCall' PlayRisky + [{-no results-}] + (CmmCallee (CmmLit (CmmLabel mkDirty_MUT_ARR_Label)) + CCallConv) + [ (CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint) + , (CmmHinted addr AddrHint) + , (CmmHinted idx NoHint) ] + (Just vols) + NoC_SRT -- No SRT b/c we do PlayRisky + CmmMayReturn loadArrPtrsSize :: CmmExpr -> CmmExpr loadArrPtrsSize addr = CmmLoad (cmmOffsetB addr off) bWord hunk ./compiler/codeGen/ClosureInfo.lhs 332 \begin{code} mkClosureInfo :: Bool -- Is static - -> Bool -- Has one or more unlifted pointer fields -> Id -> LambdaFormInfo -> Int -> Int -- Total and pointer words hunk ./compiler/codeGen/ClosureInfo.lhs 338 -> C_SRT -> String -- String descriptor -> ClosureInfo -mkClosureInfo is_static unlifted_fields id lf_info tot_wds ptr_wds srt_info descr +mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr = ClosureInfo { closureName = name, closureLFInfo = lf_info, closureSMRep = sm_rep, hunk ./compiler/codeGen/ClosureInfo.lhs 347 closureDescr = descr } where name = idName id - sm_rep = chooseSMRep is_static unlifted_fields lf_info tot_wds ptr_wds + sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds mkConInfo :: Bool -- Is static hunk ./compiler/codeGen/ClosureInfo.lhs 350 - -> Bool -- Has one or more unlifted pointer fields -> DataCon -> Int -> Int -- Total and pointer words -> ClosureInfo hunk ./compiler/codeGen/ClosureInfo.lhs 353 -mkConInfo is_static unlifted_fields data_con tot_wds ptr_wds +mkConInfo is_static data_con tot_wds ptr_wds = ConInfo { closureSMRep = sm_rep, closureCon = data_con } where hunk ./compiler/codeGen/ClosureInfo.lhs 357 - sm_rep = chooseSMRep is_static unlifted_fields (mkConLFInfo data_con) tot_wds ptr_wds + sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds \end{code} %************************************************************************ hunk ./compiler/codeGen/ClosureInfo.lhs 395 -- not exported: sizes_from_SMRep :: SMRep -> (WordOff,WordOff) -sizes_from_SMRep (GenericRep _ _ ptrs nonptrs _) = (ptrs, nonptrs) +sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs) sizes_from_SMRep BlackHoleRep = (0, 0) \end{code} hunk ./compiler/codeGen/ClosureInfo.lhs 430 minPayloadSize :: SMRep -> Bool -> WordOff minPayloadSize smrep updatable = case smrep of - BlackHoleRep -> min_upd_size - GenericRep _ _ _ _ _ | updatable -> min_upd_size - GenericRep True _ _ _ _ -> 0 -- static - GenericRep False _ _ _ _ -> mIN_PAYLOAD_SIZE + BlackHoleRep -> min_upd_size + GenericRep _ _ _ _ | updatable -> min_upd_size + GenericRep True _ _ _ -> 0 -- static + GenericRep False _ _ _ -> mIN_PAYLOAD_SIZE -- ^^^^^___ dynamic where min_upd_size = hunk ./compiler/codeGen/ClosureInfo.lhs 452 \begin{code} chooseSMRep :: Bool -- True <=> static closure - -> Bool -- Has one or more unlifted pointer fields -> LambdaFormInfo -> WordOff -> WordOff -- Tot wds, ptr wds -> SMRep hunk ./compiler/codeGen/ClosureInfo.lhs 456 -chooseSMRep is_static unlifted_fields lf_info tot_wds ptr_wds +chooseSMRep is_static lf_info tot_wds ptr_wds = let nonptr_wds = tot_wds - ptr_wds closure_type = getClosureType is_static ptr_wds lf_info hunk ./compiler/codeGen/ClosureInfo.lhs 461 in - GenericRep is_static unlifted_fields ptr_wds nonptr_wds closure_type + GenericRep is_static ptr_wds nonptr_wds closure_type -- We *do* get non-updatable top-level thunks sometimes. eg. f = g -- gets compiled to a jump to g (if g has non-zero arity), instead of hunk ./compiler/codeGen/ClosureInfo.lhs 734 where not_nocaf_constr = case sm_rep of - GenericRep _ _ _ _ ConstrNoCaf -> False - _other -> True + GenericRep _ _ _ ConstrNoCaf -> False + _other -> True \end{code} Note [Entering error thunks] hunk ./compiler/codeGen/SMRep.lhs 242 = GenericRep -- GC routines consult sizes in info tbl Bool -- True <=> This is a static closure. Affects how -- we garbage-collect it - Bool -- True <=> Has any unlifted pointer fields. !Int -- # ptr words !Int -- # non-ptr words ClosureType -- closure type hunk ./compiler/codeGen/SMRep.lhs 282 \begin{code} isStaticRep :: SMRep -> Bool -isStaticRep (GenericRep is_static _ _ _ _) = is_static +isStaticRep (GenericRep is_static _ _ _) = is_static isStaticRep BlackHoleRep = False \end{code} hunk ./compiler/codeGen/SMRep.lhs 293 -- krc: only called by tickyDynAlloc in CgTicky; return -- Nothing for a black hole so we can at least make something work. smRepClosureType :: SMRep -> Maybe ClosureType -smRepClosureType (GenericRep _ _ _ _ ty) = Just ty +smRepClosureType (GenericRep _ _ _ ty) = Just ty smRepClosureType BlackHoleRep = Nothing smRepClosureTypeInt :: SMRep -> StgHalfWord hunk ./compiler/codeGen/SMRep.lhs 297 -smRepClosureTypeInt (GenericRep False False 1 0 Constr) = CONSTR_1_0 -smRepClosureTypeInt (GenericRep False False 0 1 Constr) = CONSTR_0_1 -smRepClosureTypeInt (GenericRep False False 2 0 Constr) = CONSTR_2_0 -smRepClosureTypeInt (GenericRep False False 1 1 Constr) = CONSTR_1_1 -smRepClosureTypeInt (GenericRep False False 0 2 Constr) = CONSTR_0_2 -smRepClosureTypeInt (GenericRep False False _ _ Constr) = CONSTR -smRepClosureTypeInt (GenericRep False True _ _ Constr) = CONSTR_PRIMWRAP +smRepClosureTypeInt (GenericRep False 1 0 Constr) = CONSTR_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Constr) = CONSTR_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Constr) = CONSTR_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Constr) = CONSTR_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Constr) = CONSTR_0_2 +smRepClosureTypeInt (GenericRep False _ _ Constr) = CONSTR hunk ./compiler/codeGen/SMRep.lhs 304 -smRepClosureTypeInt (GenericRep False False 1 0 Fun) = FUN_1_0 -smRepClosureTypeInt (GenericRep False False 0 1 Fun) = FUN_0_1 -smRepClosureTypeInt (GenericRep False False 2 0 Fun) = FUN_2_0 -smRepClosureTypeInt (GenericRep False False 1 1 Fun) = FUN_1_1 -smRepClosureTypeInt (GenericRep False False 0 2 Fun) = FUN_0_2 -smRepClosureTypeInt (GenericRep False False _ _ Fun) = FUN -smRepClosureTypeInt (GenericRep False _ _ _ Fun) = FUN_PRIMWRAP +smRepClosureTypeInt (GenericRep False 1 0 Fun) = FUN_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Fun) = FUN_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Fun) = FUN_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Fun) = FUN_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Fun) = FUN_0_2 +smRepClosureTypeInt (GenericRep False _ _ Fun) = FUN hunk ./compiler/codeGen/SMRep.lhs 311 -smRepClosureTypeInt (GenericRep False _ 1 0 Thunk) = THUNK_1_0 -smRepClosureTypeInt (GenericRep False _ 0 1 Thunk) = THUNK_0_1 -smRepClosureTypeInt (GenericRep False _ 2 0 Thunk) = THUNK_2_0 -smRepClosureTypeInt (GenericRep False _ 1 1 Thunk) = THUNK_1_1 -smRepClosureTypeInt (GenericRep False _ 0 2 Thunk) = THUNK_0_2 -smRepClosureTypeInt (GenericRep False _ _ _ Thunk) = THUNK +smRepClosureTypeInt (GenericRep False 1 0 Thunk) = THUNK_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Thunk) = THUNK_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Thunk) = THUNK_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Thunk) = THUNK_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Thunk) = THUNK_0_2 +smRepClosureTypeInt (GenericRep False _ _ Thunk) = THUNK hunk ./compiler/codeGen/SMRep.lhs 318 -smRepClosureTypeInt (GenericRep False _ _ _ ThunkSelector) = THUNK_SELECTOR +smRepClosureTypeInt (GenericRep False _ _ ThunkSelector) = THUNK_SELECTOR hunk ./compiler/codeGen/SMRep.lhs 320 -smRepClosureTypeInt (GenericRep True _ _ _ Constr) = CONSTR_STATIC -smRepClosureTypeInt (GenericRep True _ _ _ ConstrNoCaf) = CONSTR_NOCAF_STATIC -smRepClosureTypeInt (GenericRep True _ _ _ Fun) = FUN_STATIC -smRepClosureTypeInt (GenericRep True _ _ _ Thunk) = THUNK_STATIC +smRepClosureTypeInt (GenericRep True _ _ Constr) = CONSTR_STATIC +smRepClosureTypeInt (GenericRep True _ _ ConstrNoCaf) = CONSTR_NOCAF_STATIC +smRepClosureTypeInt (GenericRep True _ _ Fun) = FUN_STATIC +smRepClosureTypeInt (GenericRep True _ _ Thunk) = THUNK_STATIC smRepClosureTypeInt BlackHoleRep = BLACKHOLE hunk ./compiler/codeGen/StgCmm.hs 328 -- static data structures (ie those built at compile -- time), we take care that info-table contains the -- information we need. - (static_cl_info, _) = layOutStaticConstr False data_con arg_reps - (dyn_cl_info, arg_things) = - layOutDynConstr unlifted_fields data_con arg_reps - - unlifted_fields = dataConHasUnliftedPtrFields data_con + (static_cl_info, _) = layOutStaticConstr data_con arg_reps + (dyn_cl_info, arg_things) = layOutDynConstr data_con arg_reps emit_info cl_info ticky_code = emitClosureAndInfoTable cl_info NativeDirectCall [] hunk ./compiler/codeGen/StgCmmBind.hs 38 import CLabel import StgSyn import CostCentre -import Type import Id import Control.Monad import Name hunk ./compiler/codeGen/StgCmmBind.hs 73 ; srt_info <- getSRTInfo srt ; mod_name <- getModuleName ; let descr = closureDescription mod_name name - closure_info = mkClosureInfo True False id lf_info 0 0 srt_info descr + closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr closure_label = mkLocalClosureLabel name (idCafInfo id) cg_id_info = litIdInfo id lf_info (CmmLabel closure_label) closure_rep = mkStaticClosureFields closure_info ccs True [] hunk ./compiler/codeGen/StgCmmBind.hs 283 (tot_wds, ptr_wds, fv_details) = mkVirtHeapOffsets (isLFThunk lf_info) fv_infos closure_info = mkClosureInfo False -- Not static - any_unlifted bndr lf_info tot_wds ptr_wds c_srt descr hunk ./compiler/codeGen/StgCmmBind.hs 286 - any_unlifted = any unlifted fv_infos - where unlifted (rep,id) = rep == PtrRep - && isUnLiftedType (idType id) - -- BUILD ITS INFO TABLE AND CODE ; forkClosureBody $ -- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere hunk ./compiler/codeGen/StgCmmBind.hs 327 descr = closureDescription mod_name (idName bndr) closure_info = mkClosureInfo False -- Not static - False -- no unlifted/pointed fields bndr lf_info tot_wds ptr_wds NoC_SRT -- No SRT for a std-form closure descr hunk ./compiler/codeGen/StgCmmClosure.hs 369 chooseSMRep :: Bool -- True <=> static closure - -> Bool -- Has one or more unlifted pointer fields -> LambdaFormInfo -> WordOff -> WordOff -- Tot wds, ptr wds -> SMRep hunk ./compiler/codeGen/StgCmmClosure.hs 373 -chooseSMRep is_static unlifted_fields lf_info tot_wds ptr_wds +chooseSMRep is_static lf_info tot_wds ptr_wds = let nonptr_wds = tot_wds - ptr_wds closure_type = getClosureType is_static ptr_wds lf_info hunk ./compiler/codeGen/StgCmmClosure.hs 378 in - GenericRep is_static unlifted_fields ptr_wds nonptr_wds closure_type + GenericRep is_static ptr_wds nonptr_wds closure_type -- We *do* get non-updatable top-level thunks sometimes. eg. f = g -- gets compiled to a jump to g (if g has non-zero arity), instead of hunk ./compiler/codeGen/StgCmmClosure.hs 714 -------------------------------------- mkClosureInfo :: Bool -- Is static - -> Bool -- Has one or more unlifted pointer fields -> Id -> LambdaFormInfo -> Int -> Int -- Total and pointer words hunk ./compiler/codeGen/StgCmmClosure.hs 720 -> C_SRT -> String -- String descriptor -> ClosureInfo -mkClosureInfo is_static unlifted_fields id lf_info tot_wds ptr_wds srt_info descr +mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr = ClosureInfo { closureName = name, closureLFInfo = lf_info, closureSMRep = sm_rep, hunk ./compiler/codeGen/StgCmmClosure.hs 730 closureCafs = idCafInfo id } where name = idName id - sm_rep = chooseSMRep is_static unlifted_fields lf_info tot_wds ptr_wds + sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds mkConInfo :: Bool -- Is static hunk ./compiler/codeGen/StgCmmClosure.hs 733 - -> Bool -- Has one or more unlifted pointer fields -> DataCon -> Int -> Int -- Total and pointer words -> ClosureInfo hunk ./compiler/codeGen/StgCmmClosure.hs 736 -mkConInfo is_static unlifted_fields data_con tot_wds ptr_wds +mkConInfo is_static data_con tot_wds ptr_wds = ConInfo { closureSMRep = sm_rep, closureCon = data_con } where hunk ./compiler/codeGen/StgCmmClosure.hs 740 - sm_rep = chooseSMRep is_static unlifted_fields (mkConLFInfo data_con) tot_wds ptr_wds + sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds + -- We need a black-hole closure info to pass to @allocDynClosure@ when we -- want to allocate the black hole on entry to a CAF. These are the only hunk ./compiler/codeGen/StgCmmClosure.hs 841 -- not exported: sizes_from_SMRep :: SMRep -> (WordOff,WordOff) -sizes_from_SMRep (GenericRep _ _ ptrs nonptrs _) = (ptrs, nonptrs) +sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs) sizes_from_SMRep BlackHoleRep = (0, 0) -- Computing slop size. WARNING: this looks dodgy --- it has deep hunk ./compiler/codeGen/StgCmmClosure.hs 874 minPayloadSize :: SMRep -> Bool -> WordOff minPayloadSize smrep updatable = case smrep of - BlackHoleRep -> min_upd_size - GenericRep _ _ _ _ _ | updatable -> min_upd_size - GenericRep True _ _ _ _ -> 0 -- static - GenericRep False _ _ _ _ -> mIN_PAYLOAD_SIZE + BlackHoleRep -> min_upd_size + GenericRep _ _ _ _ | updatable -> min_upd_size + GenericRep True _ _ _ -> 0 -- static + GenericRep False _ _ _ -> mIN_PAYLOAD_SIZE -- ^^^^^___ dynamic where min_upd_size = hunk ./compiler/codeGen/StgCmmClosure.hs 930 where not_nocaf_constr = case sm_rep of - GenericRep _ _ _ _ ConstrNoCaf -> False - _other -> True + GenericRep _ _ _ ConstrNoCaf -> False + _other -> True isStaticClosure :: ClosureInfo -> Bool isStaticClosure cl_info = isStaticRep (closureSMRep cl_info) hunk ./compiler/codeGen/StgCmmCon.hs 37 import Constants import DataCon import FastString -import Type import Id import Literal import PrelInfo hunk ./compiler/codeGen/StgCmmCon.hs 74 closure_label = mkClosureLabel name $ idCafInfo id caffy = any stgArgHasCafRefs args (closure_info, nv_args_w_offsets) - = layOutStaticConstr False con (addArgReps args) + = layOutStaticConstr con (addArgReps args) get_lit (arg, _offset) = do { CmmLit lit <- getArgAmode arg ; return lit } hunk ./compiler/codeGen/StgCmmCon.hs 193 -------- buildDynCon: the general case ----------- buildDynCon binder ccs con args - = do { let (cl_info, args_w_offsets) = - layOutDynConstr unlifted_fields con (addArgReps args) + = do { let (cl_info, args_w_offsets) = layOutDynConstr con (addArgReps args) -- No void args in args_w_offsets ; (tmp, init) <- allocDynClosure cl_info use_cc blame_cc args_w_offsets ; return (regIdInfo binder lf_info tmp, init) } hunk ./compiler/codeGen/StgCmmCon.hs 206 blame_cc = use_cc -- cost-centre on which to blame the alloc (same) - unlifted_fields = dataConHasUnliftedPtrFields con - - -dataConHasUnliftedPtrFields :: DataCon -> Bool -dataConHasUnliftedPtrFields con = any unlifted (dataConRepArgTys con) - where unlifted ty = isUnLiftedType ty && typePrimRep ty == PtrRep --------------------------------------------------------------- -- Binding constructor arguments hunk ./compiler/codeGen/StgCmmHeap.hs 53 ----------------------------------------------------------- layOutDynConstr, layOutStaticConstr - :: Bool -> DataCon -> [(PrimRep, a)] + :: DataCon -> [(PrimRep, a)] -> (ClosureInfo, [(NonVoid a, VirtualHpOffset)]) -- No Void arguments in result hunk ./compiler/codeGen/StgCmmHeap.hs 60 layOutDynConstr = layOutConstr False layOutStaticConstr = layOutConstr True -layOutConstr :: Bool -> Bool -> DataCon -> [(PrimRep, a)] +layOutConstr :: Bool -> DataCon -> [(PrimRep, a)] -> (ClosureInfo, [(NonVoid a, VirtualHpOffset)]) hunk ./compiler/codeGen/StgCmmHeap.hs 62 -layOutConstr is_static unlifted_fields data_con args - = (mkConInfo is_static unlifted_fields data_con tot_wds ptr_wds, +layOutConstr is_static data_con args + = (mkConInfo is_static data_con tot_wds ptr_wds, things_w_offsets) where (tot_wds, -- #ptr_wds + #nonptr_wds hunk ./compiler/codeGen/StgCmmUtils.hs 61 import ForeignCall import IdInfo import Type -import DataCon import TyCon import Constants import SMRep hunk ./compiler/codeGen/StgCmmUtils.hs 283 where closure_tbl = CmmLit (CmmLabel lbl) lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs -------------------------------------------------------------------------- --- --- Does a DataCon have any fields that point to unlifted (primitive) --- types. This property is important for local GC. --- -------------------------------------------------------------------------- - -dataConHasUnliftedPtrFields :: DataCon -> Bool -dataConHasUnliftedPtrFields con = any unlifted (dataConRepArgTys con) - where unlifted ty = isUnLiftedType ty && typePrimRep ty == PtrRep - ------------------------------------------------------------------------- -- -- Conditionals and rts calls hunk ./includes/Cmm.h 474 #define mutArrPtrsCardWords(n) \ ROUNDUP_BYTES_TO_WDS(((n) + (1 << MUT_ARR_PTRS_CARD_BITS) - 1) >> MUT_ARR_PTRS_CARD_BITS) +#define isGlobalPrim(p) (W_[p - WDS(1)] != 0) + /* ----------------------------------------------------------------------------- Voluntary Yields/Blocks hunk ./includes/rts/Constants.h 236 by tryWakeupThread() */ #define ThreadMigrating 13 +/* Involved in a message sent to tso->msg_cap */ +#define BlockedOnMsgGlobalise 14 + /* * These constants are returned to the scheduler by a thread that has * stopped for one reason or another. See typedef StgThreadReturnCode hunk ./includes/rts/prof/LDV.h 34 #ifdef CMINUSMINUS +#define LDV_RECORD_DEAD(c) \ + foreign "C" LDV_recordDead(c "ptr") #define LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(c) \ foreign "C" LDV_recordDead_FILL_SLOP_DYNAMIC(c "ptr") hunk ./includes/rts/prof/LDV.h 45 LDVW((c)) = ((StgWord)RTS_DEREF(era) << LDV_SHIFT) | LDV_STATE_CREATE void LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p ); +void LDV_recordDead (StgClosure *p, nat size); hunk ./includes/rts/prof/LDV.h 47 +#define LDV_RECORD_DEAD(c) \ + LDV_recordDead(c) #define LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(c) \ LDV_recordDead_FILL_SLOP_DYNAMIC(c) hunk ./includes/rts/prof/LDV.h 57 #else /* !PROFILING */ #define LDV_RECORD_CREATE(c) /* nothing */ +#define LDV_RECORD_DEAD(c, size) /* nothing */ #define LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(c) /* nothing */ #endif /* PROFILING */ hunk ./includes/rts/storage/Block.h 106 #define BF_KNOWN 128 /* Block was swept in the last generation */ #define BF_SWEPT 256 +/* contains primitive objects */ +#define BF_PRIM 512 /* Finding the block descriptor for a given block -------------------------- */ hunk ./includes/rts/storage/ClosureMacros.h 138 SET_INFO((c), (new_info)); \ LDV_RECORD_CREATE(c); +#define OVERWRITE_PRIM_INFO(c, new_info, old_size, new_size) \ + LDV_RECORD_DEAD((StgClosure *)(c), old_size); \ + fillPrimSlop((StgClosure *)c, old_size, new_size); \ + SET_INFO((c), (new_info)); \ + LDV_RECORD_CREATE(c); + /* ----------------------------------------------------------------------------- How to get hold of the static link field for a static closure. -------------------------------------------------------------------------- */ hunk ./includes/rts/storage/ClosureTypes.h 83 #define WHITEHOLE 59 #define N_CLOSURE_TYPES 60 -/* Closure flags, for the info->flags field. */ -#define HAS_UNLIFTED_FIELDS 1 - #endif /* RTS_STORAGE_CLOSURETYPES_H */ hunk ./includes/rts/storage/Closures.h 466 StgClosure *bh; } MessageBlackHole; +typedef struct MessageGlobalise_ { + StgHeader header; + struct Message_ *link; + StgTSO *tso; + StgClosure *req; // closure to globalise +} MessageGlobalise; + #endif /* RTS_STORAGE_CLOSURES_H */ hunk ./includes/rts/storage/GC.h 77 unsigned int n_large_blocks; // no. of blocks used by large objs unsigned int n_new_large_blocks; // count freshly allocated large objects + bdescr * prim_blocks; // blocks of primitive objects + unsigned int n_prim_blocks; // these blocks are marked/swept, + unsigned int n_prim_words; // not copied. + unsigned int max_blocks; // max blocks StgTSO * threads; // threads in this gen hunk ./includes/rts/storage/GC.h 164 StgPtr allocate ( Capability *cap, lnat n ); StgPtr allocatePinned ( Capability *cap, lnat n ); +StgPtr allocatePrim ( Capability *cap, lnat n ); /* memory allocator for executable memory */ void * allocateExec(unsigned int len, void **exec_addr); hunk ./includes/rts/storage/GC.h 239 bd->dest_ix = dest->ix; } +/* ----------------------------------------------------------------------------- + Primitive heap area + + For now, just use a word before the primitive object to indicate + global/private. Later try using a bitmap. + -------------------------------------------------------------------------- */ + +EXTERN_INLINE rtsBool isGlobalPrim (StgClosure *p); +EXTERN_INLINE rtsBool isGlobalPrim (StgClosure *p) +{ + return (rtsBool) *((P_)p - 1); +} + +EXTERN_INLINE rtsBool isGlobal (StgClosure *p); +EXTERN_INLINE rtsBool isGlobal (StgClosure *p) +{ + bdescr *bd; + bd = Bdescr((P_)p); + return bd->gen_no != 0 || ((bd->flags & BF_PRIM) && isGlobalPrim(p)); +} + +INLINE_HEADER void setGlobal (StgClosure *p) +{ + *((P_)p - 1) = 1; +} + +// slop in the prim area is marked with '-size' +INLINE_HEADER void fillPrimSlop (StgClosure *p, + StgWord old_size, StgWord new_size) +{ + *((P_)p + new_size) = old_size - new_size + 2; +} + #endif /* RTS_STORAGE_GC_H */ hunk ./includes/rts/storage/InfoTables.h 233 StgClosureInfo layout; /* closure layout info (one word) */ - StgQtrWord type; /* closure type */ - StgQtrWord flags; + StgHalfWord type; /* closure type */ StgHalfWord srt_bitmap; /* In a CONSTR: - the constructor tag hunk ./includes/rts/storage/InfoTables.h 239 In a FUN/THUNK - a bitmap of SRT entries + In an IND_LOCAL + - the owning Capability */ #ifdef TABLES_NEXT_TO_CODE hunk ./includes/stg/MiscClosures.h 191 RTS_ENTRY(stg_MSG_BLACKHOLE); RTS_ENTRY(stg_STUB_MSG_BLACKHOLE); RTS_ENTRY(stg_MSG_NULL); +RTS_ENTRY(stg_MSG_GLOBALISE); RTS_ENTRY(stg_MVAR_TSO_QUEUE); RTS_ENTRY(stg_catch); RTS_ENTRY(stg_PAP); hunk ./includes/stg/MiscClosures.h 380 RTS_FUN_DECL(stg_gen_block); RTS_FUN_DECL(stg_block_noregs); RTS_FUN_DECL(stg_block_1); -RTS_FUN_DECL(stg_block_blackhole); -RTS_FUN_DECL(stg_block_blackhole_finally); +RTS_FUN_DECL(stg_block_enter); RTS_FUN_DECL(stg_block_takemvar); RTS_RET(stg_block_takemvar); RTS_FUN_DECL(stg_block_putmvar); hunk ./rts/Apply.cmm 123 // Off we go! TICK_ENT_VIA_NODE(); + info = %INFO_PTR(UNTAG(R1)); + if (IS_FORWARDING_PTR(info)) { + R1 = UN_FORWARDING_PTR(info) | GETTAG(R1); + } + #ifdef NO_ARG_REGS jump %GET_ENTRY(UNTAG(R1)); #else hunk ./rts/Apply.cmm 205 // Off we go! TICK_ENT_VIA_NODE(); + info = %INFO_PTR(UNTAG(R1)); + if (IS_FORWARDING_PTR(info)) { + R1 = UN_FORWARDING_PTR(info) | GETTAG(R1); + } + #ifdef NO_ARG_REGS jump %GET_ENTRY(UNTAG(R1)); #else hunk ./rts/Apply.cmm 280 // Off we go! TICK_ENT_VIA_NODE(); + info = %INFO_PTR(UNTAG(R1)); + if (IS_FORWARDING_PTR(info)) { + R1 = UN_FORWARDING_PTR(info) | GETTAG(R1); + } + #ifdef NO_ARG_REGS jump %GET_ENTRY(UNTAG(R1)); #else hunk ./rts/Capability.h 334 EXTERN_INLINE void recordClosureMutated_ (Capability *cap, StgClosure *p) { - bdescr *bd; - bd = Bdescr((StgPtr)p); - if (bd->gen_no != 0) { - recordMutableCap(cap,p,bd->gen_no); + if (isGlobal(p)) { + recordMutableCap(cap,p,global_gen_no); // XXX gen wrong } } hunk ./rts/HeapStackCheck.cmm 334 W_ info; W_ type; + info = %INFO_PTR(UNTAG(R1)); + if (IS_FORWARDING_PTR(info)) { + R1 = UN_FORWARDING_PTR(info) | GETTAG(R1); + } info = %GET_FUN_INFO(UNTAG(R1)); // cache the size hunk ./rts/HeapStackCheck.cmm 640 BLOCK_BUT_FIRST(stg_block_putmvar_finally); } -stg_block_blackhole +stg_block_enter { Sp_adj(-2); Sp(1) = R1; hunk ./rts/LdvProfile.h 26 // Invoked when: // 1) Hp is incremented and exceeds HpLim (in Updates.hc). // 2) copypart() is called (in GC.c). -#define LDV_FILL_SLOP(from, howMany) \ +#define LDV_FILL_SLOP(from, howMany) \ if (era > 0) { \ hunk ./rts/LdvProfile.h 28 - int i; \ - for (i = 0;i < (howMany); i++) \ - ((StgWord *)(from))[i] = 0; \ + ZERO_SLOP(from, i); \ } // Informs the LDV profiler that closure c has just been evacuated. hunk ./rts/Messages.c 36 i != &stg_MSG_BLACKHOLE_info && i != &stg_MSG_TRY_WAKEUP_info && i != &stg_STUB_MSG_BLACKHOLE_info && // can happen if a MSG_BLACKHOLE is revoked + i != &stg_MSG_GLOBALISE_info && i != &stg_WHITEHOLE_info) { barf("sendMessage: %p", i); } hunk ./rts/Messages.c 49 // not necessary, since all messages are in the global heap: // recordClosureMutated(from_cap,(StgClosure*)msg); - ASSERT(!(Bdescr((StgPtr)msg)->gen->is_local)); + ASSERT(isGlobal((StgClosure*)msg)); if (to_cap->running_task == NULL) { to_cap->running_task = myTask(); hunk ./rts/Messages.c 127 tryWakeupThread(cap, b->tso); } return; + } + else if (i == &stg_MSG_GLOBALISE_info) + { + MessageGlobalise *g = (MessageGlobalise*)m; + StgClosure *p; + const StgInfoTable *info; + + debugTraceCap(DEBUG_sched, cap, "message: globalise %p for thread %lu", + g->req, (lnat)g->tso->id); + + p = g->req; + ASSERT(isGlobal((StgClosure*)p)); + info = get_itbl(p); + + // paranoia + if (info->type == IND_LOCAL && info->srt_bitmap == cap->no) { + globalise(cap, &((StgInd*)p)->indirectee); + SET_INFO(p, &stg_IND_info); + } + + // even if we didn't globalise: wake up the thread. If the + // message came to the wrong place, then the source thread + // will try again and emit a message to the right place. + tryWakeupThread(cap, g->tso); + return; + } else if (i == &stg_STUB_MSG_BLACKHOLE_info || i == &stg_MSG_NULL_info) { hunk ./rts/Messages.c 241 #ifdef THREADED_RTS if (owner->cap != cap) { - // if we got here, then the msg should already be global, - // because for the BH to be owned by another cap it must - // be global, so stg_BLACKHOLE_info would have allocated - // the msg in global memory. sendMessage() will assert. + msg->link = (MessageBlackHole*)END_TSO_QUEUE; // just make it valid + globalise_(cap, (StgClosure*)msg); sendMessage(cap, owner->cap, (Message*)msg); debugTraceCap(DEBUG_sched, cap, "forwarding message to cap %d", owner->cap->no); return 1; hunk ./rts/Messages.c 252 // Capability. msg->tso is the first thread to block on this // BLACKHOLE, so we first create a BLOCKING_QUEUE object. - if (Bdescr((P_)bh)->gen_ix >= global_gen_ix) { - // allocate the BQ in global memory iff the BH is global - bq = (StgBlockingQueue*)allocateInGen(cap, global_gen_ix, - sizeofW(StgBlockingQueue)); - } else { - bq = (StgBlockingQueue*)allocate(cap, sizeofW(StgBlockingQueue)); - } - + bq = (StgBlockingQueue*)allocatePrim(cap, sizeofW(StgBlockingQueue)); + // initialise the BLOCKING_QUEUE object SET_HDR(bq, &stg_BLOCKING_QUEUE_DIRTY_info, CCS_SYSTEM); bq->bh = bh; hunk ./rts/Messages.c 344 return 0; // not blocked } + +/* ---------------------------------------------------------------------------- + Requesting globalisation of a closure + -------------------------------------------------------------------------- */ + + +void +#ifndef THREADED_RTS + GNUC3_ATTRIBUTE(__noreturn__) +#endif +messageGlobalise (Capability *cap USED_IF_THREADS, + StgTSO *tso USED_IF_THREADS, + StgClosure *p USED_IF_THREADS) +{ +#ifndef THREADED_RTS + + barf("messageGlobalise in non-THREADED_RTS"); + +#else + + Capability *dest; + MessageGlobalise *msg; + + ASSERT(get_itbl(p)->type == IND_LOCAL); + ASSERT(Bdescr((P_)p)->gen_ix == global_gen_ix); + + dest = &capabilities[get_itbl(p)->srt_bitmap]; + + debugTraceCap(DEBUG_sched, cap, + "thread %lu requesting globalisation of closure at %p from cap %u", + (lnat)tso->id, p, (nat)dest->no); + + msg = (MessageGlobalise*)allocatePrim(cap, sizeofW(MessageGlobalise)); + setGlobal((StgClosure*)msg); + + SET_HDR(msg, &stg_MSG_GLOBALISE_info, CCS_SYSTEM); + msg->tso = tso; + msg->req = p; + + sendMessage(cap, dest, (Message*)msg); + + tso->block_info.closure = (StgClosure*)msg; + tso->why_blocked = BlockedOnMsgGlobalise; +#endif +} + hunk ./rts/Messages.h 11 #include "BeginPrivate.h" -nat messageBlackHole(Capability *cap, MessageBlackHole *msg); +nat messageBlackHole (Capability *cap, MessageBlackHole *msg); +void messageGlobalise (Capability *cap, StgTSO *tso, StgClosure *p); #ifdef THREADED_RTS void executeMessage (Capability *cap, Message *m); hunk ./rts/PrimOps.cmm 64 n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) []; + ("ptr" p) = foreign "C" allocatePrim(MyCapability() "ptr",words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_bytes(p) = n; hunk ./rts/PrimOps.cmm 157 // number of words. size = n + mutArrPtrsCardWords(n); words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; - ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2]; + ("ptr" arr) = foreign "C" allocatePrim(MyCapability() "ptr",words) [R2]; TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); hunk ./rts/PrimOps.cmm 224 W_ mv; /* Args: R1 = initialisation value */ - ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh); - - mv = Hp - SIZEOF_StgMutVar + WDS(1); + MAYBE_GC(R1_PTR, stg_newMutVarzh); + ("ptr" mv) = foreign "C" allocatePrim(MyCapability() "ptr", + BYTES_TO_WDS(SIZEOF_StgMutVar)) [R1]; + SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]); StgMutVar_var(mv) = R1; hunk ./rts/PrimOps.cmm 334 R3 = stg_NO_FINALIZER_closure; } - ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh ); + MAYBE_GC(R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh); + + ("ptr" w) = foreign "C" allocatePrim(MyCapability() "ptr", + BYTES_TO_WDS(SIZEOF_StgWeak)) [R1,R2,R3]; hunk ./rts/PrimOps.cmm 339 - w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); // We don't care about cfinalizer here. hunk ./rts/PrimOps.cmm 380 flag = R5; eptr = R6; - ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh ); + MAYBE_GC(R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh); + ("ptr" w) = foreign "C" allocatePrim(MyCapability() "ptr", + BYTES_TO_WDS(SIZEOF_StgWeak)) [R1,R2]; hunk ./rts/PrimOps.cmm 384 - w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); payload_words = 4; hunk ./rts/PrimOps.cmm 388 words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) []; + ("ptr" p) = foreign "C" allocatePrim(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); hunk ./rts/PrimOps.cmm 1133 /* args: none */ W_ mvar; - ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh ); + MAYBE_GC(NO_PTRS, stg_newMVarzh); + ("ptr" mvar) = foreign "C" allocatePrim(MyCapability(), + BYTES_TO_WDS(SIZEOF_StgMVar)) []; hunk ./rts/PrimOps.cmm 1137 - mvar = Hp - SIZEOF_StgMVar + WDS(1); SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]); // MVARs start dirty: generation 0 has no mutable list StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure; hunk ./rts/PrimOps.cmm 1179 */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { - bd = Bdescr(mvar); - if (TO_W_(bdescr_gen_ix(bd)) >= TO_W_(CInt[global_gen_ix])) - { - ("ptr" q) = foreign "C" allocateInGen(MyCapability(), - CInt[global_gen_ix], - BYTES_TO_WDS(SIZEOF_StgMVarTSOQueue)); - - SAVE_THREAD_STATE(); - ("ptr" new_tso) = foreign "C" globalise_(MyCapability(), - CurrentTSO); - CurrentTSO = new_tso; - LOAD_THREAD_STATE(); - } - else - { - // Note [mvar-heap-check] We want to do the heap check in the - // branch here, to avoid the conditional in the common case. - // However, we've already locked the MVar above, so we better - // be careful to unlock it again if the the heap check fails. - // Unfortunately we don't have an easy way to inject any code - // into the heap check generated by the code generator, so we - // have to do it in stg_gc_gen (see HeapStackCheck.cmm). - HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh); - - q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); - } - + ("ptr" q) = foreign "C" allocatePrim(MyCapability(), + BYTES_TO_WDS(SIZEOF_StgMVarTSOQueue)); SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = END_TSO_QUEUE; StgMVarTSOQueue_tso(q) = CurrentTSO; hunk ./rts/PrimOps.cmm 1185 + bd = Bdescr(mvar); + if (TO_W_(bdescr_gen_ix(bd)) >= TO_W_(CInt[global_gen_ix]) || + isGlobalPrim(mvar)) { + ("ptr" new_tso) = foreign "C" globalise_(MyCapability(), q); + } + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = q; } else { hunk ./rts/PrimOps.cmm 1359 if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { - bd = Bdescr(mvar); - if (TO_W_(bdescr_gen_ix(bd)) >= TO_W_(CInt[global_gen_ix])) - { - ("ptr" q) = foreign "C" allocateInGen(MyCapability(), - CInt[global_gen_ix], - BYTES_TO_WDS(SIZEOF_StgMVarTSOQueue)); - - SAVE_THREAD_STATE(); - ("ptr" new_tso) = foreign "C" globalise_(MyCapability(), - CurrentTSO); - CurrentTSO = new_tso; - LOAD_THREAD_STATE(); - } - else - { - // see Note [mvar-heap-check] above - HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh); - - q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); - } - + ("ptr" q) = foreign "C" allocatePrim(MyCapability(), + BYTES_TO_WDS(SIZEOF_StgMVarTSOQueue)); SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = END_TSO_QUEUE; StgMVarTSOQueue_tso(q) = CurrentTSO; hunk ./rts/PrimOps.cmm 1365 + bd = Bdescr(mvar); + if (TO_W_(bdescr_gen_ix(bd)) >= TO_W_(CInt[global_gen_ix]) || + isGlobalPrim(mvar)) { + ("ptr" new_tso) = foreign "C" globalise_(MyCapability(), q); + } + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = q; } else { hunk ./rts/PrimOps.cmm 1525 { W_ index, sn_obj; - ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh ); - + MAYBE_GC(R1_PTR, stg_makeStableNamezh); + (index) = foreign "C" lookupStableName(R1 "ptr") []; /* Is there already a StableName for this heap object? hunk ./rts/PrimOps.cmm 1533 * stable_ptr_table is a pointer to an array of snEntry structs. */ if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) { - sn_obj = Hp - SIZEOF_StgStableName + WDS(1); + ("ptr" sn_obj) = foreign "C" allocatePrim(MyCapability() "ptr", + BYTES_TO_WDS(SIZEOF_StgStableName)) [R1]; SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]); StgStableName_sn(sn_obj) = index; snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj; hunk ./rts/PrimOps.cmm 1583 words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr); bytes = WDS(words); - ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh ); + MAYBE_GC(R1_PTR & R2_PTR & R3_PTR & R5_PTR, stg_newBCOzh); + ("ptr" bco) = foreign "C" allocatePrim(MyCapability() "ptr", words) + [R1,R2,R3,R5]; hunk ./rts/PrimOps.cmm 1587 - bco = Hp - bytes + WDS(1); SET_HDR(bco, stg_BCO_info, W_[CCCS]); StgBCO_instrs(bco) = R1; hunk ./rts/PrimOps.cmm 1642 W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr; info = %GET_STD_INFO(UNTAG(R1)); + MAYBE_GC(R1_PTR, stg_unpackClosurezh); + // Some closures have non-standard layout, so we omit those here. W_ type; type = TO_W_(%INFO_TYPE(info)); hunk ./rts/PrimOps.cmm 1671 ptrs_arr_cards = mutArrPtrsCardWords(ptrs); ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards); - ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh); - W_ clos; clos = UNTAG(R1); hunk ./rts/PrimOps.cmm 1674 - ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1); - nptrs_arr = Hp - nptrs_arr_sz + WDS(1); + ("ptr" ptrs_arr ) = foreign "C" allocatePrim(MyCapability(), ptrs_arr_sz) []; + ("ptr" nptrs_arr) = foreign "C" allocatePrim(MyCapability(), nptrs_arr_sz) []; SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]); StgMutArrPtrs_ptrs(ptrs_arr) = ptrs; hunk ./rts/RaiseAsync.c 167 globalise(cap, (StgClosure**)&target); globalise(cap, (StgClosure**)&exception); - msg = (MessageThrowTo *) allocateInGen(cap, global_gen_ix, - sizeofW(MessageThrowTo)); + msg = (MessageThrowTo *) allocatePrim(cap, sizeofW(MessageThrowTo)); + setGlobal((StgClosure*)msg); + // message starts locked; the caller has to unlock it when it is // ready. SET_HDR(msg, &stg_WHITEHOLE_info, CCS_SYSTEM); hunk ./rts/RaiseAsync.c 380 // might still get a TRY_WAKEUP message from the owner of the // blackhole some time in the future, but that doesn't matter. ASSERT(target->block_info.bh->header.info == &stg_MSG_BLACKHOLE_info); - OVERWRITE_INFO(target->block_info.bh, &stg_STUB_MSG_BLACKHOLE_info); + OVERWRITE_PRIM_INFO(target->block_info.bh, + &stg_STUB_MSG_BLACKHOLE_info, + sizeofW(MessageBlackHole), + sizeofW(StgInd)); + raiseAsync(cap, target, msg->exception, rtsFalse, NULL); + return THROWTO_SUCCESS; + } + } + + case BlockedOnMsgGlobalise: + { + if (target->flags & TSO_BLOCKEX) { + // BlockedOnMsgGlobalise is not interruptible. + blockedThrowTo(cap,target,msg); + return THROWTO_BLOCKED; + } else { + // We could revoke the message, but it won't do any harm + // if we just leave it in flight. raiseAsync(cap, target, msg->exception, rtsFalse, NULL); return THROWTO_SUCCESS; } hunk ./rts/RaiseAsync.c 619 if (mvar->head == q) { mvar->head = q->link; - q->header.info = &stg_IND_info; + OVERWRITE_PRIM_INFO(q, &stg_IND_info, + sizeofW(StgMVarTSOQueue), sizeofW(StgInd)); if (mvar->tail == q) { mvar->tail = (StgMVarTSOQueue*)END_TSO_QUEUE; } hunk ./rts/RaiseAsync.c 630 // we lose the tail pointer when the GC shorts out the IND. // So we use MSG_NULL as a kind of non-dupable indirection; // these are ignored by takeMVar/putMVar. - q->header.info = &stg_MSG_NULL_info; + OVERWRITE_PRIM_INFO(q, &stg_MSG_NULL_info, + sizeofW(StgMVarTSOQueue), sizeofW(Message)); } else { hunk ./rts/RaiseAsync.c 634 - q->header.info = &stg_IND_info; + OVERWRITE_PRIM_INFO(q, &stg_IND_info, + sizeofW(StgMVarTSOQueue), sizeofW(StgInd)); } hunk ./rts/RaiseAsync.c 637 -#ifdef DEBUG - ZERO_SLOP((StgPtr)q + sizeofW(StgInd), - sizeofW(StgMVarTSOQueue) - sizeofW(StgInd)); -#endif // revoke the MVar operation tso->_link = END_TSO_QUEUE; hunk ./rts/RaiseAsync.c 681 break; } + case BlockedOnMsgGlobalise: + // nothing to do + goto done; + #if !defined(THREADED_RTS) case BlockedOnRead: case BlockedOnWrite: hunk ./rts/RtsFlags.c 1108 error = rtsTrue; break; - case 'b': /* heapBase in hex; undocumented */ + case 'b': /* undocumented: heapBase in hex; 0x prefix optional */ if (rts_argv[arg][3] != '\0') { hunk ./rts/RtsFlags.c 1110 - RtsFlags.GcFlags.heapBase - = strtol(rts_argv[arg]+3, (char **) NULL, 16); + if (rts_argv[arg][4] == '0' && + rts_argv[arg][5] == 'x') { + RtsFlags.GcFlags.heapBase + = strtol(rts_argv[arg]+6, (char **) NULL, 16); + } else { + RtsFlags.GcFlags.heapBase + = strtol(rts_argv[arg]+3, (char **) NULL, 16); + } } else { errorBelch("-xb: requires argument"); error = rtsTrue; hunk ./rts/STM.c 415 static StgInvariantCheckQueue *new_stg_invariant_check_queue(Capability *cap, StgAtomicInvariant *invariant) { StgInvariantCheckQueue *result; - result = (StgInvariantCheckQueue *)allocate(cap, sizeofW(StgInvariantCheckQueue)); + result = (StgInvariantCheckQueue *)allocatePrim(cap, sizeofW(StgInvariantCheckQueue)); SET_HDR (result, &stg_INVARIANT_CHECK_QUEUE_info, CCS_SYSTEM); result -> invariant = invariant; result -> my_execution = NO_TREC; hunk ./rts/STM.c 424 static StgTRecChunk *new_stg_trec_chunk(Capability *cap) { StgTRecChunk *result; - result = (StgTRecChunk *)allocate(cap, sizeofW(StgTRecChunk)); + result = (StgTRecChunk *)allocatePrim(cap, sizeofW(StgTRecChunk)); SET_HDR (result, &stg_TREC_CHUNK_info, CCS_SYSTEM); result -> prev_chunk = END_STM_CHUNK_LIST; result -> next_entry_idx = 0; hunk ./rts/STM.c 435 static StgTRecHeader *new_stg_trec_header(Capability *cap) { StgTRecHeader *result; - result = (StgTRecHeader *) allocateInGen(cap, global_gen_ix, - sizeofW(StgTRecHeader)); + result = (StgTRecHeader *) allocatePrim(cap, sizeofW(StgTRecHeader)); + setGlobal((StgClosure*)result); SET_HDR (result, &stg_TREC_HEADER_info, CCS_SYSTEM); result -> current_chunk = new_stg_trec_chunk(cap); hunk ./rts/STM.c 469 StgClosure *closure, rtsBool is_local) { StgTVarWatchQueue *result = NULL; - if (is_local) { - result = (StgTVarWatchQueue *)allocate(cap, sizeofW(StgTVarWatchQueue)); - } else { - result = (StgTVarWatchQueue *)allocateInGen(cap, global_gen_ix, - sizeofW(StgTVarWatchQueue)); + result = (StgTVarWatchQueue *)allocatePrim(cap, sizeofW(StgTVarWatchQueue)); + if (!is_local) { + setGlobal((StgClosure*)result); } SET_HDR (result, &stg_TVAR_WATCH_QUEUE_info, CCS_SYSTEM); result -> closure = closure; hunk ./rts/STM.c 1164 // 1. Allocate an StgAtomicInvariant, set last_execution to NO_TREC // to signal that this is a new invariant in the current atomic block - invariant = (StgAtomicInvariant *) allocate(cap, sizeofW(StgAtomicInvariant)); + invariant = (StgAtomicInvariant *) allocatePrim(cap, sizeofW(StgAtomicInvariant)); TRACE("%p : stmAddInvariantToCheck allocated invariant=%p", trec, invariant); SET_HDR (invariant, &stg_ATOMIC_INVARIANT_info, CCS_SYSTEM); invariant -> code = code; hunk ./rts/STM.c 1485 // Better globalise the TSO. We don't know for sure that this is // necessary, but checking would be tedious: if any TVar is global // then the TSO needs to be. - tso = globalise_(cap, tso); + tso = globalise_(cap, (StgClosure*)tso); // Put ourselves to sleep. We retain locks on all the TVars involved // until we are sound asleep : (a) on the wait queues, (b) BlockedOnSTM hunk ./rts/STM.c 1655 StgTVar *stmNewTVar(Capability *cap, StgClosure *new_value) { StgTVar *result; - result = (StgTVar *)allocate(cap, sizeofW(StgTVar)); + result = (StgTVar *)allocatePrim(cap, sizeofW(StgTVar)); SET_HDR (result, &stg_TVAR_info, CCS_SYSTEM); result -> current_value = new_value; result -> first_watch_queue_entry = END_STM_WATCH_QUEUE; hunk ./rts/Schedule.c 1071 // This assert can be a killer if the app is doing lots // of large block allocations. - IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); + IF_DEBUG(sanity, checkNurserySanity(cap->no)); // now update the nursery to point to the new block cap->r.rCurrentNursery = bd; hunk ./rts/Schedule.c 1327 heap_census = scheduleNeedHeapProfile(rtsTrue); + if (heap_census || force_major) { + N = RtsFlags.GcFlags.generations - 1; + } else { + N = next_gc_gen; + } + if (heap_census || sched_state >= SCHED_INTERRUPTING) { gc_type = GC_SEQ; #ifdef THREADED_RTS hunk ./rts/Schedule.c 1348 gc_type = GC_SEQ; } - if (heap_census || force_major) { - N = RtsFlags.GcFlags.generations - 1; - } else { - N = next_gc_gen; - } - // In order to GC, there must be no threads running Haskell code. // Therefore, the GC thread needs to hold *all* the capabilities, // and release them after the GC has completed. hunk ./rts/Schedule.c 2144 IF_DEBUG(sanity,checkTSO(tso)); - if (tso->stack_size >= tso->max_stack_size + if (tso->stack_size >= tso->max_stack_size-1 // XXX -1 for global flag && !(tso->flags & TSO_BLOCKEX)) { // NB. never raise a StackOverflow exception if the thread is // inside Control.Exceptino.block. It is impractical to protect hunk ./rts/Schedule.c 2208 new_tso_size = (lnat)BLOCK_ROUND_UP(new_stack_size * sizeof(W_) + TSO_STRUCT_SIZE)/sizeof(W_); new_tso_size = round_to_mblocks(new_tso_size); /* Be MBLOCK-friendly */ + + new_tso_size --; // XXX space for global flag + new_stack_size = new_tso_size - TSO_STRUCT_SIZEW; debugTrace(DEBUG_sched, hunk ./rts/Schedule.c 2217 "increasing stack size from %ld words to %d.", (long)tso->stack_size, new_stack_size); - dest = (StgTSO *)allocate(cap,new_tso_size); + + dest = (StgTSO *)allocatePrim(cap,new_tso_size); TICK_ALLOC_TSO(new_stack_size,0); /* copy the TSO block and the old stack into the new area */ hunk ./rts/Stats.c 781 generation *gen; debugBelch( -"---------------------------------------------------------------\n" -" Gen Max Large Cap Mut-list Blocks Live Slop\n" -" Blocks Objects Bytes \n" -"---------------------------------------------------------------\n"); +"------------------------------------------------------------------------\n" +" Gen Max Large Prim Cap Mut-list Total Live Slop\n" +" Blocks Objects Blocks Bytes Blocks \n" +"------------------------------------------------------------------------\n"); tot_live = 0; tot_slop = 0; hunk ./rts/Stats.c 795 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) { lge++; } - gen_live = gen->n_words + countOccupied(gen->large_objects); - gen_blocks = gen->n_blocks + gen->n_large_blocks; + gen_live = gen->n_words + gen->n_prim_words + countOccupied(gen->large_objects); + gen_blocks = gen->n_blocks + gen->n_large_blocks + gen->n_prim_blocks; slop = gen_blocks * BLOCK_SIZE_W - gen_live; hunk ./rts/Stats.c 800 - debugBelch("%5d %7d %8d %4s %8s %8d %8ld %8ld\n", - g, gen->max_blocks, lge, "", "", gen_blocks, + debugBelch("%5d %7d %8d %8d %4s %8s %8d %8ld %8ld\n", + g, gen->max_blocks, lge, gen->n_prim_blocks, "", "", gen_blocks, gen_live*sizeof(W_), slop*sizeof(W_)); for (i = 0; i < n_capabilities; i++) { hunk ./rts/Stats.c 817 slop = cap_blocks * BLOCK_SIZE_W - cap_live; - debugBelch("%5s %7s %8s %4d %8d %8d %8ld %8ld\n", - "", "", "", i, cap_mut*sizeof(W_), cap_blocks, + debugBelch("%5s %7s %8s %8s %4d %8ld %8d %8ld %8ld\n", + "", "", "", "", i, cap_mut*sizeof(W_), cap_blocks, cap_live*sizeof(W_), slop*sizeof(W_)); gen_mut += cap_mut; hunk ./rts/Stats.c 826 gen_blocks += cap_blocks; } - debugBelch("%46s-----------------\n",""); - debugBelch("%5s %7s %8s %4s %8s %8s %8ld %8ld\n\n", - "", "", "", "", "", "", + debugBelch("%55s-----------------\n",""); + debugBelch("%5s %7s %8s %4s %8s %8s %8s %8ld %8ld\n\n", + "", "", "", "", "", "", "", gen_live*sizeof(W_), slop*sizeof(W_)); slop = gen_blocks * BLOCK_SIZE_W - gen_live; hunk ./rts/Stats.c 836 tot_live += gen_live; tot_slop += slop; } - debugBelch("---------------------------------------------------------------\n"); - debugBelch("%46s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_)); - debugBelch("---------------------------------------------------------------\n"); + debugBelch("------------------------------------------------------------------------\n"); + debugBelch("%55s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_)); + debugBelch("------------------------------------------------------------------------\n"); debugBelch("\n"); } hunk ./rts/StgMiscClosures.cmm 241 STRING(ind_local_msg,"IND_LOCAL: not mine") #define DEF_IND_LOCAL(n) \ - INFO_TABLE(stg_IND_LOCAL##n,1,0,IND_LOCAL,"IND_LOCAL","IND_LOCAL") \ + INFO_TABLE_CONSTR(stg_IND_LOCAL##n,1,0,n,IND_LOCAL,"IND_LOCAL","IND_LOCAL") \ { \ if (TO_W_(Capability_no(MyCapability())) != n) { \ hunk ./rts/StgMiscClosures.cmm 244 - foreign "C" barf(ind_local_msg "ptr") never returns; \ + foreign "C" messageGlobalise(MyCapability(), CurrentTSO, R1) [R1]; \ + jump stg_block_enter; \ } else { \ R1 = StgInd_indirectee(R1); \ ENTER(); \ hunk ./rts/StgMiscClosures.cmm 421 info == stg_BLOCKING_QUEUE_CLEAN_info || info == stg_BLOCKING_QUEUE_DIRTY_info) { - - // msg is allocated in global memory iff the BLAKCHOLE is global - - bd = Bdescr(R1); - if (TO_W_(bdescr_gen_ix(bd)) >= TO_W_(CInt[global_gen_ix])) - { - SAVE_THREAD_STATE(); - ("ptr" new_tso) = foreign "C" globalise_(MyCapability(), - CurrentTSO); - CurrentTSO = new_tso; - LOAD_THREAD_STATE(); - - ("ptr" msg) = foreign "C" allocateInGen(MyCapability() "ptr", - CInt[global_gen_ix], + ("ptr" msg) = foreign "C" allocatePrim(MyCapability() "ptr", BYTES_TO_WDS(SIZEOF_MessageBlackHole)) [R1]; hunk ./rts/StgMiscClosures.cmm 423 - } - else - { - ("ptr" msg) = foreign "C" allocate(MyCapability() "ptr", - BYTES_TO_WDS(SIZEOF_MessageBlackHole)) [R1]; - } - SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM); MessageBlackHole_tso(msg) = CurrentTSO; MessageBlackHole_bh(msg) = R1; hunk ./rts/StgMiscClosures.cmm 434 } else { StgTSO_why_blocked(CurrentTSO) = BlockedOnBlackHole::I16; StgTSO_block_info(CurrentTSO) = msg; - jump stg_block_blackhole; + jump stg_block_enter; } } else hunk ./rts/StgMiscClosures.cmm 628 INFO_TABLE_CONSTR(stg_MSG_NULL,1,0,0,PRIM,"MSG_NULL","MSG_NULL") { foreign "C" barf("MSG_NULL object entered!") never returns; } +INFO_TABLE_CONSTR(stg_MSG_GLOBALISE,3,0,0,PRIM,"MSG_GLOBALISE","MSG_GLOBALISE") +{ foreign "C" barf("MSG_GLOBALISE object entered!") never returns; } + /* ---------------------------------------------------------------------------- END_TSO_QUEUE hunk ./rts/Threads.c 71 } size = round_to_mblocks(size); - tso = (StgTSO *)allocate(cap, size); + tso = (StgTSO *)allocatePrim(cap, size); stack_size = size - TSO_STRUCT_SIZEW; TICK_ALLOC_TSO(stack_size, 0); hunk ./rts/Threads.c 110 /* Link the new thread on the global thread list. */ - tso->id = atomic_inc(&next_thread_id) - 1; + tso->id = atomic_inc((StgVolatilePtr)&next_thread_id) - 1; tso->global_link = cap->r.rG0->threads; cap->r.rG0->threads = tso; hunk ./rts/Threads.c 275 case BlockedOnBlackHole: case BlockedOnSTM: case ThreadMigrating: + case BlockedOnMsgGlobalise: goto unblock; default: hunk ./rts/Threads.c 351 // checking the owner field at the same time. bq->bh = 0; bq->queue = 0; bq->owner = 0; #endif - OVERWRITE_INFO(bq, &stg_STUB_BLOCKING_QUEUE_info); + OVERWRITE_PRIM_INFO(bq, &stg_STUB_BLOCKING_QUEUE_info, + sizeofW(StgBlockingQueue), sizeofW(StgInd)); } // If we update a closure that we know we BLACKHOLE'd, and the closure hunk ./rts/Threads.c 517 case BlockedOnSTM: debugBelch("is blocked on an STM operation"); break; + case BlockedOnMsgGlobalise: + debugBelch("is blocked on globalisation of %p", + ((MessageGlobalise*)tso->block_info.closure)->req); + break; default: barf("printThreadBlockage: strange tso->why_blocked: %d for TSO %d (%d)", tso->why_blocked, tso->id, tso); hunk ./rts/Updates.h 78 } \ } \ } \ - ZERO_SLOP(p + SIZEOF_StgThunk, sz); \ + ZERO_SLOP(p + SIZEOF_StgThunkHeader, sz); \ } #else /* !CMINUSMINUS */ hunk ./rts/Updates.h 83 -#define ZERO_SLOP(p,words) \ - { \ - nat i; \ - for (i = 0; i < words; i++) { \ - *((StgPtr)p + i) = 0; \ - } \ - } +INLINE_HEADER void +ZERO_SLOP (StgPtr p, nat words) +{ + nat i; + for (i = 0; i < words; i++) { + *(p + i) = 0; + } +} INLINE_HEADER void FILL_THUNK_SLOP(StgClosure *p) hunk ./rts/Updates.h 117 sz = inf->layout.payload.ptrs + inf->layout.payload.nptrs; break; } - ZERO_SLOP((StgPtr)p + sizeofW(StgThunk), sz); + ZERO_SLOP((StgPtr)p + sizeofW(StgThunkHeader), sz); no_slop: ; } hunk ./rts/Weak.c 102 for (w = list; w; w = w->link) { StgArrWords *farr; - // Better not be a DEAD_WEAK at this stage; the garbage - // collector removes DEAD_WEAKs from the weak pointer list. - ASSERT(w->header.info != &stg_DEAD_WEAK_info); + // Better be a DEAD_WEAK + ASSERT(w->header.info == &stg_DEAD_WEAK_info); if (w->finalizer != &stg_NO_FINALIZER_closure) { n++; hunk ./rts/Weak.c 117 (void *)farr->payload[2], farr->payload[3]); -#ifdef PROFILING - // A weak pointer is inherently used, so we do not need to call - // LDV_recordDead(). - // - // Furthermore, when PROFILING is turned on, dead weak - // pointers are exactly as large as weak pointers, so there is - // no need to fill the slop, either. See stg_DEAD_WEAK_info - // in StgMiscClosures.hc. -#endif - SET_HDR(w, &stg_DEAD_WEAK_info, w->header.prof.ccs); } if (task != NULL) { hunk ./rts/sm/BlockAlloc.c 481 // Todo: not true in multithreaded GC // ASSERT_SM_LOCK(); + IF_DEBUG(sanity, checkFreeListSanity()); + ASSERT(p->free != (P_)-1); p->free = (void *)-1; /* indicates that this block is free */ hunk ./rts/sm/Compact.h 23 INLINE_HEADER void mark(StgPtr p, bdescr *bd) { - nat offset_within_block = p - bd->start; // in words + StgWord offset_within_block = ((W_)p & BLOCK_MASK) / sizeof(W_); // in words StgPtr bitmap_word = (StgPtr)bd->u.bitmap + (offset_within_block / (sizeof(W_)*BITS_PER_BYTE)); StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1)); hunk ./rts/sm/Compact.h 27 + ASSERT(((W_)bitmap_word & MBLOCK_MASK) >= FIRST_BLOCK_OFF); *bitmap_word |= bit_mask; } hunk ./rts/sm/Compact.h 34 INLINE_HEADER void unmark(StgPtr p, bdescr *bd) { - nat offset_within_block = p - bd->start; // in words + StgWord offset_within_block = ((W_)p & BLOCK_MASK) / sizeof(W_); // in words StgPtr bitmap_word = (StgPtr)bd->u.bitmap + (offset_within_block / (sizeof(W_)*BITS_PER_BYTE)); StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1)); hunk ./rts/sm/Compact.h 44 INLINE_HEADER StgWord is_marked(StgPtr p, bdescr *bd) { - nat offset_within_block = p - bd->start; // in words + StgWord offset_within_block = ((W_)p & BLOCK_MASK) / sizeof(W_); // in words StgPtr bitmap_word = (StgPtr)bd->u.bitmap + (offset_within_block / (sizeof(W_)*BITS_PER_BYTE)); StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1)); hunk ./rts/sm/Evac.c 477 #if defined(LOCAL_GC) // make sure this isn't a pointer into the wrong local heap - ASSERT(bd->gen_no > gct->collect_gen || bd->gen_ix == gct->index); + ASSERT(bd->gen_no > gct->collect_gen || bd->gen_ix == gct->index || + ((bd->flags & BF_PRIM) && isGlobalPrim(q))); #endif hunk ./rts/sm/Evac.c 481 - if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED)) != 0) { + // in local GC, we mark/sweep BF_PRIM blocks, but in global GC + // we copy them. + if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_PRIM)) != 0) { // pointer into to-space: just return it. It might be a pointer // into a generation that we aren't collecting (> N), or it hunk ./rts/sm/Evac.c 516 return; } + if ((bd->flags & BF_PRIM)) { + // we may have to record a failed evac here, so that the + // parent object is recorded in the remembered set. + // However, at the end of GC globalise_mut_list will remove + // the mut_list entry and globalise the transitive closure + // of objects pointed to by it. + if (bd->gen_ix < gct->evac_gen_ix) { + gct->failed_to_evac = rtsTrue; + TICK_GC_FAILED_PROMOTION(); + } + // beware, this object may be in another Capability's local + // heap. If so, it ought to be global. + if (!isGlobalPrim((StgClosure*)q)) { + ASSERT(gct->gc_type == GC_LOCAL ? bd->gen_ix == gct->index : 1); + if (!is_marked((P_)q,bd)) { + mark((P_)q,bd); + push_mark_stack(q); + } + } + return; + } + /* If the object is in a gen that we're compacting, then we * need to use an alternative evacuate procedure. */ hunk ./rts/sm/Evac.c 543 if (!is_marked((P_)q,bd)) { mark((P_)q,bd); - push_mark_stack((P_)q); + push_mark_stack(q); } return; } hunk ./rts/sm/Evac.c 547 - + info = q->header.info; if (IS_FORWARDING_PTR(info)) { hunk ./rts/sm/Evac.c 663 case WEAK: case PRIM: case MUT_PRIM: - copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),global_gen_ix); + copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_ix); return; case BCO: hunk ./rts/sm/Evac.c 667 - copy(p,info,q,bco_sizeW((StgBCO *)q),global_gen_ix); + copy(p,info,q,bco_sizeW((StgBCO *)q),gen_ix); return; case THUNK_SELECTOR: hunk ./rts/sm/Evac.c 694 case ARR_WORDS: // just copy the block - copy(p,info,q,arr_words_sizeW((StgArrWords *)q),global_gen_ix); + copy(p,info,q,arr_words_sizeW((StgArrWords *)q),gen_ix); return; case MUT_VAR_CLEAN: hunk ./rts/sm/Evac.c 701 case MUT_VAR_DIRTY: case MVAR_CLEAN: case MVAR_DIRTY: - copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),global_gen_ix); + copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_ix); return; case MUT_ARR_PTRS_CLEAN: hunk ./rts/sm/Evac.c 709 case MUT_ARR_PTRS_FROZEN: case MUT_ARR_PTRS_FROZEN0: // just copy the block - copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),global_gen_ix); + copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),gen_ix); return; case TSO: hunk ./rts/sm/Evac.c 746 } case TREC_CHUNK: - copy(p,info,q,sizeofW(StgTRecChunk),global_gen_ix); + copy(p,info,q,sizeofW(StgTRecChunk),gen_ix); return; default: hunk ./rts/sm/GC.c 160 static void shutdown_gc_threads (nat me); static void collect_gct_blocks (void); static lnat count_part_lists (nat start_gen); +static void freeMarkStack (void); #if 0 && defined(DEBUG) static void gcCAFs (void); hunk ./rts/sm/GC.c 166 #endif -/* ----------------------------------------------------------------------------- - The mark stack. - -------------------------------------------------------------------------- */ - -bdescr *mark_stack_top_bd; // topmost block in the mark stack -bdescr *mark_stack_bd; // current block in the mark stack -StgPtr mark_sp; // pointer to the next unallocated mark stack entry - /* ----------------------------------------------------------------------------- GarbageCollect: the main entry point to the garbage collector. hunk ./rts/sm/GC.c 323 // Prepare the workspaces attached to this gc_thread prepare_gc_thread(); - /* Allocate a mark stack if we're doing a major collection. - */ - if (major_gc && oldest_gen->mark) { - mark_stack_bd = allocBlock(); - mark_stack_top_bd = mark_stack_bd; - mark_stack_bd->link = NULL; - mark_stack_bd->u.back = NULL; - mark_sp = mark_stack_bd->start; - } else { - mark_stack_bd = NULL; - mark_stack_top_bd = NULL; - mark_sp = NULL; - } - /* ----------------------------------------------------------------------- * follow all the roots that we know about: */ hunk ./rts/sm/GC.c 564 else // not compacted { freeChain(gen->old_blocks); + + if (N >= global_gen_no) { + freeChain(gen->prim_blocks); + gen->prim_blocks = NULL; + gen->n_prim_blocks = 0; + gen->n_prim_words = 0; + } else { + sweepPrimArea(gen); + } + gen->n_prim_words = countOccupied(gen->prim_blocks); } gen->old_blocks = NULL; hunk ./rts/sm/GC.c 632 } } - if (gc_type != GC_LOCAL) { - // Free the mark stack. - if (mark_stack_top_bd != NULL) { - debugTrace(DEBUG_gc, "mark stack: %d blocks", - countBlocks(mark_stack_top_bd)); - freeChain(mark_stack_top_bd); - } - // Free any bitmaps. - for (g = 0; g < total_generations; g++) { - gen = &all_generations[g]; - if (gen->bitmap != NULL) { - freeGroup(gen->bitmap); - gen->bitmap = NULL; - } + // Free the mark stack, leaving one block. + freeMarkStack(); + + // Free any bitmaps. + for (g = 0; g < total_generations; g++) { + gen = &all_generations[g]; + if (gct->gc_type == GC_LOCAL && isNonLocalGen(gen)) + continue; + if (gen->bitmap != NULL) { + freeGroup(gen->bitmap); + gen->bitmap = NULL; } } hunk ./rts/sm/GC.c 647 // Resize the nursery if necessary - if (gc_type != GC_LOCAL) { - // for local GC we don't know how big the nursery was supposed - // to be, so just leave it as is. It might be a bit bigger than - // before due to adding new large blocks and/or new blocks in - // allocate(), but we'll resize at the next major GC. - resize_nursery(copied,N); - } + resize_nursery(copied,N); #ifdef PROFILING // resetStaticObjectForRetainerProfiling() must be called before hunk ./rts/sm/GC.c 916 init_gc_thread(t); + // The mark stack always has one block in it. + t->mark_stack_bd = allocBlock(); + t->mark_stack_top_bd = t->mark_stack_bd; + t->mark_stack_bd->link = NULL; + t->mark_stack_bd->u.back = NULL; + t->mark_sp = t->mark_stack_bd->start; + #ifdef USE_PAPI t->papi_events = -1; #endif hunk ./rts/sm/GC.c 1054 write_barrier(); // scavenge objects in compacted generation - if (mark_stack_bd != NULL && !mark_stack_empty()) { + if (!mark_stack_empty()) { return rtsTrue; } hunk ./rts/sm/GC.c 1191 pruneSparkQueue(cap); #endif + // free this thread's mark stack. + freeMarkStack(); + #ifdef USE_PAPI // count events in this thread towards the GC totals papi_thread_stop_gc1_count(gct->papi_events); hunk ./rts/sm/GC.c 1345 ASSERT(gen->n_scavenged_large_blocks == 0); // grab all the partial blocks stashed in the gc_thread workspaces and - // move them to the old_threads list of this gen. + // move them to the old_blocks list of this gen. for (n = 0; n < n_capabilities; n++) { ws = &gc_threads[n]->gens[gen->ix]; for (bd = ws->part_list; bd != NULL; bd = next) { hunk ./rts/sm/GC.c 1382 bd->flags &= ~BF_EVACUATED; } - // for a compacted generation, we need to allocate the bitmap - if (gen->mark) { + // if we're doing global GC, mark all the prim blocks as ordinary + // blocks, so we'll copy them rather than marking. + if (gct->collect_gen >= global_gen_no) { + for (bd = gen->prim_blocks; bd; bd = bd->link) { + bd->flags &= ~(BF_PRIM | BF_MARKED); + } + } + + // allocate the mark bitmap for any blocks that will be marked, as + // opposed to copied, during this collection. + { nat bitmap_size; // in bytes bdescr *bitmap_bdescr; StgWord *bitmap; hunk ./rts/sm/GC.c 1396 - - bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE); - + bdescr *marked_blocks; + nat n_marked_blocks; + + if (gen->mark) { + ASSERT(gen->prim_blocks == NULL); + marked_blocks = gen->old_blocks; + n_marked_blocks = gen->n_old_blocks; + } else if (gct->collect_gen < global_gen_no) { + ASSERT(countBlocks(gen->prim_blocks) == gen->n_prim_blocks); + marked_blocks = gen->prim_blocks; + n_marked_blocks = gen->n_prim_blocks; + } else { + marked_blocks = NULL; + n_marked_blocks = 0; + } + + bitmap_size = n_marked_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE); + if (bitmap_size > 0) { bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size) / BLOCK_SIZE); hunk ./rts/sm/GC.c 1428 // For each block in this step, point to its bitmap from the // block descriptor. - for (bd=gen->old_blocks; bd != NULL; bd = bd->link) { + for (bd = marked_blocks; bd != NULL; bd = bd->link) { bd->u.bitmap = bitmap; bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE); hunk ./rts/sm/GC.c 1533 // If this gen is the local G0 for another Capability and // we're doing a local GC, then we don't bother allocating a // todo block. - if (gct->gc_type != GC_LOCAL - || all_generations[g].no != 0 - || g == gct->index) { + if (!(gct->gc_type == GC_LOCAL && isNonLocalGenIx(g))) { prepare_gen_workspace(g); } } hunk ./rts/sm/GC.c 1580 } } +/* ----------------------------------------------------------------------------- + Free excess blocks on the mark stack + -------------------------------------------------------------------------- */ + +static void +freeMarkStack (void) +{ + if (gct->mark_stack_top_bd->link != NULL) { + debugTrace(DEBUG_gc, "mark stack: %d blocks", + countBlocks(gct->mark_stack_top_bd)); + freeChain(gct->mark_stack_top_bd->link); + gct->mark_stack_top_bd->link = NULL; + ASSERT(gct->mark_stack_top_bd == gct->mark_stack_bd); + } +} + /* ----------------------------------------------------------------------------- Function we pass to evacuate roots. -------------------------------------------------------------------------- */ hunk ./rts/sm/GC.c 1749 { const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; - if (RtsFlags.GcFlags.generations == 1) + if (gct->gc_type == GC_LOCAL) + { + if (RtsFlags.GcFlags.heapSizeSuggestion == 0) + { + resizeNursery(gct->cap, RtsFlags.GcFlags.minAllocAreaSize); + } + else + { + // we don't know how big the nursery was supposed + // to be, so just leave it as is. It might be a bit bigger than + // before due to adding new large blocks and/or new blocks in + // allocate(), but we'll resize at the next major GC. + } + } + else if (RtsFlags.GcFlags.generations == 1) { // Two-space collector: nat blocks; hunk ./rts/sm/GC.h 32 extern rtsBool work_stealing; // work stealing is enabled? extern nat next_gc_gen; // generation to collect next time -extern bdescr *mark_stack_bd; -extern bdescr *mark_stack_top_bd; -extern StgPtr mark_sp; - #ifdef DEBUG extern nat mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS; #endif hunk ./rts/sm/GCAux.c 68 return p; } + // BF_PRIM things can be either copied or marked, depending on + // kind of GC we're doing, so we check for both here. + if (bd->flags & BF_PRIM) { + info = q->header.info; + if (is_marked((P_)q,bd) || isGlobalPrim(q)) { + return p; + } else { + return NULL; + } + } + // large objects use the evacuated flag if (bd->flags & BF_LARGE) { if (get_itbl(q)->type == TSO && hunk ./rts/sm/GCThread.h 190 StgWeak *old_weak_ptrs; + // -------------------- + // The mark stack + + bdescr *mark_stack_top_bd; // topmost block in the mark stack + bdescr *mark_stack_bd; // current block in the mark stack + StgPtr mark_sp; // pointer to the next unallocated mark + // stack entry + #ifdef USE_PAPI int papi_events; #endif hunk ./rts/sm/Globalise.c 30 #include "Apply.h" #include "Printer.h" #include "Updates.h" +#include "MarkStack.h" static REGPARM1 GNUC_ATTR_HOT void globalise_evac (StgClosure **p); static void globalise_scavenge (void); hunk ./rts/sm/Globalise.c 37 STATIC_INLINE void globalise_large (StgPtr p); static void globalise_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size); +static void globalise_maybe_record_mutable (StgClosure *q); rtsBool globalise_wrt (Capability *cap USED_IF_THREADS, hunk ./rts/sm/Globalise.c 43 StgClosure *parent, StgClosure **root) { - bdescr *bd; - bd = Bdescr((StgPtr)parent); - if (bd->gen_ix >= global_gen_ix) { + if (isGlobal(parent)) { return globalise(cap,root); } return rtsTrue; hunk ./rts/sm/Globalise.c 258 *p = (StgClosure*)to; } -STATIC_INLINE GNUC_ATTR_HOT void -nocopy_IND_LOCAL(StgClosure **p, StgClosure *src, nat tag) -{ - StgPtr to; - - to = alloc_for_copy_global(sizeofW(StgInd)); - ((StgInd *)to)->header.info = stg_IND_LOCAL_tbl[gct->index]; - ((StgInd *)to)->indirectee = TAG_CLOSURE(tag,src); - *p = (StgClosure*)to; - recordMutableGen_GC((StgClosure*)to, global_gen_no); -} - -STATIC_INLINE void -copy_part(StgClosure **p, StgClosure *src, nat size_to_reserve, - nat size_to_copy) -{ - StgPtr to; - - to = alloc_for_copy_global(size_to_reserve); - *p = (StgClosure *)to; - copy_closure((StgPtr)src,to,src->header.info,size_to_copy); -} - REGPARM1 GNUC_ATTR_HOT void globalise_evac (StgClosure **p) { hunk ./rts/sm/Globalise.c 297 globalise_large((P_)q); return; } - + + if (bd->flags & BF_PRIM) { + if (!isGlobalPrim(q)) { + setGlobal(q); + push_mark_stack(q); + } + return; + } + info = q->header.info; if (IS_FORWARDING_PTR(info)) { hunk ./rts/sm/Globalise.c 314 return; } - if (INFO_PTR_TO_STRUCT(info)->flags & HAS_UNLIFTED_FIELDS) { - // These are the ones we can't promote because they point to - // primitive objects. - nocopy_IND_LOCAL(p,q,tag); - return; - } - switch (INFO_PTR_TO_STRUCT(info)->type) { case WHITEHOLE: hunk ./rts/sm/Globalise.c 359 case THUNK_1_0: case THUNK_0_1: + case IND_PERM: copy_IND(p,info,q,sizeofW(StgThunk)+1); return; hunk ./rts/sm/Globalise.c 409 copy_IND(p,info,q,ap_stack_sizeW((StgAP_STACK*)q)); return; - case TSO: - { - StgTSO *tso = (StgTSO *)q; - - /* Deal with relocated TSOs - */ - if (tso->what_next == ThreadRelocated) { - q = (StgClosure *)tso->_link; - *p = q; - goto loop; - } - - /* To evacuate a small TSO, we need to adjust the stack pointer - */ - { - StgTSO *new_tso; - StgPtr r, s; - - copy_part(p,(StgClosure *)tso, tso_sizeW(tso), - sizeofW(StgTSO)); - new_tso = (StgTSO *)*p; - move_TSO(tso, new_tso); - for (r = tso->sp, s = new_tso->sp; - r < tso->stack+tso->stack_size;) { - *s++ = *r++; - } - - // Link the new TSO onto the generation's threads list - ACQUIRE_SM_LOCK; - new_tso->global_link = global_gen->threads; - global_gen->threads = new_tso; - RELEASE_SM_LOCK; - - // mark the old TSO as relocated - tso->what_next = ThreadRelocated; - tso->_link = new_tso; - return; - } - } - case BLACKHOLE: // don't promote: we can't move BLACKHOLEs, because the update // frame points to them, so leave an IND_LOCAL instead hunk ./rts/sm/Globalise.c 412 - nocopy_IND_LOCAL(p,q,0); + { + StgPtr to; + + to = alloc_for_copy_global(sizeofW(StgInd)); + ((StgInd *)to)->header.info = stg_IND_LOCAL_tbl[bd->gen_ix]; + ((StgInd *)to)->indirectee = TAG_CLOSURE(tag,q); + *p = (StgClosure*)to; + recordMutableCap(&capabilities[bd->gen_ix], + (StgClosure*)to, global_gen_no); return; hunk ./rts/sm/Globalise.c 422 + } case ARR_WORDS: hunk ./rts/sm/Globalise.c 425 - case MUT_ARR_PTRS_CLEAN: - case MUT_ARR_PTRS_DIRTY: - case MUT_ARR_PTRS_FROZEN: - case MUT_ARR_PTRS_FROZEN0: - case MUT_VAR_CLEAN: - case MUT_VAR_DIRTY: - case MVAR_CLEAN: - case MVAR_DIRTY: - case BCO: - case WEAK: - case PRIM: - case MUT_PRIM: - case BLOCKING_QUEUE: - gct->failed_to_evac = rtsTrue; + // just copy the block + copy_tag(p,info,q,arr_words_sizeW((StgArrWords *)q),tag); return; hunk ./rts/sm/Globalise.c 429 - case IND_PERM: - case TREC_CHUNK: - barf("globalise_evac: cannot globalise type %d", (int)(INFO_PTR_TO_STRUCT(info)->type)); - default: barf("globalise_evac: strange closure type %d", (int)(INFO_PTR_TO_STRUCT(info)->type)); } hunk ./rts/sm/Globalise.c 770 static StgPtr globalise_scavenge_mut_arr_ptrs (StgMutArrPtrs *a) { - lnat m; - StgPtr p, q; + StgPtr p; hunk ./rts/sm/Globalise.c 772 - for (m = 0; m < mutArrPtrsCards(a->ptrs); m++) - { - if (*mutArrPtrsCard(a,m) != 0) { - p = (StgPtr)&a->payload[m << MUT_ARR_PTRS_CARD_BITS]; - q = stg_min(p + (1 << MUT_ARR_PTRS_CARD_BITS), - (StgPtr)&a->payload[a->ptrs]); - for (; p < q; p++) { - globalise_evac((StgClosure**)p); - } - } + for (p = (P_)&a->payload[0]; p < (P_)&a->payload[a->ptrs]; p++) { + globalise_evac((StgClosure**)p); } return (StgPtr)a + mut_arr_ptrs_sizeW(a); hunk ./rts/sm/Globalise.c 798 case MUT_ARR_PTRS_FROZEN0: return globalise_scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); - case TREC_CHUNK: - // shouldn't happen: we do not globalise TREC_CHUNKS, - // and they stay on the mut list. - case BLOCKING_QUEUE: - // shouldn't happen: we refuse to globalise BLOCKING_QUEUE, - // and if we end up with one on the mut list after GC then it - // is dealt with by globalise_mut_list. default: barf("globalise_scavenge_one: unimplemented/strange closure type %d @ %p", type, p); hunk ./rts/sm/Globalise.c 804 } } -STATIC_INLINE GNUC_ATTR_HOT StgPtr +static GNUC_ATTR_HOT REGPARM1 StgPtr globalise_scavenge_one (StgPtr p) { const StgInfoTable *info; hunk ./rts/sm/Globalise.c 881 case MVAR_DIRTY: case MUT_VAR_CLEAN: case MUT_VAR_DIRTY: + case TREC_CHUNK: + case BLOCKING_QUEUE: { StgPtr end; hunk ./rts/sm/Globalise.c 934 return p + tso_sizeW(tso); } - // an IND_LOCAL is "already globalised" + // an IND_LOCAL could be here as a result of a BLACKHOLE being + // globalised, so we don't want to try to globalise it. case IND_LOCAL: hunk ./rts/sm/Globalise.c 937 + gct->failed_to_evac = rtsTrue; return p + sizeofW(StgInd); case IND: hunk ./rts/sm/Globalise.c 974 globalise_scavenge_one(p); if (gct->failed_to_evac) { gct->failed_to_evac = rtsFalse; - if (ws->gen->no > 0) { - recordMutableGen_GC((StgClosure *)p, ws->gen->no); - } + globalise_maybe_record_mutable((StgClosure*)p); } // stats gct->scanned += closure_sizeW((StgClosure*)p); hunk ./rts/sm/Globalise.c 981 } } - static GNUC_ATTR_HOT void globalise_scavenge_block (bdescr *bd) { hunk ./rts/sm/Globalise.c 1004 if (gct->failed_to_evac) { gct->failed_to_evac = rtsFalse; - recordMutableGen_GC((StgClosure*)p, ws->gen->no); - - switch (get_itbl((StgClosure*)p)->type) { - case TSO: - case IND_LOCAL: - case BLOCKING_QUEUE: // allowed to point to local BLACKHOLEs - debugTrace(DEBUG_gc, "not demoting %p", p); - break; - default: - barf("globalise_scavenge_block: failed_to_evac"); - } + globalise_maybe_record_mutable((StgClosure*)p); } p = q; hunk ./rts/sm/Globalise.c 1030 gct->scan_bd = NULL; } - + static void globalise_scavenge (void) hunk ./rts/sm/Globalise.c 1037 { gen_workspace *ws; bdescr *bd; + StgPtr p; do { ws = &gct->gens[global_gen_ix]; // XXX assumes structure of generations hunk ./rts/sm/Globalise.c 1044 gct->scan_bd = NULL; + // scavenge objects in compacted generation + while ((p = pop_mark_stack())) { + globalise_scavenge_one(p); + if (gct->failed_to_evac) { + gct->failed_to_evac = rtsFalse; + globalise_maybe_record_mutable((StgClosure*)p); + } + } + // If we have a scan block with some work to do, // scavenge everything up to the free pointer. if (ws->todo_bd->u.scan < ws->todo_free) hunk ./rts/sm/Globalise.c 1082 -------------------------------------------------------------------------- */ static void -globalise_mut_list (Capability *cap USED_IF_DEBUG, bdescr *mut_list, nat g) +globalise_maybe_record_mutable (StgClosure *q) +{ + nat g = global_gen_no; + + switch (get_itbl((StgClosure*)q)->type) { + + case TSO: + // put it on the correct mutable list; during parallel + // GC it may have ended up on the wrong one. + recordMutableCap(((StgTSO*)q)->cap, q, g); + break; + + case IND_LOCAL: + // put it on the correct mutable list; during parallel + // GC it may have ended up on the wrong one. + recordMutableCap(&capabilities[get_itbl(q)->srt_bitmap], + q, g); + break; + + case BLOCKING_QUEUE: { + bdescr *bd; + StgBlockingQueue *bq = (StgBlockingQueue *)q; + bd = Bdescr((P_)bq->bh); + if (bd->gen->is_local) { + recordMutableCap(&capabilities[bd->gen->cap], q, g); + } + break; + } + + case MUT_PRIM: + if (q->header.info == &stg_TREC_HEADER_info) { + recordMutableCap(&capabilities[((StgTRecHeader*)q)->cap_no], + q, g); + break; + } else { + goto other_closure; + } + + case TREC_CHUNK: + recordMutableCap(&capabilities[((StgTRecChunk*)q)->cap_no], + q, g); + break; + + default: + other_closure: + barf("globalise_maybe_record_mutable: %d", get_itbl(q)->type); + } +} + +static void +globalise_mut_list (Capability *cap STG_UNUSED, bdescr *mut_list, + nat g STG_UNUSED) { bdescr *bd; hunk ./rts/sm/Globalise.c 1136 - StgPtr p, r; + StgPtr p; StgClosure *q; hunk ./rts/sm/Globalise.c 1139 + // if g != 1, then we'll need to do something more clever below + ASSERT(g == 1); + // gct->gc_type = GC_LOCAL; gct->failed_to_evac = rtsFalse; hunk ./rts/sm/Globalise.c 1151 q = (StgClosure *)*p; - // we don't care about TSOs or IND_LOCALs: their - // contents are private to this Capability - switch (get_itbl((StgClosure*)q)->type) { - - case TSO: - // put it on the correct mutable list; during parallel - // GC it may have ended up on the wrong one. - recordMutableCap(((StgTSO*)q)->cap, q, g); - continue; - - case IND_LOCAL: - // XXX: this assertion may trigger? - ASSERT(Bdescr((P_)((StgInd*)q)->indirectee)->gen_ix == cap->no); - recordMutableGen_GC(q, g); - continue; - - case BLOCKING_QUEUE: { - bdescr *bd; - StgBlockingQueue *bq = (StgBlockingQueue *)q; - bd = Bdescr((P_)bq->bh); - if (bd->gen->is_local) { - recordMutableCap(&capabilities[bd->gen->cap], q, g); - } - continue; + globalise_scavenge_one((StgPtr)q); + if (gct->failed_to_evac) { + gct->failed_to_evac = rtsFalse; + globalise_maybe_record_mutable(q); } hunk ./rts/sm/Globalise.c 1157 - case MUT_PRIM: - if (q->header.info == &stg_TREC_HEADER_info) { - recordMutableCap(&capabilities[((StgTRecHeader*)q)->cap_no], - q, g); - continue; - } else { - goto other_closure; - } - - case TREC_CHUNK: - recordMutableCap(&capabilities[((StgTRecChunk*)q)->cap_no], - q, g); - continue; - - default: - other_closure: - r = globalise_scavenge_one((StgPtr)q); - if (gct->failed_to_evac) { - // cannot happen, because we just GC'd and - // promoted all the primitive objects, so - // globalisation should never fail at this point. - barf("globalise_mut_lists: failed_to_evac"); - } - } } } } hunk ./rts/sm/MarkStack.h 20 #include "BeginPrivate.h" INLINE_HEADER void -push_mark_stack(StgPtr p) +push_mark_stack(StgClosure *p) { bdescr *bd; hunk ./rts/sm/MarkStack.h 24 - *mark_sp++ = (StgWord)p; + *gct->mark_sp++ = (StgWord)p; hunk ./rts/sm/MarkStack.h 26 - if (((W_)mark_sp & BLOCK_MASK) == 0) + if (((W_)gct->mark_sp & BLOCK_MASK) == 0) { hunk ./rts/sm/MarkStack.h 28 - if (mark_stack_bd->u.back != NULL) + if (gct->mark_stack_bd->link != NULL) { hunk ./rts/sm/MarkStack.h 30 - mark_stack_bd = mark_stack_bd->u.back; + gct->mark_stack_bd = gct->mark_stack_bd->link; } else { hunk ./rts/sm/MarkStack.h 35 bd = allocBlock_sync(); - bd->link = mark_stack_bd; - bd->u.back = NULL; - mark_stack_bd->u.back = bd; // double-link the new block on - mark_stack_top_bd = bd; - mark_stack_bd = bd; + bd->u.back = gct->mark_stack_bd; + bd->link = NULL; + gct->mark_stack_bd->link = bd; // double-link the new block on + gct->mark_stack_bd = bd; } hunk ./rts/sm/MarkStack.h 40 - mark_sp = mark_stack_bd->start; + gct->mark_sp = gct->mark_stack_bd->start; } } hunk ./rts/sm/MarkStack.h 47 INLINE_HEADER StgPtr pop_mark_stack(void) { - if (((W_)mark_sp & BLOCK_MASK) == 0) + if (((W_)gct->mark_sp & BLOCK_MASK) == 0) { hunk ./rts/sm/MarkStack.h 49 - if (mark_stack_bd->link == NULL) + if (gct->mark_stack_bd->u.back == NULL) { return NULL; } hunk ./rts/sm/MarkStack.h 55 else { - mark_stack_bd = mark_stack_bd->link; - mark_sp = mark_stack_bd->start + BLOCK_SIZE_W; + gct->mark_stack_bd = gct->mark_stack_bd->u.back; + gct->mark_sp = gct->mark_stack_bd->start + BLOCK_SIZE_W; } } hunk ./rts/sm/MarkStack.h 59 - return (StgPtr)*--mark_sp; + return (StgPtr)*--gct->mark_sp; } INLINE_HEADER rtsBool hunk ./rts/sm/MarkStack.h 65 mark_stack_empty(void) { - return (((W_)mark_sp & BLOCK_MASK) == 0 && mark_stack_bd->link == NULL); + return (((W_)gct->mark_sp & BLOCK_MASK) == 0 && gct->mark_stack_bd->u.back == NULL); } #include "EndPrivate.h" hunk ./rts/sm/MarkWeak.c 174 gct->failed_to_evac = rtsFalse; recordMutableGen_GC((StgClosure *)w,Bdescr((P_)w)->gen_no); } + +#ifdef PROFILING + // A weak pointer is inherently used, so we do not need to call + // LDV_recordDead(). + // + // Furthermore, when PROFILING is turned on, dead weak + // pointers are exactly as large as weak pointers, so there is + // no need to fill the slop, either. See stg_DEAD_WEAK_info + // in StgMiscClosures.hc. +#endif + SET_INFO(w, &stg_DEAD_WEAK_info); } // Next, move to the WeakThreads stage after fully hunk ./rts/sm/Sanity.c 7 * * Sanity checking code for the heap and stack. * - * Used when debugging: check that everything reasonable. + * Used when debugging: check that everything satisfies the invariants. * * - All things that are supposed to be pointers look like pointers. * hunk ./rts/sm/Sanity.c 177 { StgFunInfoTable *fun_info; StgRetFun *ret_fun; + StgClosure *fun; ret_fun = (StgRetFun *)c; hunk ./rts/sm/Sanity.c 180 - fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun)); + fun = UNTAG_CLOSURE(ret_fun->fun); + if (IS_FORWARDING_PTR(fun->header.info)) { + fun = (StgClosure*)UN_FORWARDING_PTR(fun->header.info); + } + fun_info = get_fun_itbl(UNTAG_CLOSURE(fun)); size = ret_fun->size; switch (fun_info->f.fun_type) { case ARG_GEN: hunk ./rts/sm/Sanity.c 463 static void checkGlobalPtr (StgClosure *p) { - bdescr *bd; - if (!HEAP_ALLOCED(p)) return; - bd = Bdescr((StgPtr)p); - ASSERT (bd->gen_no != 0); - ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); + if (HEAP_ALLOCED(p)) { + ASSERT(isGlobal(p)); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); + } } // Check that an object only points to global heap, unless it is hunk ./rts/sm/Sanity.c 514 return thunk_sizeW_fromITBL(info); } + case MUT_PRIM: + if (p->header.info == &stg_TREC_HEADER_info) { + // TREC_HEADERs are private and are allowed to point to + // the local heap. + return checkClosure(p); + } // else fall through... case FUN: case FUN_1_0: case FUN_0_1: hunk ./rts/sm/Sanity.c 542 case THUNK_STATIC: case FUN_STATIC: case PRIM: - case MUT_PRIM: { nat i; for (i = 0; i < info->layout.payload.ptrs; i++) { hunk ./rts/sm/Sanity.c 551 } case IND_LOCAL: - ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgInd*)p)->indirectee)); + { + StgClosure *q = ((StgInd*)p)->indirectee; + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); + // the indirectee should live in the correct local heap + ASSERT(Bdescr((P_)q)->gen_ix == get_itbl(p)->srt_bitmap); return sizeofW(StgInd); hunk ./rts/sm/Sanity.c 557 + } case BLOCKING_QUEUE: { hunk ./rts/sm/Sanity.c 730 } } +void checkPrimHeapChain (bdescr *bd) +{ + StgPtr p; + StgWord flag; + + for (; bd != NULL; bd = bd->link) { + p = bd->start; + while (p < bd->free) { + flag = *p; + p += 1; + switch (flag) { + case 0: // not global + p += checkClosure((StgClosure*)p); + break; + case 1: // global + p += checkGlobalClosure((StgClosure*)p); + break; + default: // reclaimed free space of size 'flag - 2' + p += flag - 2; + break; + } + } + } +} + void checkHeapChunk(StgPtr start, StgPtr end) { hunk ./rts/sm/Sanity.c 859 static void checkMutableList (bdescr *mut_bd, nat gen, nat cap_no) { - bdescr *bd; + bdescr *bd, *pbd; StgPtr q; StgClosure *p; hunk ./rts/sm/Sanity.c 866 for (bd = mut_bd; bd != NULL; bd = bd->link) { for (q = bd->start; q < bd->free; q++) { p = (StgClosure *)*q; - ASSERT(!HEAP_ALLOCED(p) || Bdescr((P_)p)->gen_no == gen); + pbd = Bdescr((P_)p); + ASSERT(!HEAP_ALLOCED(p) || pbd->gen_no == gen || + ((pbd->flags & BF_PRIM) && isGlobalPrim(p))); checkGlobalClosure(p); if (get_itbl(p)->type == TSO) { // TSOs on the mutable list must belong to this capability hunk ./rts/sm/Sanity.c 941 /* Nursery sanity check */ void -checkNurserySanity (nursery *nursery) +checkNurserySanity (nat cap_no) { bdescr *bd, *prev; nat blocks = 0; hunk ./rts/sm/Sanity.c 945 + nursery *nursery; + + nursery = &nurseries[cap_no]; prev = NULL; for (bd = nursery->blocks; bd != NULL; bd = bd->link) { hunk ./rts/sm/Sanity.c 951 + ASSERT(bd->gen == &all_generations[cap_no]); + ASSERT(bd->gen_ix == cap_no); ASSERT(bd->u.back == prev); prev = bd; blocks += bd->blocks; hunk ./rts/sm/Sanity.c 969 gen_workspace *ws; ASSERT(countBlocks(gen->blocks) == gen->n_blocks); + ASSERT(countBlocks(gen->prim_blocks) == gen->n_prim_blocks); ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); #if defined(THREADED_RTS) hunk ./rts/sm/Sanity.c 981 if (gen->is_local) { checkHeapChain(gen->blocks); + checkPrimHeapChain(gen->prim_blocks); } else { checkGlobalHeapChain(gen->blocks); hunk ./rts/sm/Sanity.c 984 + ASSERT(gen->prim_blocks == NULL); } for (n = 0; n < n_capabilities; n++) { hunk ./rts/sm/Sanity.c 1015 checkGeneration(gen, rtsFalse); } } - checkNurserySanity(&nurseries[cap_no]); + checkNurserySanity(cap_no); } /* Full heap sanity check. */ hunk ./rts/sm/Sanity.c 1027 checkGeneration(&all_generations[g], after_major_gc); } for (n = 0; n < n_capabilities; n++) { - checkNurserySanity(&nurseries[n]); + checkNurserySanity(n); } } hunk ./rts/sm/Sanity.c 1074 markBlocks(gc_threads[i]->gens[n].todo_bd); } markBlocks(all_generations[n].blocks); + markBlocks(all_generations[n].prim_blocks); markBlocks(all_generations[n].large_objects); } hunk ./rts/sm/Sanity.c 1080 for (i = 0; i < n_capabilities; i++) { markBlocks(nurseries[i].blocks); + markBlocks(gc_threads[i]->mark_stack_top_bd); } #ifdef PROFILING hunk ./rts/sm/Sanity.c 1145 gen = &all_generations[n]; findBlockInList(bd, gen->blocks); findBlockInList(bd, gen->old_blocks); + findBlockInList(bd, gen->prim_blocks); findBlockInList(bd, gen->large_objects); findBlockInList(bd, gen->scavenged_large_objects); } hunk ./rts/sm/Sanity.c 1152 for (i = 0; i < n_capabilities; i++) { findBlockInList(bd, nurseries[i].blocks); + findBlockInList(bd, gc_threads[i]->mark_stack_top_bd); } } hunk ./rts/sm/Sanity.c 1155 - + /* ----------------------------------------------------------------------------- Memory leak detection hunk ./rts/sm/Sanity.c 1184 genBlocks (generation *gen) { ASSERT(countBlocks(gen->blocks) == gen->n_blocks); + ASSERT(countBlocks(gen->prim_blocks) == gen->n_prim_blocks); ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); hunk ./rts/sm/Sanity.c 1186 - return gen->n_blocks + gen->n_old_blocks + + return gen->n_blocks + gen->n_prim_blocks + gen->n_old_blocks + countAllocdBlocks(gen->large_objects); } hunk ./rts/sm/Sanity.c 1196 nat g, i, n; lnat gen_blocks[total_generations]; lnat nursery_blocks, retainer_blocks, - arena_blocks, exec_blocks; + arena_blocks, exec_blocks, mark_stack_blocks; lnat live_blocks = 0, free_blocks = 0; rtsBool leak; hunk ./rts/sm/Sanity.c 1222 nursery_blocks += nurseries[i].n_blocks; } + mark_stack_blocks = 0; + for (i = 0; i < n_capabilities; i++) { + mark_stack_blocks += countBlocks(gc_threads[i]->mark_stack_top_bd); + } + retainer_blocks = 0; #ifdef PROFILING if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { hunk ./rts/sm/Sanity.c 1247 for (g = 0; g < total_generations; g++) { live_blocks += gen_blocks[g]; } - live_blocks += nursery_blocks + + live_blocks += nursery_blocks + mark_stack_blocks + retainer_blocks + arena_blocks + exec_blocks; #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_))) hunk ./rts/sm/Sanity.c 1267 } debugBelch(" nursery : %5lu blocks (%lu MB)\n", nursery_blocks, MB(nursery_blocks)); + debugBelch(" mark stacks : %5lu blocks (%lu MB)\n", + mark_stack_blocks, MB(mark_stack_blocks)); debugBelch(" retainer : %5lu blocks (%lu MB)\n", retainer_blocks, MB(retainer_blocks)); debugBelch(" arena blocks : %5lu blocks (%lu MB)\n", hunk ./rts/sm/Sanity.h 26 /* debugging routines */ void checkSanity (rtsBool local_only, rtsBool after_gc, rtsBool major_gc, nat cap_no); -void checkNurserySanity ( nursery *nursery ); +void checkNurserySanity (nat cap_no); void checkHeapChain (bdescr *bd); void checkGlobalHeapChain (bdescr *bd); hunk ./rts/sm/Sanity.h 30 +void checkPrimHeapChain (bdescr *bd); void checkHeapChunk ( StgPtr start, StgPtr end ); void checkLargeObjects ( bdescr *bd ); hunk ./rts/sm/Scav.c 121 tso->flags &= ~TSO_LINK_DIRTY; } + debugTrace(DEBUG_gc,"dirty: %d %d",(int)tso->dirty, tso->flags & TSO_LINK_DIRTY); + gct->eager_promotion = saved_eager; } hunk ./rts/sm/Scav.c 795 -------------------------------------------------------------------------- */ static void -scavenge_mark_stack(void) +scavenge_mark_stack (generation *gen) { StgPtr p, q; StgInfoTable *info; hunk ./rts/sm/Scav.c 802 nat saved_evac_gen_ix; rtsBool saved_eager_promotion; - gct->evac_gen_ix = oldest_gen->ix; saved_evac_gen_ix = gct->evac_gen_ix; saved_eager_promotion = gct->eager_promotion; hunk ./rts/sm/Scav.c 804 + gct->evac_gen_ix = gen->ix; while ((p = pop_mark_stack())) { hunk ./rts/sm/Scav.c 1085 if (gct->failed_to_evac) { gct->failed_to_evac = rtsFalse; - if (gct->evac_gen_ix) { + if (gen->no > 0) { recordMutableGen_GC((StgClosure *)q, oldest_gen->no); hunk ./rts/sm/Scav.c 1087 - } + } else { + ASSERT(Bdescr(q)->flags & BF_PRIM); + if (isGlobalPrim((StgClosure*)q)) { + recordMutableGen_GC((StgClosure *)q, oldest_gen->no); + } + } } } // while (p = pop_mark_stack()) } hunk ./rts/sm/Scav.c 1948 } // scavenge objects in compacted generation - if (mark_stack_bd != NULL && !mark_stack_empty()) { - scavenge_mark_stack(); + if (!mark_stack_empty()) { + if (gct->collect_gen >= global_gen_no) { + scavenge_mark_stack(global_gen); + } else { + scavenge_mark_stack(gct->cap->r.rG0); + } work_to_do = rtsTrue; } hunk ./rts/sm/Storage.c 109 gen->large_objects = NULL; gen->n_large_blocks = 0; gen->n_new_large_blocks = 0; + gen->prim_blocks = NULL; + gen->n_prim_blocks = 0; + gen->n_prim_words = 0; gen->scavenged_large_objects = NULL; gen->n_scavenged_large_blocks = 0; gen->mark = 0; hunk ./rts/sm/Storage.c 509 return blocks; } -static void -resizeNursery ( nursery *nursery, nat blocks, nat cap_no) +void +resizeNursery ( Capability *cap, nat blocks ) { bdescr *bd; nat nursery_blocks; hunk ./rts/sm/Storage.c 514 + nursery *nursery; + nat cap_no; + + cap_no = cap->no; + nursery = &nurseries[cap_no]; nursery_blocks = nursery->n_blocks; if (nursery_blocks == blocks) return; hunk ./rts/sm/Storage.c 555 } nursery->n_blocks = blocks; + IF_DEBUG(sanity, checkNurserySanity(cap->no)); ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks); } hunk ./rts/sm/Storage.c 567 { nat i; for (i = 0; i < n_capabilities; i++) { - resizeNursery(&nurseries[i], blocks, i); + resizeNursery(&capabilities[i], blocks); } } hunk ./rts/sm/Storage.c 723 } dbl_link_onto(bd, &cap->r.rNursery->blocks); cap->r.rCurrentAlloc = bd; - IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); + IF_DEBUG(sanity, checkNurserySanity(cap->no)); } p = bd->free; bd->free += n; hunk ./rts/sm/Storage.c 795 return p; } +StgPtr +allocatePrim (Capability *cap, lnat n) +{ + StgPtr p; + bdescr *bd; + + // If the request is for a large object, then allocate() + // will give us a BF_LARGE object anyway and these are safe to be + // returned from allocatePrim. + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + p = allocate(cap, n); + ASSERT(Bdescr(p)->flags & BF_LARGE); + return p; + } + + // allocate an extra word for the global flag + n = n + 1; + + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); + + bd = cap->r.rG0->prim_blocks; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + + // The current prim block is full, we need to find another + // one. First, we try taking the next block from the + // nursery: + bd = cap->r.rCurrentNursery->link; + + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + // The nursery is empty, or the next block is already + // full: allocate a fresh block (we can't fail here). + ACQUIRE_SM_LOCK; + bd = allocBlock(); + RELEASE_SM_LOCK; + // the bd->dest for prim blocks is the global gen. If we + // didn't do this, then a global collection + // would have to copy live non-global prim objects into a + // new BF_PRIM block. + initBdescr(bd, cap->r.rG0, global_gen); + // If we had to allocate a new block, then we'll GC + // pretty quickly now, because MAYBE_GC() will + // notice that CurrentNursery->link is NULL. + } else { + // we have a block in the nursery: take it out and use it to + // to allocatePrim() from. + cap->r.rCurrentNursery->link = bd->link; + cap->r.rNursery->n_blocks--; + if (bd->link != NULL) { + bd->link->u.back = cap->r.rCurrentNursery; + } + } + bd->flags = BF_PRIM; + bd->link = cap->r.rG0->prim_blocks; + bd->dest_ix = global_gen_ix; + cap->r.rG0->prim_blocks = bd; + cap->r.rG0->n_prim_blocks++; + IF_DEBUG(sanity, checkNurserySanity(cap->no)); + IF_DEBUG(sanity, ASSERT(countBlocks(cap->r.rG0->prim_blocks) == cap->r.rG0->n_prim_blocks)); + } + + *bd->free = 0; // set the global flag to 0 + p = bd->free + 1; + bd->free += n; + + IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa)); + return p; +} + + /* ----------------------------------------------------------------------------- Write Barriers -------------------------------------------------------------------------- */ hunk ./rts/sm/Storage.c 879 dirty_MUT_VAR (StgRegTable *reg, StgClosure *p) { Capability *cap = regTableToCapability(reg); - bdescr *bd; - bd = Bdescr((StgPtr)p); - if (bd->gen_ix >= global_gen_ix) { + bdescr *bd = Bdescr((StgPtr)p); + if (bd->gen_ix >= global_gen_ix || isGlobalPrim(p)) { ((StgMutVar*)p)->var = publish(cap, ((StgMutVar*)p)->var); } } hunk ./rts/sm/Storage.c 890 dirty_MUT_ARR (StgRegTable *reg, StgMutArrPtrs *arr, nat ix) { Capability *cap = regTableToCapability(reg); - bdescr *bd; - bd = Bdescr((StgPtr)arr); - if (bd->gen_ix >= global_gen_ix) { + bdescr *bd = Bdescr((StgPtr)arr); + if (bd->gen_ix >= global_gen_ix || isGlobalPrim((StgClosure*)arr)) { arr->payload[ix] = publish(cap, arr->payload[ix]); } } hunk ./rts/sm/Storage.c 979 allocated += cap->r.rG0->n_new_large_blocks * BLOCK_SIZE_W; + // XXX: take into account allocations from allocatePrim + return allocated; } hunk ./rts/sm/Storage.c 1037 live = 0; for (g = 0; g < total_generations; g++) { gen = &all_generations[g]; - live += gen->n_words + countOccupied(gen->large_objects); + live += gen->n_words + gen->n_prim_words + countOccupied(gen->large_objects); } return live; } hunk ./rts/sm/Storage.h 73 void resetNursery ( nat n ); void resetNurseries ( void ); +void resizeNursery ( Capability *cap, nat blocks ); void resizeNurseries ( nat blocks ); void resizeNurseriesFixed ( nat blocks ); lnat countNurseryBlocks ( void ); hunk ./rts/sm/Sweep.c 19 #include "BlockAlloc.h" #include "Sweep.h" +#include "Compact.h" #include "Trace.h" void hunk ./rts/sm/Sweep.c 88 ASSERT(countBlocks(gen->old_blocks) == gen->n_old_blocks); } + + +static rtsBool +empty (bdescr *bd) +{ + nat i; + StgPtr p; + StgWord flag, size; + rtsBool empty; + + empty = rtsTrue; + p = bd->start; + while (p < bd->free) { + flag = *p; + p += 1; + switch (flag) { + case 0: // not global + size = closure_sizeW((StgClosure*)p); + if (!is_marked(p,bd)) { + *(p-1) = size+2; + } else { + empty = rtsFalse; + } + p += size; + break; + case 1: // global + empty = rtsFalse; + size = closure_sizeW((StgClosure*)p); + p += size; + break; + default: // reclaimed free space of size 'flag-2' + p += flag-2; + break; + } + } + + return empty; +} + +void +sweepPrimArea (generation *gen) +{ + bdescr *bd, *prev, *next; + + prev = NULL; + for (bd = gen->prim_blocks; bd != NULL; bd = next) + { + next = bd->link; + if (empty(bd)) { + if (prev == NULL) { + gen->prim_blocks = next; + } else { + prev->link = next; + } + debugTrace(DEBUG_gc, "sweepPrimArea: free block at %p", bd->start); + freeGroup(bd); + gen->n_prim_blocks--; + } else { + prev = bd; + } + } +} hunk ./rts/sm/Sweep.h 18 #define SM_SWEEP_H RTS_PRIVATE void sweep(generation *gen); +RTS_PRIVATE void sweepPrimArea (generation *gen); #endif /* SM_SWEEP_H */ } [fix various bugs Simon Marlow **20101021095632 Ignore-this: a8ab2e693258e03cbc300a8eab34adda ] { hunk ./rts/Messages.c 308 #ifdef THREADED_RTS if (owner->cap != cap) { - // if we got here, then the msg should already be global, - // because for the BH to be owned by another cap it must - // be global, so stg_BLACKHOLE_info would have allocated - // the msg in global memory. sendMessage() will assert. + msg->link = (MessageBlackHole*)END_TSO_QUEUE; // just make it valid + globalise_(cap, (StgClosure*)msg); sendMessage(cap, owner->cap, (Message*)msg); debugTraceCap(DEBUG_sched, cap, "forwarding message to cap %d", owner->cap->no); return 1; hunk ./rts/Messages.c 353 GNUC3_ATTRIBUTE(__noreturn__) #endif messageGlobalise (Capability *cap USED_IF_THREADS, - StgTSO *tso USED_IF_THREADS, - StgClosure *p USED_IF_THREADS) + StgTSO *tso USED_IF_THREADS, + StgClosure *p USED_IF_THREADS, + nat owner USED_IF_THREADS) { #ifndef THREADED_RTS hunk ./rts/Messages.c 366 Capability *dest; MessageGlobalise *msg; - ASSERT(get_itbl(p)->type == IND_LOCAL); + // No: The owner might be turning it into an IND. Don't look at + // the info table. + // ASSERT(get_itbl(p)->type == IND_LOCAL); + ASSERT(Bdescr((P_)p)->gen_ix == global_gen_ix); hunk ./rts/Messages.c 372 - dest = &capabilities[get_itbl(p)->srt_bitmap]; + dest = &capabilities[owner]; debugTraceCap(DEBUG_sched, cap, "thread %lu requesting globalisation of closure at %p from cap %u", hunk ./rts/Messages.h 12 #include "BeginPrivate.h" nat messageBlackHole (Capability *cap, MessageBlackHole *msg); -void messageGlobalise (Capability *cap, StgTSO *tso, StgClosure *p); +void messageGlobalise (Capability *cap, StgTSO *tso, StgClosure *p, nat owner); #ifdef THREADED_RTS void executeMessage (Capability *cap, Message *m); hunk ./rts/StgMiscClosures.cmm 244 INFO_TABLE_CONSTR(stg_IND_LOCAL##n,1,0,n,IND_LOCAL,"IND_LOCAL","IND_LOCAL") \ { \ if (TO_W_(Capability_no(MyCapability())) != n) { \ - foreign "C" messageGlobalise(MyCapability(), CurrentTSO, R1) [R1]; \ + foreign "C" messageGlobalise(MyCapability(), CurrentTSO, R1, n) [R1]; \ jump stg_block_enter; \ } else { \ hunk ./rts/StgMiscClosures.cmm 247 - R1 = StgInd_indirectee(R1); \ - ENTER(); \ + R1 = StgInd_indirectee(R1); \ + ENTER(); \ } \ } hunk ./rts/sm/GC.c 368 if (gc_type == GC_SEQ) { for (n = 0; n < n_capabilities; n++) { markCapability(mark_root, gct, &capabilities[n], - rtsTrue/*prune sparks*/); + rtsTrue/*don't mark sparks*/); } } else { hunk ./rts/sm/GC.c 371 - markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/); + markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/); } markScheduler(mark_root, gct); hunk ./rts/sm/GC.c 412 if (gc_type != GC_LOCAL) gcStablePtrTable(); #ifdef THREADED_RTS - if (n_gc_threads == 1) { + switch (gc_type) + { + case GC_LOCAL: + // don't touch the spark pool for GC_LOCAL: other Capabilities may + // be stealing from it. It only contains global pointers, so we + // are safe to ignore it. + break; + + case GC_SEQ: for (n = 0; n < n_capabilities; n++) { pruneSparkQueue(&capabilities[n]); } hunk ./rts/sm/GC.c 424 - } else { + break; + + case GC_PAR: pruneSparkQueue(gct->cap); hunk ./rts/sm/GC.c 428 + break; } #endif hunk ./rts/sm/GC.c 541 } else { prev->link = next; } - freeGroup(bd); + freeGroup_sync(bd); gen->n_old_blocks--; } else hunk ./rts/sm/GC.c 574 } else // not compacted { - freeChain(gen->old_blocks); + freeChain_sync(gen->old_blocks); if (N >= global_gen_no) { hunk ./rts/sm/GC.c 577 - freeChain(gen->prim_blocks); + freeChain_sync(gen->prim_blocks); gen->prim_blocks = NULL; gen->n_prim_blocks = 0; gen->n_prim_words = 0; hunk ./rts/sm/GC.c 595 * collection from large_objects. Any objects left on the * large_objects list are therefore dead, so we free them here. */ - freeChain(gen->large_objects); + freeChain_sync(gen->large_objects); gen->large_objects = gen->scavenged_large_objects; gen->n_large_blocks = gen->n_scavenged_large_blocks; gen->n_new_large_blocks = 0; hunk ./rts/sm/GC.c 652 if (gct->gc_type == GC_LOCAL && isNonLocalGen(gen)) continue; if (gen->bitmap != NULL) { - freeGroup(gen->bitmap); + freeGroup_sync(gen->bitmap); gen->bitmap = NULL; } } hunk ./rts/sm/GC.c 1333 g = gen->no; if (g != 0) { for (i = 0; i < n_capabilities; i++) { - freeChain(capabilities[i].mut_lists[g]); - capabilities[i].mut_lists[g] = allocBlock(); + freeChain_sync(capabilities[i].mut_lists[g]); + capabilities[i].mut_lists[g] = allocBlock_sync(); } } hunk ./rts/sm/GC.c 1601 if (gct->mark_stack_top_bd->link != NULL) { debugTrace(DEBUG_gc, "mark stack: %d blocks", countBlocks(gct->mark_stack_top_bd)); - freeChain(gct->mark_stack_top_bd->link); + freeChain_sync(gct->mark_stack_top_bd->link); gct->mark_stack_top_bd->link = NULL; ASSERT(gct->mark_stack_top_bd == gct->mark_stack_bd); } hunk ./rts/sm/GC.c 1764 { if (RtsFlags.GcFlags.heapSizeSuggestion == 0) { + ACQUIRE_SM_LOCK; // needed due to use of allocGroup/freeGroup resizeNursery(gct->cap, RtsFlags.GcFlags.minAllocAreaSize); hunk ./rts/sm/GC.c 1766 + RELEASE_SM_LOCK; } else { hunk ./rts/sm/GCUtils.c 93 void freeChain_sync(bdescr *bd) { - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - freeChain(bd); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + // GC_LOCAL uses the ordinary locking protocol for the block + // allocator because it runs concurrently with the mutator. + if (gct->gc_type == GC_LOCAL) { + return freeChain_lock(bd); + } else { + ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + freeChain(bd); + RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + } +} + +void +freeGroup_sync(bdescr *bd) +{ + // GC_LOCAL uses the ordinary locking protocol for the block + // allocator because it runs concurrently with the mutator. + if (gct->gc_type == GC_LOCAL) { + return freeGroup_lock(bd); + } else { + ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + freeGroup(bd); + RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + } } /* ----------------------------------------------------------------------------- hunk ./rts/sm/GCUtils.h 23 bdescr *allocBlock_sync(void); void freeChain_sync(bdescr *bd); +void freeGroup_sync(bdescr *bd); void push_scanned_block (bdescr *bd, gen_workspace *ws); StgPtr todo_block_full (nat size, gen_workspace *ws); hunk ./rts/sm/Sweep.c 21 #include "Sweep.h" #include "Compact.h" #include "Trace.h" +#include "GCUtils.h" void sweep(generation *gen) hunk ./rts/sm/Sweep.c 144 prev->link = next; } debugTrace(DEBUG_gc, "sweepPrimArea: free block at %p", bd->start); - freeGroup(bd); + freeGroup_sync(bd); gen->n_prim_blocks--; } else { prev = bd; } [move debugging utils into a separate file Simon Marlow **20101021095634 Ignore-this: c9534c23e252d90f3a197a097dadf1bb ] { hunk ./rts/Printer.c 899 #endif /* HAVE_BFD_H */ -void findPtr(P_ p, int); /* keep gcc -Wall happy */ - -int searched = 0; - -static int -findPtrBlocks (StgPtr p, bdescr *bd, StgPtr arr[], int arr_size, int i) -{ - StgPtr q, r, end; - for (; bd; bd = bd->link) { - searched++; - for (q = bd->start; q < bd->free; q++) { - if (UNTAG_CLOSURE((StgClosure*)*q) == (StgClosure *)p) { - if (i < arr_size) { - for (r = bd->start; r < bd->free; r = end) { - // skip over zeroed-out slop - while (*r == 0) r++; - if (!LOOKS_LIKE_CLOSURE_PTR(r)) { - debugBelch("%p found at %p, no closure at %p\n", - p, q, r); - break; - } - end = r + closure_sizeW((StgClosure*)r); - if (q < end) { - debugBelch("%p = ", r); - printClosure((StgClosure *)r); - arr[i++] = r; - break; - } - } - if (r >= bd->free) { - debugBelch("%p found at %p, closure?", p, q); - } - } else { - return i; - } - } - } - } - return i; -} - -void -findPtr(P_ p, int follow) -{ - nat g; - bdescr *bd; - const int arr_size = 1024; - StgPtr arr[arr_size]; - int i = 0; - searched = 0; - - for (g = 0; g < total_generations; g++) { - bd = all_generations[g].blocks; - i = findPtrBlocks(p,bd,arr,arr_size,i); - bd = all_generations[g].large_objects; - i = findPtrBlocks(p,bd,arr,arr_size,i); - if (i >= arr_size) return; - } - if (follow && i == 1) { - debugBelch("-->\n"); - findPtr(arr[0], 1); - } -} /* prettyPrintClosure() is for printing out a closure using the data constructor names found in the info tables. Closures are printed in a fashion that resembles hunk ./rts/package.conf.in 141 , "-u", "base_GHCziConcziSignal_runHandlers_closure" #endif +/* force DebugUtils to be linked in */ +#ifdef DEBUG +#ifdef LEADING_UNDERSCORE + , "-u", "_findPtr" +#else + , "-u", "findPtr" +#endif +#endif + /* Pick up static libraries in preference over dynamic if in earlier search * path. This is important to use the static gmp in preference on Mac OS. * The used option is specific to the Darwin linker. addfile ./rts/sm/DebugUtils.c hunk ./rts/sm/DebugUtils.c 1 +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1994-2010. + * + * Tools for use in gdb. + * + * ---------------------------------------------------------------------------*/ + +#include "PosixSource.h" +#include "Rts.h" + +#include "Capability.h" +#include "Printer.h" + +#include "GCThread.h" +#include "Storage.h" +#include "DebugUtils.h" + +#ifdef DEBUG + +void +findBlockInList(bdescr *bd, bdescr *list) +{ + bdescr *p; + + for (p = list; p != NULL; p = p->link) { + ASSERT(bd != p); + } +} + +// useful for locating a block from within gdb. +void +findBlock (bdescr *bd) +{ + nat n, g, i; + generation *gen; + + for (n = 0; n < total_generations; n++) { + g = all_generations[n].no; + for (i = 0; i < n_capabilities; i++) { + if (g > 0) { + findBlockInList(bd, capabilities[i].mut_lists[g]); + } + findBlockInList(bd, gc_threads[i]->gens[n].part_list); + findBlockInList(bd, gc_threads[i]->gens[n].scavd_list); + findBlockInList(bd, gc_threads[i]->gens[n].todo_bd); + } + + gen = &all_generations[n]; + findBlockInList(bd, gen->blocks); + findBlockInList(bd, gen->old_blocks); + findBlockInList(bd, gen->prim_blocks); + findBlockInList(bd, gen->large_objects); + findBlockInList(bd, gen->scavenged_large_objects); + } + + for (i = 0; i < n_capabilities; i++) { + findBlockInList(bd, nurseries[i].blocks); + findBlockInList(bd, gc_threads[i]->mark_stack_top_bd); + } +} + +int searched = 0; + +static int +findPtrBlocks (char *msg, StgPtr p, bdescr *bd, StgPtr arr[], int arr_size, int i) +{ + StgPtr q, r, end; + nat block_no = 0; + + for (; bd; bd = bd->link, block_no++) { + searched++; + for (q = bd->start; q < bd->free; q++) { + if (UNTAG_CLOSURE((StgClosure*)*q) == (StgClosure *)p) { + debugBelch("found at %p in block %d of %s\n", + q, block_no, msg); + if (i < arr_size) { + for (r = bd->start; r < bd->free; r = end) { + // skip over zeroed-out slop + while (*r == 0) r++; + if (!LOOKS_LIKE_CLOSURE_PTR(r)) { + debugBelch("no closure at %p\n", r); + break; + } + end = r + closur