Index: memcheck/mc_main.c
===================================================================
--- memcheck/mc_main.c	(revision 6776)
+++ memcheck/mc_main.c	(working copy)
@@ -4964,6 +4964,7 @@
       "Copyright (C) 2002-2007, and GNU GPL'd, by Julian Seward et al.");
    VG_(details_bug_reports_to)  (VG_BUGS_TO);
    VG_(details_avg_translation_sizeB) ( 556 );
+   VG_(details_shadow_guest_multiplier)(1);
 
    VG_(basic_tool_funcs)          (mc_post_clo_init,
                                    MC_(instrument),
Index: memcheck/mc_translate.c
===================================================================
--- memcheck/mc_translate.c	(revision 6776)
+++ memcheck/mc_translate.c	(working copy)
@@ -1034,7 +1034,7 @@
       /* complainIfUndefined(mce, atom); */
    } else {
       /* Do a plain shadow Put. */
-      stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
+      stmt( mce->bb, IRStmt_Put(offset + mce->layout->shadow_offsetB, vatom) );
    }
 }
 
@@ -1074,7 +1074,7 @@
       /* Do a cloned version of the Put that refers to the shadow
          area. */
       IRRegArray* new_descr 
-         = mkIRRegArray( descr->base + mce->layout->total_sizeB, 
+         = mkIRRegArray( descr->base + mce->layout->shadow_offsetB, 
                          tyS, descr->nElems);
       stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
    }
@@ -1095,7 +1095,7 @@
    } else {
       /* return a cloned version of the Get that refers to the shadow
          area. */
-      return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
+      return IRExpr_Get( offset + mce->layout->shadow_offsetB, tyS );
    }
 }
 
@@ -1120,7 +1120,7 @@
       /* return a cloned version of the Get that refers to the shadow
          area. */
       IRRegArray* new_descr 
-         = mkIRRegArray( descr->base + mce->layout->total_sizeB, 
+         = mkIRRegArray( descr->base + mce->layout->shadow_offsetB, 
                          tyS, descr->nElems);
       return IRExpr_GetI( new_descr, ix, bias );
    }
Index: include/pub_tool_tooliface.h
===================================================================
--- include/pub_tool_tooliface.h	(revision 6776)
+++ include/pub_tool_tooliface.h	(working copy)
@@ -268,6 +268,13 @@
    setting is optional. */
 extern void VG_(details_avg_translation_sizeB) ( UInt size );
 
+/* How much space should be allocated for shadow data describing the
+   guest program's registers? The size will be this multiplier times
+   the size of the registers themselves. So 0 means shadow guest
+   state, 1 means shadow guest of the same size as the regular guest
+   state (like Memcheck has one V bit per data bits), and so on. */
+extern void VG_(details_shadow_guest_multiplier)( Int size );
+
 /* String printed if an `tl_assert' assertion fails or VG_(tool_panic)
    is called.  Should probably be an email address. */
 extern void VG_(details_bug_reports_to)   ( Char* bug_reports_to );
Index: coregrind/m_sigframe/sigframe-ppc32-linux.c
===================================================================
--- coregrind/m_sigframe/sigframe-ppc32-linux.c	(revision 6776)
+++ coregrind/m_sigframe/sigframe-ppc32-linux.c	(working copy)
@@ -94,7 +94,7 @@
 struct vg_sig_private {
    UInt magicPI;
    UInt sigNo_private;
-   VexGuestPPC32State shadow;
+   VexGuestPPC32State shadow[MAX_SHADOW_GUEST_MULTIPLIER];
 };
 
 /* Structure put on stack for signal handlers with SA_SIGINFO clear. */
@@ -116,14 +116,14 @@
 };
 
 #define SET_SIGNAL_LR(zztst, zzval)                          \
-   do { tst->arch.vex.guest_LR = (zzval);                    \
+   do { tst->arch->vex.guest_LR = (zzval);                    \
       VG_TRACK( post_reg_write, Vg_CoreSignal, tst->tid,     \
                 offsetof(VexGuestPPC32State,guest_LR),       \
                 sizeof(UWord) );                             \
    } while (0)
 
 #define SET_SIGNAL_GPR(zztst, zzn, zzval)                    \
-   do { tst->arch.vex.guest_GPR##zzn = (zzval);              \
+   do { tst->arch->vex.guest_GPR##zzn = (zzval);              \
       VG_TRACK( post_reg_write, Vg_CoreSignal, tst->tid,     \
                 offsetof(VexGuestPPC32State,guest_GPR##zzn), \
                 sizeof(UWord) );                             \
@@ -139,20 +139,20 @@
    VG_TRACK( pre_mem_write, Vg_CoreSignal, tst->tid, "signal frame mcontext",
              (Addr)mc, sizeof(struct vki_pt_regs) );
 
-#  define DO(gpr)  mc->mc_gregs[VKI_PT_R0+gpr] = tst->arch.vex.guest_GPR##gpr
+#  define DO(gpr)  mc->mc_gregs[VKI_PT_R0+gpr] = tst->arch->vex.guest_GPR##gpr
    DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
    DO(8);  DO(9);  DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
    DO(16); DO(17); DO(18); DO(19); DO(20); DO(21); DO(22); DO(23);
    DO(24); DO(25); DO(26); DO(27); DO(28); DO(29); DO(30); DO(31);
 #  undef DO
 
-   mc->mc_gregs[VKI_PT_NIP]     = tst->arch.vex.guest_CIA;
+   mc->mc_gregs[VKI_PT_NIP]     = tst->arch->vex.guest_CIA;
    mc->mc_gregs[VKI_PT_MSR]     = 0xf032;   /* pretty arbitrary */
-   mc->mc_gregs[VKI_PT_ORIG_R3] = tst->arch.vex.guest_GPR3;
-   mc->mc_gregs[VKI_PT_CTR]     = tst->arch.vex.guest_CTR;
-   mc->mc_gregs[VKI_PT_LNK]     = tst->arch.vex.guest_LR;
-   mc->mc_gregs[VKI_PT_XER]     = LibVEX_GuestPPC32_get_XER(&tst->arch.vex);
-   mc->mc_gregs[VKI_PT_CCR]     = LibVEX_GuestPPC32_get_CR(&tst->arch.vex);
+   mc->mc_gregs[VKI_PT_ORIG_R3] = tst->arch->vex.guest_GPR3;
+   mc->mc_gregs[VKI_PT_CTR]     = tst->arch->vex.guest_CTR;
+   mc->mc_gregs[VKI_PT_LNK]     = tst->arch->vex.guest_LR;
+   mc->mc_gregs[VKI_PT_XER]     = LibVEX_GuestPPC32_get_XER(&tst->arch->vex);
+   mc->mc_gregs[VKI_PT_CCR]     = LibVEX_GuestPPC32_get_CR(&tst->arch->vex);
    mc->mc_gregs[VKI_PT_MQ]      = 0;
    mc->mc_gregs[VKI_PT_TRAP]    = 0;
    mc->mc_gregs[VKI_PT_DAR]     = fault_addr;
@@ -455,7 +455,7 @@
 //.. 
 //..    // FIXME: save_i387(&tst->arch, fpstate);
 //.. 
-//.. #  define SC2(reg,REG)  sc->reg = tst->arch.vex.guest_##REG
+//.. #  define SC2(reg,REG)  sc->reg = tst->arch->vex.guest_##REG
 //..    SC2(gs,GS);
 //..    SC2(fs,FS);
 //..    SC2(es,ES);
@@ -472,7 +472,7 @@
 //.. 
 //..    SC2(eip,EIP);
 //..    SC2(cs,CS);
-//..    sc->eflags = LibVEX_GuestX86_get_eflags(&tst->arch.vex);
+//..    sc->eflags = LibVEX_GuestX86_get_eflags(&tst->arch->vex);
 //..    SC2(ss,SS);
 //..    /* XXX esp_at_signal */
 //..    /* XXX trapno */
@@ -540,9 +540,9 @@
 //.. {
 //..    frame->sigNo_private = sigNo;
 //..    frame->magicPI       = 0x31415927;
-//..    frame->vex_shadow    = tst->arch.vex_shadow;
+//..    frame->vex_shadow    = tst->arch->vex_shadow;
 //..    /* HACK ALERT */
-//..    frame->vex           = tst->arch.vex;
+//..    frame->vex           = tst->arch->vex;
 //..    /* end HACK ALERT */
 //..    frame->mask          = tst->sig_mask;
 //..    frame->handlerflags  = flags;
@@ -637,7 +637,7 @@
 //..    /* SIGILL defines addr to be the faulting address */
 //..    if (sigNo == VKI_SIGILL && siginfo->si_code > 0)
 //..       frame->sigInfo._sifields._sigfault._addr 
-//..          = (void*)tst->arch.vex.guest_CIA;
+//..          = (void*)tst->arch->vex.guest_CIA;
 //.. 
 //..    synth_ucontext(tst->tid, siginfo, mask, &frame->uContext, &frame->fpstate);
 //.. 
@@ -684,13 +684,13 @@
    /* Set up the stack chain pointer */
    VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame",
              sp, sizeof(UWord) );
-   *(Addr *)sp = tst->arch.vex.guest_GPR1;
+   *(Addr *)sp = tst->arch->vex.guest_GPR1;
    VG_TRACK( post_mem_write, Vg_CoreSignal, tid, 
              sp, sizeof(UWord) );
 
    faultaddr = (Addr)siginfo->_sifields._sigfault._addr;
    if (sigNo == VKI_SIGILL && siginfo->si_code > 0)
-      faultaddr = tst->arch.vex.guest_CIA;
+      faultaddr = tst->arch->vex.guest_CIA;
 
    if (flags & VKI_SA_SIGINFO) {
       struct rt_sigframe *frame = (struct rt_sigframe *) sp;
@@ -749,11 +749,13 @@
 
    priv->magicPI       = 0x31415927;
    priv->sigNo_private = sigNo;
-   priv->shadow        = tst->arch.vex_shadow;
+   VG_(memcpy)(priv->shadow, tst->arch->vex_shadow,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC32State));
 
    SET_SIGNAL_GPR(tid, 1, sp);
    SET_SIGNAL_GPR(tid, 3, sigNo);
-   tst->arch.vex.guest_CIA = (Addr) handler;
+   tst->arch->vex.guest_CIA = (Addr) handler;
 
 //..    Addr		esp;
 //..    ThreadState* tst = VG_(get_ThreadState)(tid);
@@ -770,14 +772,14 @@
 //..    SET_SIGNAL_ESP(tid, esp);
 //.. 
 //..    //VG_(printf)("handler = %p\n", handler);
-//..    tst->arch.vex.guest_CIA = (Addr) handler;
+//..    tst->arch->vex.guest_CIA = (Addr) handler;
 //..    /* This thread needs to be marked runnable, but we leave that the
 //..       caller to do. */
 
    if (0)
       VG_(printf)("pushed signal frame; %R1 now = %p, "
                   "next %%CIA = %p, status=%d\n", 
-		  sp, tst->arch.vex.guest_CIA, tst->status);
+		  sp, tst->arch->vex.guest_CIA, tst->status);
 }
 
 
@@ -803,9 +805,9 @@
 //..    }
 //..    tst->sig_mask        = frame->mask;
 //..    tst->tmp_sig_mask    = frame->mask;
-//..    tst->arch.vex_shadow = frame->vex_shadow;
+//..    tst->arch->vex_shadow = frame->vex_shadow;
 //..    /* HACK ALERT */
-//..    tst->arch.vex        = frame->vex;
+//..    tst->arch->vex        = frame->vex;
 //..    /* end HACK ALERT */
 //..    *sigNo               = frame->sigNo_private;
 //..    return True;
@@ -816,23 +818,23 @@
 //..                          struct vki_sigcontext *sc )
 //.. //..                          struct vki_sigcontext *sc, struct _vki_fpstate *fpstate )
 //.. {
-//..    tst->arch.vex.guest_EAX     = sc->eax;
-//..    tst->arch.vex.guest_ECX     = sc->ecx;
-//..    tst->arch.vex.guest_EDX     = sc->edx;
-//..    tst->arch.vex.guest_EBX     = sc->ebx;
-//..    tst->arch.vex.guest_EBP     = sc->ebp; 
-//..    tst->arch.vex.guest_ESP     = sc->esp;
-//..    tst->arch.vex.guest_ESI     = sc->esi;
-//..    tst->arch.vex.guest_EDI     = sc->edi;
-//.. //::    tst->arch.vex.guest_eflags  = sc->eflags;
-//.. //::    tst->arch.vex.guest_EIP     = sc->eip;
+//..    tst->arch->vex.guest_EAX     = sc->eax;
+//..    tst->arch->vex.guest_ECX     = sc->ecx;
+//..    tst->arch->vex.guest_EDX     = sc->edx;
+//..    tst->arch->vex.guest_EBX     = sc->ebx;
+//..    tst->arch->vex.guest_EBP     = sc->ebp; 
+//..    tst->arch->vex.guest_ESP     = sc->esp;
+//..    tst->arch->vex.guest_ESI     = sc->esi;
+//..    tst->arch->vex.guest_EDI     = sc->edi;
+//.. //::    tst->arch->vex.guest_eflags  = sc->eflags;
+//.. //::    tst->arch->vex.guest_EIP     = sc->eip;
 //.. 
-//..    tst->arch.vex.guest_CS      = sc->cs; 
-//..    tst->arch.vex.guest_SS      = sc->ss;
-//..    tst->arch.vex.guest_DS      = sc->ds;
-//..    tst->arch.vex.guest_ES      = sc->es;
-//..    tst->arch.vex.guest_FS      = sc->fs;
-//..    tst->arch.vex.guest_GS      = sc->gs;
+//..    tst->arch->vex.guest_CS      = sc->cs; 
+//..    tst->arch->vex.guest_SS      = sc->ss;
+//..    tst->arch->vex.guest_DS      = sc->ds;
+//..    tst->arch->vex.guest_ES      = sc->es;
+//..    tst->arch->vex.guest_FS      = sc->fs;
+//..    tst->arch->vex.guest_GS      = sc->gs;
 //.. 
 //.. //::    restore_i387(&tst->arch, fpstate);
 //.. }
@@ -872,7 +874,7 @@
    tst = VG_(get_ThreadState)(tid);
 
    /* Check that the stack frame looks valid */
-   sp = tst->arch.vex.guest_GPR1;
+   sp = tst->arch->vex.guest_GPR1;
    vg_assert(VG_IS_16_ALIGNED(sp));
    /* JRS 17 Nov 05: This code used to check that *sp -- which should
       have been set by the stwu at the start of the handler -- points
@@ -901,32 +903,34 @@
 
    sigNo = priv->sigNo_private;
 
-#  define DO(gpr)  tst->arch.vex.guest_GPR##gpr = mc->mc_gregs[VKI_PT_R0+gpr]
+#  define DO(gpr)  tst->arch->vex.guest_GPR##gpr = mc->mc_gregs[VKI_PT_R0+gpr]
    DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
    DO(8);  DO(9);  DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
    DO(16); DO(17); DO(18); DO(19); DO(20); DO(21); DO(22); DO(23);
    DO(24); DO(25); DO(26); DO(27); DO(28); DO(29); DO(30); DO(31);
 #  undef DO
 
-   tst->arch.vex.guest_CIA = mc->mc_gregs[VKI_PT_NIP];
+   tst->arch->vex.guest_CIA = mc->mc_gregs[VKI_PT_NIP];
 
    // Umm ... ? (jrs 2005 July 8)
-   // tst->arch.m_orig_gpr3 = mc->mc_gregs[VKI_PT_ORIG_R3];
+   // tst->arch->m_orig_gpr3 = mc->mc_gregs[VKI_PT_ORIG_R3];
 
-   LibVEX_GuestPPC32_put_CR( mc->mc_gregs[VKI_PT_CCR], &tst->arch.vex );
+   LibVEX_GuestPPC32_put_CR( mc->mc_gregs[VKI_PT_CCR], &tst->arch->vex );
 
-   tst->arch.vex.guest_LR  = mc->mc_gregs[VKI_PT_LNK];
-   tst->arch.vex.guest_CTR = mc->mc_gregs[VKI_PT_CTR];
-   LibVEX_GuestPPC32_put_XER( mc->mc_gregs[VKI_PT_XER], &tst->arch.vex );
+   tst->arch->vex.guest_LR  = mc->mc_gregs[VKI_PT_LNK];
+   tst->arch->vex.guest_CTR = mc->mc_gregs[VKI_PT_CTR];
+   LibVEX_GuestPPC32_put_XER( mc->mc_gregs[VKI_PT_XER], &tst->arch->vex );
 
-   tst->arch.vex_shadow = priv->shadow;
+   VG_(memcpy)(tst->arch->vex_shadow, priv->shadow,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC32State));
 
    VG_TRACK(die_mem_stack_signal, sp, frame_size);
 
    if (VG_(clo_trace_signals))
       VG_(message)(Vg_DebugMsg,
                    "vg_pop_signal_frame (thread %d): isRT=%d valid magic; EIP=%p",
-                   tid, has_siginfo, tst->arch.vex.guest_CIA);
+                   tid, has_siginfo, tst->arch->vex.guest_CIA);
 
    /* tell the tools */
    VG_TRACK( post_deliver_signal, tid, sigNo );
@@ -939,7 +943,7 @@
 //..    tst = VG_(get_ThreadState)(tid);
 //.. 
 //..    /* Correctly reestablish the frame base address. */
-//..    esp   = tst->arch.vex.guest_ESP;
+//..    esp   = tst->arch->vex.guest_ESP;
 //.. 
 //..    if (!isRT)
 //..       size = restore_sigframe(tst, (struct sigframe *)esp, &sigNo);
@@ -952,7 +956,7 @@
 //..       VG_(message)(
 //..          Vg_DebugMsg, 
 //..          "VG_(signal_return) (thread %d): isRT=%d valid magic; EIP=%p", 
-//..          tid, isRT, tst->arch.vex.guest_EIP);
+//..          tid, isRT, tst->arch->vex.guest_EIP);
 //.. 
 //..    /* tell the tools */
 //..    VG_TRACK( post_deliver_signal, tid, sigNo );
Index: coregrind/m_sigframe/sigframe-amd64-linux.c
===================================================================
--- coregrind/m_sigframe/sigframe-amd64-linux.c	(revision 6776)
+++ coregrind/m_sigframe/sigframe-amd64-linux.c	(working copy)
@@ -93,7 +93,7 @@
 
    /* XXX This is wrong.  Surely we should store the shadow values
       into the shadow memory behind the actual values? */
-   VexGuestAMD64State vex_shadow;
+   VexGuestAMD64State vex_shadow[MAX_SHADOW_GUEST_MULTIPLIER];
 
    /* HACK ALERT */
    VexGuestAMD64State vex;
@@ -337,7 +337,7 @@
 
    // FIXME: save_i387(&tst->arch, fpstate);
 
-#  define SC2(reg,REG)  sc->reg = tst->arch.vex.guest_##REG
+#  define SC2(reg,REG)  sc->reg = tst->arch->vex.guest_##REG
    SC2(r8,R8);
    SC2(r9,R9);
    SC2(r10,R10);
@@ -356,7 +356,7 @@
    SC2(rsp,RSP);
 
    SC2(rip,RIP);
-   sc->eflags = LibVEX_GuestAMD64_get_rflags(&tst->arch.vex);
+   sc->eflags = LibVEX_GuestAMD64_get_rflags(&tst->arch->vex);
    // FIXME: SC2(cs,CS);
    // FIXME: SC2(gs,GS);
    // FIXME: SC2(fs,FS);
@@ -422,9 +422,10 @@
 {
    frame->sigNo_private = sigNo;
    frame->magicPI       = 0x31415927;
-   frame->vex_shadow    = tst->arch.vex_shadow;
+   VG_(memcpy)(frame->vex_shadow, tst->arch->vex_shadow,
+	       VG_(details).shadow_guest_multiplier*sizeof(VexGuestArchState));
    /* HACK ALERT */
-   frame->vex           = tst->arch.vex;
+   frame->vex           = tst->arch->vex;
    /* end HACK ALERT */
    frame->mask          = tst->sig_mask;
    frame->handlerflags  = flags;
@@ -464,7 +465,7 @@
    /* SIGILL defines addr to be the faulting address */
    if (sigNo == VKI_SIGILL && siginfo->si_code > 0)
       frame->sigInfo._sifields._sigfault._addr 
-         = (void*)tst->arch.vex.guest_RIP;
+         = (void*)tst->arch->vex.guest_RIP;
 
    synth_ucontext(tst->tid, siginfo, mask, &frame->uContext, &frame->fpstate);
 
@@ -499,17 +500,17 @@
    VG_TRACK( post_reg_write, Vg_CoreSignal, tid, VG_O_STACK_PTR, sizeof(Addr));
 
    //VG_(printf)("handler = %p\n", handler);
-   tst->arch.vex.guest_RIP = (Addr) handler;
-   tst->arch.vex.guest_RDI = (ULong) siginfo->si_signo;
-   tst->arch.vex.guest_RSI = (Addr) &frame->sigInfo;
-   tst->arch.vex.guest_RDX = (Addr) &frame->uContext;
+   tst->arch->vex.guest_RIP = (Addr) handler;
+   tst->arch->vex.guest_RDI = (ULong) siginfo->si_signo;
+   tst->arch->vex.guest_RSI = (Addr) &frame->sigInfo;
+   tst->arch->vex.guest_RDX = (Addr) &frame->uContext;
    /* This thread needs to be marked runnable, but we leave that the
       caller to do. */
 
    if (0)
       VG_(printf)("pushed signal frame; %%RSP now = %p, "
                   "next %%RIP = %p, status=%d\n", 
-		  rsp, tst->arch.vex.guest_RIP, tst->status);
+		  rsp, tst->arch->vex.guest_RIP, tst->status);
 }
 
 
@@ -535,9 +536,10 @@
    }
    tst->sig_mask        = frame->mask;
    tst->tmp_sig_mask    = frame->mask;
-   tst->arch.vex_shadow = frame->vex_shadow;
+   VG_(memcpy)(tst->arch->vex_shadow, frame->vex_shadow,
+	       VG_(details).shadow_guest_multiplier*sizeof(VexGuestArchState));
    /* HACK ALERT */
-   tst->arch.vex        = frame->vex;
+   tst->arch->vex       = frame->vex;
    /* end HACK ALERT */
    *sigNo               = frame->sigNo_private;
    return True;
@@ -548,28 +550,28 @@
                          struct vki_sigcontext *sc, 
                          struct _vki_fpstate *fpstate )
 {
-   tst->arch.vex.guest_RAX     = sc->rax;
-   tst->arch.vex.guest_RCX     = sc->rcx;
-   tst->arch.vex.guest_RDX     = sc->rdx;
-   tst->arch.vex.guest_RBX     = sc->rbx;
-   tst->arch.vex.guest_RBP     = sc->rbp; 
-   tst->arch.vex.guest_RSP     = sc->rsp;
-   tst->arch.vex.guest_RSI     = sc->rsi;
-   tst->arch.vex.guest_RDI     = sc->rdi;
-   tst->arch.vex.guest_R8      = sc->r8;
-   tst->arch.vex.guest_R9      = sc->r9;
-   tst->arch.vex.guest_R10     = sc->r10;
-   tst->arch.vex.guest_R11     = sc->r11;
-   tst->arch.vex.guest_R12     = sc->r12;
-   tst->arch.vex.guest_R13     = sc->r13;
-   tst->arch.vex.guest_R14     = sc->r14;
-   tst->arch.vex.guest_R15     = sc->r15;
-//::    tst->arch.vex.guest_rflags  = sc->rflags;
-   tst->arch.vex.guest_RIP     = sc->rip;
+   tst->arch->vex.guest_RAX     = sc->rax;
+   tst->arch->vex.guest_RCX     = sc->rcx;
+   tst->arch->vex.guest_RDX     = sc->rdx;
+   tst->arch->vex.guest_RBX     = sc->rbx;
+   tst->arch->vex.guest_RBP     = sc->rbp; 
+   tst->arch->vex.guest_RSP     = sc->rsp;
+   tst->arch->vex.guest_RSI     = sc->rsi;
+   tst->arch->vex.guest_RDI     = sc->rdi;
+   tst->arch->vex.guest_R8      = sc->r8;
+   tst->arch->vex.guest_R9      = sc->r9;
+   tst->arch->vex.guest_R10     = sc->r10;
+   tst->arch->vex.guest_R11     = sc->r11;
+   tst->arch->vex.guest_R12     = sc->r12;
+   tst->arch->vex.guest_R13     = sc->r13;
+   tst->arch->vex.guest_R14     = sc->r14;
+   tst->arch->vex.guest_R15     = sc->r15;
+//::    tst->arch->vex.guest_rflags  = sc->rflags;
+   tst->arch->vex.guest_RIP     = sc->rip;
 
-//::    tst->arch.vex.guest_CS      = sc->cs; 
-//::    tst->arch.vex.guest_FS      = sc->fs;
-//::    tst->arch.vex.guest_GS      = sc->gs;
+//::    tst->arch->vex.guest_CS      = sc->cs; 
+//::    tst->arch->vex.guest_FS      = sc->fs;
+//::    tst->arch->vex.guest_GS      = sc->gs;
 
 //::    restore_i387(&tst->arch, fpstate);
 }
@@ -598,7 +600,7 @@
    tst = VG_(get_ThreadState)(tid);
 
    /* Correctly reestablish the frame base address. */
-   rsp   = tst->arch.vex.guest_RSP;
+   rsp   = tst->arch->vex.guest_RSP;
 
    size = restore_rt_sigframe(tst, (struct rt_sigframe *)rsp, &sigNo);
 
@@ -609,7 +611,7 @@
       VG_(message)(
          Vg_DebugMsg, 
          "VG_(signal_return) (thread %d): isRT=%d valid magic; RIP=%p", 
-         tid, isRT, tst->arch.vex.guest_RIP);
+         tid, isRT, tst->arch->vex.guest_RIP);
 
    /* tell the tools */
    VG_TRACK( post_deliver_signal, tid, sigNo );
Index: coregrind/m_sigframe/sigframe-ppc64-linux.c
===================================================================
--- coregrind/m_sigframe/sigframe-ppc64-linux.c	(revision 6776)
+++ coregrind/m_sigframe/sigframe-ppc64-linux.c	(working copy)
@@ -97,7 +97,7 @@
 struct vg_sig_private {
    UInt magicPI;
    UInt sigNo_private;
-   VexGuestPPC64State shadow;
+   VexGuestPPC64State shadow[MAX_SHADOW_GUEST_MULTIPLIER];
 };
 
 /* Structure put on stack for all signal handlers. */
@@ -113,14 +113,14 @@
 };
 
 #define SET_SIGNAL_LR(zztst, zzval)                          \
-   do { tst->arch.vex.guest_LR = (zzval);                    \
+   do { tst->arch->vex.guest_LR = (zzval);                    \
       VG_TRACK( post_reg_write, Vg_CoreSignal, tst->tid,     \
                 offsetof(VexGuestPPC64State,guest_LR),       \
                 sizeof(UWord) );                             \
    } while (0)
 
 #define SET_SIGNAL_GPR(zztst, zzn, zzval)                    \
-   do { tst->arch.vex.guest_GPR##zzn = (zzval);              \
+   do { tst->arch->vex.guest_GPR##zzn = (zzval);              \
       VG_TRACK( post_reg_write, Vg_CoreSignal, tst->tid,     \
                 offsetof(VexGuestPPC64State,guest_GPR##zzn), \
                 sizeof(UWord) );                             \
@@ -215,13 +215,13 @@
    /* Set up the stack chain pointer */
    VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame",
              sp, sizeof(UWord) );
-   *(Addr *)sp = tst->arch.vex.guest_GPR1;
+   *(Addr *)sp = tst->arch->vex.guest_GPR1;
    VG_TRACK( post_mem_write, Vg_CoreSignal, tid, 
              sp, sizeof(UWord) );
 
    faultaddr = (Addr)siginfo->_sifields._sigfault._addr;
    if (sigNo == VKI_SIGILL && siginfo->si_code > 0)
-      faultaddr = tst->arch.vex.guest_CIA;
+      faultaddr = tst->arch->vex.guest_CIA;
 
    VG_(memcpy)(&frame->info, siginfo, sizeof(*siginfo));
    VG_TRACK( post_mem_write, Vg_CoreSignal, tid,
@@ -235,22 +235,22 @@
              (Addr)(&frame->uc), sizeof(frame->uc) );
 
 #  define DO(gpr)  frame->uc.uc_mcontext.gp_regs[VKI_PT_R0+gpr] \
-                      = tst->arch.vex.guest_GPR##gpr 
+                      = tst->arch->vex.guest_GPR##gpr 
    DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
    DO(8);  DO(9);  DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
    DO(16); DO(17); DO(18); DO(19); DO(20); DO(21); DO(22); DO(23);
    DO(24); DO(25); DO(26); DO(27); DO(28); DO(29); DO(30); DO(31);
 #  undef DO
 
-   frame->uc.uc_mcontext.gp_regs[VKI_PT_NIP]     = tst->arch.vex.guest_CIA;
+   frame->uc.uc_mcontext.gp_regs[VKI_PT_NIP]     = tst->arch->vex.guest_CIA;
    frame->uc.uc_mcontext.gp_regs[VKI_PT_MSR]     = 0xf032;   /* pretty arbitrary */
-   frame->uc.uc_mcontext.gp_regs[VKI_PT_ORIG_R3] = tst->arch.vex.guest_GPR3;
-   frame->uc.uc_mcontext.gp_regs[VKI_PT_CTR]     = tst->arch.vex.guest_CTR;
-   frame->uc.uc_mcontext.gp_regs[VKI_PT_LNK]     = tst->arch.vex.guest_LR;
+   frame->uc.uc_mcontext.gp_regs[VKI_PT_ORIG_R3] = tst->arch->vex.guest_GPR3;
+   frame->uc.uc_mcontext.gp_regs[VKI_PT_CTR]     = tst->arch->vex.guest_CTR;
+   frame->uc.uc_mcontext.gp_regs[VKI_PT_LNK]     = tst->arch->vex.guest_LR;
    frame->uc.uc_mcontext.gp_regs[VKI_PT_XER]     = LibVEX_GuestPPC64_get_XER(
-                                                      &tst->arch.vex);
+                                                      &tst->arch->vex);
    frame->uc.uc_mcontext.gp_regs[VKI_PT_CCR]     = LibVEX_GuestPPC64_get_CR(
-                                                      &tst->arch.vex);
+                                                      &tst->arch->vex);
    //mc->mc_gregs[VKI_PT_MQ]      = 0;
    //mc->mc_gregs[VKI_PT_TRAP]    = 0;
    //mc->mc_gregs[VKI_PT_DAR]     = fault_addr;
@@ -286,17 +286,19 @@
    /* Handler is in fact a standard ppc64-linux function descriptor, 
       so extract the function entry point and also the toc ptr to use. */
    SET_SIGNAL_GPR(tid, 2, (Addr) ((ULong*)handler)[1]);
-   tst->arch.vex.guest_CIA = (Addr) ((ULong*)handler)[0];
+   tst->arch->vex.guest_CIA = (Addr) ((ULong*)handler)[0];
 
    priv = &frame->priv;
    priv->magicPI       = 0x31415927;
    priv->sigNo_private = sigNo;
-   priv->shadow        = tst->arch.vex_shadow;
+   VG_(memcpy)(priv->shadow, tst->arch->vex_shadow,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC32State));
 
    if (0)
       VG_(printf)("pushed signal frame; %R1 now = %p, "
                   "next %%CIA = %p, status=%d\n", 
-		  sp, tst->arch.vex.guest_CIA, tst->status);
+		  sp, tst->arch->vex.guest_CIA, tst->status);
 }
 
 
@@ -319,7 +321,7 @@
    tst = VG_(get_ThreadState)(tid);
 
    /* Check that the stack frame looks valid */
-   sp = tst->arch.vex.guest_GPR1;
+   sp = tst->arch->vex.guest_GPR1;
    vg_assert(VG_IS_16_ALIGNED(sp));
    /* JRS 17 Nov 05: This code used to check that *sp -- which should
       have been set by the stwu at the start of the handler -- points
@@ -337,7 +339,7 @@
 
    sigNo = priv->sigNo_private;
 
-#  define DO(gpr)  tst->arch.vex.guest_GPR##gpr \
+#  define DO(gpr)  tst->arch->vex.guest_GPR##gpr \
                       = frame->uc.uc_mcontext.gp_regs[VKI_PT_R0+gpr]
    DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
    DO(8);  DO(9);  DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
@@ -345,24 +347,26 @@
    DO(24); DO(25); DO(26); DO(27); DO(28); DO(29); DO(30); DO(31);
 #  undef DO
 
-   tst->arch.vex.guest_CIA = frame->uc.uc_mcontext.gp_regs[VKI_PT_NIP];
+   tst->arch->vex.guest_CIA = frame->uc.uc_mcontext.gp_regs[VKI_PT_NIP];
 
    LibVEX_GuestPPC64_put_CR( frame->uc.uc_mcontext.gp_regs[VKI_PT_CCR], 
-                             &tst->arch.vex );
+                             &tst->arch->vex );
 
-   tst->arch.vex.guest_LR  = frame->uc.uc_mcontext.gp_regs[VKI_PT_LNK];
-   tst->arch.vex.guest_CTR = frame->uc.uc_mcontext.gp_regs[VKI_PT_CTR];
+   tst->arch->vex.guest_LR  = frame->uc.uc_mcontext.gp_regs[VKI_PT_LNK];
+   tst->arch->vex.guest_CTR = frame->uc.uc_mcontext.gp_regs[VKI_PT_CTR];
    LibVEX_GuestPPC64_put_XER( frame->uc.uc_mcontext.gp_regs[VKI_PT_XER], 
-                              &tst->arch.vex );
+                              &tst->arch->vex );
 
-   tst->arch.vex_shadow = priv->shadow;
+   VG_(memcpy)(tst->arch->vex_shadow, priv->shadow,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC32State));
 
    VG_TRACK(die_mem_stack_signal, sp, frame_size);
 
    if (VG_(clo_trace_signals))
       VG_(message)(Vg_DebugMsg,
                    "vg_pop_signal_frame (thread %d): isRT=%d valid magic; EIP=%p",
-                   tid, has_siginfo, tst->arch.vex.guest_CIA);
+                   tid, has_siginfo, tst->arch->vex.guest_CIA);
 
    /* tell the tools */
    VG_TRACK( post_deliver_signal, tid, sigNo );
Index: coregrind/m_sigframe/sigframe-ppc32-aix5.c
===================================================================
--- coregrind/m_sigframe/sigframe-ppc32-aix5.c	(revision 6776)
+++ coregrind/m_sigframe/sigframe-ppc32-aix5.c	(working copy)
@@ -63,7 +63,7 @@
 struct hacky_sigframe {
    UChar              lower_guardzone[512];  // put nothing here
    VexGuestPPC32State gst;
-   VexGuestPPC32State gshadow;
+   VexGuestPPC32State gshadow[MAX_SHADOW_GUEST_MULTIPLIER];
    UInt               magicPI;
    UInt               sigNo_private;
    UInt               tramp[2];
@@ -85,14 +85,14 @@
 }
 
 #define SET_SIGNAL_LR(zztst, zzval)                          \
-   do { tst->arch.vex.guest_LR = (zzval);                    \
+   do { tst->arch->vex.guest_LR = (zzval);                    \
       VG_TRACK( post_reg_write, Vg_CoreSignal, tst->tid,     \
                 offsetof(VexGuestPPC32State,guest_LR),       \
                 sizeof(UWord) );                             \
    } while (0)
 
 #define SET_SIGNAL_GPR(zztst, zzn, zzval)                    \
-   do { tst->arch.vex.guest_GPR##zzn = (zzval);              \
+   do { tst->arch->vex.guest_GPR##zzn = (zzval);              \
       VG_TRACK( post_reg_write, Vg_CoreSignal, tst->tid,     \
                 offsetof(VexGuestPPC32State,guest_GPR##zzn), \
                 sizeof(UWord) );                             \
@@ -130,11 +130,14 @@
    /* clear it (very conservatively) */
    VG_(memset)(&frame->lower_guardzone, 0, 512);
    VG_(memset)(&frame->gst,     0, sizeof(VexGuestPPC32State));
-   VG_(memset)(&frame->gshadow, 0, sizeof(VexGuestPPC32State));
+   VG_(memset)(&frame->gshadow, 0, (MAX_SHADOW_GUEST_MULTIPLIER *
+				    sizeof(VexGuestPPC32State)));
 
    /* save stuff in frame */
-   frame->gst           = tst->arch.vex;
-   frame->gshadow       = tst->arch.vex_shadow;
+   frame->gst           = tst->arch->vex;
+   VG_(memcpy)(frame->gshadow, tst->arch->vex_shadow,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC32State));
    frame->sigNo_private = sigNo;
    frame->magicPI       = 0x31415927;
 
@@ -143,7 +146,7 @@
    vg_assert(sp == (Addr)&frame->lower_guardzone[256]);
    VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame",
              sp, sizeof(UWord) );
-   *(Addr*)sp = tst->arch.vex.guest_GPR1;
+   *(Addr*)sp = tst->arch->vex.guest_GPR1;
    VG_TRACK( post_mem_write, Vg_CoreSignal, tid,
              sp, sizeof(UWord) );
 
@@ -153,7 +156,7 @@
    SET_SIGNAL_GPR(tid, 3, sigNo);
    SET_SIGNAL_GPR(tid, 4, 0); /* XXX: the siginfo* */
    SET_SIGNAL_GPR(tid, 5, 0); /* XXX: the ucontext* */
-   tst->arch.vex.guest_CIA = ((UWord*)handler)[0];
+   tst->arch->vex.guest_CIA = ((UWord*)handler)[0];
 
    /* set up return trampoline */
    vg_assert(__NR_FAKE_SIGRETURN >= 10000);
@@ -175,7 +178,7 @@
       VG_(printf)("pushed signal frame for sig %d; R1 now = %p, "
                   "next %%CIA = %p, status=%d\n", 
                   sigNo,
-	          sp, tst->arch.vex.guest_CIA, tst->status);
+	          sp, tst->arch->vex.guest_CIA, tst->status);
       VG_(printf)("trampoline is at %p\n",  &frame->tramp[0]);
    }
 }
@@ -194,7 +197,7 @@
    tst = VG_(get_ThreadState)(tid);
 
    /* Check that the stack frame looks valid */
-   sp = tst->arch.vex.guest_GPR1;
+   sp = tst->arch->vex.guest_GPR1;
    vg_assert(VG_IS_16_ALIGNED(sp));
 
    frame = (struct hacky_sigframe*)(sp - 256);
@@ -203,14 +206,16 @@
    /* restore the entire guest state, and shadow, from the
       frame.  Note, as per comments above, this is a kludge - should
       restore it from saved ucontext.  Oh well. */
-   tst->arch.vex = frame->gst;
-   tst->arch.vex_shadow = frame->gshadow;
+   tst->arch->vex = frame->gst;
+   VG_(memcpy)(tst->arch->vex_shadow, frame->gshadow
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC32State));
    sigNo = frame->sigNo_private;
 
    if (VG_(clo_trace_signals))
       VG_(message)(Vg_DebugMsg,
                    "vg_pop_signal_frame (thread %d): valid magic; CIA=%p",
-                   tid, tst->arch.vex.guest_CIA);
+                   tid, tst->arch->vex.guest_CIA);
 
    VG_TRACK( die_mem_stack_signal, 
              (Addr)frame, 
Index: coregrind/m_sigframe/sigframe-ppc64-aix5.c
===================================================================
--- coregrind/m_sigframe/sigframe-ppc64-aix5.c	(revision 6776)
+++ coregrind/m_sigframe/sigframe-ppc64-aix5.c	(working copy)
@@ -63,7 +63,7 @@
 struct hacky_sigframe {
    UChar              lower_guardzone[1024];  // put nothing here
    VexGuestPPC64State gst;
-   VexGuestPPC64State gshadow;
+   VexGuestPPC64State gshadow[MAX_SHADOW_GUEST_MULTIPLIER];
    UInt               magicPI;
    UInt               sigNo_private;
    UInt               tramp[2];
@@ -85,14 +85,14 @@
 }
 
 #define SET_SIGNAL_LR(zztst, zzval)                          \
-   do { tst->arch.vex.guest_LR = (zzval);                    \
+   do { tst->arch->vex.guest_LR = (zzval);                    \
       VG_TRACK( post_reg_write, Vg_CoreSignal, tst->tid,     \
                 offsetof(VexGuestPPC64State,guest_LR),       \
                 sizeof(UWord) );                             \
    } while (0)
 
 #define SET_SIGNAL_GPR(zztst, zzn, zzval)                    \
-   do { tst->arch.vex.guest_GPR##zzn = (zzval);              \
+   do { tst->arch->vex.guest_GPR##zzn = (zzval);              \
       VG_TRACK( post_reg_write, Vg_CoreSignal, tst->tid,     \
                 offsetof(VexGuestPPC64State,guest_GPR##zzn), \
                 sizeof(UWord) );                             \
@@ -130,11 +130,14 @@
    /* clear it (very conservatively) */
    VG_(memset)(&frame->lower_guardzone, 0, 1024);
    VG_(memset)(&frame->gst,     0, sizeof(VexGuestPPC64State));
-   VG_(memset)(&frame->gshadow, 0, sizeof(VexGuestPPC64State));
+   VG_(memset)(&frame->gshadow, 0, (MAX_SHADOW_GUEST_MULTIPLIER *
+				    sizeof(VexGuestPPC64State)));
 
    /* save stuff in frame */
-   frame->gst           = tst->arch.vex;
-   frame->gshadow       = tst->arch.vex_shadow;
+   frame->gst           = tst->arch->vex;
+   VG_(memcpy)(frame->gshadow, tst->arch->vex_shadow,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC64State));
    frame->sigNo_private = sigNo;
    frame->magicPI       = 0x31415927;
 
@@ -143,7 +146,7 @@
    vg_assert(sp == (Addr)&frame->lower_guardzone[512]);
    VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame",
              sp, sizeof(UWord) );
-   *(Addr*)sp = tst->arch.vex.guest_GPR1;
+   *(Addr*)sp = tst->arch->vex.guest_GPR1;
    VG_TRACK( post_mem_write, Vg_CoreSignal, tid,
              sp, sizeof(UWord) );
 
@@ -153,7 +156,7 @@
    SET_SIGNAL_GPR(tid, 3, sigNo);
    SET_SIGNAL_GPR(tid, 4, 0); /* XXX: the siginfo* */
    SET_SIGNAL_GPR(tid, 5, 0); /* XXX: the ucontext* */
-   tst->arch.vex.guest_CIA = ((UWord*)handler)[0];
+   tst->arch->vex.guest_CIA = ((UWord*)handler)[0];
 
    /* set up return trampoline */
    vg_assert(__NR_FAKE_SIGRETURN >= 10000);
@@ -175,7 +178,7 @@
       VG_(printf)("pushed signal frame for sig %d; R1 now = %p, "
                   "next %%CIA = %p, status=%d\n", 
                   sigNo,
-	          sp, tst->arch.vex.guest_CIA, tst->status);
+	          sp, tst->arch->vex.guest_CIA, tst->status);
       VG_(printf)("trampoline is at %p\n",  &frame->tramp[0]);
    }
 }
@@ -197,7 +200,7 @@
    tst = VG_(get_ThreadState)(tid);
 
    /* Check that the stack frame looks valid */
-   sp = tst->arch.vex.guest_GPR1;
+   sp = tst->arch->vex.guest_GPR1;
    vg_assert(VG_IS_16_ALIGNED(sp));
 
    /* If the frame is being cleared by some mechanism other than our
@@ -237,14 +240,16 @@
    /* restore the entire guest state, and shadow, from the
       frame.  Note, as per comments above, this is a kludge - should
       restore it from saved ucontext.  Oh well. */
-   tst->arch.vex = frame->gst;
-   tst->arch.vex_shadow = frame->gshadow;
+   tst->arch->vex = frame->gst;
+   VG_(memcpy)(tst->arch->vex_shadow, frame->gshadow
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC64State));
    sigNo = frame->sigNo_private;
 
    if (VG_(clo_trace_signals))
       VG_(message)(Vg_DebugMsg,
                    "vg_pop_signal_frame (thread %d): valid magic; CIA=%p",
-                   tid, tst->arch.vex.guest_CIA);
+                   tid, tst->arch->vex.guest_CIA);
 
    VG_TRACK( die_mem_stack_signal, 
              (Addr)frame, 
Index: coregrind/m_sigframe/sigframe-x86-linux.c
===================================================================
--- coregrind/m_sigframe/sigframe-x86-linux.c	(revision 6776)
+++ coregrind/m_sigframe/sigframe-x86-linux.c	(working copy)
@@ -99,7 +99,7 @@
 
    /* XXX This is wrong.  Surely we should store the shadow values
       into the shadow memory behind the actual values? */
-   VexGuestX86State vex_shadow;
+   VexGuestX86State vex_shadow[MAX_SHADOW_GUEST_MULTIPLIER];
 
    /* HACK ALERT */
    VexGuestX86State vex;
@@ -361,7 +361,7 @@
 
    // FIXME: save_i387(&tst->arch, fpstate);
 
-#  define SC2(reg,REG)  sc->reg = tst->arch.vex.guest_##REG
+#  define SC2(reg,REG)  sc->reg = tst->arch->vex.guest_##REG
    SC2(gs,GS);
    SC2(fs,FS);
    SC2(es,ES);
@@ -378,7 +378,7 @@
 
    SC2(eip,EIP);
    SC2(cs,CS);
-   sc->eflags = LibVEX_GuestX86_get_eflags(&tst->arch.vex);
+   sc->eflags = LibVEX_GuestX86_get_eflags(&tst->arch->vex);
    SC2(ss,SS);
    /* XXX esp_at_signal */
    /* XXX trapno */
@@ -443,9 +443,10 @@
 {
    frame->sigNo_private = sigNo;
    frame->magicPI       = 0x31415927;
-   frame->vex_shadow    = tst->arch.vex_shadow;
+   VG_(memcpy)(frame->vex_shadow, tst->arch->vex_shadow,
+	       VG_(details).shadow_guest_multiplier*sizeof(VexGuestArchState));
    /* HACK ALERT */
-   frame->vex           = tst->arch.vex;
+   frame->vex           = tst->arch->vex;
    /* end HACK ALERT */
    frame->mask          = tst->sig_mask;
    frame->handlerflags  = flags;
@@ -538,7 +539,7 @@
    /* SIGILL defines addr to be the faulting address */
    if (sigNo == VKI_SIGILL && siginfo->si_code > 0)
       frame->sigInfo._sifields._sigfault._addr 
-         = (void*)tst->arch.vex.guest_EIP;
+         = (void*)tst->arch->vex.guest_EIP;
 
    synth_ucontext(tst->tid, siginfo, mask, &frame->uContext, &frame->fpstate);
 
@@ -576,14 +577,14 @@
    VG_TRACK( post_reg_write, Vg_CoreSignal, tid, VG_O_STACK_PTR, sizeof(Addr));
 
    //VG_(printf)("handler = %p\n", handler);
-   tst->arch.vex.guest_EIP = (Addr) handler;
+   tst->arch->vex.guest_EIP = (Addr) handler;
    /* This thread needs to be marked runnable, but we leave that the
       caller to do. */
 
    if (0)
       VG_(printf)("pushed signal frame; %%ESP now = %p, "
                   "next %%EIP = %p, status=%d\n", 
-		  esp, tst->arch.vex.guest_EIP, tst->status);
+		  esp, tst->arch->vex.guest_EIP, tst->status);
 }
 
 
@@ -609,9 +610,10 @@
    }
    tst->sig_mask        = frame->mask;
    tst->tmp_sig_mask    = frame->mask;
-   tst->arch.vex_shadow = frame->vex_shadow;
+   VG_(memcpy)(tst->arch->vex_shadow, frame->vex_shadow,
+	       VG_(details).shadow_guest_multiplier*sizeof(VexGuestArchState));
    /* HACK ALERT */
-   tst->arch.vex        = frame->vex;
+   tst->arch->vex        = frame->vex;
    /* end HACK ALERT */
    *sigNo               = frame->sigNo_private;
    return True;
@@ -622,22 +624,22 @@
                          struct vki_sigcontext *sc, 
                          struct _vki_fpstate *fpstate )
 {
-   tst->arch.vex.guest_EAX     = sc->eax;
-   tst->arch.vex.guest_ECX     = sc->ecx;
-   tst->arch.vex.guest_EDX     = sc->edx;
-   tst->arch.vex.guest_EBX     = sc->ebx;
-   tst->arch.vex.guest_EBP     = sc->ebp; 
-   tst->arch.vex.guest_ESP     = sc->esp;
-   tst->arch.vex.guest_ESI     = sc->esi;
-   tst->arch.vex.guest_EDI     = sc->edi;
-//::    tst->arch.vex.guest_eflags  = sc->eflags;
-   tst->arch.vex.guest_EIP     = sc->eip;
-   tst->arch.vex.guest_CS      = sc->cs; 
-   tst->arch.vex.guest_SS      = sc->ss;
-   tst->arch.vex.guest_DS      = sc->ds;
-   tst->arch.vex.guest_ES      = sc->es;
-   tst->arch.vex.guest_FS      = sc->fs;
-   tst->arch.vex.guest_GS      = sc->gs;
+   tst->arch->vex.guest_EAX     = sc->eax;
+   tst->arch->vex.guest_ECX     = sc->ecx;
+   tst->arch->vex.guest_EDX     = sc->edx;
+   tst->arch->vex.guest_EBX     = sc->ebx;
+   tst->arch->vex.guest_EBP     = sc->ebp; 
+   tst->arch->vex.guest_ESP     = sc->esp;
+   tst->arch->vex.guest_ESI     = sc->esi;
+   tst->arch->vex.guest_EDI     = sc->edi;
+//::    tst->arch->vex.guest_eflags  = sc->eflags;
+   tst->arch->vex.guest_EIP     = sc->eip;
+   tst->arch->vex.guest_CS      = sc->cs; 
+   tst->arch->vex.guest_SS      = sc->ss;
+   tst->arch->vex.guest_DS      = sc->ds;
+   tst->arch->vex.guest_ES      = sc->es;
+   tst->arch->vex.guest_FS      = sc->fs;
+   tst->arch->vex.guest_GS      = sc->gs;
 
 //::    restore_i387(&tst->arch, fpstate);
 }
@@ -675,7 +677,7 @@
    tst = VG_(get_ThreadState)(tid);
 
    /* Correctly reestablish the frame base address. */
-   esp   = tst->arch.vex.guest_ESP;
+   esp   = tst->arch->vex.guest_ESP;
 
    if (!isRT)
       size = restore_sigframe(tst, (struct sigframe *)esp, &sigNo);
@@ -689,7 +691,7 @@
       VG_(message)(
          Vg_DebugMsg, 
          "VG_(signal_return) (thread %d): isRT=%d valid magic; EIP=%p", 
-         tid, isRT, tst->arch.vex.guest_EIP);
+         tid, isRT, tst->arch->vex.guest_EIP);
 
    /* tell the tools */
    VG_TRACK( post_deliver_signal, tid, sigNo );
Index: coregrind/m_syswrap/syswrap-ppc64-linux.c
===================================================================
--- coregrind/m_syswrap/syswrap-ppc64-linux.c	(revision 6776)
+++ coregrind/m_syswrap/syswrap-ppc64-linux.c	(working copy)
@@ -302,19 +302,19 @@
       The child's TLS register (r2) gets set to the tlsaddr argument
       if the CLONE_SETTLS flag is set.
    */
-   setup_child( &ctst->arch, &ptst->arch );
+   setup_child( ctst->arch, ptst->arch );
 
    /* Make sys_clone appear to have returned Success(0) in the
       child. */
-   { UInt old_cr = LibVEX_GuestPPC64_get_CR( &ctst->arch.vex );
+   { UInt old_cr = LibVEX_GuestPPC64_get_CR( &ctst->arch->vex );
      /* %r3 = 0 */
-     ctst->arch.vex.guest_GPR3 = 0;
+     ctst->arch->vex.guest_GPR3 = 0;
      /* %cr0.so = 0 */
-     LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), &ctst->arch.vex );
+     LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), &ctst->arch->vex );
    }
 
    if (sp != 0)
-      ctst->arch.vex.guest_GPR1 = sp;
+      ctst->arch->vex.guest_GPR1 = sp;
 
    ctst->os_state.parent = ptid;
 
@@ -344,7 +344,7 @@
    if (flags & VKI_CLONE_SETTLS) {
       if (debug)
          VG_(printf)("clone child has SETTLS: tls at %p\n", child_tls);
-      ctst->arch.vex.guest_GPR13 = child_tls;
+      ctst->arch->vex.guest_GPR13 = child_tls;
    }
 
    flags &= ~VKI_CLONE_SETTLS;
@@ -372,7 +372,7 @@
   out:
    if (res.isError) {
       /* clone failed */
-      VG_(cleanup_thread)(&ctst->arch);
+      VG_(cleanup_thread)(ctst->arch);
       ctst->status = VgTs_Empty;
    }
 
@@ -1015,12 +1015,12 @@
    ///* Adjust esp to point to start of frame; skip back up over handler
    //   ret addr */
    tst = VG_(get_ThreadState)(tid);
-   //tst->arch.vex.guest_ESP -= sizeof(Addr);
+   //tst->arch->vex.guest_ESP -= sizeof(Addr);
    // Should we do something equivalent on ppc64-linux?  Who knows.
 
    ///* This is only so that the EIP is (might be) useful to report if
    //   something goes wrong in the sigreturn */
-   //ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
+   //ML_(fixup_guest_state_to_restart_syscall)(tst->arch);
    // Should we do something equivalent on ppc64?  Who knows.
 
    /* Restore register state from frame and remove it */
Index: coregrind/m_syswrap/syswrap-ppc32-aix5.c
===================================================================
--- coregrind/m_syswrap/syswrap-ppc32-aix5.c	(revision 6776)
+++ coregrind/m_syswrap/syswrap-ppc32-aix5.c	(working copy)
@@ -605,49 +605,49 @@
 
          /* The guest thread is to start running whatever context
             this syscall showed up with. */
-         dst_ts->arch.vex.guest_GPR0  = ats_new->mst.gpr[0];
-         dst_ts->arch.vex.guest_GPR1  = ats_new->mst.gpr[1]; /* sp */
-         dst_ts->arch.vex.guest_GPR2  = ats_new->mst.gpr[2]; /* toc */
-         dst_ts->arch.vex.guest_GPR3  = ats_new->mst.gpr[3]; /* initarg */
-         dst_ts->arch.vex.guest_GPR4  = ats_new->mst.gpr[4];
-         dst_ts->arch.vex.guest_GPR5  = ats_new->mst.gpr[5];
-         dst_ts->arch.vex.guest_GPR6  = ats_new->mst.gpr[6];
-         dst_ts->arch.vex.guest_GPR7  = ats_new->mst.gpr[7];
-         dst_ts->arch.vex.guest_GPR8  = ats_new->mst.gpr[8];
-         dst_ts->arch.vex.guest_GPR9  = ats_new->mst.gpr[9];
-         dst_ts->arch.vex.guest_GPR10 = ats_new->mst.gpr[10];
-         dst_ts->arch.vex.guest_GPR11 = ats_new->mst.gpr[11]; /* ?? */
-         dst_ts->arch.vex.guest_GPR12 = ats_new->mst.gpr[12];
-         dst_ts->arch.vex.guest_GPR13 = ats_new->mst.gpr[13];
-         dst_ts->arch.vex.guest_GPR14 = ats_new->mst.gpr[14];
-         dst_ts->arch.vex.guest_GPR15 = ats_new->mst.gpr[15];
-         dst_ts->arch.vex.guest_GPR16 = ats_new->mst.gpr[16];
-         dst_ts->arch.vex.guest_GPR17 = ats_new->mst.gpr[17];
-         dst_ts->arch.vex.guest_GPR18 = ats_new->mst.gpr[18];
-         dst_ts->arch.vex.guest_GPR19 = ats_new->mst.gpr[19];
-         dst_ts->arch.vex.guest_GPR20 = ats_new->mst.gpr[20];
-         dst_ts->arch.vex.guest_GPR21 = ats_new->mst.gpr[21];
-         dst_ts->arch.vex.guest_GPR22 = ats_new->mst.gpr[22];
-         dst_ts->arch.vex.guest_GPR23 = ats_new->mst.gpr[23];
-         dst_ts->arch.vex.guest_GPR24 = ats_new->mst.gpr[24];
-         dst_ts->arch.vex.guest_GPR25 = ats_new->mst.gpr[25];
-         dst_ts->arch.vex.guest_GPR26 = ats_new->mst.gpr[26];
-         dst_ts->arch.vex.guest_GPR27 = ats_new->mst.gpr[27];
-         dst_ts->arch.vex.guest_GPR28 = ats_new->mst.gpr[28];
-         dst_ts->arch.vex.guest_GPR29 = ats_new->mst.gpr[29];
-         dst_ts->arch.vex.guest_GPR30 = ats_new->mst.gpr[30];
-         dst_ts->arch.vex.guest_GPR31 = ats_new->mst.gpr[31];
-         dst_ts->arch.vex.guest_CIA   = ats_new->mst.iar; /* pc */
-         dst_ts->arch.vex.guest_LR    = ats_new->mst.lr;
-         dst_ts->arch.vex.guest_CTR   = ats_new->mst.ctr;
-         LibVEX_GuestPPC32_put_CR( ats_new->mst.cr, &dst_ts->arch.vex );
-         LibVEX_GuestPPC32_put_XER( ats_new->mst.xer, &dst_ts->arch.vex );
+         dst_ts->arch->vex.guest_GPR0  = ats_new->mst.gpr[0];
+         dst_ts->arch->vex.guest_GPR1  = ats_new->mst.gpr[1]; /* sp */
+         dst_ts->arch->vex.guest_GPR2  = ats_new->mst.gpr[2]; /* toc */
+         dst_ts->arch->vex.guest_GPR3  = ats_new->mst.gpr[3]; /* initarg */
+         dst_ts->arch->vex.guest_GPR4  = ats_new->mst.gpr[4];
+         dst_ts->arch->vex.guest_GPR5  = ats_new->mst.gpr[5];
+         dst_ts->arch->vex.guest_GPR6  = ats_new->mst.gpr[6];
+         dst_ts->arch->vex.guest_GPR7  = ats_new->mst.gpr[7];
+         dst_ts->arch->vex.guest_GPR8  = ats_new->mst.gpr[8];
+         dst_ts->arch->vex.guest_GPR9  = ats_new->mst.gpr[9];
+         dst_ts->arch->vex.guest_GPR10 = ats_new->mst.gpr[10];
+         dst_ts->arch->vex.guest_GPR11 = ats_new->mst.gpr[11]; /* ?? */
+         dst_ts->arch->vex.guest_GPR12 = ats_new->mst.gpr[12];
+         dst_ts->arch->vex.guest_GPR13 = ats_new->mst.gpr[13];
+         dst_ts->arch->vex.guest_GPR14 = ats_new->mst.gpr[14];
+         dst_ts->arch->vex.guest_GPR15 = ats_new->mst.gpr[15];
+         dst_ts->arch->vex.guest_GPR16 = ats_new->mst.gpr[16];
+         dst_ts->arch->vex.guest_GPR17 = ats_new->mst.gpr[17];
+         dst_ts->arch->vex.guest_GPR18 = ats_new->mst.gpr[18];
+         dst_ts->arch->vex.guest_GPR19 = ats_new->mst.gpr[19];
+         dst_ts->arch->vex.guest_GPR20 = ats_new->mst.gpr[20];
+         dst_ts->arch->vex.guest_GPR21 = ats_new->mst.gpr[21];
+         dst_ts->arch->vex.guest_GPR22 = ats_new->mst.gpr[22];
+         dst_ts->arch->vex.guest_GPR23 = ats_new->mst.gpr[23];
+         dst_ts->arch->vex.guest_GPR24 = ats_new->mst.gpr[24];
+         dst_ts->arch->vex.guest_GPR25 = ats_new->mst.gpr[25];
+         dst_ts->arch->vex.guest_GPR26 = ats_new->mst.gpr[26];
+         dst_ts->arch->vex.guest_GPR27 = ats_new->mst.gpr[27];
+         dst_ts->arch->vex.guest_GPR28 = ats_new->mst.gpr[28];
+         dst_ts->arch->vex.guest_GPR29 = ats_new->mst.gpr[29];
+         dst_ts->arch->vex.guest_GPR30 = ats_new->mst.gpr[30];
+         dst_ts->arch->vex.guest_GPR31 = ats_new->mst.gpr[31];
+         dst_ts->arch->vex.guest_CIA   = ats_new->mst.iar; /* pc */
+         dst_ts->arch->vex.guest_LR    = ats_new->mst.lr;
+         dst_ts->arch->vex.guest_CTR   = ats_new->mst.ctr;
+         LibVEX_GuestPPC32_put_CR( ats_new->mst.cr, &dst_ts->arch->vex );
+         LibVEX_GuestPPC32_put_XER( ats_new->mst.xer, &dst_ts->arch->vex );
 
          /* Record what seems like the highest legitimate stack
             address for this thread, so that the stack unwinder works
             properly.  It seems reasonable to use the R1 value
             supplied here. */
-         dst_ts->client_stack_highest_word = dst_ts->arch.vex.guest_GPR1;
+         dst_ts->client_stack_highest_word = dst_ts->arch->vex.guest_GPR1;
 
          /* The host thread is to start running
             start_thread_NORETURN */
Index: coregrind/m_syswrap/syswrap-ppc64-aix5.c
===================================================================
--- coregrind/m_syswrap/syswrap-ppc64-aix5.c	(revision 6776)
+++ coregrind/m_syswrap/syswrap-ppc64-aix5.c	(working copy)
@@ -515,49 +515,49 @@
 
          /* The guest thread is to start running whatever context
             this syscall showed up with. */
-         dst_ts->arch.vex.guest_GPR0  = ats_new->mst.gpr[0];
-         dst_ts->arch.vex.guest_GPR1  = ats_new->mst.gpr[1]; /* sp */
-         dst_ts->arch.vex.guest_GPR2  = ats_new->mst.gpr[2]; /* toc */
-         dst_ts->arch.vex.guest_GPR3  = ats_new->mst.gpr[3]; /* initarg */
-         dst_ts->arch.vex.guest_GPR4  = ats_new->mst.gpr[4];
-         dst_ts->arch.vex.guest_GPR5  = ats_new->mst.gpr[5];
-         dst_ts->arch.vex.guest_GPR6  = ats_new->mst.gpr[6];
-         dst_ts->arch.vex.guest_GPR7  = ats_new->mst.gpr[7];
-         dst_ts->arch.vex.guest_GPR8  = ats_new->mst.gpr[8];
-         dst_ts->arch.vex.guest_GPR9  = ats_new->mst.gpr[9];
-         dst_ts->arch.vex.guest_GPR10 = ats_new->mst.gpr[10];
-         dst_ts->arch.vex.guest_GPR11 = ats_new->mst.gpr[11]; /* ?? */
-         dst_ts->arch.vex.guest_GPR12 = ats_new->mst.gpr[12];
-         dst_ts->arch.vex.guest_GPR13 = ats_new->mst.gpr[13];
-         dst_ts->arch.vex.guest_GPR14 = ats_new->mst.gpr[14];
-         dst_ts->arch.vex.guest_GPR15 = ats_new->mst.gpr[15];
-         dst_ts->arch.vex.guest_GPR16 = ats_new->mst.gpr[16];
-         dst_ts->arch.vex.guest_GPR17 = ats_new->mst.gpr[17];
-         dst_ts->arch.vex.guest_GPR18 = ats_new->mst.gpr[18];
-         dst_ts->arch.vex.guest_GPR19 = ats_new->mst.gpr[19];
-         dst_ts->arch.vex.guest_GPR20 = ats_new->mst.gpr[20];
-         dst_ts->arch.vex.guest_GPR21 = ats_new->mst.gpr[21];
-         dst_ts->arch.vex.guest_GPR22 = ats_new->mst.gpr[22];
-         dst_ts->arch.vex.guest_GPR23 = ats_new->mst.gpr[23];
-         dst_ts->arch.vex.guest_GPR24 = ats_new->mst.gpr[24];
-         dst_ts->arch.vex.guest_GPR25 = ats_new->mst.gpr[25];
-         dst_ts->arch.vex.guest_GPR26 = ats_new->mst.gpr[26];
-         dst_ts->arch.vex.guest_GPR27 = ats_new->mst.gpr[27];
-         dst_ts->arch.vex.guest_GPR28 = ats_new->mst.gpr[28];
-         dst_ts->arch.vex.guest_GPR29 = ats_new->mst.gpr[29];
-         dst_ts->arch.vex.guest_GPR30 = ats_new->mst.gpr[30];
-         dst_ts->arch.vex.guest_GPR31 = ats_new->mst.gpr[31];
-         dst_ts->arch.vex.guest_CIA   = ats_new->mst.iar; /* pc */
-         dst_ts->arch.vex.guest_LR    = ats_new->mst.lr;
-         dst_ts->arch.vex.guest_CTR   = ats_new->mst.ctr;
-         LibVEX_GuestPPC64_put_CR( ats_new->mst.cr, &dst_ts->arch.vex );
-         LibVEX_GuestPPC64_put_XER( ats_new->mst.xer, &dst_ts->arch.vex );
+         dst_ts->arch->vex.guest_GPR0  = ats_new->mst.gpr[0];
+         dst_ts->arch->vex.guest_GPR1  = ats_new->mst.gpr[1]; /* sp */
+         dst_ts->arch->vex.guest_GPR2  = ats_new->mst.gpr[2]; /* toc */
+         dst_ts->arch->vex.guest_GPR3  = ats_new->mst.gpr[3]; /* initarg */
+         dst_ts->arch->vex.guest_GPR4  = ats_new->mst.gpr[4];
+         dst_ts->arch->vex.guest_GPR5  = ats_new->mst.gpr[5];
+         dst_ts->arch->vex.guest_GPR6  = ats_new->mst.gpr[6];
+         dst_ts->arch->vex.guest_GPR7  = ats_new->mst.gpr[7];
+         dst_ts->arch->vex.guest_GPR8  = ats_new->mst.gpr[8];
+         dst_ts->arch->vex.guest_GPR9  = ats_new->mst.gpr[9];
+         dst_ts->arch->vex.guest_GPR10 = ats_new->mst.gpr[10];
+         dst_ts->arch->vex.guest_GPR11 = ats_new->mst.gpr[11]; /* ?? */
+         dst_ts->arch->vex.guest_GPR12 = ats_new->mst.gpr[12];
+         dst_ts->arch->vex.guest_GPR13 = ats_new->mst.gpr[13];
+         dst_ts->arch->vex.guest_GPR14 = ats_new->mst.gpr[14];
+         dst_ts->arch->vex.guest_GPR15 = ats_new->mst.gpr[15];
+         dst_ts->arch->vex.guest_GPR16 = ats_new->mst.gpr[16];
+         dst_ts->arch->vex.guest_GPR17 = ats_new->mst.gpr[17];
+         dst_ts->arch->vex.guest_GPR18 = ats_new->mst.gpr[18];
+         dst_ts->arch->vex.guest_GPR19 = ats_new->mst.gpr[19];
+         dst_ts->arch->vex.guest_GPR20 = ats_new->mst.gpr[20];
+         dst_ts->arch->vex.guest_GPR21 = ats_new->mst.gpr[21];
+         dst_ts->arch->vex.guest_GPR22 = ats_new->mst.gpr[22];
+         dst_ts->arch->vex.guest_GPR23 = ats_new->mst.gpr[23];
+         dst_ts->arch->vex.guest_GPR24 = ats_new->mst.gpr[24];
+         dst_ts->arch->vex.guest_GPR25 = ats_new->mst.gpr[25];
+         dst_ts->arch->vex.guest_GPR26 = ats_new->mst.gpr[26];
+         dst_ts->arch->vex.guest_GPR27 = ats_new->mst.gpr[27];
+         dst_ts->arch->vex.guest_GPR28 = ats_new->mst.gpr[28];
+         dst_ts->arch->vex.guest_GPR29 = ats_new->mst.gpr[29];
+         dst_ts->arch->vex.guest_GPR30 = ats_new->mst.gpr[30];
+         dst_ts->arch->vex.guest_GPR31 = ats_new->mst.gpr[31];
+         dst_ts->arch->vex.guest_CIA   = ats_new->mst.iar; /* pc */
+         dst_ts->arch->vex.guest_LR    = ats_new->mst.lr;
+         dst_ts->arch->vex.guest_CTR   = ats_new->mst.ctr;
+         LibVEX_GuestPPC64_put_CR( ats_new->mst.cr, &dst_ts->arch->vex );
+         LibVEX_GuestPPC64_put_XER( ats_new->mst.xer, &dst_ts->arch->vex );
 
          /* Record what seems like the highest legitimate stack
             address for this thread, so that the stack unwinder works
             properly.  It seems reasonable to use the R1 value
             supplied here. */
-         dst_ts->client_stack_highest_word = dst_ts->arch.vex.guest_GPR1;
+         dst_ts->client_stack_highest_word = dst_ts->arch->vex.guest_GPR1;
 
          /* The host thread is to start running
             start_thread_NORETURN */
Index: coregrind/m_syswrap/syswrap-aix5.c
===================================================================
--- coregrind/m_syswrap/syswrap-aix5.c	(revision 6776)
+++ coregrind/m_syswrap/syswrap-aix5.c	(working copy)
@@ -185,9 +185,9 @@
    if (found) {
       if (0) 
          VG_(printf)("THREAD CANCELED, new cia,toc = %p,%p\n", ent, toc);
-      tst->arch.vex.guest_CIA  = ent;
-      tst->arch.vex.guest_GPR2 = toc;
-      tst->arch.vex.guest_GPR3 = (Word)(-1); /* == PTHREAD_CANCELED */
+      tst->arch->vex.guest_CIA  = ent;
+      tst->arch->vex.guest_GPR2 = toc;
+      tst->arch->vex.guest_GPR3 = (Word)(-1); /* == PTHREAD_CANCELED */
       /* If the thread is blocked in a syscall, we better bop it on
          the head with SIGVGKILL in order to get it out of said
          syscall. */
Index: coregrind/m_syswrap/syswrap-amd64-linux.c
===================================================================
--- coregrind/m_syswrap/syswrap-amd64-linux.c	(revision 6776)
+++ coregrind/m_syswrap/syswrap-amd64-linux.c	(working copy)
@@ -233,14 +233,14 @@
       If the clone call specifies a NULL rsp for the new thread, then
       it actually gets a copy of the parent's rsp.
    */
-   setup_child( &ctst->arch, &ptst->arch );
+   setup_child( ctst->arch, ptst->arch );
 
    /* Make sys_clone appear to have returned Success(0) in the
       child. */
-   ctst->arch.vex.guest_RAX = 0;
+   ctst->arch->vex.guest_RAX = 0;
 
    if (rsp != 0)
-      ctst->arch.vex.guest_RSP = rsp;
+      ctst->arch->vex.guest_RSP = rsp;
 
    ctst->os_state.parent = ptid;
 
@@ -270,7 +270,7 @@
    if (flags & VKI_CLONE_SETTLS) {
       if (debug)
 	 VG_(printf)("clone child has SETTLS: tls at %p\n", tlsaddr);
-      ctst->arch.vex.guest_FS_ZERO = tlsaddr;
+      ctst->arch->vex.guest_FS_ZERO = tlsaddr;
    }
 
    flags &= ~VKI_CLONE_SETTLS;
@@ -290,7 +290,7 @@
   out:
    if (res.isError) {
       /* clone failed */
-      VG_(cleanup_thread)(&ctst->arch);
+      VG_(cleanup_thread)(ctst->arch);
       ctst->status = VgTs_Empty;
    }
 
@@ -471,12 +471,12 @@
    /* Adjust RSP to point to start of frame; skip back up over handler
       ret addr */
    tst = VG_(get_ThreadState)(tid);
-   tst->arch.vex.guest_RSP -= sizeof(Addr);
+   tst->arch->vex.guest_RSP -= sizeof(Addr);
 
    /* This is only so that the RIP is (might be) useful to report if
       something goes wrong in the sigreturn.  JRS 20070318: no idea
       what this is for */
-   ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
+   ML_(fixup_guest_state_to_restart_syscall)(tst->arch);
 
    /* Restore register state from frame and remove it, as 
       described above */
@@ -509,12 +509,12 @@
    /* "do" the syscall ourselves; the kernel never sees it */
    if (ARG1 == VKI_ARCH_SET_FS) {
       tst = VG_(get_ThreadState)(tid);
-      tst->arch.vex.guest_FS_ZERO = ARG2;
+      tst->arch->vex.guest_FS_ZERO = ARG2;
    }
    else if (ARG1 == VKI_ARCH_GET_FS) {
       PRE_MEM_WRITE("arch_prctl(addr)", ARG2, sizeof(unsigned long));
       tst = VG_(get_ThreadState)(tid);
-      *(unsigned long *)ARG2 = tst->arch.vex.guest_FS_ZERO;
+      *(unsigned long *)ARG2 = tst->arch->vex.guest_FS_ZERO;
       POST_MEM_WRITE(ARG2, sizeof(unsigned long));
    }
    else {
Index: coregrind/m_syswrap/syswrap-x86-linux.c
===================================================================
--- coregrind/m_syswrap/syswrap-x86-linux.c	(revision 6776)
+++ coregrind/m_syswrap/syswrap-x86-linux.c	(working copy)
@@ -245,14 +245,14 @@
       clone flags of 0xF00, and it seems to rely on the assumption
       that the child inherits a copy of the parent's GDT.  
       setup_child takes care of setting that up. */
-   setup_child( &ctst->arch, &ptst->arch, True );
+   setup_child( ctst->arch, ptst->arch, True );
 
    /* Make sys_clone appear to have returned Success(0) in the
       child. */
-   ctst->arch.vex.guest_EAX = 0;
+   ctst->arch->vex.guest_EAX = 0;
 
    if (esp != 0)
-      ctst->arch.vex.guest_ESP = esp;
+      ctst->arch->vex.guest_ESP = esp;
 
    ctst->os_state.parent = ptid;
 
@@ -285,8 +285,8 @@
                      "base=%p limit=%x; esp=%p fs=%x gs=%x\n",
 		     tlsinfo, tlsinfo->entry_number, 
                      tlsinfo->base_addr, tlsinfo->limit,
-		     ptst->arch.vex.guest_ESP,
-		     ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
+		     ptst->arch->vex.guest_ESP,
+		     ctst->arch->vex.guest_FS, ctst->arch->vex.guest_GS);
       res = sys_set_thread_area(ctid, tlsinfo);
       if (res.isError)
 	 goto out;
@@ -309,7 +309,7 @@
   out:
    if (res.isError) {
       /* clone failed */
-      VG_(cleanup_thread)(&ctst->arch);
+      VG_(cleanup_thread)(ctst->arch);
       ctst->status = VgTs_Empty;
    }
 
@@ -519,7 +519,7 @@
    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    vg_assert(8 == sizeof(VexGuestX86SegDescr));
 
-   ldt = (Char*)(VG_(threads)[tid].arch.vex.guest_LDT);
+   ldt = (Char*)(VG_(threads)[tid].arch->vex.guest_LDT);
    res = VG_(mk_SysRes_Success)( 0 );
    if (ldt == NULL)
       /* LDT not allocated, meaning all entries are null */
@@ -553,7 +553,7 @@
    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
 
-   ldt      = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
+   ldt      = (VexGuestX86SegDescr*)VG_(threads)[tid].arch->vex.guest_LDT;
    ldt_info = (vki_modify_ldt_t*)ptr;
 
    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
@@ -574,7 +574,7 @@
       now. */
    if (ldt == (HWord)NULL) {
       ldt = alloc_zeroed_x86_LDT();
-      VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
+      VG_(threads)[tid].arch->vex.guest_LDT = (HWord)ldt;
    }
 
    /* Install the new entry ...  */
@@ -623,12 +623,12 @@
    if (info == NULL)
       return VG_(mk_SysRes_Error)( VKI_EFAULT );
 
-   gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
+   gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch->vex.guest_GDT;
 
    /* If the thread doesn't have a GDT, allocate it now. */
    if (!gdt) {
       gdt = alloc_zeroed_x86_GDT();
-      VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
+      VG_(threads)[tid].arch->vex.guest_GDT = (HWord)gdt;
    }
 
    idx = info->entry_number;
@@ -676,12 +676,12 @@
    if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
       return VG_(mk_SysRes_Error)( VKI_EINVAL );
 
-   gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
+   gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch->vex.guest_GDT;
 
    /* If the thread doesn't have a GDT, allocate it now. */
    if (!gdt) {
       gdt = alloc_zeroed_x86_GDT();
-      VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
+      VG_(threads)[tid].arch->vex.guest_GDT = (HWord)gdt;
    }
 
    info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
@@ -718,7 +718,8 @@
 {
    /* We inherit our parent's guest state. */
    child->vex = parent->vex;
-   child->vex_shadow = parent->vex_shadow;
+   VG_(memcpy)(child->vex_shadow, parent->vex_shadow,
+	       VG_(details).shadow_guest_multiplier*sizeof(VexGuestX86State));
 
    /* We inherit our parent's LDT. */
    if (parent->vex.guest_LDT == (HWord)NULL) {
@@ -948,12 +949,12 @@
    /* Adjust esp to point to start of frame; skip back up over
       sigreturn sequence's "popl %eax" and handler ret addr */
    tst = VG_(get_ThreadState)(tid);
-   tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
+   tst->arch->vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
    /* XXX why does ESP change differ from rt_sigreturn case below? */
 
    /* This is only so that the EIP is (might be) useful to report if
       something goes wrong in the sigreturn */
-   ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
+   ML_(fixup_guest_state_to_restart_syscall)(tst->arch);
 
    /* Restore register state from frame and remove it */
    VG_(sigframe_destroy)(tid, False);
@@ -982,12 +983,12 @@
    /* Adjust esp to point to start of frame; skip back up over handler
       ret addr */
    tst = VG_(get_ThreadState)(tid);
-   tst->arch.vex.guest_ESP -= sizeof(Addr);
+   tst->arch->vex.guest_ESP -= sizeof(Addr);
    /* XXX why does ESP change differ from sigreturn case above? */
 
    /* This is only so that the EIP is (might be) useful to report if
       something goes wrong in the sigreturn */
-   ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
+   ML_(fixup_guest_state_to_restart_syscall)(tst->arch);
 
    /* Restore register state from frame and remove it */
    VG_(sigframe_destroy)(tid, True);
Index: coregrind/m_syswrap/syswrap-ppc32-linux.c
===================================================================
--- coregrind/m_syswrap/syswrap-ppc32-linux.c	(revision 6776)
+++ coregrind/m_syswrap/syswrap-ppc32-linux.c	(working copy)
@@ -274,19 +274,19 @@
       The child's TLS register (r2) gets set to the tlsaddr argument
       if the CLONE_SETTLS flag is set.
    */
-   setup_child( &ctst->arch, &ptst->arch );
+   setup_child( ctst->arch, ptst->arch );
 
    /* Make sys_clone appear to have returned Success(0) in the
       child. */
-   { UInt old_cr = LibVEX_GuestPPC32_get_CR( &ctst->arch.vex );
+   { UInt old_cr = LibVEX_GuestPPC32_get_CR( &ctst->arch->vex );
      /* %r3 = 0 */
-     ctst->arch.vex.guest_GPR3 = 0;
+     ctst->arch->vex.guest_GPR3 = 0;
      /* %cr0.so = 0 */
-     LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), &ctst->arch.vex );
+     LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), &ctst->arch->vex );
    }
 
    if (sp != 0)
-      ctst->arch.vex.guest_GPR1 = sp;
+      ctst->arch->vex.guest_GPR1 = sp;
 
    ctst->os_state.parent = ptid;
 
@@ -316,7 +316,7 @@
    if (flags & VKI_CLONE_SETTLS) {
       if (debug)
          VG_(printf)("clone child has SETTLS: tls at %p\n", child_tls);
-      ctst->arch.vex.guest_GPR2 = child_tls;
+      ctst->arch->vex.guest_GPR2 = child_tls;
    }
 
    flags &= ~VKI_CLONE_SETTLS;
@@ -342,7 +342,7 @@
   out:
    if (res.isError) {
       /* clone failed */
-      VG_(cleanup_thread)(&ctst->arch);
+      VG_(cleanup_thread)(ctst->arch);
       ctst->status = VgTs_Empty;
    }
 
@@ -1037,12 +1037,12 @@
    ///* Adjust esp to point to start of frame; skip back up over
    //   sigreturn sequence's "popl %eax" and handler ret addr */
    tst = VG_(get_ThreadState)(tid);
-   //tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
+   //tst->arch->vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
    // Should we do something equivalent on ppc32?  Who knows.
 
    ///* This is only so that the EIP is (might be) useful to report if
    //   something goes wrong in the sigreturn */
-   //ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
+   //ML_(fixup_guest_state_to_restart_syscall)(tst->arch);
    // Should we do something equivalent on ppc32?  Who knows.
 
    /* Restore register state from frame and remove it */
@@ -1072,12 +1072,12 @@
    ///* Adjust esp to point to start of frame; skip back up over handler
    //   ret addr */
    tst = VG_(get_ThreadState)(tid);
-   //tst->arch.vex.guest_ESP -= sizeof(Addr);
+   //tst->arch->vex.guest_ESP -= sizeof(Addr);
    // Should we do something equivalent on ppc32?  Who knows.
 
    ///* This is only so that the EIP is (might be) useful to report if
    //   something goes wrong in the sigreturn */
-   //ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
+   //ML_(fixup_guest_state_to_restart_syscall)(tst->arch);
    // Should we do something equivalent on ppc32?  Who knows.
 
    /* Restore register state from frame and remove it */
Index: coregrind/m_syswrap/syswrap-main.c
===================================================================
--- coregrind/m_syswrap/syswrap-main.c	(revision 6776)
+++ coregrind/m_syswrap/syswrap-main.c	(working copy)
@@ -247,7 +247,7 @@
    vki_sigset_t saved;
    UWord err 
       = ML_(do_syscall_for_client_WRK)(
-           syscallno, &tst->arch.vex, 
+           syscallno, &tst->arch->vex, 
            syscall_mask, &saved, _VKI_NSIG_WORDS * sizeof(UWord)
 #          if defined(VGO_aix5)
            , __NR_rt_sigprocmask
@@ -795,7 +795,7 @@
    sci = & syscallInfo[tid];
    vg_assert(sci->status.what == SsIdle);
 
-   getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex );
+   getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch->vex );
 
    /* Copy .orig_args to .args.  The pre-handler may modify .args, but
       we want to keep the originals too, just in case. */
@@ -926,7 +926,7 @@
          /* Gack.  More impedance matching.  Copy the possibly
             modified syscall args back into the guest state. */
          vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
-         putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
+         putSyscallArgsIntoGuestState( &sci->args, &tst->arch->vex );
 
          /* Drop the lock */
          VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
@@ -948,7 +948,7 @@
 
          /* Even more impedance matching.  Extract the syscall status
             from the guest state. */
-         getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
+         getSyscallStatusFromGuestState( &sci->status, &tst->arch->vex );
          vg_assert(sci->status.what == SsComplete);
 
          PRINT("SYSCALL[%d,%d](%3d) ... [async] --> %s(0x%llx)\n",
@@ -986,7 +986,7 @@
    /* Dump the syscall result back in the guest state.  This is
       a platform-specific action. */
    if (!(sci->flags & SfNoWriteResult))
-      putSyscallStatusIntoGuestState( &sci->status, &tst->arch.vex );
+      putSyscallStatusIntoGuestState( &sci->status, &tst->arch->vex );
 
    /* Situation now:
       - the guest state is now correctly modified following the syscall
@@ -1041,7 +1041,7 @@
       previously written the result into the guest state. */
    vg_assert(sci->status.what == SsComplete);
 
-   getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
+   getSyscallStatusFromGuestState( &test_status, &tst->arch->vex );
    if (!(sci->flags & SfNoWriteResult))
       vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
    /* Ok, looks sane */
@@ -1079,7 +1079,7 @@
       failure if the kernel supplied a fd that it doesn't like), once
       again dump the syscall result back in the guest state.*/
    if (!(sci->flags & SfNoWriteResult))
-      putSyscallStatusIntoGuestState( &sci->status, &tst->arch.vex );
+      putSyscallStatusIntoGuestState( &sci->status, &tst->arch->vex );
 
    /* Do any post-syscall actions required by the tool. */
    if (VG_(needs).syscall_wrapper)
@@ -1294,7 +1294,7 @@
    vg_assert(VG_(is_running_thread)(tid));
 
    tst     = VG_(get_ThreadState)(tid);
-   th_regs = &tst->arch;
+   th_regs = tst->arch;
    sci     = & syscallInfo[tid];
 
    /* Figure out what the state of the syscall was by examining the
Index: coregrind/m_machine.c
===================================================================
--- coregrind/m_machine.c	(revision 6776)
+++ coregrind/m_machine.c	(working copy)
@@ -38,9 +38,9 @@
 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL
 
 
-#define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
-#define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
-#define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
+#define INSTR_PTR(regs)    ((regs)->vex.VG_INSTR_PTR)
+#define STACK_PTR(regs)    ((regs)->vex.VG_STACK_PTR)
+#define FRAME_PTR(regs)    ((regs)->vex.VG_FRAME_PTR)
 
 Addr VG_(get_SP) ( ThreadId tid )
 {
@@ -60,7 +60,7 @@
 Addr VG_(get_LR) ( ThreadId tid )
 {
 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
-   return VG_(threads)[tid].arch.vex.guest_LR;
+   return VG_(threads)[tid].arch->vex.guest_LR;
 #  elif defined(VGA_x86) || defined(VGA_amd64)
    return 0;
 #  else
@@ -91,7 +91,7 @@
    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    vg_assert(offset + size <= sizeof(VexGuestArchState));
 
-   VG_(memcpy)( area, (void*)(((Addr)&(tst->arch.vex_shadow)) + offset), size);
+   VG_(memcpy)(area, (void*)(((Addr)&(tst->arch->vex_shadow)) + offset), size);
 }
 
 void VG_(set_shadow_regs_area) ( ThreadId tid, OffT offset, SizeT size,
@@ -106,7 +106,7 @@
    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    vg_assert(offset + size <= sizeof(VexGuestArchState));
 
-   VG_(memcpy)( (void*)(((Addr)(&tst->arch.vex_shadow)) + offset), area, size);
+   VG_(memcpy)((void*)(((Addr)(&tst->arch->vex_shadow)) + offset), area, size);
 }
 
 
@@ -188,7 +188,7 @@
    for (tid = 1; tid < VG_N_THREADS; tid++) {
       if (VG_(is_valid_tid)(tid)) {
          ThreadState* tst = VG_(get_ThreadState)(tid);
-         apply_to_GPs_of_tid(&(tst->arch.vex), f);
+         apply_to_GPs_of_tid(&(tst->arch->vex), f);
       }
    }
 }
Index: coregrind/m_tooliface.c
===================================================================
--- coregrind/m_tooliface.c	(revision 6776)
+++ coregrind/m_tooliface.c	(working copy)
@@ -56,12 +56,13 @@
 
 /* Init with default values. */
 VgDetails VG_(details) = {
-   .name                  = NULL,
-   .version               = NULL,
-   .description           = NULL,
-   .copyright_author      = NULL,
-   .bug_reports_to        = NULL,
-   .avg_translation_sizeB = VG_DEFAULT_TRANS_SIZEB,
+   .name                    = NULL,
+   .version                 = NULL,
+   .description             = NULL,
+   .copyright_author        = NULL,
+   .bug_reports_to          = NULL,
+   .avg_translation_sizeB   = VG_DEFAULT_TRANS_SIZEB,
+   .shadow_guest_multiplier = 0
 };
 
 /* Use macro because they're so repetitive */
@@ -77,6 +78,7 @@
 DETAILS(Char*, copyright_author)
 DETAILS(Char*, bug_reports_to)
 DETAILS(UInt,  avg_translation_sizeB)
+DETAILS(Int,   shadow_guest_multiplier)
 
 
 /*--------------------------------------------------------------------*/
Index: coregrind/pub_core_tooliface.h
===================================================================
--- coregrind/pub_core_tooliface.h	(revision 6776)
+++ coregrind/pub_core_tooliface.h	(working copy)
@@ -69,11 +69,17 @@
       Char* copyright_author;
       Char* bug_reports_to;
       UInt  avg_translation_sizeB;
+      Int   shadow_guest_multiplier;
    }
    VgDetails;
 
 extern VgDetails VG_(details);
 
+/* Until we can eliminate all the places that need to store shadow guest
+   state in a fixed sized structure, we need to have a static limit on
+   how big it can be. */
+#define MAX_SHADOW_GUEST_MULTIPLIER 10
+
 /* ---------------------------------------------------------------------
    'Needs'
    ------------------------------------------------------------------ */
Index: coregrind/m_initimg/initimg-aix5.c
===================================================================
--- coregrind/m_initimg/initimg-aix5.c	(revision 6776)
+++ coregrind/m_initimg/initimg-aix5.c	(working copy)
@@ -299,7 +299,9 @@
    LibVEX_GuestPPC32_initialise(&arch->vex);
 
    /* Zero out the shadow area. */
-   VG_(memset)(&arch->vex_shadow, 0, sizeof(VexGuestPPC32State));
+   VG_(memset)(&arch->vex_shadow, 0,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC32State));
 
 #  else /* defined(VGP_ppc64_aix5) */
 
@@ -310,7 +312,9 @@
    LibVEX_GuestPPC64_initialise(&arch->vex);
 
    /* Zero out the shadow area. */
-   VG_(memset)(&arch->vex_shadow, 0, sizeof(VexGuestPPC64State));
+   VG_(memset)(&arch->vex_shadow, 0,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC64State));
 
 #  endif
 
Index: coregrind/m_initimg/initimg-linux.c
===================================================================
--- coregrind/m_initimg/initimg-linux.c	(revision 6776)
+++ coregrind/m_initimg/initimg-linux.c	(working copy)
@@ -917,7 +917,7 @@
 */
 void VG_(ii_finalise_image)( IIFinaliseImageInfo iifii )
 {
-   ThreadArchState* arch = &VG_(threads)[1].arch;
+   ThreadArchState* arch = VG_(threads)[1].arch;
 
    /* On Linux we get client_{ip/sp/toc}, and start the client with
       all other registers zeroed. */
@@ -930,7 +930,9 @@
    LibVEX_GuestX86_initialise(&arch->vex);
 
    /* Zero out the shadow area. */
-   VG_(memset)(&arch->vex_shadow, 0, sizeof(VexGuestX86State));
+   VG_(memset)(&arch->vex_shadow, 0,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestX86State));
 
    /* Put essential stuff into the new state. */
    arch->vex.guest_ESP = iifii.initial_client_SP;
@@ -950,7 +952,9 @@
    LibVEX_GuestAMD64_initialise(&arch->vex);
 
    /* Zero out the shadow area. */
-   VG_(memset)(&arch->vex_shadow, 0, sizeof(VexGuestAMD64State));
+   VG_(memset)(&arch->vex_shadow, 0,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestAMD64State));
 
    /* Put essential stuff into the new state. */
    arch->vex.guest_RSP = iifii.initial_client_SP;
@@ -964,7 +968,9 @@
    LibVEX_GuestPPC32_initialise(&arch->vex);
 
    /* Zero out the shadow area. */
-   VG_(memset)(&arch->vex_shadow, 0, sizeof(VexGuestPPC32State));
+   VG_(memset)(&arch->vex_shadow, 0,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC32State));
 
    /* Put essential stuff into the new state. */
    arch->vex.guest_GPR1 = iifii.initial_client_SP;
@@ -978,7 +984,9 @@
    LibVEX_GuestPPC64_initialise(&arch->vex);
 
    /* Zero out the shadow area. */
-   VG_(memset)(&arch->vex_shadow, 0, sizeof(VexGuestPPC64State));
+   VG_(memset)(&arch->vex_shadow, 0,
+	       VG_(details).shadow_guest_multiplier
+	       * sizeof(VexGuestPPC64State));
 
    /* Put essential stuff into the new state. */
    arch->vex.guest_GPR1 = iifii.initial_client_SP;
Index: coregrind/m_debugger.c
===================================================================
--- coregrind/m_debugger.c	(revision 6776)
+++ coregrind/m_debugger.c	(working copy)
@@ -167,7 +167,7 @@
 
       if ((res = VG_(waitpid)(pid, &status, 0)) == pid &&
           WIFSTOPPED(status) && WSTOPSIG(status) == VKI_SIGSTOP &&
-          ptrace_setregs(pid, &(VG_(threads)[tid].arch.vex)) == 0 &&
+          ptrace_setregs(pid, &(VG_(threads)[tid].arch->vex)) == 0 &&
           VG_(kill)(pid, VKI_SIGSTOP) == 0 &&
           VG_(ptrace)(VKI_PTRACE_DETACH, pid, NULL, 0) == 0)
       {
Index: coregrind/m_coredump/coredump-elf.c
===================================================================
--- coregrind/m_coredump/coredump-elf.c	(revision 6776)
+++ coregrind/m_coredump/coredump-elf.c	(working copy)
@@ -268,18 +268,18 @@
 
    vg_assert(sizeof(*regs) == sizeof(prs->pr_reg));
 
-   ML_(fill_elfregs_from_tst)(regs, &tst->arch);
+   ML_(fill_elfregs_from_tst)(regs, tst->arch);
 }
 
 static void fill_fpu(const ThreadState *tst, vki_elf_fpregset_t *fpu)
 {
-   ML_(fill_elffpregs_from_tst)(fpu, &tst->arch);
+   ML_(fill_elffpregs_from_tst)(fpu, tst->arch);
 }
 
 #if defined(VGP_x86_linux)
 static void fill_xfpu(const ThreadState *tst, vki_elf_fpxregset_t *xfpu)
 {
-   ML_(fill_elffpxregs_from_tst)(xfpu, &tst->arch);
+   ML_(fill_elffpxregs_from_tst)(xfpu, tst->arch);
 }
 #endif
 
Index: coregrind/m_stacktrace.c
===================================================================
--- coregrind/m_stacktrace.c	(revision 6776)
+++ coregrind/m_stacktrace.c	(working copy)
@@ -266,11 +266,11 @@
       similar code a few lines further down. */
    if (ULong_to_Ptr(lr) == (void*)&VG_(ppctoc_magic_redirect_return_stub)
        && VG_(is_valid_tid)(tid_if_known)) {
-      Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
+      Word hsp = VG_(threads)[tid_if_known].arch->vex.guest_REDIR_SP;
       redirs_used++;
       if (hsp >= 1 && hsp < redir_stack_size)
          lr = VG_(threads)[tid_if_known]
-                 .arch.vex.guest_REDIR_STACK[hsp-1];
+                 .arch->vex.guest_REDIR_STACK[hsp-1];
    }
 #  endif
 
@@ -340,12 +340,12 @@
                used by the unwinding so far with 'redirs_used'. */
             if (ip == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
                 && VG_(is_valid_tid)(tid_if_known)) {
-               Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
+               Word hsp = VG_(threads)[tid_if_known].arch->vex.guest_REDIR_SP;
                hsp -= 2 * redirs_used;
                redirs_used ++;
                if (hsp >= 1 && hsp < redir_stack_size)
                   ip = VG_(threads)[tid_if_known]
-                          .arch.vex.guest_REDIR_STACK[hsp-1];
+                          .arch->vex.guest_REDIR_STACK[hsp-1];
             }
 #           endif
 
Index: coregrind/m_main.c
===================================================================
--- coregrind/m_main.c	(revision 6776)
+++ coregrind/m_main.c	(working copy)
@@ -2119,7 +2119,7 @@
       appropriately. */
    VG_(set_IP)(tid, __libc_freeres_wrapper);
 #  if defined(VGP_ppc64_linux)
-   VG_(threads)[tid].arch.vex.guest_GPR2 = r2;
+   VG_(threads)[tid].arch->vex.guest_GPR2 = r2;
 #  endif
 
    /* Block all blockable signals by copying the real block state into
Index: coregrind/m_scheduler/scheduler.c
===================================================================
--- coregrind/m_scheduler/scheduler.c	(revision 6776)
+++ coregrind/m_scheduler/scheduler.c	(working copy)
@@ -374,7 +374,7 @@
    vki_sigset_t savedmask;
 
    vg_assert(tid >= 0 && tid < VG_N_THREADS);
-   VG_(cleanup_thread)(&VG_(threads)[tid].arch);
+   VG_(cleanup_thread)(VG_(threads)[tid].arch);
    VG_(threads)[tid].tid = tid;
 
    /* Leave the thread in Zombie, so that it doesn't get reallocated
@@ -446,6 +446,7 @@
 {
    Int i;
    ThreadId tid_main;
+   SizeT arch_size;
 
    VG_(debugLog)(1,"sched","sched_init: cls_end=0x%lx, cls_sz=%ld\n",
                    clstack_end, clstack_size);
@@ -455,6 +456,12 @@
 
    ML_(sema_init)(&the_BigLock);
 
+   arch_size = sizeof(ThreadArchState);
+   arch_size += VG_(details).shadow_guest_multiplier
+      * sizeof(VexGuestArchState);
+   vg_assert(VG_(details).shadow_guest_multiplier <= 
+	     MAX_SHADOW_GUEST_MULTIPLIER);
+
    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
       /* Paranoia .. completely zero it out. */
       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
@@ -462,6 +469,9 @@
       VG_(threads)[i].sig_queue = NULL;
 
       os_state_init(&VG_(threads)[i]);
+
+      VG_(threads)[i].arch = VG_(arena_malloc)(VG_AR_CORE, arch_size);
+
       mostly_clear_thread_record(i);
 
       VG_(threads)[i].status                    = VgTs_Empty;
@@ -509,37 +519,35 @@
    layout requirements. */
 static void do_pre_run_checks ( volatile ThreadState* tst )
 {
-   Addr a_vex    = (Addr) & tst->arch.vex;
-   Addr a_vexsh  = (Addr) & tst->arch.vex_shadow;
-   Addr a_spill  = (Addr) & tst->arch.vex_spill;
-   UInt sz_vex   = (UInt) sizeof tst->arch.vex;
-   UInt sz_vexsh = (UInt) sizeof tst->arch.vex_shadow;
-   UInt sz_spill = (UInt) sizeof tst->arch.vex_spill;
+   Addr a_vex    = (Addr) & tst->arch->vex;
+   Addr a_vexsh  = (Addr) & tst->arch->vex_shadow;
+   Addr a_spill  = (Addr) & tst->arch->vex_spill;
+   UInt sz_vex   = (UInt) sizeof tst->arch->vex;
+   UInt sz_spill = (UInt) sizeof tst->arch->vex_spill;
 
    if (0)
    VG_(printf)("%p %d %p %d %p %d\n",
-               (void*)a_vex, sz_vex, (void*)a_vexsh, sz_vexsh,
-               (void*)a_spill, sz_spill );
+               (void*)a_vex, sz_vex,
+               (void*)a_spill, sz_spill,
+	       (void*)a_vexsh);
 
    vg_assert(VG_IS_8_ALIGNED(sz_vex));
-   vg_assert(VG_IS_8_ALIGNED(sz_vexsh));
    vg_assert(VG_IS_16_ALIGNED(sz_spill));
 
    vg_assert(VG_IS_4_ALIGNED(a_vex));
    vg_assert(VG_IS_4_ALIGNED(a_vexsh));
    vg_assert(VG_IS_4_ALIGNED(a_spill));
 
-   vg_assert(sz_vex == sz_vexsh);
-   vg_assert(a_vex + sz_vex == a_vexsh);
+   vg_assert(a_vex + sz_vex == a_spill);
 
    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
-   vg_assert(a_vex + 2 * sz_vex == a_spill);
+   vg_assert(a_vex + sz_vex + sz_spill == a_vexsh);
 
 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
    /* ppc guest_state vector regs must be 16 byte aligned for
       loads/stores */
-   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VR0));
-   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow.guest_VR0));
+   vg_assert(VG_IS_16_ALIGNED(& tst->arch->vex.guest_VR0));
+   vg_assert(VG_IS_16_ALIGNED(& tst->arch->vex_shadow.guest_VR0));
 #  endif   
 }
 
@@ -579,7 +587,7 @@
    */
    /* Clear any existing reservation that this thread might have made
       last time it was running. */
-   VG_(threads)[tid].arch.vex.guest_RESVN = 0;
+   VG_(threads)[tid].arch->vex.guest_RESVN = 0;
 #  endif   
 
 #  if defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
@@ -593,8 +601,8 @@
       Oh well. */
    { UWord host_sprg3;
      __asm__ __volatile__( "mfspr %0,259\n" : "=b"(host_sprg3) );
-    VG_(threads)[tid].arch.vex.guest_SPRG3_RO = host_sprg3;
-    vg_assert(sizeof(VG_(threads)[tid].arch.vex.guest_SPRG3_RO) == sizeof(void*));
+    VG_(threads)[tid].arch->vex.guest_SPRG3_RO = host_sprg3;
+    vg_assert(sizeof(VG_(threads)[tid].arch->vex.guest_SPRG3_RO) == sizeof(void*));
    }
 #  endif
 
@@ -621,7 +629,7 @@
    SCHEDSETJMP(
       tid, 
       jumped, 
-      trc = (UInt)VG_(run_innerloop)( (void*)&tst->arch.vex,
+      trc = (UInt)VG_(run_innerloop)( (void*)&tst->arch->vex,
                                       VG_(clo_profile_flags) > 0 ? 1 : 0 )
    );
 
@@ -685,7 +693,7 @@
         TRC_ value.
    */
    argblock[0] = (UWord)hcode;
-   argblock[1] = (UWord)&VG_(threads)[tid].arch.vex;
+   argblock[1] = (UWord)&VG_(threads)[tid].arch->vex;
    argblock[2] = 0; /* next guest IP is written here */
    argblock[3] = 0; /* guest state ptr afterwards is written here */
 
@@ -1011,7 +1019,7 @@
             for (q = 0; q < EmWarn_NUMBER; q++)
                counts[q] = 0;
          }
-         ew   = (VexEmWarn)VG_(threads)[tid].arch.vex.guest_EMWARN;
+         ew   = (VexEmWarn)VG_(threads)[tid].arch->vex.guest_EMWARN;
          what = (ew < 0 || ew >= EmWarn_NUMBER)
                    ? "unknown (?!)"
                    : LibVEX_EmWarn_string(ew);
@@ -1030,7 +1038,7 @@
       case VEX_TRC_JMP_EMFAIL: {
          VexEmWarn ew;
          HChar*    what;
-         ew   = (VexEmWarn)VG_(threads)[tid].arch.vex.guest_EMWARN;
+         ew   = (VexEmWarn)VG_(threads)[tid].arch->vex.guest_EMWARN;
          what = (ew < 0 || ew >= EmWarn_NUMBER)
                    ? "unknown (?!)"
                    : LibVEX_EmWarn_string(ew);
@@ -1069,8 +1077,8 @@
 
       case VEX_TRC_JMP_TINVAL:
          VG_(discard_translations)(
-            (Addr64)VG_(threads)[tid].arch.vex.guest_TISTART,
-            VG_(threads)[tid].arch.vex.guest_TILEN,
+            (Addr64)VG_(threads)[tid].arch->vex.guest_TISTART,
+            VG_(threads)[tid].arch->vex.guest_TILEN,
             "scheduler(VEX_TRC_JMP_TINVAL)"
          );
          if (0)
@@ -1098,7 +1106,7 @@
             address to zero, so if you don't guest_EIP, the thread will
             jump to zero afterwards and probably die as a result. */
 #        if defined(VGA_x86)
-         //FIXME: VG_(threads)[tid].arch.vex.guest_EIP = ....
+         //FIXME: VG_(threads)[tid].arch->vex.guest_EIP = ....
          //handle_sysenter_x86(tid);
          vg_assert2(0, "VG_(scheduler), phase 3: "
                        "sysenter_x86 on not yet implemented");
@@ -1169,8 +1177,8 @@
 #  error Unknown arch
 #endif
 
-#define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
-#define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
+#define CLREQ_ARGS(regs)   ((regs)->vex.VG_CLREQ_ARGS)
+#define CLREQ_RET(regs)    ((regs)->vex.VG_CLREQ_RET)
 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
 
 // These macros write a value to a client's thread register, and tell the
Index: coregrind/pub_core_threadstate.h
===================================================================
--- coregrind/pub_core_threadstate.h	(revision 6776)
+++ coregrind/pub_core_threadstate.h	(working copy)
@@ -96,12 +96,13 @@
       /* Saved machine context. */
       VexGuestArchState vex;
 
-      /* Saved shadow context. */
-      VexGuestArchState vex_shadow;
-
-      /* Spill area. */
+      /* Spill area for vex's register allocator */
       UChar vex_spill[LibVEX_N_SPILL_BYTES];
 
+      /* Saved shadow context. Part of the guest state vex works with,
+         but its size and layout is tool-dependent. */
+      VexGuestArchState vex_shadow[0];
+
       /* --- END vex-mandated guest state --- */
    } 
    ThreadArchState;
@@ -161,7 +162,7 @@
    VgSchedReturnCode exitreason;
 
    /* Architecture-specific thread state. */
-   ThreadArchState arch;
+   ThreadArchState *arch;
 
    /* This thread's blocked-signals mask.  Semantics is that for a
       signal to be delivered to this thread, the signal must not be
