My dyngen for Plan 9 is now capable of emitting the right kind of code for a
Plan 9 QEMU.  In addition to some tables and ancillary functions, the core of
dyngen's output is an enormous switch/case statement for copying individual
micro-op functions.  Here are examples of the output for both Dyngen/UNIX and
Dyngen/Plan 9:

Dyngen/UNIX:

case INDEX_op_sqrtps: {
    long param1, param2;
    extern void op_sqrtps();
extern char float32_sqrt;
extern char float32_sqrt;
extern char float32_sqrt;
extern char float32_sqrt;
    memcpy(gen_code_ptr, (void *)((char *)&op_sqrtps+0), 129);
    param1 = *opparam_ptr++;
    param2 = *opparam_ptr++;
    *(uint32_t *)(gen_code_ptr + 24) = (int32_t)param2 + 0;
    *(uint32_t *)(gen_code_ptr + 35) = (int32_t)param1 + 0;
    *(uint32_t *)(gen_code_ptr + 40) = (long)(&float32_sqrt) - (long)(gen_code_ptr
    + 40) + -4;
    *(uint32_t *)(gen_code_ptr + 62) = (long)(&float32_sqrt) - (long)(gen_code_ptr
    + 62) + -4;
    *(uint32_t *)(gen_code_ptr + 84) = (long)(&float32_sqrt) - (long)(gen_code_ptr
    + 84) + -4;
    *(uint32_t *)(gen_code_ptr + 106) = (long)(&float32_sqrt) -
    (long)(gen_code_ptr + 106) + -4;
    gen_code_ptr += 129;
}
break;

Dyngen/Plan 9:

	case INDEX_sqrtps:
	{
	    extern uchar __op_p9_push[];
	    memcpy(gen_code_ptr, __op_p9_push, 5);
	    dyngen_itab[51]->addr = (ulong)(gen_code_ptr + 178);
	    dynreloc(gen_code_ptr - 0, 1, 1, dyngen_itab, dyngen_nimport);
	    gen_code_ptr += 5;
	} {
	    extern uchar op_sqrtps[];
	    memcpy(gen_code_ptr, op_sqrtps, 173);
	    ulong param1 = *opparam_ptr++;
	    ulong param2 = *opparam_ptr++;
	    dynreloc(gen_code_ptr - 69183, 69341, 2, dyngen_itab, dyngen_nimport);
	    /* ri=27, ro=0xfff6239f */
	    dynreloc(gen_code_ptr - 69183, 69327, 1, dyngen_itab, dyngen_nimport);
	    /* ri=7, ro=0x0 */
	    dynreloc(gen_code_ptr - 69183, 69304, 2, dyngen_itab, dyngen_nimport);
	    /* ri=27, ro=0xfff623c4 */
	    dynreloc(gen_code_ptr - 69183, 69290, 1, dyngen_itab, dyngen_nimport);
	    /* ri=7, ro=0x0 */
	    dynreloc(gen_code_ptr - 69183, 69267, 2, dyngen_itab, dyngen_nimport);
	    /* ri=27, ro=0xfff623e9 */
	    dynreloc(gen_code_ptr - 69183, 69253, 1, dyngen_itab, dyngen_nimport);
	    /* ri=7, ro=0x0 */
	    dynreloc(gen_code_ptr - 69183, 69231, 2, dyngen_itab, dyngen_nimport);
	    /* ri=27, ro=0xfff6240d */
	    dyngen_itab[52]->addr = param2;
	    dynreloc(gen_code_ptr - 69183, 69206, 1, dyngen_itab, dyngen_nimport);
	    /* ri=52, ro=0x0 */
	    dyngen_itab[51]->addr = param1;
	    dynreloc(gen_code_ptr - 69183, 69195, 1, dyngen_itab, dyngen_nimport);
	    /* ri=51, ro=0x0 */
	    dynreloc(gen_code_ptr - 69183, 69188, 1, dyngen_itab, dyngen_nimport);
	    /* ri=7, ro=0x0 */
	    gen_code_ptr += 173;
	}
	break;


The first set of { }'s are the machinery for the push/op/push/op layout.  The
second set is the code to relocate our example micro-op, sqrtps, which, as you
can see takes two parameters.  The ri=...  value provides (for debugging) the
index into the import table.  The ro=...  is the offset from that symbol; the
0xfff...  are being used for PC-indirect references.  Relevant import indicies
are

/* Index 7: env */
/* Index 27: float32_sqrt */
/* Index 51: __op_param1 [cfolder 1] */
/* Index 52: __op_param2 [cfolder 2] */

Our accesses to the environment are because we don't have explicit register
allocation.  Otherwise, it's a 1-for-1 match.  Oh yeah...  the somewhat funky
69183 and friends...  op_sqrtps starts 69183 bytes into the dlm.  Since
dynreloc() operates assuming a full dlm, we just back its pointer up to pretend
that we're still doing the dlm-at-once.  Hoorah.  In fairness, we can probably
not alter the base and just fiddle with the offset...  this is just the first
thing that I thought of about a month ago and haven't taken the time to ensure
that it works "the other way."