My dyngen for Plan 9 is now capable of emitting the right kind of code for a

Plan 9 QEMU.  In addition to some tables and ancillary functions, the core of

dyngen's output is an enormous switch/case statement for copying individual

micro-op functions.  Here are examples of the output for both Dyngen/UNIX and

Dyngen/Plan 9:



Dyngen/UNIX:



case INDEX_op_sqrtps: {

    long param1, param2;

    extern void op_sqrtps();

extern char float32_sqrt;

extern char float32_sqrt;

extern char float32_sqrt;

extern char float32_sqrt;

    memcpy(gen_code_ptr, (void *)((char *)&op_sqrtps+0), 129);

    param1 = *opparam_ptr++;

    param2 = *opparam_ptr++;

    *(uint32_t *)(gen_code_ptr + 24) = (int32_t)param2 + 0;

    *(uint32_t *)(gen_code_ptr + 35) = (int32_t)param1 + 0;

    *(uint32_t *)(gen_code_ptr + 40) = (long)(&float32_sqrt) - (long)(gen_code_ptr
    + 40) + -4;

    *(uint32_t *)(gen_code_ptr + 62) = (long)(&float32_sqrt) - (long)(gen_code_ptr
    + 62) + -4;

    *(uint32_t *)(gen_code_ptr + 84) = (long)(&float32_sqrt) - (long)(gen_code_ptr
    + 84) + -4;

    *(uint32_t *)(gen_code_ptr + 106) = (long)(&float32_sqrt) -
    (long)(gen_code_ptr + 106) + -4;

    gen_code_ptr += 129;

}

break;



Dyngen/Plan 9:



	case INDEX_sqrtps:

	{

	    extern uchar __op_p9_push[];

	    memcpy(gen_code_ptr, __op_p9_push, 5);

	    dyngen_itab[51]->addr = (ulong)(gen_code_ptr + 178);

	    dynreloc(gen_code_ptr - 0, 1, 1, dyngen_itab, dyngen_nimport);

	    gen_code_ptr += 5;

	} {

	    extern uchar op_sqrtps[];

	    memcpy(gen_code_ptr, op_sqrtps, 173);

	    ulong param1 = *opparam_ptr++;

	    ulong param2 = *opparam_ptr++;

	    dynreloc(gen_code_ptr - 69183, 69341, 2, dyngen_itab, dyngen_nimport);
	    /* ri=27, ro=0xfff6239f */

	    dynreloc(gen_code_ptr - 69183, 69327, 1, dyngen_itab, dyngen_nimport);
	    /* ri=7, ro=0x0 */

	    dynreloc(gen_code_ptr - 69183, 69304, 2, dyngen_itab, dyngen_nimport);
	    /* ri=27, ro=0xfff623c4 */

	    dynreloc(gen_code_ptr - 69183, 69290, 1, dyngen_itab, dyngen_nimport);
	    /* ri=7, ro=0x0 */

	    dynreloc(gen_code_ptr - 69183, 69267, 2, dyngen_itab, dyngen_nimport);
	    /* ri=27, ro=0xfff623e9 */

	    dynreloc(gen_code_ptr - 69183, 69253, 1, dyngen_itab, dyngen_nimport);
	    /* ri=7, ro=0x0 */

	    dynreloc(gen_code_ptr - 69183, 69231, 2, dyngen_itab, dyngen_nimport);
	    /* ri=27, ro=0xfff6240d */

	    dyngen_itab[52]->addr = param2;

	    dynreloc(gen_code_ptr - 69183, 69206, 1, dyngen_itab, dyngen_nimport);
	    /* ri=52, ro=0x0 */

	    dyngen_itab[51]->addr = param1;

	    dynreloc(gen_code_ptr - 69183, 69195, 1, dyngen_itab, dyngen_nimport);
	    /* ri=51, ro=0x0 */

	    dynreloc(gen_code_ptr - 69183, 69188, 1, dyngen_itab, dyngen_nimport);
	    /* ri=7, ro=0x0 */

	    gen_code_ptr += 173;

	}

	break;





The first set of { }'s are the machinery for the push/op/push/op layout.  The

second set is the code to relocate our example micro-op, sqrtps, which, as you

can see takes two parameters.  The ri=...  value provides (for debugging) the

index into the import table.  The ro=...  is the offset from that symbol; the

0xfff...  are being used for PC-indirect references.  Relevant import indicies

are



/* Index 7: env */

/* Index 27: float32_sqrt */

/* Index 51: __op_param1 [cfolder 1] */

/* Index 52: __op_param2 [cfolder 2] */



Our accesses to the environment are because we don't have explicit register

allocation.  Otherwise, it's a 1-for-1 match.  Oh yeah...  the somewhat funky

69183 and friends...  op_sqrtps starts 69183 bytes into the dlm.  Since

dynreloc() operates assuming a full dlm, we just back its pointer up to pretend

that we're still doing the dlm-at-once.  Hoorah.  In fairness, we can probably

not alter the base and just fiddle with the offset...  this is just the first

thing that I thought of about a month ago and haven't taken the time to ensure

that it works "the other way."