changelog shortlog tags branches changeset file revisions annotate raw help

Mercurial > hg > plan9front / sys/src/cmd/7l/asmout.c

revision 7144: d0b9ab522e8b
child 7145: 84d616f1d180
     1.1new file mode 100644
     1.2--- /dev/null
     1.3+++ b/sys/src/cmd/7l/asmout.c
     1.4@@ -0,0 +1,1709 @@
     1.5+#include	"l.h"
     1.6+
     1.7+#define	S32	(0U<<31)
     1.8+#define	S64	(1U<<31)
     1.9+#define	Rm(X)	(((X)&31)<<16)
    1.10+#define	Rn(X)	(((X)&31)<<5)
    1.11+#define	Rd(X)	(((X)&31)<<0)
    1.12+#define	Sbit	(1U<<29)
    1.13+
    1.14+#define	OPDP2(x)		(0<<30 | 0 << 29 | 0xd6<<21 | (x)<<10)
    1.15+#define	OPDP3(sf,op54,op31,o0)	((sf)<<31 | (op54)<<29 | 0x1B<<24 | (op31)<<21 | (o0)<<15)
    1.16+#define	OPBcc(x)		(0x2A<<25 | 0<<24 | 0<<4 | ((x)&15))
    1.17+#define	OPBLR(x)		(0x6B<<25 | 0<<23 | (x)<<21 | 0x1F<<16 | 0<<10)	/* x=0, JMP; 1, CALL; 2, RET */
    1.18+#define	SYSOP(l,op0,op1,crn,crm,op2,rt)	(0x354<<22 | (l)<<21 | (op0)<<19 | (op1)<<16 | (crn)<<12 | (crm)<<8 | (op2)<<5 | (rt))
    1.19+#define	SYSHINT(x)	SYSOP(0,0,3,2,0,(x),0x1F)
    1.20+
    1.21+#define	LDSTR12U(sz,v,opc)	((sz)<<30 | 7<<27 | (v)<<26 | 1<<24 | (opc)<<22)
    1.22+#define	LDSTR9S(sz,v,opc)	((sz)<<30 | 7<<27 | (v)<<26 | 0<<24 | (opc)<<22)
    1.23+#define	LD2STR(o)	((o) & ~(3<<22))
    1.24+
    1.25+#define	LDSTX(sz,o2,l,o1,o0)	((sz)<<30 | 0x8<<24 | (o2)<<23 | (l)<<22 | (o1)<<21 | (o0)<<15)
    1.26+
    1.27+#define	FPCMP(m,s,type,op,op2)	((m)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 1<<21 | (op)<<14 | 8<<10 | (op2))
    1.28+#define	FPCCMP(m,s,type,op)	((m)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 1<<21 | 1<<10 | (op)<<4)
    1.29+#define	FPOP1S(m,s,type,op)	((m)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 1<<21 | (op)<<15 | 0x10<<10)
    1.30+#define	FPOP2S(m,s,type,op)	((m)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 1<<21 | (op)<<12 | 2<<10)
    1.31+#define	FPCVTI(sf,s,type,rmode,op)	((sf)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 1<<21 | (rmode)<<19 | (op)<<16 | 0<<10)
    1.32+#define	FPCVTF(sf,s,type,rmode,op,scale)	((sf)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 0<<21 | (rmode)<<19 | (op)<<16 | (scale)<<10)
    1.33+#define	ADR(p,o,rt)	((p)<<31 | ((o)&3)<<29 | (0x10<<24) | (((o>>2)&0x7FFFF)<<5) | (rt))
    1.34+
    1.35+#define	LSL0_32	(2<<13)
    1.36+#define	LSL0_64	(3<<13)
    1.37+
    1.38+static long	opbrr(int);
    1.39+static long	opbra(int);
    1.40+static long	oshrr(int, int, int);
    1.41+static long	olhrr(int, int, int);
    1.42+static long	olsr12u(long, long, int, int);
    1.43+static long	olsr9s(long, long, int, int);
    1.44+static long	opimm(int);
    1.45+static vlong	brdist(Prog*, int, int, int);
    1.46+static long	opbfm(int, int, int, int, int);
    1.47+static long	opextr(int, long, int, int, int);
    1.48+static long	opbit(int);
    1.49+static long	op0(int);
    1.50+static long	opstr12(int);
    1.51+static long	opstr9(int);
    1.52+static long	opldr9(int);
    1.53+static long	opxrrr(int);
    1.54+static long	olsxrr(int, int, int, int);
    1.55+static long	oprrr(int);
    1.56+static long	opirr(int);
    1.57+static long	opldr12(int);
    1.58+static long	opldrpp(int);
    1.59+static long	opload(int);
    1.60+static long	opstore(int);
    1.61+static long	omovlit(int, Prog*, Adr*, int);
    1.62+static int	movesize(int);
    1.63+static long	oaddi(long, long, int, int);
    1.64+
    1.65+/*
    1.66+ * valid pstate field values, and value to use in instruction
    1.67+ */
    1.68+static struct{
    1.69+	ulong	a;
    1.70+	ulong	b;
    1.71+} pstatefield[] = {
    1.72+D_SPSel,		(0<<16) | (4<<12) | (5<<5),
    1.73+D_DAIFSet,	(3<<16) | (4<<12) | (6<<5),
    1.74+D_DAIFClr,	(3<<16) | (4<<12) | (7<<5),
    1.75+};
    1.76+
    1.77+void
    1.78+asmout(Prog *p, Optab *o)
    1.79+{
    1.80+	long o1, o2, o3, o4, o5, v, hi;
    1.81+	ulong u;
    1.82+	vlong d;
    1.83+	int r, s, rf, rt, ra, nzcv, cond, i, as;
    1.84+	Mask *mask;
    1.85+	static Prog *lastcase;
    1.86+
    1.87+	o1 = 0;
    1.88+	o2 = 0;
    1.89+	o3 = 0;
    1.90+	o4 = 0;
    1.91+	o5 = 0;
    1.92+	switch(o->type) {
    1.93+	default:
    1.94+		diag("unknown asm %d", o->type);
    1.95+		prasm(p);
    1.96+		break;
    1.97+
    1.98+	case 0:		/* pseudo ops */
    1.99+		break;
   1.100+
   1.101+	case 1:		/* op Rm,[Rn],Rd; default Rn=Rd -> op Rm<<0,[Rn,]Rd (shifted register) */
   1.102+		o1 = oprrr(p->as);
   1.103+		rf = p->from.reg;
   1.104+		rt = p->to.reg;
   1.105+		r = p->reg;
   1.106+		if(p->to.type == D_NONE)
   1.107+			rt = REGZERO;
   1.108+		if(r == NREG)
   1.109+			r = rt;
   1.110+		o1 |= (rf<<16) | (r<<5) | rt;
   1.111+		break;
   1.112+
   1.113+	case 2:		/* add/sub $(uimm12|uimm24)[,R],R; cmp $(uimm12|uimm24),R */
   1.114+		o1 = opirr(p->as);
   1.115+		rt = p->to.reg;
   1.116+		if(p->to.type == D_NONE){
   1.117+			if((o1 & Sbit) == 0)
   1.118+				diag("ineffective ZR destination\n%P", p);
   1.119+			rt = REGZERO;
   1.120+		}
   1.121+		r = p->reg;
   1.122+		if(r == NREG)
   1.123+			r = rt;
   1.124+		v = regoff(&p->from);
   1.125+		o1 = oaddi(o1, v, r, rt);
   1.126+		break;
   1.127+
   1.128+	case 3:		/* op R<<n[,R],R (shifted register) */
   1.129+		o1 = oprrr(p->as);
   1.130+		o1 |= p->from.offset;	/* includes reg, op, etc */
   1.131+		rt = p->to.reg;
   1.132+		if(p->to.type == D_NONE)
   1.133+			rt = REGZERO;
   1.134+		r = p->reg;
   1.135+		if(p->as == AMVN || p->as == AMVNW)
   1.136+			r = REGZERO;
   1.137+		else if(r == NREG)
   1.138+			r = rt;
   1.139+		o1 |= (r<<5) | rt;
   1.140+		break;
   1.141+
   1.142+	case 4:		/* mov $addcon, R; mov $recon, R; mov $racon, R */
   1.143+		o1 = opirr(p->as);
   1.144+		rt = p->to.reg;
   1.145+		r = o->param;
   1.146+		if(r == 0)
   1.147+			r = REGZERO;
   1.148+		v = regoff(&p->from);
   1.149+		if((v & 0xFFF000) != 0){
   1.150+			v >>= 12;
   1.151+			o1 |= 1<<22;	/* shift, by 12 */
   1.152+		}
   1.153+		o1 |= ((v& 0xFFF) << 10) | (r<<5) | rt;
   1.154+		break;
   1.155+
   1.156+	case 5:		/* b s; bl s */
   1.157+		o1 = opbra(p->as);
   1.158+		o1 |= brdist(p, 0, 26, 2);
   1.159+		break;
   1.160+
   1.161+	case 6:		/* b ,O(R); bl ,O(R) */
   1.162+		o1 = opbrr(p->as);
   1.163+		o1 |= p->to.reg << 5;
   1.164+		break;
   1.165+
   1.166+	case 7:		/* beq s */
   1.167+		o1 = opbra(p->as);
   1.168+		o1 |= brdist(p, 0, 19, 2)<<5;
   1.169+		break;
   1.170+
   1.171+	case 8:		/* lsl $c,[R],R -> ubfm $(W-1)-c,$(-c MOD (W-1)),Rn,Rd */
   1.172+		rt = p->to.reg;
   1.173+		rf = p->reg;
   1.174+		if(rf == NREG)
   1.175+			rf = rt;
   1.176+		v = p->from.offset;
   1.177+		switch(p->as){
   1.178+		case AASR:	o1 = opbfm(ASBFM, v, 63, rf, rt); break;
   1.179+		case AASRW:	o1 = opbfm(ASBFMW, v, 31, rf, rt); break;
   1.180+		case ALSL:	o1 = opbfm(AUBFM, (64-v)&63, 63-v, rf, rt); break;
   1.181+		case ALSLW:	o1 = opbfm(AUBFMW, (32-v)&31, 31-v, rf, rt); break;
   1.182+		case ALSR:	o1 = opbfm(AUBFM, v, 63, rf, rt); break;
   1.183+		case ALSRW:	o1 = opbfm(AUBFMW, v, 31, rf, rt); break;
   1.184+		case AROR:	o1 = opextr(AEXTR, v, rf, rf, rt); break;
   1.185+		case ARORW:	o1 = opextr(AEXTRW, v, rf, rf, rt); break;
   1.186+		default:
   1.187+			diag("bad shift $con\n%P", curp);
   1.188+			break;
   1.189+		}
   1.190+		break;
   1.191+
   1.192+	case 9:		/* lsl Rm,[Rn],Rd -> lslv Rm, Rn, Rd */
   1.193+		o1 = oprrr(p->as);
   1.194+		r = p->reg;
   1.195+		if(r == NREG)
   1.196+			r = p->to.reg;
   1.197+		o1 |= (p->from.reg << 16) | (r<<5) | p->to.reg;
   1.198+		break;
   1.199+
   1.200+	case 10:	/* brk/hvc/.../svc [$con] */
   1.201+		o1 = opimm(p->as);
   1.202+		if(p->to.type != D_NONE)
   1.203+			o1 |= (p->to.offset & 0xffff)<<5;
   1.204+		break;
   1.205+
   1.206+	case 11:	/* dword */
   1.207+		switch(aclass(&p->to)) {
   1.208+		case C_VCON:
   1.209+		case C_ZCON:
   1.210+		case C_LCON:
   1.211+			if(!dlm)
   1.212+				break;
   1.213+			if(p->to.name != D_EXTERN && p->to.name != D_STATIC)
   1.214+				break;
   1.215+		case C_ADDR:
   1.216+			if(p->to.sym->type == SUNDEF)
   1.217+				ckoff(p->to.sym, p->to.offset);
   1.218+			dynreloc(p->to.sym, p->pc, 1);
   1.219+		}
   1.220+		o1 = instoffset;
   1.221+		o2 = instoffset >> 32;
   1.222+		break;
   1.223+
   1.224+	case 12:	/* movT $lcon, reg */
   1.225+		o1 = omovlit(p->as, p, &p->from, p->to.reg);
   1.226+		break;
   1.227+
   1.228+	case 13:	/* addop $lcon, [R], R (64 bit literal); cmp $lcon,R -> addop $lcon,R, ZR */
   1.229+		o1 = omovlit(AMOV, p, &p->from, REGTMP);
   1.230+		if(!o1)
   1.231+			break;
   1.232+		rt = p->to.reg;
   1.233+		if(p->to.type == D_NONE)
   1.234+			rt = REGZERO;
   1.235+		r = p->reg;
   1.236+		if(r == NREG)
   1.237+			r = rt;
   1.238+		if(p->to.type != D_NONE && (p->to.reg == REGSP || r == REGSP)){
   1.239+			o2 = opxrrr(p->as);
   1.240+			o2 |= REGTMP<<16;
   1.241+			o2 |= LSL0_64;
   1.242+		}else{
   1.243+			o2 = oprrr(p->as);
   1.244+			o2 |= REGTMP << 16;	/* shift is 0 */
   1.245+		}
   1.246+		o2 |= r << 5;
   1.247+		o2 |= rt;
   1.248+		break;
   1.249+
   1.250+	case 14:	/* word */
   1.251+		if(aclass(&p->to) == C_ADDR)
   1.252+			diag("address constant needs DWORD\n%P", p);
   1.253+		o1 = instoffset;
   1.254+		break;
   1.255+
   1.256+	case 15:	/* mul/mneg/umulh/umull r,[r,]r; madd/msub Rm,Rn,Ra,Rd */
   1.257+		o1 = oprrr(p->as);
   1.258+		rf = p->from.reg;
   1.259+		rt = p->to.reg;
   1.260+		if(p->from3.type == D_REG){
   1.261+			r = p->from3.reg;
   1.262+			ra = p->reg;
   1.263+			if(ra == NREG)
   1.264+				ra = REGZERO;
   1.265+		}else{
   1.266+			r = p->reg;
   1.267+			if(r == NREG)
   1.268+				r = rt;
   1.269+			ra = REGZERO;
   1.270+		}
   1.271+		o1 |= (rf<<16) | (ra<<10) | (r<<5) | rt;
   1.272+		break;
   1.273+
   1.274+	case 16:	/* XremY R[,R],R -> XdivY; XmsubY */
   1.275+		o1 = oprrr(p->as);
   1.276+		rf = p->from.reg;
   1.277+		rt = p->to.reg;
   1.278+		r = p->reg;
   1.279+		if(r == NREG)
   1.280+			r = rt;
   1.281+		o1 |= (rf<<16) | (r<<5) | REGTMP;
   1.282+		o2 = oprrr(AMSUBW);
   1.283+		o2 |= o1 & (1<<31);	/* same size */
   1.284+		o2 |= (rf<<16) | (r<<10) | (REGTMP<<5) | rt;
   1.285+		break;
   1.286+
   1.287+	case 17:		/* op Rm,[Rn],Rd; default Rn=ZR */
   1.288+		o1 = oprrr(p->as);
   1.289+		rf = p->from.reg;
   1.290+		rt = p->to.reg;
   1.291+		r = p->reg;
   1.292+		if(p->to.type == D_NONE)
   1.293+			rt = REGZERO;
   1.294+		if(r == NREG)
   1.295+			r = REGZERO;
   1.296+		o1 |= (rf<<16) | (r<<5) | rt;
   1.297+		break;
   1.298+
   1.299+	case 18:	/* csel cond,Rn,Rm,Rd; cinc/cinv/cneg cond,Rn,Rd; cset cond,Rd */
   1.300+		o1 = oprrr(p->as);
   1.301+		cond = p->from.reg;
   1.302+		r = p->reg;
   1.303+		if(r != NREG){
   1.304+			if(p->from3.type == D_NONE){
   1.305+				/* CINC/CINV/CNEG */
   1.306+				rf = r;
   1.307+				cond ^= 1;
   1.308+			}else
   1.309+				rf = p->from3.reg;	/* CSEL */
   1.310+		}else{
   1.311+			/* CSET */
   1.312+			if(p->from3.type != D_NONE)
   1.313+				diag("invalid combination\n%P", p);
   1.314+			r = rf = REGZERO;
   1.315+			cond ^= 1;
   1.316+		}
   1.317+		rt = p->to.reg;
   1.318+		o1 |= (r<<16) | (cond<<12) | (rf<<5) | rt;
   1.319+		break;
   1.320+
   1.321+	case 19:	/* CCMN cond, (Rm|uimm5),Rn, uimm4 -> ccmn Rn,Rm,uimm4,cond */
   1.322+		nzcv = p->to.offset;
   1.323+		cond = p->from.reg;
   1.324+		if(p->from3.type == D_REG){
   1.325+			o1 = oprrr(p->as);
   1.326+			rf = p->from3.reg;	/* Rm */
   1.327+		}else{
   1.328+			o1 = opirr(p->as);
   1.329+			rf = p->from3.offset & 0x1F;
   1.330+		}
   1.331+		o1 |= (rf<<16) | (cond<<12) | (p->reg<<5) | nzcv;
   1.332+		break;
   1.333+
   1.334+	case 20:	/* movT R,O(R) -> strT */
   1.335+		v = regoff(&p->to);
   1.336+		r = p->to.reg;
   1.337+		if(r == NREG)
   1.338+			r = o->param;
   1.339+		if(v < 0){	/* unscaled 9-bit signed */
   1.340+			o1 = olsr9s(opstr9(p->as), v, r, p->from.reg);
   1.341+		}else{
   1.342+			v = offsetshift(v, o->a3);
   1.343+			o1 = olsr12u(opstr12(p->as), v, r, p->from.reg);
   1.344+		}
   1.345+		break;
   1.346+
   1.347+	case 21:	/* movT O(R),R -> ldrT */
   1.348+		v = regoff(&p->from);
   1.349+		r = p->from.reg;
   1.350+		if(r == NREG)
   1.351+			r = o->param;
   1.352+		if(v < 0){	/* unscaled 9-bit signed */
   1.353+			o1 = olsr9s(opldr9(p->as), v, r, p->to.reg);
   1.354+		}else{
   1.355+			v = offsetshift(v, o->a1);
   1.356+			//print("offset=%lld v=%ld a1=%d\n", instoffset, v, o->a1);
   1.357+			o1 = olsr12u(opldr12(p->as), v, r, p->to.reg);
   1.358+		}
   1.359+		break;
   1.360+
   1.361+	case 22:	/* movT (R)O!,R; movT O(R)!, R -> ldrT */
   1.362+		v = p->from.offset;
   1.363+		if(v < -256 || v > 255)
   1.364+			diag("offset out of range\n%P", p);
   1.365+		o1 = opldrpp(p->as);
   1.366+		if(p->from.type == D_XPOST)
   1.367+			o1 |= 1<<10;
   1.368+		else
   1.369+			o1 |= 3<<10;
   1.370+		o1 |= ((v&0x1FF)<<12) | (p->from.reg<<5) | p->to.reg;
   1.371+		break;
   1.372+
   1.373+	case 23:	/* movT R,(R)O!; movT O(R)!, R -> strT */
   1.374+		v = p->to.offset;
   1.375+		if(v < -256 || v > 255)
   1.376+			diag("offset out of range\n%P", p);
   1.377+		o1 = LD2STR(opldrpp(p->as));
   1.378+		if(p->to.type == D_XPOST)
   1.379+			o1 |= 1<<10;
   1.380+		else
   1.381+			o1 |= 3<<10;
   1.382+		o1 |= ((v&0x1FF)<<12) | (p->to.reg<<5) | p->from.reg;
   1.383+		break;
   1.384+
   1.385+	case 24:		/* mov/mvn Rs,Rd -> add $0,Rs,Rd or orr Rs,ZR,Rd */
   1.386+		rf = p->from.reg;
   1.387+		rt = p->to.reg;
   1.388+		s = rf == REGSP || rt == REGSP;
   1.389+		if(p->as == AMVN || p->as == AMVNW){
   1.390+			if(s)
   1.391+				diag("illegal SP reference\n%P", p);
   1.392+			o1 = oprrr(p->as);
   1.393+			o1 |= (rf<<16) | (REGZERO<<5) | rt;
   1.394+		}else if(s){
   1.395+			o1 = opirr(p->as);
   1.396+			o1 |= (rf<<5) | rt;
   1.397+		}else{
   1.398+			o1 = oprrr(p->as);
   1.399+			o1 |= (rf<<16) | (REGZERO<<5) | rt;
   1.400+		}
   1.401+		break;
   1.402+
   1.403+	case 25: /* negX Rs, Rd -> subX Rs<<0, ZR, Rd */
   1.404+		o1 = oprrr(p->as);
   1.405+		rf = p->from.reg;
   1.406+		rt = p->to.reg;
   1.407+		o1 |= (rf<<16) | (REGZERO<<5) | rt;
   1.408+		break;
   1.409+
   1.410+	case 26: /* negX Rm<<s, Rd -> subX Rm<<s, ZR, Rd */
   1.411+		o1 = oprrr(p->as);
   1.412+		o1 |= p->from.offset;	/* includes reg, op, etc */
   1.413+		rt = p->to.reg;
   1.414+		o1 |= (REGZERO<<5) | rt;
   1.415+		break;
   1.416+
   1.417+	case 27:		/* op Rm<<n[,Rn],Rd (extended register) */
   1.418+		o1 = opxrrr(p->as);
   1.419+		if(p->from.type == D_EXTREG)
   1.420+			o1 |= p->from.offset;	/* includes reg, op, etc */
   1.421+		else
   1.422+			o1 |= p->from.reg << 16;
   1.423+		rt = p->to.reg;
   1.424+		if(p->to.type == D_NONE)
   1.425+			rt = REGZERO;
   1.426+		r = p->reg;
   1.427+		if(r == NREG)
   1.428+			r = rt;
   1.429+		o1 |= (r<<5) | rt;
   1.430+		break;
   1.431+
   1.432+	case 28:	/* logop $lcon, [R], R (64 bit literal) */
   1.433+		o1 = omovlit(AMOV, p, &p->from, REGTMP);
   1.434+		if(!o1)
   1.435+			break;
   1.436+		r = p->reg;
   1.437+		if(r == NREG)
   1.438+			r = p->to.reg;
   1.439+		o2 = oprrr(p->as);
   1.440+		o2 |= REGTMP << 16;	/* shift is 0 */
   1.441+		o2 |= r << 5;
   1.442+		o2 |= p->to.reg;
   1.443+		break;
   1.444+
   1.445+	case 29:	/* op Rn, Rd */
   1.446+		o1 = oprrr(p->as);
   1.447+		o1 |= p->from.reg<<5 | p->to.reg;
   1.448+		break;
   1.449+
   1.450+	case 30:	/* movT R,L(R) -> strT */
   1.451+		s = movesize(o->as);
   1.452+		if(s < 0)
   1.453+			diag("unexpected long move, op %A tab %A\n%P", p->as, o->as, p);
   1.454+		v = regoff(&p->to);
   1.455+		if(v < 0)
   1.456+			diag("negative large offset\n%P", p);
   1.457+		if((v & ((1<<s)-1)) != 0)
   1.458+			diag("misaligned offset\n%P", p);
   1.459+		hi = v - (v & (0xFFF<<s));
   1.460+		if((hi & 0xFFF) != 0)
   1.461+			diag("internal: miscalculated offset %ld [%d]\n%P", v, s, p);
   1.462+		//fprint(2, "v=%ld (%#lux) s=%d hi=%ld (%#lux) v'=%ld (%#lux)\n", v, v, s, hi, hi, ((v-hi)>>s)&0xFFF, ((v-hi)>>s)&0xFFF);
   1.463+		r = p->to.reg;
   1.464+		if(r == NREG)
   1.465+			r = o->param;
   1.466+		o1 = oaddi(opirr(AADD), hi, r, REGTMP);
   1.467+		o2 = olsr12u(opstr12(p->as), ((v-hi)>>s)&0xFFF, REGTMP, p->from.reg);
   1.468+		break;
   1.469+
   1.470+	case 31:	/* movT L(R), R -> ldrT */
   1.471+		s = movesize(o->as);
   1.472+		if(s < 0)
   1.473+			diag("unexpected long move, op %A tab %A\n%P", p->as, o->as, p);
   1.474+		v = regoff(&p->from);
   1.475+		if(v < 0)
   1.476+			diag("negative large offset\n%P", p);
   1.477+		if((v & ((1<<s)-1)) != 0)
   1.478+			diag("misaligned offset\n%P", p);
   1.479+		hi = v - (v & (0xFFF<<s));
   1.480+		if((hi & 0xFFF) != 0)
   1.481+			diag("internal: miscalculated offset %ld [%d]\n%P", v, s, p);
   1.482+		//fprint(2, "v=%ld (%#lux) s=%d hi=%ld (%#lux) v'=%ld (%#lux)\n", v, v, s, hi, hi, ((v-hi)>>s)&0xFFF, ((v-hi)>>s)&0xFFF);
   1.483+		r = p->from.reg;
   1.484+		if(r == NREG)
   1.485+			r = o->param;
   1.486+		o1 = oaddi(opirr(AADD), hi, r, REGTMP);
   1.487+		o2 = olsr12u(opldr12(p->as), ((v-hi)>>s)&0xFFF, REGTMP, p->to.reg);
   1.488+		break;
   1.489+
   1.490+	case 32:	/* mov $con, R -> movz/movn */
   1.491+		r = 32;
   1.492+		if(p->as == AMOV)
   1.493+			r = 64;
   1.494+		d = p->from.offset;
   1.495+		s = movcon(d);
   1.496+		if(s < 0 || s >= r){
   1.497+			d = ~d;
   1.498+			s = movcon(d);
   1.499+			if(s < 0 || s >= r)
   1.500+				diag("impossible move wide: %#llux\n%P", p->from.offset, p);
   1.501+			if(p->as == AMOV)
   1.502+				o1 = opirr(AMOVN);
   1.503+			else
   1.504+				o1 = opirr(AMOVNW);
   1.505+		}else{
   1.506+			if(p->as == AMOV)
   1.507+				o1 = opirr(AMOVZ);
   1.508+			else
   1.509+				o1 = opirr(AMOVZW);
   1.510+		}
   1.511+		rt = p->to.reg;
   1.512+		o1 |= (((d>>(s*16))& 0xFFFF) << 5) | ((s&3)<<21) | rt;
   1.513+		break;
   1.514+
   1.515+	case 33:	/* movk $uimm16 << pos */
   1.516+		o1 = opirr(p->as);
   1.517+		d = p->from.offset;
   1.518+		if((d>>16) != 0)
   1.519+			diag("requires uimm16\n%P", p);
   1.520+		s = 0;
   1.521+		if(p->from3.type != D_NONE){
   1.522+			if(p->from3.type != D_CONST)
   1.523+				diag("missing bit position\n%P", p);
   1.524+			s = p->from3.offset;
   1.525+			if((s&0xF) != 0 || (s /= 16) >= 4 || (o1&S64) == 0 && s >= 2)
   1.526+				diag("illegal bit position\n%P", p);
   1.527+		}
   1.528+		rt = p->to.reg;
   1.529+		o1 |= ((d & 0xFFFF) << 5) | ((s&3)<<21) | rt;
   1.530+		break;
   1.531+		
   1.532+	case 34:	/* mov $lacon,R */
   1.533+		o1 = omovlit(AMOV, p, &p->from, REGTMP);
   1.534+		if(!o1)
   1.535+			break;
   1.536+
   1.537+		o2 = opxrrr(AADD);
   1.538+		o2 |= REGTMP << 16;
   1.539+		o2 |= LSL0_64;
   1.540+		r = p->from.reg;
   1.541+		if(r == NREG)
   1.542+			r = o->param;
   1.543+		o2 |= r << 5;
   1.544+		o2 |= p->to.reg;
   1.545+		break;
   1.546+
   1.547+	case 35:	/* mov SPR,R -> mrs */
   1.548+		o1 = oprrr(AMRS);
   1.549+		v = p->from.offset;
   1.550+		if((o1 & (v & ~(3<<19))) != 0)
   1.551+			diag("MRS register value overlap\n%P", p);
   1.552+		o1 |= v;
   1.553+		o1 |= p->to.reg;
   1.554+		break;
   1.555+
   1.556+	case 36:	/* mov R,SPR */
   1.557+		o1 = oprrr(AMSR);
   1.558+		v = p->to.offset;
   1.559+		if((o1 & (v & ~(3<<19))) != 0)
   1.560+			diag("MSR register value overlap\n%P", p);
   1.561+		o1 |= v;
   1.562+		o1 |= p->from.reg;
   1.563+		break;
   1.564+
   1.565+	case 37:	/* mov $con,PSTATEfield -> MSR [immediate] */
   1.566+		if((p->from.offset&~(uvlong)0xF) != 0)
   1.567+			diag("illegal immediate for PSTATE field\n%P", p);
   1.568+		o1 = opirr(AMSR);
   1.569+		o1 |= (p->from.offset&0xF) << 8;	/* Crm */
   1.570+		v = 0;
   1.571+		for(i = 0; i < nelem(pstatefield); i++)
   1.572+			if(pstatefield[i].a == p->to.offset){
   1.573+				v = pstatefield[i].b;
   1.574+				break;
   1.575+			}
   1.576+		if(v == 0)
   1.577+			diag("illegal PSTATE field for immediate move\n%P", p);
   1.578+		o1 |= v;
   1.579+		break;
   1.580+
   1.581+	case 38:	/* clrex [$imm] */
   1.582+		o1 = opimm(p->as);
   1.583+		if(p->to.type == D_NONE)
   1.584+			o1 |= 0xF<<8;
   1.585+		else
   1.586+			o1 |= (p->to.offset & 0xF)<<8;
   1.587+		break;
   1.588+
   1.589+	case 39:	/* cbz R, rel */
   1.590+		o1 = opirr(p->as);
   1.591+		o1 |= p->from.reg;
   1.592+		o1 |= brdist(p, 0, 19, 2) << 5;
   1.593+		break;
   1.594+
   1.595+	case 40:	/* tbz */
   1.596+		o1 = opirr(p->as);
   1.597+		v = p->from.offset;
   1.598+		if(v < 0 || v > 63)
   1.599+			diag("illegal bit number\n%P", p);
   1.600+		o1 |= ((v&0x20)<<(31-5)) | ((v&0x1F)<<19);
   1.601+		o1 |= brdist(p, 0, 14, 2)<<5;
   1.602+		o1 |= p->reg;
   1.603+		break;
   1.604+
   1.605+	case 41:	/* eret, nop, others with no operands */
   1.606+		o1 = op0(p->as);
   1.607+		break;
   1.608+
   1.609+	case 42:	/* bfm R,r,s,R */
   1.610+		o1 = opbfm(p->as, p->from.offset, p->from3.offset, p->reg, p->to.reg);
   1.611+		break;
   1.612+
   1.613+	case 43:	/* bfm aliases */
   1.614+		r = p->from.offset;
   1.615+		s = p->from3.offset;
   1.616+		rf = p->reg;
   1.617+		rt = p->to.reg;
   1.618+		if(rf == NREG)
   1.619+			rf = rt;
   1.620+		switch(p->as){
   1.621+		case ABFI:		o1 = opbfm(ABFM, 64-r, s-1, rf, rt); break;
   1.622+		case ABFIW:	o1 = opbfm(ABFMW, 32-r, s-1, rf, rt); break;
   1.623+		case ABFXIL:	o1 = opbfm(ABFM, r, r+s-1, rf, rt); break;
   1.624+		case ABFXILW:	o1 = opbfm(ABFMW, r, r+s-1, rf, rt); break;
   1.625+		case ASBFIZ:	o1 = opbfm(ASBFM, 64-r, s-1, rf, rt); break;
   1.626+		case ASBFIZW:	o1 = opbfm(ASBFMW, 32-r, s-1, rf, rt); break;
   1.627+		case ASBFX:	o1 = opbfm(ASBFM, r, r+s-1, rf, rt); break;
   1.628+		case ASBFXW:	o1 = opbfm(ASBFMW, r, r+s-1, rf, rt); break;
   1.629+		case AUBFIZ:	o1 = opbfm(AUBFM, 64-r, s-1, rf, rt); break;
   1.630+		case AUBFIZW:	o1 = opbfm(AUBFMW, 32-r, s-1, rf, rt); break;
   1.631+		case AUBFX:	o1 = opbfm(AUBFM, r, r+s-1, rf, rt); break;
   1.632+		case AUBFXW:	o1 = opbfm(AUBFMW, r, r+s-1, rf, rt); break;
   1.633+		default:
   1.634+			diag("bad bfm alias\n%P", curp);
   1.635+			break;
   1.636+		}
   1.637+		break;
   1.638+
   1.639+	case 44:	/* extr $b, Rn, Rm, Rd */
   1.640+		o1 = opextr(p->as, p->from.offset, p->from3.reg, p->reg, p->to.reg);
   1.641+		break;
   1.642+
   1.643+	case 45:	/* sxt/uxt[bhw] R,R; movT R,R -> sxtT R,R */
   1.644+		rf = p->from.reg;
   1.645+		rt = p->to.reg;
   1.646+		as = p->as;
   1.647+		if(rf == REGZERO)
   1.648+			as = AMOVWU;	/* clearer in disassembly */
   1.649+		switch(as){
   1.650+		case AMOVB:
   1.651+		case ASXTB:	o1 = opbfm(ASBFM, 0, 7, rf, rt); break;
   1.652+		case AMOVH:
   1.653+		case ASXTH:	o1 = opbfm(ASBFM, 0, 15, rf, rt); break;
   1.654+		case AMOVW:
   1.655+		case ASXTW:	o1 = opbfm(ASBFM, 0, 31, rf, rt); break;
   1.656+		case AMOVBU:
   1.657+		case AUXTB:	o1 = opbfm(AUBFM, 0, 7, rf, rt); break;
   1.658+		case AMOVHU:
   1.659+		case AUXTH:	o1 = opbfm(AUBFM, 0, 15, rf, rt); break;
   1.660+		case AMOVWU:	o1 = oprrr(as) | (rf<<16) | (REGZERO<<5) | rt; break;
   1.661+		case AUXTW:	o1 = opbfm(AUBFM, 0, 31, rf, rt); break;
   1.662+		case ASXTBW:	o1 = opbfm(ASBFMW, 0, 7, rf, rt); break;
   1.663+		case ASXTHW:	o1 = opbfm(ASBFMW, 0, 15, rf, rt); break;
   1.664+		case AUXTBW:	o1 = opbfm(AUBFMW, 0, 7, rf, rt); break;
   1.665+		case AUXTHW:	o1 = opbfm(AUBFMW, 0, 15, rf, rt); break;
   1.666+		default:	diag("bad sxt %A", as); break;
   1.667+		}
   1.668+		break;
   1.669+
   1.670+	case 46:	/* cls */
   1.671+		o1 = opbit(p->as);
   1.672+		o1 |= p->from.reg<<5;
   1.673+		o1 |= p->to.reg;
   1.674+		break;
   1.675+
   1.676+	case 47:	/* movT R,V(R) -> strT (huge offset) */
   1.677+		o1 = omovlit(AMOVW, p, &p->to, REGTMP);
   1.678+		if(!o1)
   1.679+			break;
   1.680+		r = p->to.reg;
   1.681+		if(r == NREG)
   1.682+			r = o->param;
   1.683+		o2 = olsxrr(p->as, REGTMP,r, p->from.reg);
   1.684+		break;
   1.685+
   1.686+	case 48:	/* movT V(R), R -> ldrT (huge offset) */
   1.687+		o1 = omovlit(AMOVW, p, &p->from, REGTMP);
   1.688+		if(!o1)
   1.689+			break;
   1.690+		r = p->from.reg;
   1.691+		if(r == NREG)
   1.692+			r = o->param;
   1.693+		o2 = olsxrr(p->as, REGTMP,r, p->to.reg);
   1.694+		break;
   1.695+
   1.696+	case 50:	/* sys/sysl */
   1.697+		o1 = opirr(p->as);
   1.698+		if((p->from.offset & ~SYSARG4(0x7, 0xF, 0xF, 0x7)) != 0)
   1.699+			diag("illegal SYS argument\n%P", p);
   1.700+		o1 |= p->from.offset;
   1.701+		if(p->to.type == D_REG)
   1.702+			o1 |= p->to.reg;
   1.703+		else if(p->reg != NREG)
   1.704+			o1 |= p->reg;
   1.705+		else
   1.706+			o1 |= 0x1F;
   1.707+		break;
   1.708+
   1.709+	case 51:	/* dmb */
   1.710+		o1 = opirr(p->as);
   1.711+		if(p->from.type == D_CONST)
   1.712+			o1 |= (p->from.offset&0xF)<<8;
   1.713+		break;
   1.714+
   1.715+	case 52:	/* hint */
   1.716+		o1 = opirr(p->as);
   1.717+		o1 |= (p->from.offset&0x7F)<<5;
   1.718+		break;
   1.719+
   1.720+	case 53:	/* and/or/eor/bic/... $bimmN, Rn, Rd -> op (N,r,s), Rn, Rd */
   1.721+		as = p->as;
   1.722+		rt = p->to.reg;
   1.723+		r = p->reg;
   1.724+		if(r == NREG)
   1.725+			r = rt;
   1.726+		if(as == AMOV){
   1.727+			as = AORR;
   1.728+			r = REGZERO;
   1.729+		}else if(as == AMOVW){
   1.730+			as = AORRW;
   1.731+			r = REGZERO;
   1.732+		}
   1.733+		o1 = opirr(as);
   1.734+		s = o1 & S64? 64: 32;
   1.735+		mask = findmask(p->from.offset);
   1.736+		if(mask == nil)
   1.737+			mask = findmask(p->from.offset | (p->from.offset<<32));
   1.738+		if(mask != nil){
   1.739+			o1 |= ((mask->r&(s-1))<<16) | (((mask->s-1)&(s-1))<<10);
   1.740+			if(s == 64){
   1.741+				if(mask->e == 64 && ((uvlong)p->from.offset>>32) != 0)
   1.742+					o1 |= 1<<22;
   1.743+			}else{
   1.744+				u = (uvlong)p->from.offset >> 32;
   1.745+				if(u != 0 && u != 0xFFFFFFFF)
   1.746+					diag("mask needs 64 bits %#llux\n%P", p->from.offset, p);
   1.747+			}
   1.748+		}else
   1.749+			diag("invalid mask %#llux\n%P", p->from.offset, p);	/* probably shouldn't happen */
   1.750+		o1 |= (r<<5) | rt;
   1.751+		break;
   1.752+
   1.753+	case 54:	/* floating point arith */
   1.754+		o1 = oprrr(p->as);
   1.755+		if(p->from.type == D_FCONST) {
   1.756+			rf = chipfloat(p->from.ieee);
   1.757+			if(rf < 0 || 1){
   1.758+				diag("invalid floating-point immediate\n%P", p);
   1.759+				rf = 0;
   1.760+			}
   1.761+			rf |= (1<<3);
   1.762+		} else
   1.763+			rf = p->from.reg;
   1.764+		rt = p->to.reg;
   1.765+		r = p->reg;
   1.766+		if((o1 & (0x1F<<24)) == (0x1E<<24) && (o1 & (1<<11)) == 0){	/* monadic */
   1.767+			r = rf;
   1.768+			rf = 0;
   1.769+		}else if(r == NREG)
   1.770+			r = rt;
   1.771+		o1 |= (rf << 16) | (r<<5) | rt;
   1.772+		break;
   1.773+
   1.774+	case 56:	/* floating point compare */
   1.775+		o1 = oprrr(p->as);
   1.776+		if(p->from.type == D_FCONST) {
   1.777+			if(p->from.ieee->h != 0 || p->from.ieee->l != 0)
   1.778+				diag("invalid floating-point immediate\n%P", p);
   1.779+			o1 |= 8;	/* zero */
   1.780+			rf = 0;
   1.781+		}else
   1.782+			rf = p->from.reg;
   1.783+		rt = p->reg;
   1.784+		o1 |= rf<<16  | rt<<5;
   1.785+		break;
   1.786+
   1.787+	case 57:	/* floating point conditional compare */
   1.788+		o1 = oprrr(p->as);
   1.789+		cond = p->from.reg;
   1.790+		nzcv = p->to.offset;
   1.791+		if(nzcv & ~0xF)
   1.792+			diag("implausible condition\n%P", p);
   1.793+		rf = p->reg;
   1.794+		if(p->from3.type != D_FREG)
   1.795+			diag("illegal FCCMP\n%P", p);
   1.796+		rt = p->from3.reg;
   1.797+		o1 |= rf<<16 | cond<<12  | rt<<5 | nzcv;
   1.798+		break;
   1.799+
   1.800+	case 58:	/* ldxr */
   1.801+		o1 = opload(p->as);
   1.802+		o1 |= 0x1F<<16;
   1.803+		o1 |= p->from.reg<<5;
   1.804+		if(p->reg != NREG)
   1.805+			o1 |= p->reg<<10;
   1.806+		else
   1.807+			o1 |= 0x1F<<10;
   1.808+		o1 |= p->to.reg;
   1.809+		break;
   1.810+
   1.811+	case 59:	/* stxr */
   1.812+		o1 = opstore(p->as);
   1.813+		o1 |= p->reg << 16;
   1.814+		if(p->from3.type != D_NONE)
   1.815+			o1 |= p->from3.reg<<10;
   1.816+		else
   1.817+			o1 |= 0x1F<<10;
   1.818+		o1 |= p->to.reg<<5;
   1.819+		o1 |= p->from.reg;
   1.820+		break;
   1.821+
   1.822+	case 60:	/* adrp label,r */
   1.823+		d = brdist(p, 12, 21, 0);
   1.824+		o1 = ADR(1, d, p->to.reg);
   1.825+		break;
   1.826+
   1.827+	case 61:	/* adr label, r */
   1.828+		d = brdist(p, 0, 21, 0);
   1.829+		o1 = ADR(0, d, p->to.reg);
   1.830+		break;
   1.831+
   1.832+	case 62:	/* case Rv, Rt -> adr tab, Rt; movw Rt[R<<2], Rl; add Rt, Rl; br (Rl) */
   1.833+		o1 = ADR(0, 4*4, p->to.reg);	/* adr 4(pc), Rt */
   1.834+		o2 = (2<<30)|(7<<27)|(2<<22)|(1<<21)|(3<<13)|(1<<12)|(2<<10)|(p->from.reg<<16)|(p->to.reg<<5)|REGTMP;	/* movw Rt[Rv<<2], REGTMP */
   1.835+		o3 = oprrr(AADD) | (p->to.reg<<16) | (REGTMP<<5) | REGTMP;	/* add Rt, REGTMP */
   1.836+		o4 = (0x6b<<25)|(0x1F<<16)|(REGTMP<<5);	/* br (REGTMP) */
   1.837+		lastcase = p;
   1.838+		break;
   1.839+
   1.840+	case 63:	/* bcase */
   1.841+		if(lastcase == nil){
   1.842+			diag("missing CASE\n%P", p);
   1.843+			break;
   1.844+		}
   1.845+		if(p->cond != P) {
   1.846+			o1 = p->cond->pc - (lastcase->pc + 4*4);
   1.847+			if(dlm)
   1.848+				dynreloc(S, p->pc, 1);
   1.849+		}
   1.850+		break;
   1.851+
   1.852+	/* reloc ops */
   1.853+	case 64:	/* movT R,addr */
   1.854+		o1 = omovlit(AMOV, p, &p->to, REGTMP);
   1.855+		if(!o1)
   1.856+			break;
   1.857+		o2 = olsr12u(opstr12(p->as), 0, REGTMP, p->from.reg);
   1.858+		break;
   1.859+
   1.860+	case 65:	/* movT addr,R */
   1.861+		o1 = omovlit(AMOV, p, &p->from, REGTMP);
   1.862+		if(!o1)
   1.863+			break;
   1.864+		o2 = olsr12u(opldr12(p->as), 0, REGTMP, p->to.reg);
   1.865+		break;
   1.866+	}
   1.867+
   1.868+	if(debug['a'] > 1)
   1.869+		Bprint(&bso, "%2d ", o->type);
   1.870+
   1.871+	v = p->pc;
   1.872+	switch(o->size) {
   1.873+	default:
   1.874+		if(debug['a'])
   1.875+			Bprint(&bso, " %.8lux:\t\t%P\n", v, p);
   1.876+		break;
   1.877+	case 4:
   1.878+		if(debug['a'])
   1.879+			Bprint(&bso, " %.8lux: %.8lux\t%P\n", v, o1, p);
   1.880+		lputl(o1);
   1.881+		break;
   1.882+	case 8:
   1.883+		if(debug['a'])
   1.884+			Bprint(&bso, " %.8lux: %.8lux %.8lux%P\n", v, o1, o2, p);
   1.885+		lputl(o1);
   1.886+		lputl(o2);
   1.887+		break;
   1.888+	case 12:
   1.889+		if(debug['a'])
   1.890+			Bprint(&bso, " %.8lux: %.8lux %.8lux %.8lux%P\n", v, o1, o2, o3, p);
   1.891+		lputl(o1);
   1.892+		lputl(o2);
   1.893+		lputl(o3);
   1.894+		break;
   1.895+	case 16:
   1.896+		if(debug['a'])
   1.897+			Bprint(&bso, " %.8lux: %.8lux %.8lux %.8lux %.8lux%P\n",
   1.898+				v, o1, o2, o3, o4, p);
   1.899+		lputl(o1);
   1.900+		lputl(o2);
   1.901+		lputl(o3);
   1.902+		lputl(o4);
   1.903+		break;
   1.904+	case 20:
   1.905+		if(debug['a'])
   1.906+			Bprint(&bso, " %.8lux: %.8lux %.8lux %.8lux %.8lux %.8lux%P\n",
   1.907+				v, o1, o2, o3, o4, o5, p);
   1.908+		lputl(o1);
   1.909+		lputl(o2);
   1.910+		lputl(o3);
   1.911+		lputl(o4);
   1.912+		lputl(o5);
   1.913+		break;
   1.914+	}
   1.915+}
   1.916+
   1.917+/*
   1.918+ * basic Rm op Rn -> Rd (using shifted register with 0)
   1.919+ * also op Rn -> Rt
   1.920+ * also Rm*Rn op Ra -> Rd
   1.921+ */
   1.922+static long
   1.923+oprrr(int a)
   1.924+{
   1.925+	switch(a) {
   1.926+	case AADC:	return S64 | 0<<30 | 0<<29 | 0xd0<<21 | 0<<10;
   1.927+	case AADCW:	return S32 | 0<<30 | 0<<29 | 0xd0<<21 | 0<<10;
   1.928+	case AADCS:	return S64 | 0<<30 | 1<<29 | 0xd0<<21 | 0<<10;
   1.929+	case AADCSW:	return S32 | 0<<30 | 1<<29 | 0xd0<<21 | 0<<10;
   1.930+
   1.931+	case ANGC:
   1.932+	case ASBC:	return S64 | 1<<30 | 0<<29 | 0xd0<<21 | 0<<10;
   1.933+	case ANGCS:
   1.934+	case ASBCS:	return S64 | 1<<30 | 1<<29 | 0xd0<<21 | 0<<10;
   1.935+	case ANGCW:
   1.936+	case ASBCW:	return S32 | 1<<30 | 0<<29 | 0xd0<<21 | 0<<10;
   1.937+	case ANGCSW:
   1.938+	case ASBCSW:	return S32 | 1<<30 | 1<<29 | 0xd0<<21 | 0<<10;
   1.939+
   1.940+	case AADD:	return S64 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10;
   1.941+	case AADDW:	return S32 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10;
   1.942+	case ACMN:
   1.943+	case AADDS:	return S64 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10;
   1.944+	case ACMNW:
   1.945+	case AADDSW:	return S32 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10;
   1.946+
   1.947+	case ASUB:	return S64 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10;
   1.948+	case ASUBW:	return S32 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10;
   1.949+	case ACMP:
   1.950+	case ASUBS:	return S64 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10;
   1.951+	case ACMPW:
   1.952+	case ASUBSW:	return S32 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10;
   1.953+
   1.954+	case AAND:	return S64 | 0<<29 | 0xA<<24;
   1.955+	case AANDW:	return S32 | 0<<29 | 0xA<<24;
   1.956+	case AMOV:
   1.957+	case AORR:	return S64 | 1<<29 | 0xA<<24;
   1.958+//	case AMOVW:
   1.959+	case AMOVWU:
   1.960+	case AORRW:	return S32 | 1<<29 | 0xA<<24;
   1.961+	case AEOR:	return S64 | 2<<29 | 0xA<<24;
   1.962+	case AEORW:	return S32 | 2<<29 | 0xA<<24;
   1.963+	case AANDS:	return S64 | 3<<29 | 0xA<<24;
   1.964+	case AANDSW:	return S32 | 3<<29 | 0xA<<24;
   1.965+
   1.966+	case ABIC:	return S64 | 0<<29 | 0xA<<24 | 1<<21;
   1.967+	case ABICW:	return S32 | 0<<29 | 0xA<<24 | 1<<21;
   1.968+	case ABICS:	return S64 | 3<<29 | 0xA<<24 | 1<<21;
   1.969+	case ABICSW:	return S32 | 3<<29 | 0xA<<24 | 1<<21;
   1.970+	case AEON:	return S64 | 2<<29 | 0xA<<24 | 1<<21;
   1.971+	case AEONW:	return S32 | 2<<29 | 0xA<<24 | 1<<21;
   1.972+	case AMVN:
   1.973+	case AORN:	return S64 | 1<<29 | 0xA<<24 | 1<<21;
   1.974+	case AMVNW:
   1.975+	case AORNW:	return S32 | 1<<29 | 0xA<<24 | 1<<21;
   1.976+
   1.977+	case AASR:	return S64 | OPDP2(10);	/* also ASRV */
   1.978+	case AASRW:	return S32 | OPDP2(10);
   1.979+	case ALSL:	return S64 | OPDP2(8);
   1.980+	case ALSLW:	return S32 | OPDP2(8);
   1.981+	case ALSR:	return S64 | OPDP2(9);
   1.982+	case ALSRW:	return S32 | OPDP2(9);
   1.983+	case AROR:	return S64 | OPDP2(11);
   1.984+	case ARORW:	return S32 | OPDP2(11);
   1.985+
   1.986+	case ACCMN:	return S64 | 0<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4;	/* cond<<12 | nzcv<<0 */
   1.987+	case ACCMNW:	return S32 | 0<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4;
   1.988+	case ACCMP:	return S64 | 1<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4;	/* imm5<<16 | cond<<12 | nzcv<<0 */
   1.989+	case ACCMPW:	return S32 | 1<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4;
   1.990+
   1.991+	case ACRC32B:		return S32 | OPDP2(16);
   1.992+	case ACRC32H:		return S32 | OPDP2(17);
   1.993+	case ACRC32W:	return S32 | OPDP2(18);
   1.994+	case ACRC32X:		return S64 | OPDP2(19);
   1.995+	case ACRC32CB:	return S32 | OPDP2(20);
   1.996+	case ACRC32CH:	return S32 | OPDP2(21);
   1.997+	case ACRC32CW:	return S32 | OPDP2(22);
   1.998+	case ACRC32CX:	return S64 | OPDP2(23);
   1.999+
  1.1000+	case ACSEL:	return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10;
  1.1001+	case ACSELW:	return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10;
  1.1002+	case ACSET:	return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10;
  1.1003+	case ACSETW:	return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10;
  1.1004+	case ACSETM:	return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10;
  1.1005+	case ACSETMW:	return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10;
  1.1006+	case ACINC:
  1.1007+	case ACSINC:	return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10;
  1.1008+	case ACINCW:
  1.1009+	case ACSINCW:	return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10;
  1.1010+	case ACINV:
  1.1011+	case ACSINV:	return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10;
  1.1012+	case ACINVW:
  1.1013+	case ACSINVW:	return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10;
  1.1014+	case ACNEG:
  1.1015+	case ACSNEG:	return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10;
  1.1016+	case ACNEGW:
  1.1017+	case ACSNEGW:	return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10;
  1.1018+
  1.1019+	case AMUL:
  1.1020+	case AMADD:	return S64 | 0<<29 | 0x1B<<24 | 0<<21 | 0<<15;
  1.1021+	case AMULW:
  1.1022+	case AMADDW:	return S32 | 0<<29 | 0x1B<<24 | 0<<21 | 0<<15;
  1.1023+	case AMNEG:
  1.1024+	case AMSUB:	return S64 | 0<<29 | 0x1B<<24 | 0<<21 | 1<<15;
  1.1025+	case AMNEGW:
  1.1026+	case AMSUBW:	return S32 | 0<<29 | 0x1B<<24 | 0<<21 | 1<<15;
  1.1027+
  1.1028+	case AMRS:	return SYSOP(1,2,0,0,0,0,0);
  1.1029+	case AMSR:	return SYSOP(0,2,0,0,0,0,0);
  1.1030+
  1.1031+	case ANEG:	return S64 | 1<<30 | 0<<29 | 0xB<<24 | 0<<21;
  1.1032+	case ANEGW:	return S32 | 1<<30 | 0<<29 | 0xB<<24 | 0<<21;
  1.1033+	case ANEGS:	return S64 | 1<<30 | 1<<29 | 0xB<<24 | 0<<21;
  1.1034+	case ANEGSW:	return S32 | 1<<30 | 1<<29 | 0xB<<24 | 0<<21;
  1.1035+
  1.1036+	case AREM:
  1.1037+	case ASDIV:	return S64 | OPDP2(3);
  1.1038+	case AREMW:
  1.1039+	case ASDIVW:	return S32 | OPDP2(3);
  1.1040+
  1.1041+	case ASMULL:
  1.1042+	case ASMADDL:	return OPDP3(1, 0, 1, 0);
  1.1043+	case ASMNEGL:
  1.1044+	case ASMSUBL:	return OPDP3(1, 0, 1, 1);
  1.1045+	case ASMULH:	return OPDP3(1, 0, 2, 0);
  1.1046+	case AUMULL:
  1.1047+	case AUMADDL:	return OPDP3(1, 0, 5, 0);
  1.1048+	case AUMNEGL:
  1.1049+	case AUMSUBL:	return OPDP3(1, 0, 5, 1);
  1.1050+	case AUMULH:	return OPDP3(1, 0, 6, 0);
  1.1051+
  1.1052+	case AUREM:
  1.1053+	case AUDIV:	return S64 | OPDP2(2);
  1.1054+	case AUREMW:
  1.1055+	case AUDIVW:	return S32 | OPDP2(2);
  1.1056+
  1.1057+	case AAESE:	return 0x4E<<24 | 2<<20 | 8<<16 | 4<<12 | 2<<10;
  1.1058+	case AAESD:	return 0x4E<<24 | 2<<20 | 8<<16 | 5<<12 | 2<<10;
  1.1059+	case AAESMC:	return 0x4E<<24 | 2<<20 | 8<<16 | 6<<12 | 2<<10;
  1.1060+	case AAESIMC:	return 0x4E<<24 | 2<<20 | 8<<16 | 7<<12 | 2<<10;
  1.1061+
  1.1062+	case ASHA1C:	return 0x5E<<24 | 0<<12;
  1.1063+	case ASHA1P:	return 0x5E<<24 | 1<<12;
  1.1064+	case ASHA1M:	return 0x5E<<24 | 2<<12;
  1.1065+	case ASHA1SU0:	return 0x5E<<24 | 3<<12;
  1.1066+	case ASHA256H:	return 0x5E<<24 | 4<<12;
  1.1067+	case ASHA256H2:	return 0x5E<<24 | 5<<12;
  1.1068+	case ASHA256SU1:	return 0x5E<<24 | 6<<12;
  1.1069+
  1.1070+	case ASHA1H:	return 0x5E<<24 | 2<<20 | 8<<16 | 0<<12 | 2<<10;
  1.1071+	case ASHA1SU1:	return 0x5E<<24 | 2<<20 | 8<<16 | 1<<12 | 2<<10;
  1.1072+	case ASHA256SU0:	return 0x5E<<24 | 2<<20 | 8<<16 | 2<<12 | 2<<10;
  1.1073+
  1.1074+	case AFCVTZSD:	return FPCVTI(1, 0, 1, 3, 0);
  1.1075+	case AFCVTZSDW:	return FPCVTI(0, 0, 1, 3, 0);
  1.1076+	case AFCVTZSS:	return FPCVTI(1, 0, 0, 3, 0);
  1.1077+	case AFCVTZSSW:	return FPCVTI(0, 0, 0, 3, 0);
  1.1078+
  1.1079+	case AFCVTZUD:	return FPCVTI(1, 0, 1, 3, 1);
  1.1080+	case AFCVTZUDW:	return FPCVTI(0, 0, 1, 3, 1);
  1.1081+	case AFCVTZUS:	return FPCVTI(1, 0, 0, 3, 1);
  1.1082+	case AFCVTZUSW:	return FPCVTI(0, 0, 0, 3, 1);
  1.1083+
  1.1084+	case ASCVTFD:		return FPCVTI(1, 0, 1, 0, 2);
  1.1085+	case ASCVTFS:		return FPCVTI(1, 0, 0, 0, 2);
  1.1086+	case ASCVTFWD:	return FPCVTI(0, 0, 1, 0, 2);
  1.1087+	case ASCVTFWS:	return FPCVTI(0, 0, 0, 0, 2);
  1.1088+
  1.1089+	case AUCVTFD:		return FPCVTI(1, 0, 1, 0, 3);
  1.1090+	case AUCVTFS:		return FPCVTI(1, 0, 0, 0, 3);
  1.1091+	case AUCVTFWD:	return FPCVTI(0, 0, 1, 0, 3);
  1.1092+	case AUCVTFWS:	return FPCVTI(0, 0, 0, 0, 3);
  1.1093+
  1.1094+	case AFADDS:	return FPOP2S(0, 0, 0, 2);
  1.1095+	case AFADDD:	return FPOP2S(0, 0, 1, 2);
  1.1096+	case AFSUBS:	return FPOP2S(0, 0, 0, 3);
  1.1097+	case AFSUBD:	return FPOP2S(0, 0, 1, 3);
  1.1098+	case AFMULS:	return FPOP2S(0, 0, 0, 0);
  1.1099+	case AFMULD:	return FPOP2S(0, 0, 1, 0);
  1.1100+	case AFDIVS:	return FPOP2S(0, 0, 0, 1);
  1.1101+	case AFDIVD:	return FPOP2S(0, 0, 1, 1);
  1.1102+	case AFMAXS:	return FPOP2S(0, 0, 0, 4);
  1.1103+	case AFMINS:	return FPOP2S(0, 0, 0, 5);
  1.1104+	case AFMAXD:	return FPOP2S(0, 0, 1, 4);
  1.1105+	case AFMIND:	return FPOP2S(0, 0, 1, 5);
  1.1106+	case AFMAXNMS:	return FPOP2S(0, 0, 0, 6);
  1.1107+	case AFMAXNMD:	return FPOP2S(0, 0, 1, 6);
  1.1108+	case AFMINNMS:	return FPOP2S(0, 0, 0, 7);
  1.1109+	case AFMINNMD:	return FPOP2S(0, 0, 1, 7);
  1.1110+	case AFNMULS:		return FPOP2S(0, 0, 0, 8);
  1.1111+	case AFNMULD:	return FPOP2S(0, 0, 1, 8);
  1.1112+
  1.1113+	case AFCMPS:	return FPCMP(0, 0, 0, 0, 0);
  1.1114+	case AFCMPD:	return FPCMP(0, 0, 1, 0, 0);
  1.1115+	case AFCMPES:	return FPCMP(0, 0, 0, 0, 16);
  1.1116+	case AFCMPED:	return FPCMP(0, 0, 1, 0, 16);
  1.1117+
  1.1118+	case AFCCMPS:		return FPCCMP(0, 0, 0, 0);
  1.1119+	case AFCCMPD:	return FPCCMP(0, 0, 1, 0);
  1.1120+	case AFCCMPES:	return FPCCMP(0, 0, 0, 1);
  1.1121+	case AFCCMPED:	return FPCCMP(0, 0, 1, 1);
  1.1122+
  1.1123+	case AFCSELS:	return 0x1E<<24 | 0<<22 | 1<<21 | 3<<10;
  1.1124+	case AFCSELD:	return 0x1E<<24 | 1<<22 | 1<<21 | 3<<10;
  1.1125+
  1.1126+	case AFMOVS:	return FPOP1S(0, 0, 0, 0);
  1.1127+	case AFABSS:	return FPOP1S(0, 0, 0, 1);
  1.1128+	case AFNEGS:	return FPOP1S(0, 0, 0, 2);
  1.1129+	case AFSQRTS:	return FPOP1S(0, 0, 0, 3);
  1.1130+	case AFCVTSD:	return FPOP1S(0, 0, 0, 5);
  1.1131+	case AFCVTSH:	return FPOP1S(0, 0, 0, 7);
  1.1132+	case AFRINTNS: return FPOP1S(0, 0, 0, 8);
  1.1133+	case AFRINTPS:	return FPOP1S(0, 0, 0, 9);
  1.1134+	case AFRINTMS:	return FPOP1S(0, 0, 0, 10);
  1.1135+	case AFRINTZS:	return FPOP1S(0, 0, 0, 11);
  1.1136+	case AFRINTAS:	return FPOP1S(0, 0, 0, 12);
  1.1137+	case AFRINTXS:	return FPOP1S(0, 0, 0, 14);
  1.1138+	case AFRINTIS:	return FPOP1S(0, 0, 0, 15);
  1.1139+
  1.1140+	case AFMOVD:	return FPOP1S(0, 0, 1, 0);
  1.1141+	case AFABSD:	return FPOP1S(0, 0, 1, 1);
  1.1142+	case AFNEGD:	return FPOP1S(0, 0, 1, 2);
  1.1143+	case AFSQRTD:	return FPOP1S(0, 0, 1, 3);
  1.1144+	case AFCVTDS:	return FPOP1S(0, 0, 1, 4);
  1.1145+	case AFCVTDH:	return FPOP1S(0, 0, 1, 7);
  1.1146+	case AFRINTND:	return FPOP1S(0, 0, 1, 8);
  1.1147+	case AFRINTPD:	return FPOP1S(0, 0, 1, 9);
  1.1148+	case AFRINTMD:	return FPOP1S(0, 0, 1, 10);
  1.1149+	case AFRINTZD:	return FPOP1S(0, 0, 1, 11);
  1.1150+	case AFRINTAD:	return FPOP1S(0, 0, 1, 12);
  1.1151+	case AFRINTXD:	return FPOP1S(0, 0, 1, 14);
  1.1152+	case AFRINTID:	return FPOP1S(0, 0, 1, 15);
  1.1153+	case AFCVTHS:	return FPOP1S(0, 0, 3, 4);
  1.1154+	case AFCVTHD:	return FPOP1S(0, 0, 3, 5);
  1.1155+
  1.1156+	}
  1.1157+	diag("bad rrr %d %A", a, a);
  1.1158+	prasm(curp);
  1.1159+	return 0;
  1.1160+}
  1.1161+
  1.1162+/*
  1.1163+ * imm -> Rd
  1.1164+ * imm op Rn -> Rd
  1.1165+ */
  1.1166+static long
  1.1167+opirr(int a)
  1.1168+{
  1.1169+	switch(a){
  1.1170+
  1.1171+	/* op $addcon, Rn, Rd */
  1.1172+	case AMOV:
  1.1173+	case AADD:	return S64 | 0<<30 | 0<<29 | 0x11<<24;
  1.1174+	case ACMN:
  1.1175+	case AADDS:	return S64 | 0<<30 | 1<<29 | 0x11<<24;
  1.1176+	case AMOVW:
  1.1177+	case AADDW:	return S32 | 0<<30 | 0<<29 | 0x11<<24;
  1.1178+	case ACMNW:
  1.1179+	case AADDSW:	return S32 | 0<<30 | 1<<29 | 0x11<<24;
  1.1180+	case ASUB:	return S64 | 1<<30 | 0<<29 | 0x11<<24;
  1.1181+	case ACMP:
  1.1182+	case ASUBS:	return S64 | 1<<30 | 1<<29 | 0x11<<24;
  1.1183+	case ASUBW:	return S32 | 1<<30 | 0<<29 | 0x11<<24;
  1.1184+	case ACMPW:
  1.1185+	case ASUBSW:	return S32 | 1<<30 | 1<<29 | 0x11<<24;
  1.1186+
  1.1187+	/* op $imm(SB), Rd; op label, Rd */
  1.1188+	case AADR:		return 0<<31 | 0x10<<24;
  1.1189+	case AADRP:	return 1<<31 | 0x10<<24;
  1.1190+
  1.1191+	/* op $bimm, Rn, Rd */
  1.1192+	case AAND:	return S64 | 0<<29 | 0x24<<23;
  1.1193+	case AANDW:	return S32 | 0<<29 | 0x24<<23 | 0<<22;
  1.1194+	case AORR:	return S64 | 1<<29 | 0x24<<23;
  1.1195+	case AORRW:	return S32 | 1<<29 | 0x24<<23 | 0<<22;
  1.1196+	case AEOR:	return S64 | 2<<29 | 0x24<<23;
  1.1197+	case AEORW:	return S32 | 2<<29 | 0x24<<23 | 0<<22;
  1.1198+	case AANDS:	return S64 | 3<<29 | 0x24<<23;
  1.1199+	case AANDSW:	return S32 | 3<<29 | 0x24<<23 | 0<<22;
  1.1200+
  1.1201+	case AASR:	return S64 | 0<<29 | 0x26<<23;	/* alias of SBFM */
  1.1202+	case AASRW:	return S32 | 0<<29 | 0x26<<23 | 0<<22;
  1.1203+
  1.1204+	/* op $width, $lsb, Rn, Rd */
  1.1205+	case ABFI:		return S64 | 2<<29 | 0x26<<23 | 1<<22;	/* alias of BFM */
  1.1206+	case ABFIW:	return S32 | 2<<29 | 0x26<<23 | 0<<22;
  1.1207+
  1.1208+	/* op $imms, $immr, Rn, Rd */
  1.1209+	case ABFM:	return S64 | 1<<29 | 0x26<<23 | 1<<22;
  1.1210+	case ABFMW:	return S32 | 1<<29 | 0x26<<23 | 0<<22;
  1.1211+	case ASBFM:	return S64 | 0<<29 | 0x26<<23 | 1<<22;
  1.1212+	case ASBFMW:	return S32 | 0<<29 | 0x26<<23 | 0<<22;
  1.1213+	case AUBFM:	return S64 | 2<<29 | 0x26<<23 | 1<<22;
  1.1214+	case AUBFMW:	return S32 | 2<<29 | 0x26<<23 | 0<<22;
  1.1215+
  1.1216+	case ABFXIL:	return S64 | 1<<29 | 0x26<<23 | 1<<22;	/* alias of BFM */
  1.1217+	case ABFXILW:	return S32 | 1<<29 | 0x26<<23 | 0<<22;
  1.1218+
  1.1219+	case AEXTR:	return S64 | 0<<29 | 0x27<<23 | 1<<22 | 0<<21;
  1.1220+	case AEXTRW:	return S32 | 0<<29 | 0x27<<23 | 0<<22 | 0<<21;
  1.1221+
  1.1222+	case ACBNZ:	return S64 | 0x1A<<25 | 1<<24;
  1.1223+	case ACBNZW:	return S32 | 0x1A<<25 | 1<<24;
  1.1224+	case ACBZ:	return S64 | 0x1A<<25 | 0<<24;
  1.1225+	case ACBZW:	return S32 | 0x1A<<25 | 0<<24;
  1.1226+
  1.1227+	case ACCMN:	return S64 | 0<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4;	/* imm5<<16 | cond<<12 | nzcv<<0 */
  1.1228+	case ACCMNW:	return S32 | 0<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4;
  1.1229+	case ACCMP:	return S64 | 1<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4;	/* imm5<<16 | cond<<12 | nzcv<<0 */
  1.1230+	case ACCMPW:	return S32 | 1<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4;
  1.1231+
  1.1232+	case AMOVK:	return S64 | 3<<29 | 0x25<<23;
  1.1233+	case AMOVKW:	return S32 | 3<<29 | 0x25<<23;
  1.1234+
  1.1235+	case AMOVN:	return S64 | 0<<29 | 0x25<<23;
  1.1236+	case AMOVNW:	return S32 | 0<<29 | 0x25<<23;
  1.1237+	case AMOVZ:	return S64 | 2<<29 | 0x25<<23;
  1.1238+	case AMOVZW:	return S32 | 2<<29 | 0x25<<23;
  1.1239+
  1.1240+	case AMSR:	return SYSOP(0,0,0,4,0,0,0x1F);	/* MSR (immediate) */
  1.1241+
  1.1242+	case AAT:
  1.1243+	case ADC:
  1.1244+	case AIC:
  1.1245+	case ATLBI:
  1.1246+	case ASYS:	return SYSOP(0,1,0,0,0,0,0);
  1.1247+	case ASYSL:	return SYSOP(1,1,0,0,0,0,0);
  1.1248+
  1.1249+	case ATBZ:	return 0x36<<24;
  1.1250+	case ATBNZ:	return 0x37<<24;
  1.1251+
  1.1252+	case ADSB:	return SYSOP(0,0,3,3,0,4,0x1F);
  1.1253+	case ADMB:	return SYSOP(0,0,3,3,0,5,0x1F);
  1.1254+	case AISB:		return SYSOP(0,0,3,3,0,6,0x1F);
  1.1255+	case AHINT:	return SYSOP(0,0,3,2,0,0,0x1F);
  1.1256+
  1.1257+	}
  1.1258+	diag("bad irr %A", a);
  1.1259+	prasm(curp);
  1.1260+	return 0;
  1.1261+}
  1.1262+
  1.1263+/*
  1.1264+ * bit operations
  1.1265+ */
  1.1266+#define	OPBIT(x)	(1<<30 | 0<<29 | 0xD6<<21 | 0<<16 | (x)<<10)
  1.1267+
  1.1268+static long
  1.1269+opbit(int a)
  1.1270+{
  1.1271+	switch(a){
  1.1272+	case ACLS:	return S64 | OPBIT(5);
  1.1273+	case ACLSW:	return S32 | OPBIT(5);
  1.1274+	case ACLZ:	return S64 | OPBIT(4);
  1.1275+	case ACLZW:	return S32 | OPBIT(4);
  1.1276+	case ARBIT:	return S64 | OPBIT(0);
  1.1277+	case ARBITW:	return S32 | OPBIT(0);
  1.1278+	case AREV:	return S64 | OPBIT(3);
  1.1279+	case AREVW:	return S32 | OPBIT(2);
  1.1280+	case AREV16:	return S64 | OPBIT(1);
  1.1281+	case AREV16W:	return S32 | OPBIT(1);
  1.1282+	case AREV32:	return S64 | OPBIT(2);
  1.1283+	default:
  1.1284+		diag("bad bit op\n%P", curp);
  1.1285+		return 0;
  1.1286+	}
  1.1287+}
  1.1288+
  1.1289+/*
  1.1290+ * add/subtract extended register
  1.1291+ */
  1.1292+static long
  1.1293+opxrrr(int a)
  1.1294+{
  1.1295+	switch(a) {
  1.1296+	case AADD:	return S64 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_64;
  1.1297+	case AADDW:	return S32 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_32;
  1.1298+	case ACMN:
  1.1299+	case AADDS:	return S64 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_64;
  1.1300+	case ACMNW:
  1.1301+	case AADDSW:	return S32 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_32;
  1.1302+
  1.1303+	case ASUB:	return S64 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_64;
  1.1304+	case ASUBW:	return S32 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_32;
  1.1305+	case ACMP:
  1.1306+	case ASUBS:	return S64 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_64;
  1.1307+	case ACMPW:
  1.1308+	case ASUBSW:	return S32 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_32;
  1.1309+
  1.1310+	}
  1.1311+	diag("bad opxrrr %A\n%P", a, curp);
  1.1312+	return 0;
  1.1313+}
  1.1314+
  1.1315+static long
  1.1316+opimm(int a)
  1.1317+{
  1.1318+	switch(a){
  1.1319+	case ASVC:	return 0xD4<<24 | 0<<21 | 1;	/* imm16<<5 */
  1.1320+	case AHVC:	return 0xD4<<24 | 0<<21 | 2;
  1.1321+	case ASMC:	return 0xD4<<24 | 0<<21 | 3;
  1.1322+	case ABRK:	return 0xD4<<24 | 1<<21 | 0;
  1.1323+	case AHLT:	return 0xD4<<24 | 2<<21 | 0;
  1.1324+	case ADCPS1:	return 0xD4<<24 | 5<<21 | 1;
  1.1325+	case ADCPS2:	return 0xD4<<24 | 5<<21 | 2;
  1.1326+	case ADCPS3:	return 0xD4<<24 | 5<<21 | 3;
  1.1327+
  1.1328+	case ACLREX:	return SYSOP(0,0,3,3,0,2,0x1F);
  1.1329+	}
  1.1330+	diag("bad imm %A", a);
  1.1331+	prasm(curp);
  1.1332+	return 0;
  1.1333+}
  1.1334+
  1.1335+static vlong
  1.1336+brdist(Prog *p, int preshift, int flen, int shift)
  1.1337+{
  1.1338+	vlong v, t;
  1.1339+	Sym *s;
  1.1340+
  1.1341+	v = 0;
  1.1342+	if(p->cond == UP) {
  1.1343+		s = p->to.sym;
  1.1344+		if(s->type != SUNDEF)
  1.1345+			diag("bad branch sym type");
  1.1346+		v = (uvlong)s->value >> (Roffset-2);
  1.1347+		dynreloc(s, p->pc, 0);	/* TO DO */
  1.1348+	}
  1.1349+	else if(p->cond != P)
  1.1350+		v = (p->cond->pc>>preshift) - (pc>>preshift);
  1.1351+	if((v & ((1<<shift)-1)) != 0)
  1.1352+		diag("misaligned label\n%P", p);
  1.1353+	v >>= shift;
  1.1354+	t = (vlong)1 << (flen-1);
  1.1355+	if(v < -t || v >= t)
  1.1356+		diag("branch too far\n%P", p);
  1.1357+	return v & ((t<<1)-1);
  1.1358+}
  1.1359+
  1.1360+/*
  1.1361+ * pc-relative branches
  1.1362+ */
  1.1363+static long
  1.1364+opbra(int a)
  1.1365+{
  1.1366+	switch(a) {
  1.1367+	case ABEQ:	return OPBcc(0x0);
  1.1368+	case ABNE:	return OPBcc(0x1);
  1.1369+	case ABCS:	return OPBcc(0x2);
  1.1370+	case ABHS:	return OPBcc(0x2);
  1.1371+	case ABCC:	return OPBcc(0x3);
  1.1372+	case ABLO:	return OPBcc(0x3);
  1.1373+	case ABMI:	return OPBcc(0x4);
  1.1374+	case ABPL:	return OPBcc(0x5);
  1.1375+	case ABVS:	return OPBcc(0x6);
  1.1376+	case ABVC:	return OPBcc(0x7);
  1.1377+	case ABHI:	return OPBcc(0x8);
  1.1378+	case ABLS:	return OPBcc(0x9);
  1.1379+	case ABGE:	return OPBcc(0xa);
  1.1380+	case ABLT:	return OPBcc(0xb);
  1.1381+	case ABGT:	return OPBcc(0xc);
  1.1382+	case ABLE:	return OPBcc(0xd);		/* imm19<<5 | cond */
  1.1383+	case AB:		return 0<<31 | 5<<26;	/* imm26 */
  1.1384+	case ABL:		return 1<<31 | 5<<26;
  1.1385+	}
  1.1386+	diag("bad bra %A", a);
  1.1387+	prasm(curp);
  1.1388+	return 0;
  1.1389+}
  1.1390+
  1.1391+static long
  1.1392+opbrr(int a)
  1.1393+{
  1.1394+	switch(a){
  1.1395+	case ABL:		return OPBLR(1);		/* BLR */
  1.1396+	case AB:		return OPBLR(0);		/* BR */
  1.1397+	case ARET:	return OPBLR(2);		/* RET */
  1.1398+	}
  1.1399+	diag("bad brr %A", a);
  1.1400+	prasm(curp);
  1.1401+	return 0;
  1.1402+}
  1.1403+
  1.1404+static long
  1.1405+op0(int a)
  1.1406+{
  1.1407+	switch(a){
  1.1408+	case ADRPS:	return 0x6B<<25 | 5<<21 | 0x1F<<16 | 0x1F<<5;
  1.1409+	case AERET:	return 0x6B<<25 | 4<<21 | 0x1F<<16 | 0<<10 | 0x1F<<5;
  1.1410+	case ANOP:	return SYSHINT(0);
  1.1411+	case AYIELD:	return SYSHINT(1);
  1.1412+	case AWFE:	return SYSHINT(2);
  1.1413+	case AWFI:	return SYSHINT(3);
  1.1414+	case ASEV:	return SYSHINT(4);
  1.1415+	case ASEVL:	return SYSHINT(5);
  1.1416+	}
  1.1417+	diag("bad op0 %A", a);
  1.1418+	prasm(curp);
  1.1419+	return 0;
  1.1420+}
  1.1421+
  1.1422+/*
  1.1423+ * register offset
  1.1424+ */
  1.1425+static long
  1.1426+opload(int a)
  1.1427+{
  1.1428+	switch(a){
  1.1429+	case ALDAR:	return LDSTX(3,1,1,0,1) | 0x1F<<10;
  1.1430+	case ALDARW:	return LDSTX(2,1,1,0,1) | 0x1F<<10;
  1.1431+	case ALDARB:	return LDSTX(0,1,1,0,1) | 0x1F<<10;
  1.1432+	case ALDARH:	return LDSTX(1,1,1,0,1) | 0x1F<<10;
  1.1433+	case ALDAXP:	return LDSTX(3,0,1,1,1);
  1.1434+	case ALDAXPW:	return LDSTX(2,0,1,1,1);
  1.1435+	case ALDAXR:	return LDSTX(3,0,1,0,1) | 0x1F<<10;
  1.1436+	case ALDAXRW:	return LDSTX(2,1,1,0,1) | 0x1F<<10;
  1.1437+	case ALDAXRB:	return LDSTX(0,0,1,0,1) | 0x1F<<10;
  1.1438+	case ALDAXRH:	return LDSTX(1,0,1,0,1) | 0x1F<<10;
  1.1439+	case ALDXR:		return LDSTX(3,0,1,0,0) | 0x1F<<10;
  1.1440+	case ALDXRB:		return LDSTX(0,0,1,0,0) | 0x1F<<10;
  1.1441+	case ALDXRH:		return LDSTX(1,0,1,0,0) | 0x1F<<10;
  1.1442+	case ALDXRW:		return LDSTX(2,0,1,0,0) | 0x1F<<10;
  1.1443+	case ALDXP:		return LDSTX(3,0,1,1,0);
  1.1444+	case ALDXPW:		return LDSTX(2,0,1,1,0);
  1.1445+	case AMOVNP:	return S64 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22;
  1.1446+	case AMOVNPW:	return S32 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22;
  1.1447+	}
  1.1448+	diag("bad opload %A\n%P", a, curp);
  1.1449+	return 0;
  1.1450+}
  1.1451+
  1.1452+static long
  1.1453+opstore(int a)
  1.1454+{
  1.1455+	switch(a){
  1.1456+	case ASTLR:		return LDSTX(3,1,0,0,1) | 0x1F<<10;
  1.1457+	case ASTLRB:		return LDSTX(0,1,0,0,1) | 0x1F<<10;
  1.1458+	case ASTLRH:		return LDSTX(1,1,0,0,1) | 0x1F<<10;
  1.1459+	case ASTLP:		return LDSTX(3,0,0,1,1);
  1.1460+	case ASTLPW:		return LDSTX(2,0,0,1,1);
  1.1461+	case ASTLRW:		return LDSTX(2,1,0,0,1) | 0x1F<<10;
  1.1462+	case ASTLXP:		return LDSTX(2,0,0,1,1);
  1.1463+	case ASTLXPW:		return LDSTX(3,0,0,1,1);
  1.1464+	case ASTLXR:		return LDSTX(3,0,0,0,1) | 0x1F<<10;
  1.1465+	case ASTLXRB:		return LDSTX(0,0,0,0,1) | 0x1F<<10;
  1.1466+	case ASTLXRH:		return LDSTX(1,0,0,0,1) | 0x1F<<10;
  1.1467+	case ASTLXRW:		return LDSTX(2,0,0,0,1) | 0x1F<<10;
  1.1468+	case ASTXR:		return LDSTX(3,0,0,0,0) | 0x1F<<10;
  1.1469+	case ASTXRB:		return LDSTX(0,0,0,0,0) | 0x1F<<10;
  1.1470+	case ASTXRH:		return LDSTX(1,0,0,0,0) | 0x1F<<10;
  1.1471+	case ASTXP:		return LDSTX(3,0,0,1,0);
  1.1472+	case ASTXPW:		return LDSTX(2,0,0,1,0);
  1.1473+	case ASTXRW:		return LDSTX(2,0,0,0,0) | 0x1F<<10;
  1.1474+	case AMOVNP:	return S64 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22;
  1.1475+	case AMOVNPW:	return S32 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22;
  1.1476+	}
  1.1477+	diag("bad opstore %A\n%P", a, curp);
  1.1478+	return 0;
  1.1479+}
  1.1480+
  1.1481+/*
  1.1482+ * load/store register (unsigned immediate) C3.3.13
  1.1483+ *	these produce 64-bit values (when there's an option)
  1.1484+ */
  1.1485+
  1.1486+static long
  1.1487+olsr12u(long o, long v, int b, int r)
  1.1488+{
  1.1489+	if(v < 0 || v >= (1<<12))
  1.1490+		diag("offset out of range: %ld\n%P", v, curp);
  1.1491+	o |= (v&0xFFF)<<10;
  1.1492+	o |= b << 5;
  1.1493+	o |= r;
  1.1494+	return o;
  1.1495+}
  1.1496+
  1.1497+static long
  1.1498+opldr12(int a)
  1.1499+{
  1.1500+	switch(a){
  1.1501+	case AMOV:	return LDSTR12U(3, 0, 1);	/* imm12<<10 | Rn<<5 | Rt */
  1.1502+	case AMOVW:	return LDSTR12U(2, 0, 2);
  1.1503+	case AMOVWU:	return LDSTR12U(2, 0, 1);
  1.1504+	case AMOVH:	return LDSTR12U(1, 0, 2);
  1.1505+	case AMOVHU:	return LDSTR12U(1, 0, 1);
  1.1506+	case AMOVB:	return LDSTR12U(0, 0, 2);
  1.1507+	case AMOVBU:	return LDSTR12U(0, 0, 1);
  1.1508+	case AFMOVS:	return LDSTR12U(2, 1, 1);
  1.1509+	case AFMOVD:	return LDSTR12U(3, 1, 1);
  1.1510+	}
  1.1511+	diag("bad opldr12 %A\n%P", a, curp);
  1.1512+	return 0;
  1.1513+}
  1.1514+
  1.1515+static long
  1.1516+opstr12(int a)
  1.1517+{
  1.1518+	return LD2STR(opldr12(a));
  1.1519+}
  1.1520+
  1.1521+/* 
  1.1522+ * load/store register (unscaled immediate) C3.3.12
  1.1523+ */
  1.1524+
  1.1525+static long
  1.1526+olsr9s(long o, long v, int b, int r)
  1.1527+{
  1.1528+	if(v < -256 || v > 255)
  1.1529+		diag("offset out of range: %ld\n%P", v, curp);
  1.1530+	o |= (v&0x1FF)<<12;
  1.1531+	o |= b << 5;
  1.1532+	o |= r;
  1.1533+	return o;
  1.1534+}
  1.1535+
  1.1536+static long
  1.1537+opldr9(int a)
  1.1538+{
  1.1539+	switch(a){
  1.1540+	case AMOV:	return LDSTR9S(3, 0, 1);	/* simm9<<12 | Rn<<5 | Rt */
  1.1541+	case AMOVW:	return LDSTR9S(2, 0, 2);
  1.1542+	case AMOVWU:	return LDSTR9S(2, 0, 1);
  1.1543+	case AMOVH:	return LDSTR9S(1, 0, 2);
  1.1544+	case AMOVHU:	return LDSTR9S(1, 0, 1);
  1.1545+	case AMOVB:	return LDSTR9S(0, 0, 2);
  1.1546+	case AMOVBU:	return LDSTR9S(0, 0, 1);
  1.1547+	case AFMOVS:	return LDSTR9S(2, 1, 1);
  1.1548+	case AFMOVD:	return LDSTR9S(3, 1, 1);
  1.1549+	}
  1.1550+	diag("bad opldr9 %A\n%P", a, curp);
  1.1551+	return 0;
  1.1552+}
  1.1553+
  1.1554+static long
  1.1555+opstr9(int a)
  1.1556+{
  1.1557+	return LD2STR(opldr9(a));
  1.1558+}
  1.1559+
  1.1560+static long
  1.1561+opldrpp(int a)
  1.1562+{
  1.1563+	switch(a){
  1.1564+	case AMOV:	return 3<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22;	/* simm9<<12 | Rn<<5 | Rt */
  1.1565+	case AMOVW:	return 2<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22;
  1.1566+	case AMOVWU:	return 2<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22;
  1.1567+	case AMOVH:	return 1<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22;
  1.1568+	case AMOVHU:	return 1<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22;
  1.1569+	case AMOVB:	return 0<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22;
  1.1570+	case AMOVBU:	return 0<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22;
  1.1571+	}
  1.1572+	diag("bad opldr %A\n%P", a, curp);
  1.1573+	return 0;
  1.1574+}
  1.1575+
  1.1576+/*
  1.1577+ * load/store register (extended register)
  1.1578+ */
  1.1579+static long
  1.1580+olsxrr(int a, int b, int c, int d)
  1.1581+{
  1.1582+	diag("need load/store extended register\n%P", curp);
  1.1583+	return -1;
  1.1584+}
  1.1585+
  1.1586+static long
  1.1587+oaddi(long o1, long v, int r, int rt)
  1.1588+{
  1.1589+	if((v & 0xFFF000) != 0){
  1.1590+		v >>= 12;
  1.1591+		o1 |= 1<<22;
  1.1592+	}
  1.1593+	o1 |= ((v & 0xFFF) << 10) | (r<<5) | rt;
  1.1594+	return o1;
  1.1595+}
  1.1596+
  1.1597+/*
  1.1598+ * load a a literal value into dr
  1.1599+ */
  1.1600+static long
  1.1601+omovlit(int as, Prog *p, Adr *a, int dr)
  1.1602+{	
  1.1603+	long v, o1;
  1.1604+	int w, fp;
  1.1605+
  1.1606+	if(p->cond == nil){	/* not in literal pool */
  1.1607+		aclass(a);
  1.1608+fprint(2, "omovlit add %lld (%#llux)\n", instoffset, instoffset);
  1.1609+		/* TO DO: could be clever, and use general constant builder */
  1.1610+		o1 = opirr(AADD);
  1.1611+		v = instoffset;
  1.1612+		if(v != 0 && (v & 0xFFF) == 0){
  1.1613+			v >>= 12;
  1.1614+			o1 |= 1<<22;	/* shift, by 12 */
  1.1615+		}
  1.1616+		o1 |= ((v& 0xFFF) << 10) | (REGZERO<<5) | dr;
  1.1617+	}else{
  1.1618+		fp = 0;
  1.1619+		w = 0;	/* default: 32 bit, unsigned */
  1.1620+		switch(as){
  1.1621+		case AFMOVS:
  1.1622+			fp = 1;
  1.1623+			break;
  1.1624+		case AFMOVD:
  1.1625+			fp = 1;
  1.1626+			w = 1;	/* 64 bit simd&fp */
  1.1627+			break;
  1.1628+		case AMOV:
  1.1629+			if(p->cond->as == ADWORD)
  1.1630+				w = 1;	/* 64 bit */
  1.1631+			else if(p->cond->to.offset < 0)
  1.1632+				w = 2;	/* sign extend */
  1.1633+			break;
  1.1634+		case AMOVB:
  1.1635+		case AMOVH:
  1.1636+		case AMOVW:
  1.1637+			w = 2;	/* 32 bit, sign-extended to 64 */
  1.1638+			break;
  1.1639+		}
  1.1640+		v = brdist(p, 0, 19, 2);
  1.1641+		o1 = (w<<30)|(fp<<26)|(3<<27);
  1.1642+		o1 |= (v&0x7FFFF)<<5;
  1.1643+		o1 |= dr;
  1.1644+	}
  1.1645+	return o1;
  1.1646+}
  1.1647+
  1.1648+static long
  1.1649+opbfm(int a, int r, int s, int rf, int rt)
  1.1650+{
  1.1651+	long o, c;
  1.1652+
  1.1653+	o = opirr(a);
  1.1654+	if((o & (1<<31)) == 0)
  1.1655+		c = 32;
  1.1656+	else
  1.1657+		c = 64;
  1.1658+	if(r < 0 || r >= c)
  1.1659+		diag("illegal bit number\n%P", curp);
  1.1660+	o |= (r&0x3F)<<16;
  1.1661+	if(s < 0 || s >= c)
  1.1662+		diag("illegal bit number\n%P", curp);
  1.1663+	o |= (s&0x3F)<<10;
  1.1664+	o |= (rf<<5) | rt;
  1.1665+	return o;
  1.1666+}
  1.1667+
  1.1668+static long
  1.1669+opextr(int a, long v, int rn, int rm, int rt)
  1.1670+{
  1.1671+	long o, c;
  1.1672+
  1.1673+	o = opirr(a);
  1.1674+	c = (o & (1<<31)) != 0? 63: 31;
  1.1675+	if(v < 0 || v > c)
  1.1676+		diag("illegal bit number\n%P", curp);
  1.1677+	o |= v<<10;
  1.1678+	o |= rn << 5;
  1.1679+	o |= rm << 16;
  1.1680+	o |= rt;
  1.1681+	return o;
  1.1682+}
  1.1683+
  1.1684+/*
  1.1685+ * size in log2(bytes)
  1.1686+ */
  1.1687+static int
  1.1688+movesize(int a)
  1.1689+{
  1.1690+	switch(a){
  1.1691+	case AMOV:
  1.1692+		return 3;
  1.1693+	case AMOVW:
  1.1694+	case AMOVWU:
  1.1695+		return 2;
  1.1696+	case AMOVH:
  1.1697+	case AMOVHU:
  1.1698+		return 1;
  1.1699+	case AMOVB:
  1.1700+	case AMOVBU:
  1.1701+		return 0;
  1.1702+	case AFMOVS:
  1.1703+		return 2;
  1.1704+	case AFMOVD:
  1.1705+		return 3;
  1.1706+	default:
  1.1707+		return -1;
  1.1708+	}
  1.1709+}
  1.1710+
  1.1711+/*
  1.1712+ * SIMD
  1.1713+ */