changelog shortlog tags branches changeset files revisions annotate raw help

Mercurial > hg > plan9front / sys/src/cmd/7c/reg.c

changeset 7169: 94fca26a23d8
parent: 4e09a9199097
author: cinap_lenrek@felloff.net
date: Wed, 17 Apr 2019 23:38:00 +0200
permissions: -rw-r--r--
description: 7c: fix long to vlong/pointer conversion, avoid negative immediate offsets

we have to explicitely convert to vlong by sign or
zero extending as not every operation leaves a proper
zero/sign extended result in the register. for example
NEGW will zero extend, breaking negative int offsets
on pointers.

we explicitely insert SXTW or MOVWU instructions which
the peephole optimizer takes out again when it is safe
todo so.

when promoting constant offsets to immediate offsets,
make sure the offset will be in range. otherwise the
linker will produce not so optimal pointer arithmetic
instructions to calculate the offset.
1 #include "gc.h"
2 
3 void addsplits(void);
4 
5 Reg*
6 rega(void)
7 {
8  Reg *r;
9 
10  r = freer;
11  if(r == R) {
12  r = alloc(sizeof(*r));
13  } else
14  freer = r->link;
15 
16  *r = zreg;
17  return r;
18 }
19 
20 int
21 rcmp(void *a1, void *a2)
22 {
23  Rgn *p1, *p2;
24  int c1, c2;
25 
26  p1 = (Rgn*)a1;
27  p2 = (Rgn*)a2;
28  c1 = p2->cost;
29  c2 = p1->cost;
30  if(c1 -= c2)
31  return c1;
32  return p2->varno - p1->varno;
33 }
34 
35 void
36 regopt(Prog *p)
37 {
38  Reg *r, *r1, *r2;
39  Prog *p1;
40  int i, z;
41  long initpc, val, npc;
42  ulong vreg;
43  Bits bit;
44  struct
45  {
46  long m;
47  long c;
48  Reg* p;
49  } log5[6], *lp;
50 
51  firstr = R;
52  lastr = R;
53  nvar = 0;
54  regbits = 0;
55  for(z=0; z<BITS; z++) {
56  externs.b[z] = 0;
57  params.b[z] = 0;
58  consts.b[z] = 0;
59  addrs.b[z] = 0;
60  }
61 
62  /*
63  * pass 1
64  * build aux data structure
65  * allocate pcs
66  * find use and set of variables
67  */
68  val = 5L * 5L * 5L * 5L * 5L;
69  lp = log5;
70  for(i=0; i<5; i++) {
71  lp->m = val;
72  lp->c = 0;
73  lp->p = R;
74  val /= 5L;
75  lp++;
76  }
77  val = 0;
78  for(; p != P; p = p->link) {
79  switch(p->as) {
80  case ADATA:
81  case AGLOBL:
82  case ANAME:
83  case ASIGNAME:
84  continue;
85  }
86  r = rega();
87  if(firstr == R) {
88  firstr = r;
89  lastr = r;
90  } else {
91  lastr->link = r;
92  r->p1 = lastr;
93  lastr->s1 = r;
94  lastr = r;
95  }
96  r->prog = p;
97  r->pc = val;
98  val++;
99 
100  lp = log5;
101  for(i=0; i<5; i++) {
102  lp->c--;
103  if(lp->c <= 0) {
104  lp->c = lp->m;
105  if(lp->p != R)
106  lp->p->log5 = r;
107  lp->p = r;
108  (lp+1)->c = 0;
109  break;
110  }
111  lp++;
112  }
113 
114  r1 = r->p1;
115  if(r1 != R)
116  switch(r1->prog->as) {
117  case ARETURN:
118  case ARET:
119  case AB:
120  case AERET:
121  r->p1 = R;
122  r1->s1 = R;
123  }
124 
125  /*
126  * left side always read
127  */
128  bit = mkvar(&p->from, p->as==AMOVW || p->as == AMOVWU || p->as == AMOV);
129  for(z=0; z<BITS; z++)
130  r->use1.b[z] |= bit.b[z];
131 
132  /*
133  * right side depends on opcode
134  */
135  bit = mkvar(&p->to, 0);
136  if(bany(&bit))
137  switch(p->as) {
138  default:
139  diag(Z, "reg: unknown asop: %A", p->as);
140  break;
141 
142  /*
143  * right side write
144  */
145  case ANOP:
146  case AMOV:
147  case AMOVB:
148  case AMOVBU:
149  case AMOVH:
150  case AMOVHU:
151  case AMOVW:
152  case AMOVWU:
153  case ASXTW:
154  case AFMOVS:
155  case AFCVTSD:
156  case AFMOVD:
157  case AFCVTDS:
158  for(z=0; z<BITS; z++)
159  r->set.b[z] |= bit.b[z];
160  break;
161 
162  /*
163  * funny
164  */
165  case ABL:
166  for(z=0; z<BITS; z++)
167  addrs.b[z] |= bit.b[z];
168  break;
169  }
170 
171  }
172  if(firstr == R)
173  return;
174  initpc = pc - val;
175  npc = val;
176 
177  /*
178  * pass 2
179  * turn branch references to pointers
180  * build back pointers
181  */
182  for(r = firstr; r != R; r = r->link) {
183  p = r->prog;
184  if(p->to.type == D_BRANCH) {
185  val = p->to.offset - initpc;
186  r1 = firstr;
187  while(r1 != R) {
188  r2 = r1->log5;
189  if(r2 != R && val >= r2->pc) {
190  r1 = r2;
191  continue;
192  }
193  if(r1->pc == val)
194  break;
195  r1 = r1->link;
196  }
197  if(r1 == R) {
198  nearln = p->lineno;
199  diag(Z, "ref not found\n%P", p);
200  continue;
201  }
202  if(r1 == r) {
203  nearln = p->lineno;
204  diag(Z, "ref to self\n%P", p);
205  continue;
206  }
207  r->s2 = r1;
208  r->p2link = r1->p2;
209  r1->p2 = r;
210  }
211  }
212  if(debug['R']) {
213  p = firstr->prog;
214  print("\n%L %D\n", p->lineno, &p->from);
215  }
216 
217  /*
218  * pass 2.5
219  * find looping structure
220  */
221  for(r = firstr; r != R; r = r->link)
222  r->active = 0;
223  change = 0;
224  loopit(firstr, npc);
225 
226  /*
227  * pass 3
228  * iterate propagating usage
229  * back until flow graph is complete
230  */
231 loop1:
232  change = 0;
233  for(r = firstr; r != R; r = r->link)
234  r->active = 0;
235  for(r = firstr; r != R; r = r->link)
236  if(r->prog->as == ARET || r->prog->as == ARETURN)
237  prop(r, zbits, zbits);
238 loop11:
239  /* pick up unreachable code */
240  i = 0;
241  for(r = firstr; r != R; r = r1) {
242  r1 = r->link;
243  if(r1 && r1->active && !r->active) {
244  prop(r, zbits, zbits);
245  i = 1;
246  }
247  }
248  if(i)
249  goto loop11;
250  if(change)
251  goto loop1;
252 
253 
254  /*
255  * pass 4
256  * iterate propagating register/variable synchrony
257  * forward until graph is complete
258  */
259 loop2:
260  change = 0;
261  for(r = firstr; r != R; r = r->link)
262  r->active = 0;
263  synch(firstr, zbits);
264  if(change)
265  goto loop2;
266 
267  addsplits();
268 
269  if(debug['R'] && debug['v']) {
270  print("\nprop structure:\n");
271  for(r = firstr; r != R; r = r->link) {
272  print("%ld:%P", r->loop, r->prog);
273  for(z=0; z<BITS; z++)
274  bit.b[z] = r->set.b[z] |
275  r->refahead.b[z] | r->calahead.b[z] |
276  r->refbehind.b[z] | r->calbehind.b[z] |
277  r->use1.b[z] | r->use2.b[z];
278  if(bany(&bit)) {
279  print("\t");
280  if(bany(&r->use1))
281  print(" u1=%B", r->use1);
282  if(bany(&r->use2))
283  print(" u2=%B", r->use2);
284  if(bany(&r->set))
285  print(" st=%B", r->set);
286  if(bany(&r->refahead))
287  print(" ra=%B", r->refahead);
288  if(bany(&r->calahead))
289  print(" ca=%B", r->calahead);
290  if(bany(&r->refbehind))
291  print(" rb=%B", r->refbehind);
292  if(bany(&r->calbehind))
293  print(" cb=%B", r->calbehind);
294  }
295  print("\n");
296  }
297  }
298 
299  /*
300  * pass 5
301  * isolate regions
302  * calculate costs (paint1)
303  */
304  r = firstr;
305  if(r) {
306  for(z=0; z<BITS; z++)
307  bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
308  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
309  if(bany(&bit)) {
310  nearln = r->prog->lineno;
311  warn(Z, "used and not set: %B", bit);
312  if(debug['R'] && !debug['w'])
313  print("used and not set: %B\n", bit);
314  }
315  }
316 
317  for(r = firstr; r != R; r = r->link)
318  r->act = zbits;
319  rgp = region;
320  nregion = 0;
321  for(r = firstr; r != R; r = r->link) {
322  for(z=0; z<BITS; z++)
323  bit.b[z] = r->set.b[z] &
324  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
325  if(bany(&bit)) {
326  nearln = r->prog->lineno;
327  warn(Z, "set and not used: %B", bit);
328  if(debug['R'])
329  print("set and not used: %B\n", bit);
330  excise(r);
331  }
332  for(z=0; z<BITS; z++)
333  bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
334  while(bany(&bit)) {
335  i = bnum(bit);
336  rgp->enter = r;
337  rgp->varno = i;
338  change = 0;
339  if(debug['R'] && debug['v'])
340  print("\n");
341  paint1(r, i);
342  bit.b[i/32] &= ~(1L<<(i%32));
343  if(change <= 0) {
344  if(debug['R'])
345  print("%L $%d: %B\n",
346  r->prog->lineno, change, blsh(i));
347  continue;
348  }
349  rgp->cost = change;
350  nregion++;
351  if(nregion >= NRGN) {
352  warn(Z, "too many regions");
353  goto brk;
354  }
355  rgp++;
356  }
357  }
358 brk:
359  qsort(region, nregion, sizeof(region[0]), rcmp);
360 
361  /*
362  * pass 6
363  * determine used registers (paint2)
364  * replace code (paint3)
365  */
366  rgp = region;
367  for(i=0; i<nregion; i++) {
368  bit = blsh(rgp->varno);
369  vreg = paint2(rgp->enter, rgp->varno);
370  vreg = allreg(vreg, rgp);
371  if(debug['R']) {
372  if(rgp->regno >= NREG)
373  print("%L $%d F%d: %B\n",
374  rgp->enter->prog->lineno,
375  rgp->cost,
376  rgp->regno-NREG,
377  bit);
378  else
379  print("%L $%d R%d: %B\n",
380  rgp->enter->prog->lineno,
381  rgp->cost,
382  rgp->regno,
383  bit);
384  }
385  if(rgp->regno != 0)
386  paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
387  rgp++;
388  }
389  /*
390  * pass 7
391  * peep-hole on basic block
392  */
393  if(!debug['R'] || debug['P'])
394  peep();
395 
396  /*
397  * pass 8
398  * recalculate pc
399  */
400  val = initpc;
401  for(r = firstr; r != R; r = r1) {
402  r->pc = val;
403  p = r->prog;
404  p1 = P;
405  r1 = r->link;
406  if(r1 != R)
407  p1 = r1->prog;
408  for(; p != p1; p = p->link) {
409  switch(p->as) {
410  default:
411  val++;
412  break;
413 
414  case ANOP:
415  case ADATA:
416  case AGLOBL:
417  case ANAME:
418  case ASIGNAME:
419  break;
420  }
421  }
422  }
423  pc = val;
424 
425  /*
426  * fix up branches
427  */
428  if(debug['R'])
429  if(bany(&addrs))
430  print("addrs: %B\n", addrs);
431 
432  r1 = 0; /* set */
433  for(r = firstr; r != R; r = r->link) {
434  p = r->prog;
435  if(p->to.type == D_BRANCH)
436  p->to.offset = r->s2->pc;
437  r1 = r;
438  }
439 
440  /*
441  * last pass
442  * eliminate nops
443  * free aux structures
444  */
445  for(p = firstr->prog; p != P; p = p->link){
446  while(p->link && p->link->as == ANOP)
447  p->link = p->link->link;
448  }
449  if(r1 != R) {
450  r1->link = freer;
451  freer = firstr;
452  }
453 }
454 
455 void
456 addsplits(void)
457 {
458  Reg *r, *r1;
459  int z, i;
460  Bits bit;
461 
462  for(r = firstr; r != R; r = r->link) {
463  if(r->loop > 1)
464  continue;
465  if(r->prog->as == ABL)
466  continue;
467  for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
468  if(r1->loop <= 1)
469  continue;
470  for(z=0; z<BITS; z++)
471  bit.b[z] = r1->calbehind.b[z] &
472  (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
473  ~(r->calahead.b[z] & addrs.b[z]);
474  while(bany(&bit)) {
475  i = bnum(bit);
476  bit.b[i/32] &= ~(1L << (i%32));
477  }
478  }
479  }
480 }
481 
482 /*
483  * add mov b,rn
484  * just after r
485  */
486 void
487 addmove(Reg *r, int bn, int rn, int f)
488 {
489  Prog *p, *p1;
490  Adr *a;
491  Var *v;
492 
493  p1 = alloc(sizeof(*p1));
494  *p1 = zprog;
495  p = r->prog;
496 
497  p1->link = p->link;
498  p->link = p1;
499  p1->lineno = p->lineno;
500 
501  v = var + bn;
502 
503  a = &p1->to;
504  a->sym = v->sym;
505  a->name = v->name;
506  a->offset = v->offset;
507  a->etype = v->etype;
508  a->type = D_OREG;
509  if(a->etype == TARRAY || a->sym == S)
510  a->type = D_CONST;
511 
512  p1->as = AMOVW;
513  if(v->etype == TCHAR || v->etype == TUCHAR)
514  p1->as = AMOVB;
515  if(v->etype == TSHORT || v->etype == TUSHORT)
516  p1->as = AMOVH;
517  if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
518  p1->as = AMOV;
519  if(v->etype == TFLOAT)
520  p1->as = AFMOVS;
521  if(v->etype == TDOUBLE)
522  p1->as = AFMOVD;
523 
524  p1->from.type = D_REG;
525  p1->from.reg = rn;
526  if(rn >= NREG) {
527  p1->from.type = D_FREG;
528  p1->from.reg = rn-NREG;
529  }
530  if(!f) {
531  p1->from = *a;
532  *a = zprog.from;
533  a->type = D_REG;
534  a->reg = rn;
535  if(rn >= NREG) {
536  a->type = D_FREG;
537  a->reg = rn-NREG;
538  }
539  if(v->etype == TUCHAR)
540  p1->as = AMOVBU;
541  if(v->etype == TUSHORT)
542  p1->as = AMOVHU;
543  if(v->etype == TUINT || v->etype == TULONG)
544  p1->as = AMOVWU;
545  }
546  if(debug['R'])
547  print("%P\t.a%P\n", p, p1);
548 }
549 
550 Bits
551 mkvar(Adr *a, int docon)
552 {
553  Var *v;
554  int i, t, n, et, z;
555  vlong o;
556  Bits bit;
557  Sym *s;
558 
559  t = a->type;
560  if(t == D_REG && a->reg != NREG)
561  regbits |= RtoB(a->reg);
562  if(t == D_FREG && a->reg != NREG)
563  regbits |= FtoB(a->reg);
564  s = a->sym;
565  o = a->offset;
566  et = a->etype;
567  if(s == S) {
568  if(t != D_CONST || !docon || a->reg != NREG)
569  goto none;
570  // et = TLONG;
571  }
572  if(t == D_CONST) {
573  if(s == S && sval(o))
574  goto none;
575  }
576 
577  n = a->name;
578  v = var;
579  for(i=0; i<nvar; i++) {
580  if(s == v->sym)
581  if(n == v->name)
582  if(o == v->offset)
583  goto out;
584  v++;
585  }
586  if(s)
587  if(s->name[0] == '.')
588  goto none;
589  if(nvar >= NVAR) {
590  if(debug['w'] > 1 && s)
591  warn(Z, "variable not optimized: %s", s->name);
592  goto none;
593  }
594  i = nvar;
595  nvar++;
596  v = &var[i];
597  v->sym = s;
598  v->offset = o;
599  v->etype = et;
600  v->name = n;
601  if(debug['R'])
602  print("bit=%2d et=%2d %D\n", i, et, a);
603 out:
604  bit = blsh(i);
605  if(n == D_EXTERN || n == D_STATIC)
606  for(z=0; z<BITS; z++)
607  externs.b[z] |= bit.b[z];
608  if(n == D_PARAM)
609  for(z=0; z<BITS; z++)
610  params.b[z] |= bit.b[z];
611  if(v->etype != et || !(typechlpfd[et] || typev[et])) /* funny punning */
612  for(z=0; z<BITS; z++)
613  addrs.b[z] |= bit.b[z];
614  if(t == D_CONST) {
615  if(s == S) {
616  for(z=0; z<BITS; z++)
617  consts.b[z] |= bit.b[z];
618  return bit;
619  }
620  if(et != TARRAY)
621  for(z=0; z<BITS; z++)
622  addrs.b[z] |= bit.b[z];
623  for(z=0; z<BITS; z++)
624  params.b[z] |= bit.b[z];
625  return bit;
626  }
627  if(t == D_OREG)
628  return bit;
629 
630 none:
631  return zbits;
632 }
633 
634 void
635 prop(Reg *r, Bits ref, Bits cal)
636 {
637  Reg *r1, *r2;
638  int z;
639 
640  for(r1 = r; r1 != R; r1 = r1->p1) {
641  for(z=0; z<BITS; z++) {
642  ref.b[z] |= r1->refahead.b[z];
643  if(ref.b[z] != r1->refahead.b[z]) {
644  r1->refahead.b[z] = ref.b[z];
645  change++;
646  }
647  cal.b[z] |= r1->calahead.b[z];
648  if(cal.b[z] != r1->calahead.b[z]) {
649  r1->calahead.b[z] = cal.b[z];
650  change++;
651  }
652  }
653  switch(r1->prog->as) {
654  case ABL:
655  for(z=0; z<BITS; z++) {
656  cal.b[z] |= ref.b[z] | externs.b[z];
657  ref.b[z] = 0;
658  }
659  break;
660 
661  case ATEXT:
662  for(z=0; z<BITS; z++) {
663  cal.b[z] = 0;
664  ref.b[z] = 0;
665  }
666  break;
667 
668  case ARET:
669  case ARETURN:
670  for(z=0; z<BITS; z++) {
671  cal.b[z] = externs.b[z];
672  ref.b[z] = 0;
673  }
674  }
675  for(z=0; z<BITS; z++) {
676  ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
677  r1->use1.b[z] | r1->use2.b[z];
678  cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
679  r1->refbehind.b[z] = ref.b[z];
680  r1->calbehind.b[z] = cal.b[z];
681  }
682  if(r1->active)
683  break;
684  r1->active = 1;
685  }
686  for(; r != r1; r = r->p1)
687  for(r2 = r->p2; r2 != R; r2 = r2->p2link)
688  prop(r2, r->refbehind, r->calbehind);
689 }
690 
691 /*
692  * find looping structure
693  *
694  * 1) find reverse postordering
695  * 2) find approximate dominators,
696  * the actual dominators if the flow graph is reducible
697  * otherwise, dominators plus some other non-dominators.
698  * See Matthew S. Hecht and Jeffrey D. Ullman,
699  * "Analysis of a Simple Algorithm for Global Data Flow Problems",
700  * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
701  * Oct. 1-3, 1973, pp. 207-217.
702  * 3) find all nodes with a predecessor dominated by the current node.
703  * such a node is a loop head.
704  * recursively, all preds with a greater rpo number are in the loop
705  */
706 long
707 postorder(Reg *r, Reg **rpo2r, long n)
708 {
709  Reg *r1;
710 
711  r->rpo = 1;
712  r1 = r->s1;
713  if(r1 && !r1->rpo)
714  n = postorder(r1, rpo2r, n);
715  r1 = r->s2;
716  if(r1 && !r1->rpo)
717  n = postorder(r1, rpo2r, n);
718  rpo2r[n] = r;
719  n++;
720  return n;
721 }
722 
723 long
724 rpolca(long *idom, long rpo1, long rpo2)
725 {
726  long t;
727 
728  if(rpo1 == -1)
729  return rpo2;
730  while(rpo1 != rpo2){
731  if(rpo1 > rpo2){
732  t = rpo2;
733  rpo2 = rpo1;
734  rpo1 = t;
735  }
736  while(rpo1 < rpo2){
737  t = idom[rpo2];
738  if(t >= rpo2)
739  fatal(Z, "bad idom");
740  rpo2 = t;
741  }
742  }
743  return rpo1;
744 }
745 
746 int
747 doms(long *idom, long r, long s)
748 {
749  while(s > r)
750  s = idom[s];
751  return s == r;
752 }
753 
754 int
755 loophead(long *idom, Reg *r)
756 {
757  long src;
758 
759  src = r->rpo;
760  if(r->p1 != R && doms(idom, src, r->p1->rpo))
761  return 1;
762  for(r = r->p2; r != R; r = r->p2link)
763  if(doms(idom, src, r->rpo))
764  return 1;
765  return 0;
766 }
767 
768 void
769 loopmark(Reg **rpo2r, long head, Reg *r)
770 {
771  if(r->rpo < head || r->active == head)
772  return;
773  r->active = head;
774  r->loop += LOOP;
775  if(r->p1 != R)
776  loopmark(rpo2r, head, r->p1);
777  for(r = r->p2; r != R; r = r->p2link)
778  loopmark(rpo2r, head, r);
779 }
780 
781 void
782 loopit(Reg *r, long nr)
783 {
784  Reg *r1;
785  long i, d, me;
786 
787  if(nr > maxnr) {
788  rpo2r = alloc(nr * sizeof(Reg*));
789  idom = alloc(nr * sizeof(long));
790  maxnr = nr;
791  }
792 
793  d = postorder(r, rpo2r, 0);
794  if(d > nr)
795  fatal(Z, "too many reg nodes");
796  nr = d;
797  for(i = 0; i < nr / 2; i++){
798  r1 = rpo2r[i];
799  rpo2r[i] = rpo2r[nr - 1 - i];
800  rpo2r[nr - 1 - i] = r1;
801  }
802  for(i = 0; i < nr; i++)
803  rpo2r[i]->rpo = i;
804 
805  idom[0] = 0;
806  for(i = 0; i < nr; i++){
807  r1 = rpo2r[i];
808  me = r1->rpo;
809  d = -1;
810  if(r1->p1 != R && r1->p1->rpo < me)
811  d = r1->p1->rpo;
812  for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
813  if(r1->rpo < me)
814  d = rpolca(idom, d, r1->rpo);
815  idom[i] = d;
816  }
817 
818  for(i = 0; i < nr; i++){
819  r1 = rpo2r[i];
820  r1->loop++;
821  if(r1->p2 != R && loophead(idom, r1))
822  loopmark(rpo2r, i, r1);
823  }
824 }
825 
826 void
827 synch(Reg *r, Bits dif)
828 {
829  Reg *r1;
830  int z;
831 
832  for(r1 = r; r1 != R; r1 = r1->s1) {
833  for(z=0; z<BITS; z++) {
834  dif.b[z] = (dif.b[z] &
835  ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
836  r1->set.b[z] | r1->regdiff.b[z];
837  if(dif.b[z] != r1->regdiff.b[z]) {
838  r1->regdiff.b[z] = dif.b[z];
839  change++;
840  }
841  }
842  if(r1->active)
843  break;
844  r1->active = 1;
845  for(z=0; z<BITS; z++)
846  dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
847  if(r1->s2 != R)
848  synch(r1->s2, dif);
849  }
850 }
851 
852 ulong
853 allreg(ulong b, Rgn *r)
854 {
855  Var *v;
856  int i;
857 
858  v = var + r->varno;
859  r->regno = 0;
860  switch(v->etype) {
861 
862  default:
863  diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
864  break;
865 
866  case TCHAR:
867  case TUCHAR:
868  case TSHORT:
869  case TUSHORT:
870  case TINT:
871  case TUINT:
872  case TLONG:
873  case TULONG:
874  case TIND:
875  case TVLONG:
876  case TUVLONG:
877  case TARRAY:
878  i = BtoR(~b);
879  if(i && r->cost > 0) {
880  r->regno = i;
881  return RtoB(i);
882  }
883  break;
884 
885  case TDOUBLE:
886  case TFLOAT:
887  i = BtoF(~b);
888  if(i && r->cost > 0) {
889  r->regno = i+NREG;
890  return FtoB(i);
891  }
892  break;
893  }
894  return 0;
895 }
896 
897 void
898 paint1(Reg *r, int bn)
899 {
900  Reg *r1;
901  Prog *p;
902  int z;
903  ulong bb;
904 
905  z = bn/32;
906  bb = 1L<<(bn%32);
907  if(r->act.b[z] & bb)
908  return;
909  for(;;) {
910  if(!(r->refbehind.b[z] & bb))
911  break;
912  r1 = r->p1;
913  if(r1 == R)
914  break;
915  if(!(r1->refahead.b[z] & bb))
916  break;
917  if(r1->act.b[z] & bb)
918  break;
919  r = r1;
920  }
921 
922  if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
923  change -= CLOAD * r->loop;
924  if(debug['R'] && debug['v'])
925  print("%ld%P\tld %B $%d\n", r->loop,
926  r->prog, blsh(bn), change);
927  }
928  for(;;) {
929  r->act.b[z] |= bb;
930  p = r->prog;
931 
932  if(r->use1.b[z] & bb) {
933  change += CREF * r->loop;
934  if(p->to.type == D_FREG && (p->as == AMOVW || p->as == AMOV))
935  change = -CINF; /* cant go Rreg to Freg */
936  if(debug['R'] && debug['v'])
937  print("%ld%P\tu1 %B $%d\n", r->loop,
938  p, blsh(bn), change);
939  }
940 
941  if((r->use2.b[z]|r->set.b[z]) & bb) {
942  change += CREF * r->loop;
943  if(p->from.type == D_FREG && (p->as == AMOVW || p->as == AMOV))
944  change = -CINF; /* cant go Rreg to Freg */
945  if(debug['R'] && debug['v'])
946  print("%ld%P\tu2 %B $%d\n", r->loop,
947  p, blsh(bn), change);
948  }
949 
950  if(STORE(r) & r->regdiff.b[z] & bb) {
951  change -= CLOAD * r->loop;
952  if(debug['R'] && debug['v'])
953  print("%ld%P\tst %B $%d\n", r->loop,
954  p, blsh(bn), change);
955  }
956 
957  if(r->refbehind.b[z] & bb)
958  for(r1 = r->p2; r1 != R; r1 = r1->p2link)
959  if(r1->refahead.b[z] & bb)
960  paint1(r1, bn);
961 
962  if(!(r->refahead.b[z] & bb))
963  break;
964  r1 = r->s2;
965  if(r1 != R)
966  if(r1->refbehind.b[z] & bb)
967  paint1(r1, bn);
968  r = r->s1;
969  if(r == R)
970  break;
971  if(r->act.b[z] & bb)
972  break;
973  if(!(r->refbehind.b[z] & bb))
974  break;
975  }
976 }
977 
978 ulong
979 paint2(Reg *r, int bn)
980 {
981  Reg *r1;
982  int z;
983  ulong bb, vreg;
984 
985  z = bn/32;
986  bb = 1L << (bn%32);
987  vreg = regbits;
988  if(!(r->act.b[z] & bb))
989  return vreg;
990  for(;;) {
991  if(!(r->refbehind.b[z] & bb))
992  break;
993  r1 = r->p1;
994  if(r1 == R)
995  break;
996  if(!(r1->refahead.b[z] & bb))
997  break;
998  if(!(r1->act.b[z] & bb))
999  break;
1000  r = r1;
1001  }
1002  for(;;) {
1003  r->act.b[z] &= ~bb;
1004 
1005  vreg |= r->regu;
1006 
1007  if(r->refbehind.b[z] & bb)
1008  for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1009  if(r1->refahead.b[z] & bb)
1010  vreg |= paint2(r1, bn);
1011 
1012  if(!(r->refahead.b[z] & bb))
1013  break;
1014  r1 = r->s2;
1015  if(r1 != R)
1016  if(r1->refbehind.b[z] & bb)
1017  vreg |= paint2(r1, bn);
1018  r = r->s1;
1019  if(r == R)
1020  break;
1021  if(!(r->act.b[z] & bb))
1022  break;
1023  if(!(r->refbehind.b[z] & bb))
1024  break;
1025  }
1026  return vreg;
1027 }
1028 
1029 void
1030 paint3(Reg *r, int bn, long rb, int rn)
1031 {
1032  Reg *r1;
1033  Prog *p;
1034  int z;
1035  ulong bb;
1036 
1037  z = bn/32;
1038  bb = 1L << (bn%32);
1039  if(r->act.b[z] & bb)
1040  return;
1041  for(;;) {
1042  if(!(r->refbehind.b[z] & bb))
1043  break;
1044  r1 = r->p1;
1045  if(r1 == R)
1046  break;
1047  if(!(r1->refahead.b[z] & bb))
1048  break;
1049  if(r1->act.b[z] & bb)
1050  break;
1051  r = r1;
1052  }
1053 
1054  if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1055  addmove(r, bn, rn, 0);
1056  for(;;) {
1057  r->act.b[z] |= bb;
1058  p = r->prog;
1059 
1060  if(r->use1.b[z] & bb) {
1061  if(debug['R'])
1062  print("%P", p);
1063  addreg(&p->from, rn);
1064  if(debug['R'])
1065  print("\t.c%P\n", p);
1066  }
1067  if((r->use2.b[z]|r->set.b[z]) & bb) {
1068  if(debug['R'])
1069  print("%P", p);
1070  addreg(&p->to, rn);
1071  if(debug['R'])
1072  print("\t.c%P\n", p);
1073  }
1074 
1075  if(STORE(r) & r->regdiff.b[z] & bb)
1076  addmove(r, bn, rn, 1);
1077  r->regu |= rb;
1078 
1079  if(r->refbehind.b[z] & bb)
1080  for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1081  if(r1->refahead.b[z] & bb)
1082  paint3(r1, bn, rb, rn);
1083 
1084  if(!(r->refahead.b[z] & bb))
1085  break;
1086  r1 = r->s2;
1087  if(r1 != R)
1088  if(r1->refbehind.b[z] & bb)
1089  paint3(r1, bn, rb, rn);
1090  r = r->s1;
1091  if(r == R)
1092  break;
1093  if(r->act.b[z] & bb)
1094  break;
1095  if(!(r->refbehind.b[z] & bb))
1096  break;
1097  }
1098 }
1099 
1100 void
1101 addreg(Adr *a, int rn)
1102 {
1103 
1104  a->sym = 0;
1105  a->name = D_NONE;
1106  a->type = D_REG;
1107  a->reg = rn;
1108  if(rn >= NREG) {
1109  a->type = D_FREG;
1110  a->reg = rn-NREG;
1111  }
1112 }
1113 
1114 /*
1115  * bit reg
1116  * 0 R9
1117  * 1 R10
1118  * ... ...
1119  * 6 R15
1120  */
1121 long
1122 RtoB(int r)
1123 {
1124  if(r >= REGMIN && r <= REGMAX)
1125  return 1L << (r-REGMIN);
1126  return 0;
1127 }
1128 
1129 int
1130 BtoR(long b)
1131 {
1132  b &= 0x07fL;
1133  if(b == 0)
1134  return 0;
1135  return bitno(b) + REGMIN;
1136 }
1137 
1138 /*
1139  * bit reg
1140  * 22 F7
1141  * 23 F8
1142  * ... ...
1143  * 29 F14
1144  */
1145 long
1146 FtoB(int f)
1147 {
1148  if(f < FREGMIN || f > FREGEXT)
1149  return 0;
1150  return 1L << (f - FREGMIN + 22);
1151 }
1152 
1153 int
1154 BtoF(long b)
1155 {
1156 
1157  b &= 0x3fc00000L;
1158  if(b == 0)
1159  return 0;
1160  return bitno(b) - 22 + FREGMIN;
1161 }