changeset 6868: | 3a2e3a97af40 |
---|---|
parent 6867: | b7b7ff7d6823 |
child 6869: | 194ab9426a0c |
author: | cinap_lenrek@felloff.net |
date: | Wed, 07 Nov 2018 16:48:14 +0100 |
files: | sys/src/9/bcm/coproc.c sys/src/9/bcm/vfp3.c |
description: | bcm: speed up co-processor operations by avoiding i+d cache flush on each operation coproc.c generated the instrucitons anew each time, requiering a i+d cache flush for each operation. instead, we can speed this up like this: given that the coprocessor registers are per cpu, we can assume that interrupts have already been disabled by the caller to prevent a process switch to another cpu. we cache the instructions generated in a static append only buffer and maintain separate end pointers for each cpu. the cache flushes only need to be done when new operations have been added to the buffer. |
1.1--- a/sys/src/9/bcm/coproc.c 1.2+++ b/sys/src/9/bcm/coproc.c 1.3@@ -1,1 +1,163 @@ 1.4-#include "../teg2/coproc.c" 1.5+/* 1.6+ * arm co-processors 1.7+ * mainly to cope with arm hard-wiring register numbers into instructions. 1.8+ * 1.9+ * CP15 (system control) is the one that gets used the most in practice. 1.10+ * 1.11+ * these routines must be callable from KZERO. 1.12+ * 1.13+ * on a multiprocessor, process switching to another cpu is assumed 1.14+ * to be inhibited by the caller as these registers are local to the cpu. 1.15+ */ 1.16+#include "u.h" 1.17+#include "../port/lib.h" 1.18+#include "mem.h" 1.19+#include "dat.h" 1.20+#include "fns.h" 1.21+#include "io.h" 1.22+ 1.23+#include "arm.h" 1.24+ 1.25+enum { 1.26+ /* alternates: 0xe12fff1e BX (R14); last e is R14 */ 1.27+ /* 0xe28ef000 B 0(R14); second e is R14 (ken) */ 1.28+ Retinst = 0xe1a0f00e, /* MOV R14, R15 */ 1.29+ 1.30+ Opmask = MASK(3), 1.31+ Regmask = MASK(4), 1.32+}; 1.33+ 1.34+static void* 1.35+mkinstr(ulong wd) 1.36+{ 1.37+ static ulong ib[256], *ep[MAXMACH+1]; 1.38+ static Lock lk; 1.39+ ulong *ip, *ie; 1.40+ 1.41+ ie = ep[m->machno]; 1.42+ for(ip = ib; ip < ie; ip += 2) 1.43+ if(*ip == wd) 1.44+ return ip; 1.45+ 1.46+ ilock(&lk); 1.47+ ie = ep[MAXMACH]; 1.48+ for(; ip < ie; ip += 2) 1.49+ if(*ip == wd) 1.50+ goto Found; 1.51+ if(ip >= &ib[nelem(ib)]) 1.52+ panic("mkinstr: out of instrucuction buffer"); 1.53+ ip[0] = wd; 1.54+ ip[1] = Retinst; 1.55+ ep[MAXMACH] = ie = ip + 2; 1.56+ cachedwbse(ip, 2*sizeof(*ip)); 1.57+Found: 1.58+ iunlock(&lk); 1.59+ cacheiinv(); 1.60+ ep[m->machno] = ie; 1.61+ return ip; 1.62+} 1.63+ 1.64+ 1.65+static void* 1.66+setupcpop(ulong opcode, int cp, int op1, int crn, int crm, 1.67+ int op2) 1.68+{ 1.69+ op1 &= Opmask; 1.70+ op2 &= Opmask; 1.71+ crn &= Regmask; 1.72+ crm &= Regmask; 1.73+ cp &= Regmask; 1.74+ return mkinstr(opcode | op1 << 21 | crn << 16 | cp << 8 | op2 << 5 | crm); 1.75+} 1.76+ 1.77+ulong 1.78+cprd(int cp, int op1, int crn, int crm, int op2) 1.79+{ 1.80+ /* 1.81+ * MRC. return value will be in R0, which is convenient. 1.82+ * Rt will be R0. 1.83+ */ 1.84+ ulong (*fp)(void) = setupcpop(0xee100010, cp, op1, crn, crm, op2); 1.85+ return fp(); 1.86+} 1.87+ 1.88+void 1.89+cpwr(int cp, int op1, int crn, int crm, int op2, ulong val) 1.90+{ 1.91+ /* MCR, Rt is R0 */ 1.92+ void (*fp)(ulong) = setupcpop(0xee000010, cp, op1, crn, crm, op2); 1.93+ fp(val); 1.94+} 1.95+ 1.96+ulong 1.97+cprdsc(int op1, int crn, int crm, int op2) 1.98+{ 1.99+ return cprd(CpSC, op1, crn, crm, op2); 1.100+} 1.101+ 1.102+void 1.103+cpwrsc(int op1, int crn, int crm, int op2, ulong val) 1.104+{ 1.105+ cpwr(CpSC, op1, crn, crm, op2, val); 1.106+} 1.107+ 1.108+/* floating point */ 1.109+ 1.110+/* fp coproc control */ 1.111+static void* 1.112+setupfpctlop(int opcode, int fpctlreg) 1.113+{ 1.114+ fpctlreg &= Nfpctlregs - 1; 1.115+ return mkinstr(opcode | fpctlreg << 16 | 0 << 12 | CpFP << 8); 1.116+} 1.117+ 1.118+ulong 1.119+fprd(int fpreg) 1.120+{ 1.121+ /* 1.122+ * VMRS. return value will be in R0, which is convenient. 1.123+ * Rt will be R0. 1.124+ */ 1.125+ ulong (*fp)(void) = setupfpctlop(0xeef00010, fpreg); 1.126+ return fp(); 1.127+} 1.128+ 1.129+void 1.130+fpwr(int fpreg, ulong val) 1.131+{ 1.132+ /* 1.133+ * fpu might be off and this VMSR might enable it 1.134+ * VMSR, Rt is R0 1.135+ */ 1.136+ void (*fp)(ulong) = setupfpctlop(0xeee00010, fpreg); 1.137+ fp(val); 1.138+} 1.139+ 1.140+/* fp register access; don't bother with single precision */ 1.141+static void* 1.142+setupfpop(int opcode, int fpreg) 1.143+{ 1.144+ ulong wd = opcode | 0 << 16 | (fpreg & (16 - 1)) << 12; 1.145+ if (fpreg >= 16) 1.146+ wd |= 1 << 22; /* high bit of dfp reg # */ 1.147+ return mkinstr(wd); 1.148+} 1.149+ 1.150+ulong 1.151+fpsavereg(int fpreg, uvlong *fpp) 1.152+{ 1.153+ /* 1.154+ * VSTR. pointer will be in R0, which is convenient. 1.155+ * Rt will be R0. 1.156+ */ 1.157+ ulong (*fp)(uvlong *) = setupfpop(0xed000000 | CpDFP << 8, fpreg); 1.158+ return fp(fpp); 1.159+} 1.160+ 1.161+void 1.162+fprestreg(int fpreg, uvlong val) 1.163+{ 1.164+ /* VLDR, Rt is R0 */ 1.165+ void (*fp)(uvlong *) = setupfpop(0xed100000 | CpDFP << 8, fpreg); 1.166+ fp(&val); 1.167+}
2.1--- a/sys/src/9/bcm/vfp3.c 2.2+++ b/sys/src/9/bcm/vfp3.c 2.3@@ -338,8 +338,12 @@ fpuprocfork(Proc *p) 2.4 void 2.5 fpusysprocsetup(Proc *p) 2.6 { 2.7+ int s; 2.8+ 2.9+ s = splhi(); 2.10 p->fpstate = FPinit; 2.11 fpoff(); 2.12+ splx(s); 2.13 } 2.14 2.15 static void