changelog shortlog tags branches files raw gz bz2 help

Mercurial > hg > plan9front / changeset: bcm: import changes for raspi2/3 from richard miller

changeset 6832: f80792d28e0e
parent 6831: 98845adbacea
child 6833: 5c8c5fbd94ce
author: cinap_lenrek@felloff.net
date: Sat, 20 Oct 2018 19:56:31 +0200
files: sys/src/9/bcm/archbcm.c sys/src/9/bcm/archbcm2.c sys/src/9/bcm/arm.h sys/src/9/bcm/arm.s sys/src/9/bcm/armv6.s sys/src/9/bcm/armv7.s sys/src/9/bcm/cache.v7.s sys/src/9/bcm/clock.c sys/src/9/bcm/dat.h sys/src/9/bcm/devarch.c sys/src/9/bcm/devgpio.c sys/src/9/bcm/dma.c sys/src/9/bcm/dwcotg.h sys/src/9/bcm/fns.h sys/src/9/bcm/gpio.c sys/src/9/bcm/io.h sys/src/9/bcm/l.s sys/src/9/bcm/lexception.s sys/src/9/bcm/main.c sys/src/9/bcm/mem.h sys/src/9/bcm/mkfile sys/src/9/bcm/mmu.c sys/src/9/bcm/pi sys/src/9/bcm/pi2 sys/src/9/bcm/pif sys/src/9/bcm/rebootcode.s sys/src/9/bcm/trap.c sys/src/9/bcm/uartmini.c sys/src/9/bcm/usbdwc.c sys/src/9/bcm/vcore.c sys/src/9/bcm/vfp3.c
description: bcm: import changes for raspi2/3 from richard miller
     1.1--- a/sys/src/9/bcm/archbcm.c
     1.2+++ b/sys/src/9/bcm/archbcm.c
     1.3@@ -1,5 +1,5 @@
     1.4 /*
     1.5- * bcm2835 (e.g. raspberry pi) architecture-specific stuff
     1.6+ * bcm2835 (e.g. original raspberry pi) architecture-specific stuff
     1.7  */
     1.8 
     1.9 #include "u.h"
    1.10@@ -13,8 +13,19 @@
    1.11 
    1.12 #define	POWERREGS	(VIRTIO+0x100000)
    1.13 
    1.14+Soc soc = {
    1.15+	.dramsize	= 512*MiB,
    1.16+	.physio		= 0x20000000,
    1.17+	.busdram	= 0x40000000,
    1.18+	.busio		= 0x7E000000,
    1.19+	.armlocal	= 0,
    1.20+	.l1ptedramattrs = Cached | Buffered,
    1.21+	.l2ptedramattrs = Cached | Buffered,
    1.22+};
    1.23+
    1.24 enum {
    1.25 	Wdogfreq	= 65536,
    1.26+	Wdogtime	= 10,	/* seconds, ≤ 15 */
    1.27 };
    1.28 
    1.29 /*
    1.30@@ -25,6 +36,7 @@ enum {
    1.31 		Password	= 0x5A<<24,
    1.32 		CfgMask		= 0x03<<4,
    1.33 		CfgReset	= 0x02<<4,
    1.34+	Rsts		= 0x20>>2,
    1.35 	Wdog		= 0x24>>2,
    1.36 };
    1.37 
    1.38@@ -48,13 +60,68 @@ archreboot(void)
    1.39 }
    1.40 
    1.41 void
    1.42+wdogfeed(void)
    1.43+{
    1.44+	u32int *r;
    1.45+
    1.46+	r = (u32int*)POWERREGS;
    1.47+	r[Wdog] = Password | (Wdogtime * Wdogfreq);
    1.48+	r[Rstc] = Password | (r[Rstc] & ~CfgMask) | CfgReset;
    1.49+}
    1.50+
    1.51+void
    1.52+wdogoff(void)
    1.53+{
    1.54+	u32int *r;
    1.55+
    1.56+	r = (u32int*)POWERREGS;
    1.57+	r[Rstc] = Password | (r[Rstc] & ~CfgMask);
    1.58+}
    1.59+	
    1.60+char *
    1.61+cputype2name(char *buf, int size)
    1.62+{
    1.63+	seprint(buf, buf + size, "1176JZF-S");
    1.64+	return buf;
    1.65+}
    1.66+
    1.67+void
    1.68 cpuidprint(void)
    1.69 {
    1.70-	print("cpu%d: %dMHz ARM1176JZF-S\n", m->machno, m->cpumhz);
    1.71+	char name[64];
    1.72+
    1.73+	cputype2name(name, sizeof name);
    1.74+	delay(50);				/* let uart catch up */
    1.75+	print("cpu%d: %dMHz ARM %s\n", m->machno, m->cpumhz, name);
    1.76+}
    1.77+
    1.78+int
    1.79+getncpus(void)
    1.80+{
    1.81+	return 1;
    1.82+}
    1.83+
    1.84+int
    1.85+startcpus(uint)
    1.86+{
    1.87+	return 1;
    1.88 }
    1.89 
    1.90 void
    1.91 archbcmlink(void)
    1.92 {
    1.93+	addclock0link(wdogfeed, HZ);
    1.94 }
    1.95 
    1.96+int
    1.97+l2ap(int ap)
    1.98+{
    1.99+	return (AP(3, (ap))|AP(2, (ap))|AP(1, (ap))|AP(0, (ap)));
   1.100+}
   1.101+
   1.102+int
   1.103+cmpswap(long *addr, long old, long new)
   1.104+{
   1.105+	return cas32(addr, old, new);
   1.106+}
   1.107+
     2.1new file mode 100644
     2.2--- /dev/null
     2.3+++ b/sys/src/9/bcm/archbcm2.c
     2.4@@ -0,0 +1,248 @@
     2.5+/*
     2.6+ * bcm2836 (e.g.raspberry pi 2) architecture-specific stuff
     2.7+ */
     2.8+
     2.9+#include "u.h"
    2.10+#include "../port/lib.h"
    2.11+#include "mem.h"
    2.12+#include "dat.h"
    2.13+#include "fns.h"
    2.14+#include "../port/error.h"
    2.15+#include "io.h"
    2.16+#include "arm.h"
    2.17+
    2.18+#include "../port/netif.h"
    2.19+
    2.20+typedef struct Mbox Mbox;
    2.21+typedef struct Mboxes Mboxes;
    2.22+
    2.23+#define	POWERREGS	(VIRTIO+0x100000)
    2.24+
    2.25+Soc soc = {
    2.26+	.dramsize	= 0x3F000000, 	/* was 1024*MiB, but overlaps with physio */
    2.27+	.physio		= 0x3F000000,
    2.28+	.busdram	= 0xC0000000,
    2.29+	.busio		= 0x7E000000,
    2.30+	.armlocal	= 0x40000000,
    2.31+	.l1ptedramattrs = Cached | Buffered | L1wralloc | L1sharable,
    2.32+	.l2ptedramattrs = Cached | Buffered | L2wralloc | L2sharable,
    2.33+};
    2.34+
    2.35+enum {
    2.36+	Wdogfreq	= 65536,
    2.37+	Wdogtime	= 10,	/* seconds, ≤ 15 */
    2.38+};
    2.39+
    2.40+/*
    2.41+ * Power management / watchdog registers
    2.42+ */
    2.43+enum {
    2.44+	Rstc		= 0x1c>>2,
    2.45+		Password	= 0x5A<<24,
    2.46+		CfgMask		= 0x03<<4,
    2.47+		CfgReset	= 0x02<<4,
    2.48+	Rsts		= 0x20>>2,
    2.49+	Wdog		= 0x24>>2,
    2.50+};
    2.51+
    2.52+/*
    2.53+ * Arm local regs for smp
    2.54+ */
    2.55+struct Mbox {
    2.56+	u32int	doorbell;
    2.57+	u32int	mbox1;
    2.58+	u32int	mbox2;
    2.59+	u32int	startcpu;
    2.60+};
    2.61+struct Mboxes {
    2.62+	Mbox	set[4];
    2.63+	Mbox	clr[4];
    2.64+};
    2.65+
    2.66+enum {
    2.67+	Mboxregs	= 0x80
    2.68+};
    2.69+
    2.70+static Lock startlock[MAXMACH + 1];
    2.71+
    2.72+void
    2.73+archreset(void)
    2.74+{
    2.75+	fpon();
    2.76+}
    2.77+
    2.78+void
    2.79+archreboot(void)
    2.80+{
    2.81+	u32int *r;
    2.82+
    2.83+	r = (u32int*)POWERREGS;
    2.84+	r[Wdog] = Password | 1;
    2.85+	r[Rstc] = Password | (r[Rstc] & ~CfgMask) | CfgReset;
    2.86+	coherence();
    2.87+	for(;;)
    2.88+		;
    2.89+}
    2.90+
    2.91+void
    2.92+wdogfeed(void)
    2.93+{
    2.94+	u32int *r;
    2.95+
    2.96+	r = (u32int*)POWERREGS;
    2.97+	r[Wdog] = Password | (Wdogtime * Wdogfreq);
    2.98+	r[Rstc] = Password | (r[Rstc] & ~CfgMask) | CfgReset;
    2.99+}
   2.100+
   2.101+void
   2.102+wdogoff(void)
   2.103+{
   2.104+	u32int *r;
   2.105+
   2.106+	r = (u32int*)POWERREGS;
   2.107+	r[Rstc] = Password | (r[Rstc] & ~CfgMask);
   2.108+}
   2.109+
   2.110+
   2.111+char *
   2.112+cputype2name(char *buf, int size)
   2.113+{
   2.114+	u32int r;
   2.115+	uint part;
   2.116+	char *p;
   2.117+
   2.118+	r = cpidget();			/* main id register */
   2.119+	assert((r >> 24) == 'A');
   2.120+	part = (r >> 4) & MASK(12);
   2.121+	switch(part){
   2.122+	case 0xc07:
   2.123+		p = seprint(buf, buf + size, "Cortex-A7");
   2.124+		break;
   2.125+	case 0xd03:
   2.126+		p = seprint(buf, buf + size, "Cortex-A53");
   2.127+		break;
   2.128+	default:
   2.129+		p = seprint(buf, buf + size, "Unknown-%#x", part);
   2.130+		break;
   2.131+	}
   2.132+	seprint(p, buf + size, " r%ldp%ld",
   2.133+		(r >> 20) & MASK(4), r & MASK(4));
   2.134+	return buf;
   2.135+}
   2.136+
   2.137+void
   2.138+cpuidprint(void)
   2.139+{
   2.140+	char name[64];
   2.141+
   2.142+	cputype2name(name, sizeof name);
   2.143+	delay(50);				/* let uart catch up */
   2.144+	print("cpu%d: %dMHz ARM %s\n", m->machno, m->cpumhz, name);
   2.145+}
   2.146+
   2.147+int
   2.148+getncpus(void)
   2.149+{
   2.150+	int n, max;
   2.151+	char *p;
   2.152+
   2.153+	n = 4;
   2.154+	if(n > MAXMACH)
   2.155+		n = MAXMACH;
   2.156+	p = getconf("*ncpu");
   2.157+	if(p && (max = atoi(p)) > 0 && n > max)
   2.158+		n = max;
   2.159+	return n;
   2.160+}
   2.161+
   2.162+static int
   2.163+startcpu(uint cpu)
   2.164+{
   2.165+	Mboxes *mb;
   2.166+	int i;
   2.167+	void cpureset();
   2.168+
   2.169+	mb = (Mboxes*)(ARMLOCAL + Mboxregs);
   2.170+	if(mb->clr[cpu].startcpu)
   2.171+		return -1;
   2.172+	mb->set[cpu].startcpu = PADDR(cpureset);
   2.173+	coherence();
   2.174+	sev();
   2.175+	for(i = 0; i < 1000; i++)
   2.176+		if(mb->clr[cpu].startcpu == 0)
   2.177+			return 0;
   2.178+	mb->clr[cpu].startcpu = PADDR(cpureset);
   2.179+	mb->set[cpu].doorbell = 1;
   2.180+	return 0;
   2.181+}
   2.182+
   2.183+void
   2.184+mboxclear(uint cpu)
   2.185+{
   2.186+	Mboxes *mb;
   2.187+
   2.188+	mb = (Mboxes*)(ARMLOCAL + Mboxregs);
   2.189+	mb->clr[cpu].mbox1 = 1;
   2.190+}
   2.191+
   2.192+void
   2.193+wakecpu(uint cpu)
   2.194+{
   2.195+	Mboxes *mb;
   2.196+
   2.197+	mb = (Mboxes*)(ARMLOCAL + Mboxregs);
   2.198+	mb->set[cpu].mbox1 = 1;
   2.199+}
   2.200+
   2.201+int
   2.202+startcpus(uint ncpu)
   2.203+{
   2.204+	int i, timeout;
   2.205+
   2.206+	for(i = 0; i < ncpu; i++)
   2.207+		lock(&startlock[i]);
   2.208+	cachedwbse(startlock, sizeof startlock);
   2.209+	for(i = 1; i < ncpu; i++){
   2.210+		if(startcpu(i) < 0)
   2.211+			return i;
   2.212+		timeout = 10000000;
   2.213+		while(!canlock(&startlock[i]))
   2.214+			if(--timeout == 0)
   2.215+				return i;
   2.216+		unlock(&startlock[i]);
   2.217+	}
   2.218+	return ncpu;
   2.219+}
   2.220+
   2.221+void
   2.222+archbcm2link(void)
   2.223+{
   2.224+	addclock0link(wdogfeed, HZ);
   2.225+}
   2.226+
   2.227+int
   2.228+l2ap(int ap)
   2.229+{
   2.230+	return (AP(0, (ap)));
   2.231+}
   2.232+
   2.233+void
   2.234+cpustart(int cpu)
   2.235+{
   2.236+	Mboxes *mb;
   2.237+
   2.238+	up = nil;
   2.239+	machinit();
   2.240+	mb = (Mboxes*)(ARMLOCAL + Mboxregs);
   2.241+	mb->clr[cpu].doorbell = 1;
   2.242+	trapinit();
   2.243+	clockinit();
   2.244+	mmuinit1();
   2.245+	timersinit();
   2.246+	cpuidprint();
   2.247+	archreset();
   2.248+	active.machs[m->machno] = 1;
   2.249+	unlock(&startlock[cpu]);
   2.250+	schedinit();
   2.251+	panic("schedinit returned");
   2.252+}
     3.1--- a/sys/src/9/bcm/arm.h
     3.2+++ b/sys/src/9/bcm/arm.h
     3.3@@ -1,5 +1,5 @@
     3.4 /*
     3.5- * arm-specific definitions for armv6
     3.6+ * arm-specific definitions for armv6 (arm11), armv7 (cortex-a8 and -a7)
     3.7  * these are used in C and assembler
     3.8  */
     3.9 
    3.10@@ -12,6 +12,7 @@
    3.11 #define PsrMsvc		0x00000013	/* `protected mode for OS' */
    3.12 #define PsrMmon		0x00000016	/* `secure monitor' (trustzone hyper) */
    3.13 #define PsrMabt		0x00000017
    3.14+#define PsrMhyp		0x0000001A
    3.15 #define PsrMund		0x0000001B
    3.16 #define PsrMsys		0x0000001F	/* `privileged user mode for OS' (trustzone) */
    3.17 #define PsrMask		0x0000001F
    3.18@@ -52,9 +53,19 @@
    3.19 #define CpTLD		10			/* TLB Lockdown, with op2 */
    3.20 #define CpVECS		12			/* vector bases, op1==0, Crm==0, op2s (cortex) */
    3.21 #define	CpPID		13			/* Process ID */
    3.22+#define	CpTIMER		14			/* Generic timer (cortex-a7) */
    3.23 #define CpSPM		15			/* system performance monitor (arm1176) */
    3.24 
    3.25 /*
    3.26+ * CpTIMER op1==0 Crm and opcode2 registers (cortex-a7)
    3.27+ */
    3.28+#define	CpTIMERcntfrq	0
    3.29+#define CpTIMERphys		2
    3.30+
    3.31+#define CpTIMERphysval	0
    3.32+#define CpTIMERphysctl	1
    3.33+
    3.34+/*
    3.35  * CpTTB op1==0, Crm==0 opcode2 values.
    3.36  */
    3.37 #define CpTTB0		0
    3.38@@ -71,6 +82,7 @@
    3.39  * CpID Secondary (CRm) registers.
    3.40  */
    3.41 #define CpIDidct	0
    3.42+#define	CpIDfeat	1
    3.43 
    3.44 /*
    3.45  * CpID op1==0 opcode2 fields.
    3.46@@ -80,6 +92,7 @@
    3.47 #define CpIDct		1			/* cache type */
    3.48 #define CpIDtlb		3			/* tlb type (cortex) */
    3.49 #define CpIDmpid	5			/* multiprocessor id (cortex) */
    3.50+#define	CpIDrevid	6			/* extra revision ID */
    3.51 
    3.52 /* CpIDid op1 values */
    3.53 #define CpIDcsize	1			/* cache size (cortex) */
    3.54@@ -133,6 +146,10 @@
    3.55 #define CpACasa			(1<<4)	/* enable speculative accesses */
    3.56 #define CpACl1pe		(1<<3)	/* l1 cache parity enable */
    3.57 #define CpACl2en		(1<<1)	/* l2 cache enable; default 1 */
    3.58+
    3.59+/* cortex-a7 and cortex-a9 */
    3.60+#define CpACsmp			(1<<6)	/* SMP l1 caches coherence; needed for ldrex/strex */
    3.61+#define CpACl1pctl		(3<<13)	/* l1 prefetch control */
    3.62 /*
    3.63  * CpCONTROL Secondary (CRm) registers and opcode2 fields.
    3.64  */
    3.65@@ -151,9 +168,9 @@
    3.66 #define CpCACHEinvd	6			/* data or unified */
    3.67 #define CpCACHEinvu	7			/* unified (not on cortex) */
    3.68 #define CpCACHEva2pa	8			/* va -> pa translation (cortex) */
    3.69-#define CpCACHEwb	10			/* writeback */
    3.70-#define CpCACHEinvdse	11			/* data or unified by mva */
    3.71-#define CpCACHEwbi	14			/* writeback+invalidate */
    3.72+#define CpCACHEwb	10			/* writeback to PoC */
    3.73+#define CpCACHEwbu	11			/* writeback to PoU */
    3.74+#define CpCACHEwbi	14			/* writeback+invalidate (to PoC) */
    3.75 
    3.76 #define CpCACHEall	0			/* entire (not for invd nor wb(i) on cortex) */
    3.77 #define CpCACHEse	1			/* single entry */
    3.78@@ -223,7 +240,7 @@
    3.79 #define CpVECSmon	1			/* secure monitor base addr */
    3.80 
    3.81 /*
    3.82- * CpSPM Secondary (CRm) registers and opcode2 fields.
    3.83+ * CpSPM Secondary (CRm) registers and opcode2 fields (armv6)
    3.84  */
    3.85 #define CpSPMperf	12			/* various counters */
    3.86 
    3.87@@ -239,6 +256,21 @@
    3.88 #define CpCACHERANGEdwbi	14		/* writeback+invalidate */
    3.89 
    3.90 /*
    3.91+ * CpTTB cache control bits
    3.92+ */
    3.93+#define CpTTBnos	(1<<5)	/* only Inner cache shareable */
    3.94+#define CpTTBinc	(0<<0|0<<6)	/* inner non-cacheable */
    3.95+#define CpTTBiwba	(0<<0|1<<6)	/* inner write-back write-allocate */
    3.96+#define CpTTBiwt	(1<<0|0<<6)	/* inner write-through */
    3.97+#define CpTTBiwb	(1<<0|1<<6)	/* inner write-back no write-allocate */
    3.98+#define CpTTBonc	(0<<3)	/* outer non-cacheable */
    3.99+#define CpTTBowba	(1<<3)	/* outer write-back write-allocate */
   3.100+#define CpTTBowt	(2<<3)	/* outer write-through */
   3.101+#define CpTTBowb	(3<<3)	/* outer write-back no write-allocate */
   3.102+#define CpTTBs	(1<<1)	/* page table in shareable memory */
   3.103+#define CpTTBbase	~0x7F		/* mask off control bits */
   3.104+
   3.105+/*
   3.106  * MMU page table entries.
   3.107  * Mbz (0x10) bit is implementation-defined and must be 0 on the cortex.
   3.108  */
   3.109@@ -256,6 +288,15 @@
   3.110 #define Cached		0x00000008		/* L[12] */
   3.111 #define Dom0		0
   3.112 
   3.113+#define L1wralloc	(1<<12)			/* L1 TEX */
   3.114+#define L1sharable	(1<<16)
   3.115+#define L2wralloc	(1<<6)			/* L2 TEX (small pages) */
   3.116+#define L2sharable	(1<<10)
   3.117+
   3.118+/* attributes for memory containing locks -- differs between armv6 and armv7 */
   3.119+//#define L1ptedramattrs	(Cached | Buffered | L1wralloc | L1sharable)
   3.120+//#define L2ptedramattrs	(Cached | Buffered | L2wralloc | L2sharable)
   3.121+
   3.122 #define Noaccess	0			/* AP, DAC */
   3.123 #define Krw		1			/* AP */
   3.124 /* armv7 deprecates AP[2] == 1 & AP[1:0] == 2 (Uro), prefers 3 (new in v7) */
   3.125@@ -267,7 +308,7 @@
   3.126 #define F(v, o, w)	(((v) & ((1<<(w))-1))<<(o))
   3.127 #define AP(n, v)	F((v), ((n)*2)+4, 2)
   3.128 #define L1AP(ap)	(AP(3, (ap)))
   3.129-#define L2AP(ap) (AP(3, (ap))|AP(2, (ap))|AP(1, (ap))|AP(0, (ap))) /* pre-armv7 */
   3.130+/* L2AP differs between armv6 and armv7 -- see l2ap in arch*.c */
   3.131 #define DAC(n, v)	F((v), (n)*2, 2)
   3.132 
   3.133 #define HVECTORS	0xffff0000
     4.1--- a/sys/src/9/bcm/arm.s
     4.2+++ b/sys/src/9/bcm/arm.s
     4.3@@ -1,5 +1,5 @@
     4.4 /*
     4.5- * armv6 machine assist, definitions
     4.6+ * armv6/v7 machine assist, definitions
     4.7  *
     4.8  * loader uses R11 as scratch.
     4.9  */
    4.10@@ -11,8 +11,6 @@
    4.11 
    4.12 #define L1X(va)		(((((va))>>20) & 0x0fff)<<2)
    4.13 
    4.14-#define PTEDRAM		(Dom0|L1AP(Krw)|Section|Cached|Buffered)
    4.15-
    4.16 /*
    4.17  * new instructions
    4.18  */
    4.19@@ -25,12 +23,32 @@
    4.20 	MOVW	$0, R0; \
    4.21 	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
    4.22 
    4.23-#define	BARRIERS	ISB; DSB
    4.24+#define	BARRIERS	DSB; ISB
    4.25 
    4.26 #define MCRR(coproc, op, rd, rn, crm) \
    4.27 	WORD $(0xec400000|(rn)<<16|(rd)<<12|(coproc)<<8|(op)<<4|(crm))
    4.28+#define MRRC(coproc, op, rd, rn, crm) \
    4.29+	WORD $(0xec500000|(rn)<<16|(rd)<<12|(coproc)<<8|(op)<<4|(crm))
    4.30+#define MSR(R, rn, m, m1) \
    4.31+	WORD $(0xe120f200|(R)<<22|(m1)<<16|(m)<<8|(rn))
    4.32+
    4.33+#define CPSIE	WORD	$0xf1080080	/* intr enable: zeroes I bit */
    4.34+#define CPSID	WORD	$0xf10c0080	/* intr disable: sets I bit */
    4.35 
    4.36 #define OKAY \
    4.37 	MOVW	$0x7E200028,R2; \
    4.38 	MOVW	$0x10000,R3; \
    4.39 	MOVW	R3,(R2)
    4.40+
    4.41+#define PUTC(s)
    4.42+
    4.43+/*
    4.44+ * get cpu id, or zero if armv6
    4.45+ */
    4.46+#define CPUID(r) \
    4.47+	MRC	CpSC, 0, r, C(CpID), C(CpIDfeat), 7; \
    4.48+	CMP	$0, r; \
    4.49+	B.EQ	2(PC); \
    4.50+	MRC	CpSC, 0, r, C(CpID), C(CpIDidct), CpIDmpid; \
    4.51+	AND.S	$(MAXMACH-1), r
    4.52+
     5.1new file mode 100644
     5.2--- /dev/null
     5.3+++ b/sys/src/9/bcm/armv6.s
     5.4@@ -0,0 +1,324 @@
     5.5+/*
     5.6+ * Broadcom bcm2835 SoC, as used in Raspberry Pi
     5.7+ * arm1176jzf-s processor (armv6)
     5.8+ */
     5.9+
    5.10+#include "arm.s"
    5.11+
    5.12+#define CACHELINESZ 32
    5.13+
    5.14+TEXT armstart(SB), 1, $-4
    5.15+
    5.16+	/*
    5.17+	 * SVC mode, interrupts disabled
    5.18+	 */
    5.19+	MOVW	$(PsrDirq|PsrDfiq|PsrMsvc), R1
    5.20+	MOVW	R1, CPSR
    5.21+
    5.22+	/*
    5.23+	 * disable the mmu and L1 caches
    5.24+	 * invalidate caches and tlb
    5.25+	 */
    5.26+	MRC	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
    5.27+	BIC	$(CpCdcache|CpCicache|CpCpredict|CpCmmu), R1
    5.28+	MCR	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
    5.29+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvu), CpCACHEall
    5.30+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
    5.31+	ISB
    5.32+
    5.33+	/*
    5.34+	 * clear mach and page tables
    5.35+	 */
    5.36+	MOVW	$PADDR(MACHADDR), R1
    5.37+	MOVW	$PADDR(KTZERO), R2
    5.38+_ramZ:
    5.39+	MOVW	R0, (R1)
    5.40+	ADD	$4, R1
    5.41+	CMP	R1, R2
    5.42+	BNE	_ramZ
    5.43+
    5.44+	/*
    5.45+	 * start stack at top of mach (physical addr)
    5.46+	 * set up page tables for kernel
    5.47+	 */
    5.48+	MOVW	$PADDR(MACHADDR+MACHSIZE-4), R13
    5.49+	MOVW	$PADDR(L1), R0
    5.50+	BL	,mmuinit(SB)
    5.51+
    5.52+	/*
    5.53+	 * set up domain access control and page table base
    5.54+	 */
    5.55+	MOVW	$Client, R1
    5.56+	MCR	CpSC, 0, R1, C(CpDAC), C(0)
    5.57+	MOVW	$PADDR(L1), R1
    5.58+	MCR	CpSC, 0, R1, C(CpTTB), C(0)
    5.59+
    5.60+	/*
    5.61+	 * enable caches, mmu, and high vectors
    5.62+	 */
    5.63+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
    5.64+	ORR	$(CpChv|CpCdcache|CpCicache|CpCpredict|CpCmmu), R0
    5.65+	MCR	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
    5.66+	ISB
    5.67+
    5.68+	/*
    5.69+	 * switch SB, SP, and PC into KZERO space
    5.70+	 */
    5.71+	MOVW	$setR12(SB), R12
    5.72+	MOVW	$(MACHADDR+MACHSIZE-4), R13
    5.73+	MOVW	$_startpg(SB), R15
    5.74+
    5.75+TEXT _startpg(SB), 1, $-4
    5.76+
    5.77+	/*
    5.78+	 * enable cycle counter
    5.79+	 */
    5.80+	MOVW	$1, R1
    5.81+	MCR	CpSC, 0, R1, C(CpSPM), C(CpSPMperf), CpSPMctl
    5.82+
    5.83+	/*
    5.84+	 * call main and loop forever if it returns
    5.85+	 */
    5.86+	BL	,main(SB)
    5.87+	B	,0(PC)
    5.88+
    5.89+	BL	_div(SB)		/* hack to load _div, etc. */
    5.90+
    5.91+TEXT cpidget(SB), 1, $-4			/* main ID */
    5.92+	MRC	CpSC, 0, R0, C(CpID), C(0), CpIDid
    5.93+	RET
    5.94+
    5.95+TEXT fsrget(SB), 1, $-4				/* data fault status */
    5.96+	MRC	CpSC, 0, R0, C(CpFSR), C(0), CpFSRdata
    5.97+	RET
    5.98+
    5.99+TEXT ifsrget(SB), 1, $-4			/* instruction fault status */
   5.100+	MRC	CpSC, 0, R0, C(CpFSR), C(0), CpFSRinst
   5.101+	RET
   5.102+
   5.103+TEXT farget(SB), 1, $-4				/* fault address */
   5.104+	MRC	CpSC, 0, R0, C(CpFAR), C(0x0)
   5.105+	RET
   5.106+
   5.107+TEXT lcycles(SB), 1, $-4
   5.108+	MRC	CpSC, 0, R0, C(CpSPM), C(CpSPMperf), CpSPMcyc
   5.109+	RET
   5.110+
   5.111+TEXT splhi(SB), 1, $-4
   5.112+	MOVW	$(MACHADDR+4), R2		/* save caller pc in Mach */
   5.113+	MOVW	R14, 0(R2)
   5.114+
   5.115+	MOVW	CPSR, R0			/* turn off irqs (but not fiqs) */
   5.116+	ORR	$(PsrDirq), R0, R1
   5.117+	MOVW	R1, CPSR
   5.118+	RET
   5.119+
   5.120+TEXT splfhi(SB), 1, $-4
   5.121+	MOVW	$(MACHADDR+4), R2		/* save caller pc in Mach */
   5.122+	MOVW	R14, 0(R2)
   5.123+
   5.124+	MOVW	CPSR, R0			/* turn off irqs and fiqs */
   5.125+	ORR	$(PsrDirq|PsrDfiq), R0, R1
   5.126+	MOVW	R1, CPSR
   5.127+	RET
   5.128+
   5.129+TEXT splflo(SB), 1, $-4
   5.130+	MOVW	CPSR, R0			/* turn on fiqs */
   5.131+	BIC	$(PsrDfiq), R0, R1
   5.132+	MOVW	R1, CPSR
   5.133+	RET
   5.134+
   5.135+TEXT spllo(SB), 1, $-4
   5.136+	MOVW	CPSR, R0			/* turn on irqs and fiqs */
   5.137+	BIC	$(PsrDirq|PsrDfiq), R0, R1
   5.138+	MOVW	R1, CPSR
   5.139+	RET
   5.140+
   5.141+TEXT splx(SB), 1, $-4
   5.142+	MOVW	$(MACHADDR+0x04), R2		/* save caller pc in Mach */
   5.143+	MOVW	R14, 0(R2)
   5.144+
   5.145+	MOVW	R0, R1				/* reset interrupt level */
   5.146+	MOVW	CPSR, R0
   5.147+	MOVW	R1, CPSR
   5.148+	RET
   5.149+
   5.150+TEXT spldone(SB), 1, $0				/* end marker for devkprof.c */
   5.151+	RET
   5.152+
   5.153+TEXT islo(SB), 1, $-4
   5.154+	MOVW	CPSR, R0
   5.155+	AND	$(PsrDirq), R0
   5.156+	EOR	$(PsrDirq), R0
   5.157+	RET
   5.158+
   5.159+TEXT	tas(SB), $-4
   5.160+TEXT	_tas(SB), $-4
   5.161+	MOVW	R0,R1
   5.162+	MOVW	$1,R0
   5.163+	SWPW	R0,(R1)			/* fix: deprecated in armv6 */
   5.164+	RET
   5.165+
   5.166+TEXT setlabel(SB), 1, $-4
   5.167+	MOVW	R13, 0(R0)		/* sp */
   5.168+	MOVW	R14, 4(R0)		/* pc */
   5.169+	MOVW	$0, R0
   5.170+	RET
   5.171+
   5.172+TEXT gotolabel(SB), 1, $-4
   5.173+	MOVW	0(R0), R13		/* sp */
   5.174+	MOVW	4(R0), R14		/* pc */
   5.175+	MOVW	$1, R0
   5.176+	RET
   5.177+
   5.178+TEXT getcallerpc(SB), 1, $-4
   5.179+	MOVW	0(R13), R0
   5.180+	RET
   5.181+
   5.182+TEXT idlehands(SB), $-4
   5.183+	MOVW	CPSR, R3
   5.184+	ORR	$(PsrDirq|PsrDfiq), R3, R1		/* splfhi */
   5.185+	MOVW	R1, CPSR
   5.186+
   5.187+	DSB
   5.188+	MOVW	nrdy(SB), R0
   5.189+	CMP	$0, R0
   5.190+	MCR.EQ	CpSC, 0, R0, C(CpCACHE), C(CpCACHEintr), CpCACHEwait
   5.191+	DSB
   5.192+
   5.193+	MOVW	R3, CPSR			/* splx */
   5.194+	RET
   5.195+
   5.196+
   5.197+TEXT coherence(SB), $-4
   5.198+	BARRIERS
   5.199+	RET
   5.200+
   5.201+/*
   5.202+ * invalidate tlb
   5.203+ */
   5.204+TEXT mmuinvalidate(SB), 1, $-4
   5.205+	MOVW	$0, R0
   5.206+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
   5.207+	BARRIERS
   5.208+	MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtc
   5.209+	RET
   5.210+
   5.211+/*
   5.212+ * mmuinvalidateaddr(va)
   5.213+ *   invalidate tlb entry for virtual page address va, ASID 0
   5.214+ */
   5.215+TEXT mmuinvalidateaddr(SB), 1, $-4
   5.216+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinvse
   5.217+	BARRIERS
   5.218+	RET
   5.219+
   5.220+/*
   5.221+ * drain write buffer
   5.222+ * writeback data cache
   5.223+ */
   5.224+TEXT cachedwb(SB), 1, $-4
   5.225+	DSB
   5.226+	MOVW	$0, R0
   5.227+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEall
   5.228+	RET
   5.229+
   5.230+/*
   5.231+ * drain write buffer
   5.232+ * writeback and invalidate data cache
   5.233+ */
   5.234+TEXT cachedwbinv(SB), 1, $-4
   5.235+	DSB
   5.236+	MOVW	$0, R0
   5.237+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall
   5.238+	RET
   5.239+
   5.240+/*
   5.241+ * cachedwbinvse(va, n)
   5.242+ *   drain write buffer
   5.243+ *   writeback and invalidate data cache range [va, va+n)
   5.244+ */
   5.245+TEXT cachedwbinvse(SB), 1, $-4
   5.246+	MOVW	R0, R1		/* DSB clears R0 */
   5.247+	DSB
   5.248+	MOVW	n+4(FP), R2
   5.249+	ADD	R1, R2
   5.250+	SUB	$1, R2
   5.251+	BIC	$(CACHELINESZ-1), R1
   5.252+	BIC	$(CACHELINESZ-1), R2
   5.253+	MCRR(CpSC, 0, 2, 1, CpCACHERANGEdwbi)
   5.254+	RET
   5.255+
   5.256+/*
   5.257+ * cachedwbse(va, n)
   5.258+ *   drain write buffer
   5.259+ *   writeback data cache range [va, va+n)
   5.260+ */
   5.261+TEXT cachedwbtlb(SB), 1, $-4
   5.262+TEXT cachedwbse(SB), 1, $-4
   5.263+
   5.264+	MOVW	R0, R1		/* DSB clears R0 */
   5.265+	DSB
   5.266+	MOVW	n+4(FP), R2
   5.267+	ADD	R1, R2
   5.268+	BIC	$(CACHELINESZ-1), R1
   5.269+	BIC	$(CACHELINESZ-1), R2
   5.270+	MCRR(CpSC, 0, 2, 1, CpCACHERANGEdwb)
   5.271+	RET
   5.272+
   5.273+/*
   5.274+ * cachedinvse(va, n)
   5.275+ *   drain write buffer
   5.276+ *   invalidate data cache range [va, va+n)
   5.277+ */
   5.278+TEXT cachedinvse(SB), 1, $-4
   5.279+	MOVW	R0, R1		/* DSB clears R0 */
   5.280+	DSB
   5.281+	MOVW	n+4(FP), R2
   5.282+	ADD	R1, R2
   5.283+	SUB	$1, R2
   5.284+	BIC	$(CACHELINESZ-1), R1
   5.285+	BIC	$(CACHELINESZ-1), R2
   5.286+	MCRR(CpSC, 0, 2, 1, CpCACHERANGEinvd)
   5.287+	RET
   5.288+
   5.289+/*
   5.290+ * drain write buffer and prefetch buffer
   5.291+ * writeback and invalidate data cache
   5.292+ * invalidate instruction cache
   5.293+ */
   5.294+TEXT cacheuwbinv(SB), 1, $-4
   5.295+	BARRIERS
   5.296+	MOVW	$0, R0
   5.297+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall
   5.298+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
   5.299+	RET
   5.300+
   5.301+/*
   5.302+ * L2 cache is not enabled
   5.303+ */
   5.304+TEXT l2cacheuwbinv(SB), 1, $-4
   5.305+	RET
   5.306+
   5.307+/*
   5.308+ * invalidate instruction cache
   5.309+ */
   5.310+TEXT cacheiinv(SB), 1, $-4
   5.311+	MOVW	$0, R0
   5.312+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
   5.313+	RET
   5.314+
   5.315+/*
   5.316+ * invalidate range of instruction cache
   5.317+ */
   5.318+TEXT cacheiinvse(SB), 1, $-4
   5.319+	MOVW	R0, R1		/* DSB clears R0 */
   5.320+	DSB
   5.321+	MOVW n+4(FP), R2
   5.322+	ADD	R1, R2
   5.323+	SUB	$1, R2
   5.324+	MCRR(CpSC, 0, 2, 1, CpCACHERANGEinvi)
   5.325+	MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtc
   5.326+	DSB
   5.327+	ISB
   5.328+	RET
     6.1new file mode 100644
     6.2--- /dev/null
     6.3+++ b/sys/src/9/bcm/armv7.s
     6.4@@ -0,0 +1,510 @@
     6.5+/*
     6.6+ * Broadcom bcm2836 SoC, as used in Raspberry Pi 2
     6.7+ * 4 x Cortex-A7 processor (armv7)
     6.8+ */
     6.9+
    6.10+#include "arm.s"
    6.11+
    6.12+#define CACHELINESZ 	64
    6.13+#define ICACHELINESZ	32
    6.14+
    6.15+#undef DSB
    6.16+#undef DMB
    6.17+#undef ISB
    6.18+#define DSB	WORD	$0xf57ff04f	/* data synch. barrier; last f = SY */
    6.19+#define DMB	WORD	$0xf57ff05f	/* data mem. barrier; last f = SY */
    6.20+#define ISB	WORD	$0xf57ff06f	/* instr. sync. barrier; last f = SY */
    6.21+#define WFI	WORD	$0xe320f003	/* wait for interrupt */
    6.22+#define WFI_EQ	WORD	$0x0320f003	/* wait for interrupt if eq */
    6.23+#define ERET	WORD	$0xe160006e	/* exception return from HYP */
    6.24+#define SEV	WORD	$0xe320f004	/* send event */
    6.25+
    6.26+/* tas/cas strex debugging limits; started at 10000 */
    6.27+#define MAXSC 1000000
    6.28+
    6.29+TEXT armstart(SB), 1, $-4
    6.30+
    6.31+	/*
    6.32+	 * if not cpu0, go to secondary startup
    6.33+	 */
    6.34+	CPUID(R1)
    6.35+	BNE	reset
    6.36+
    6.37+	/*
    6.38+	 * go to SVC mode, interrupts disabled
    6.39+	 */
    6.40+	BL	svcmode(SB)
    6.41+
    6.42+	/*
    6.43+	 * disable the mmu and caches
    6.44+	 */
    6.45+	MRC	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
    6.46+	BIC	$(CpCdcache|CpCicache|CpCmmu), R1
    6.47+	ORR	$(CpCsbo|CpCsw), R1
    6.48+	BIC	$CpCsbz, R1
    6.49+	MCR	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
    6.50+	BARRIERS
    6.51+
    6.52+	/*
    6.53+	 * clear mach and page tables
    6.54+	 */
    6.55+	MOVW	$PADDR(MACHADDR), R1
    6.56+	MOVW	$PADDR(KTZERO), R2
    6.57+_ramZ:
    6.58+	MOVW	R0, (R1)
    6.59+	ADD	$4, R1
    6.60+	CMP	R1, R2
    6.61+	BNE	_ramZ
    6.62+
    6.63+	/*
    6.64+	 * turn SMP on
    6.65+	 * invalidate tlb
    6.66+	 */
    6.67+	MRC	CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
    6.68+	ORR	$CpACsmp, R1		/* turn SMP on */
    6.69+	MCR	CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
    6.70+	BARRIERS
    6.71+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
    6.72+	BARRIERS
    6.73+
    6.74+	/*
    6.75+	 * start stack at top of mach (physical addr)
    6.76+	 * set up page tables for kernel
    6.77+	 */
    6.78+	MOVW	$PADDR(MACHADDR+MACHSIZE-4), R13
    6.79+	MOVW	$PADDR(L1), R0
    6.80+	BL	mmuinit(SB)
    6.81+
    6.82+	/*
    6.83+	 * set up domain access control and page table base
    6.84+	 */
    6.85+	MOVW	$Client, R1
    6.86+	MCR	CpSC, 0, R1, C(CpDAC), C(0)
    6.87+	MOVW	$PADDR(L1), R1
    6.88+	ORR	$(CpTTBs|CpTTBowba|CpTTBiwba), R1
    6.89+	MCR	CpSC, 0, R1, C(CpTTB), C(0)
    6.90+	MCR	CpSC, 0, R1, C(CpTTB), C(0), CpTTB1	/* cortex has two */
    6.91+
    6.92+	/*
    6.93+	 * invalidate my caches before enabling
    6.94+	 */
    6.95+	BL	cachedinv(SB)
    6.96+	BL	cacheiinv(SB)
    6.97+	BL	l2cacheuinv(SB)
    6.98+	BARRIERS
    6.99+
   6.100+	/*
   6.101+	 * enable caches, mmu, and high vectors
   6.102+	 */
   6.103+
   6.104+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
   6.105+	ORR	$(CpChv|CpCdcache|CpCicache|CpCmmu), R0
   6.106+	MCR	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
   6.107+	BARRIERS
   6.108+
   6.109+	/*
   6.110+	 * switch SB, SP, and PC into KZERO space
   6.111+	 */
   6.112+	MOVW	$setR12(SB), R12
   6.113+	MOVW	$(MACHADDR+MACHSIZE-4), R13
   6.114+	MOVW	$_startpg(SB), R15
   6.115+
   6.116+TEXT _startpg(SB), 1, $-4
   6.117+
   6.118+	/*
   6.119+	 * enable cycle counter
   6.120+	 */
   6.121+	MOVW	$(1<<31), R1
   6.122+	MCR	CpSC, 0, R1, C(CpCLD), C(CpCLDena), CpCLDenacyc
   6.123+	MOVW	$1, R1
   6.124+	MCR	CpSC, 0, R1, C(CpCLD), C(CpCLDena), CpCLDenapmnc
   6.125+
   6.126+	/*
   6.127+	 * call main and loop forever if it returns
   6.128+	 */
   6.129+	BL	,main(SB)
   6.130+	B	,0(PC)
   6.131+
   6.132+	BL	_div(SB)		/* hack to load _div, etc. */
   6.133+
   6.134+/*
   6.135+ * startup entry for cpu(s) other than 0
   6.136+ */
   6.137+TEXT cpureset(SB), 1, $-4
   6.138+reset:
   6.139+	/*
   6.140+	 * load physical base for SB addressing while mmu is off
   6.141+	 * keep a handy zero in R0 until first function call
   6.142+	 */
   6.143+	MOVW	$setR12(SB), R12
   6.144+	SUB	$KZERO, R12
   6.145+	ADD	$PHYSDRAM, R12
   6.146+	MOVW	$0, R0
   6.147+
   6.148+	/*
   6.149+	 * SVC mode, interrupts disabled
   6.150+	 */
   6.151+	BL	svcmode(SB)
   6.152+
   6.153+	/*
   6.154+	 * disable the mmu and caches
   6.155+	 */
   6.156+	MRC	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
   6.157+	BIC	$(CpCdcache|CpCicache|CpCmmu), R1
   6.158+	ORR	$(CpCsbo|CpCsw), R1
   6.159+	BIC	$CpCsbz, R1
   6.160+	MCR	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
   6.161+	BARRIERS
   6.162+
   6.163+	/*
   6.164+	 * turn SMP on
   6.165+	 * invalidate tlb
   6.166+	 */
   6.167+	MRC	CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
   6.168+	ORR	$CpACsmp, R1		/* turn SMP on */
   6.169+	MCR	CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
   6.170+	BARRIERS
   6.171+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
   6.172+	BARRIERS
   6.173+
   6.174+	/*
   6.175+	 * find Mach for this cpu
   6.176+	 */
   6.177+	MRC	CpSC, 0, R2, C(CpID), C(CpIDidct), CpIDmpid
   6.178+	AND	$(MAXMACH-1), R2	/* mask out non-cpu-id bits */
   6.179+	SLL	$2, R2			/* convert to word index */
   6.180+	MOVW	$machaddr(SB), R0
   6.181+	ADD	R2, R0			/* R0 = &machaddr[cpuid] */
   6.182+	MOVW	(R0), R0		/* R0 = machaddr[cpuid] */
   6.183+	CMP	$0, R0
   6.184+	BEQ	0(PC)			/* must not be zero */
   6.185+	SUB	$KZERO, R0, R(MACH)	/* m = PADDR(machaddr[cpuid]) */
   6.186+
   6.187+	/*
   6.188+	 * start stack at top of local Mach
   6.189+	 */
   6.190+	ADD	$(MACHSIZE-4), R(MACH), R13
   6.191+
   6.192+	/*
   6.193+	 * set up domain access control and page table base
   6.194+	 */
   6.195+	MOVW	$Client, R1
   6.196+	MCR	CpSC, 0, R1, C(CpDAC), C(0)
   6.197+	MOVW	12(R(MACH)), R1	/* m->mmul1 */
   6.198+	SUB	$KZERO, R1		/* phys addr */
   6.199+	ORR	$(CpTTBs|CpTTBowba|CpTTBiwba), R1
   6.200+	MCR	CpSC, 0, R1, C(CpTTB), C(0)
   6.201+	MCR	CpSC, 0, R1, C(CpTTB), C(0), CpTTB1	/* cortex has two */
   6.202+
   6.203+	/*
   6.204+	 * invalidate my caches before enabling
   6.205+	 */
   6.206+	BL	cachedinv(SB)
   6.207+	BL	cacheiinv(SB)
   6.208+	BARRIERS
   6.209+
   6.210+	/*
   6.211+	 * enable caches, mmu, and high vectors
   6.212+	 */
   6.213+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
   6.214+	ORR	$(CpChv|CpCdcache|CpCicache|CpCmmu), R0
   6.215+	MCR	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
   6.216+	BARRIERS
   6.217+
   6.218+	/*
   6.219+	 * switch MACH, SB, SP, and PC into KZERO space
   6.220+	 */
   6.221+	ADD	$KZERO, R(MACH)
   6.222+	MOVW	$setR12(SB), R12
   6.223+	ADD	$KZERO, R13
   6.224+	MOVW	$_startpg2(SB), R15
   6.225+
   6.226+TEXT _startpg2(SB), 1, $-4
   6.227+
   6.228+	/*
   6.229+	 * enable cycle counter
   6.230+	 */
   6.231+	MOVW	$(1<<31), R1
   6.232+	MCR	CpSC, 0, R1, C(CpCLD), C(CpCLDena), CpCLDenacyc
   6.233+	MOVW	$1, R1
   6.234+	MCR	CpSC, 0, R1, C(CpCLD), C(CpCLDena), CpCLDenapmnc
   6.235+
   6.236+	/*
   6.237+	 * call cpustart and loop forever if it returns
   6.238+	 */
   6.239+	MRC	CpSC, 0, R0, C(CpID), C(CpIDidct), CpIDmpid
   6.240+	AND	$(MAXMACH-1), R0		/* mask out non-cpu-id bits */
   6.241+	BL	,cpustart(SB)
   6.242+	B	,0(PC)
   6.243+
   6.244+/*
   6.245+ * get into SVC mode with interrupts disabled
   6.246+ * raspberry pi firmware since 29 Sept 2015 starts in HYP mode
   6.247+ */
   6.248+TEXT svcmode(SB), 1, $-4
   6.249+	MOVW	CPSR, R1
   6.250+	AND	$PsrMask, R1
   6.251+	MOVW	$PsrMhyp, R2
   6.252+	CMP	R2, R1
   6.253+	MOVW	$(PsrDirq|PsrDfiq|PsrMsvc), R1
   6.254+	BNE	nothyp
   6.255+	MSR(1, 1, 1, 0xe)	/* MOVW	R1, SPSR_HYP */
   6.256+	MSR(0, 14, 1, 0xe)	/* MOVW	R14, ELR_HYP */
   6.257+	ERET
   6.258+nothyp:
   6.259+	MOVW	R1, CPSR
   6.260+	RET
   6.261+
   6.262+TEXT cpidget(SB), 1, $-4			/* main ID */
   6.263+	MRC	CpSC, 0, R0, C(CpID), C(0), CpIDid
   6.264+	RET
   6.265+
   6.266+TEXT fsrget(SB), 1, $-4				/* data fault status */
   6.267+	MRC	CpSC, 0, R0, C(CpFSR), C(0), CpFSRdata
   6.268+	RET
   6.269+
   6.270+TEXT ifsrget(SB), 1, $-4			/* instruction fault status */
   6.271+	MRC	CpSC, 0, R0, C(CpFSR), C(0), CpFSRinst
   6.272+	RET
   6.273+
   6.274+TEXT farget(SB), 1, $-4				/* fault address */
   6.275+	MRC	CpSC, 0, R0, C(CpFAR), C(0x0)
   6.276+	RET
   6.277+
   6.278+TEXT cpctget(SB), 1, $-4			/* cache type */
   6.279+	MRC	CpSC, 0, R0, C(CpID), C(CpIDidct), CpIDct
   6.280+	RET
   6.281+
   6.282+TEXT lcycles(SB), 1, $-4
   6.283+	MRC	CpSC, 0, R0, C(CpCLD), C(CpCLDcyc), 0
   6.284+	RET
   6.285+
   6.286+TEXT splhi(SB), 1, $-4
   6.287+	MOVW	R14, 4(R(MACH))		/* save caller pc in m->splpc */
   6.288+
   6.289+	MOVW	CPSR, R0			/* turn off irqs (but not fiqs) */
   6.290+	ORR	$(PsrDirq), R0, R1
   6.291+	MOVW	R1, CPSR
   6.292+	RET
   6.293+
   6.294+TEXT splfhi(SB), 1, $-4
   6.295+	MOVW	R14, 4(R(MACH))		/* save caller pc in m->splpc */
   6.296+
   6.297+	MOVW	CPSR, R0			/* turn off irqs and fiqs */
   6.298+	ORR	$(PsrDirq|PsrDfiq), R0, R1
   6.299+	MOVW	R1, CPSR
   6.300+	RET
   6.301+
   6.302+TEXT splflo(SB), 1, $-4
   6.303+	MOVW	CPSR, R0			/* turn on fiqs */
   6.304+	BIC	$(PsrDfiq), R0, R1
   6.305+	MOVW	R1, CPSR
   6.306+	RET
   6.307+
   6.308+TEXT spllo(SB), 1, $-4
   6.309+	MOVW	CPSR, R0			/* turn on irqs and fiqs */
   6.310+	MOVW	$0, R1
   6.311+	CMP.S	R1, R(MACH)
   6.312+	MOVW.NE	R1, 4(R(MACH))			/* clear m->splpc */
   6.313+	BIC	$(PsrDirq|PsrDfiq), R0, R1
   6.314+	MOVW	R1, CPSR
   6.315+	RET
   6.316+
   6.317+TEXT splx(SB), 1, $-4
   6.318+	MOVW	R14, 4(R(MACH))		/* save caller pc in m->splpc */
   6.319+
   6.320+	MOVW	R0, R1				/* reset interrupt level */
   6.321+	MOVW	CPSR, R0
   6.322+	MOVW	R1, CPSR
   6.323+	RET
   6.324+
   6.325+TEXT spldone(SB), 1, $0				/* end marker for devkprof.c */
   6.326+	RET
   6.327+
   6.328+TEXT islo(SB), 1, $-4
   6.329+	MOVW	CPSR, R0
   6.330+	AND	$(PsrDirq), R0
   6.331+	EOR	$(PsrDirq), R0
   6.332+	RET
   6.333+
   6.334+TEXT cas(SB), $0
   6.335+TEXT cmpswap(SB), $0
   6.336+	MOVW	ov+4(FP), R1
   6.337+	MOVW	nv+8(FP), R2
   6.338+spincas:
   6.339+	LDREX	(R0), R3
   6.340+	CMP.S	R3, R1
   6.341+	BNE	fail
   6.342+	STREX	R2, (R0), R4
   6.343+	CMP.S	$0, R4
   6.344+	BNE	spincas
   6.345+	MOVW	$1, R0
   6.346+	DMB
   6.347+	RET
   6.348+fail:
   6.349+	CLREX
   6.350+	MOVW	$0, R0
   6.351+	RET
   6.352+
   6.353+TEXT	tas(SB), $-4
   6.354+TEXT	_tas(SB), $-4			/* _tas(ulong *) */
   6.355+	/* returns old (R0) after modifying (R0) */
   6.356+	MOVW	R0,R5
   6.357+	DMB
   6.358+
   6.359+	MOVW	$1,R2		/* new value of (R0) */
   6.360+	MOVW	$MAXSC, R8
   6.361+tas1:
   6.362+	LDREX (R5), R7
   6.363+	CMP.S	$0, R7		/* old value non-zero (lock taken)? */
   6.364+	BNE	lockbusy	/* we lose */
   6.365+	SUB.S	$1, R8
   6.366+	BEQ	lockloop2
   6.367+	STREX R2,(R5),R4
   6.368+	CMP.S	$0, R4
   6.369+	BNE	tas1		/* strex failed? try again */
   6.370+	DMB
   6.371+	B	tas0
   6.372+lockloop2:
   6.373+	BL	abort(SB)
   6.374+lockbusy:
   6.375+	CLREX
   6.376+tas0:
   6.377+	MOVW	R7, R0		/* return old value */
   6.378+	RET
   6.379+
   6.380+TEXT setlabel(SB), 1, $-4
   6.381+	MOVW	R13, 0(R0)		/* sp */
   6.382+	MOVW	R14, 4(R0)		/* pc */
   6.383+	MOVW	$0, R0
   6.384+	RET
   6.385+
   6.386+TEXT gotolabel(SB), 1, $-4
   6.387+	MOVW	0(R0), R13		/* sp */
   6.388+	MOVW	4(R0), R14		/* pc */
   6.389+	MOVW	$1, R0
   6.390+	RET
   6.391+
   6.392+TEXT getcallerpc(SB), 1, $-4
   6.393+	MOVW	0(R13), R0
   6.394+	RET
   6.395+
   6.396+TEXT idlehands(SB), $-4
   6.397+	MOVW	CPSR, R3
   6.398+	ORR	$(PsrDirq|PsrDfiq), R3, R1		/* splfhi */
   6.399+	MOVW	R1, CPSR
   6.400+
   6.401+	DSB
   6.402+	MOVW	nrdy(SB), R0
   6.403+	CMP	$0, R0
   6.404+	WFI_EQ
   6.405+	DSB
   6.406+
   6.407+	MOVW	R3, CPSR			/* splx */
   6.408+	RET
   6.409+
   6.410+
   6.411+TEXT coherence(SB), $-4
   6.412+	BARRIERS
   6.413+	RET
   6.414+
   6.415+TEXT sev(SB), $-4
   6.416+	SEV
   6.417+	RET
   6.418+
   6.419+/*
   6.420+ * invalidate tlb
   6.421+ */
   6.422+TEXT mmuinvalidate(SB), 1, $-4
   6.423+	DSB
   6.424+	MOVW	$0, R0
   6.425+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
   6.426+	BARRIERS
   6.427+	RET
   6.428+
   6.429+/*
   6.430+ * mmuinvalidateaddr(va)
   6.431+ *   invalidate tlb entry for virtual page address va, ASID 0
   6.432+ */
   6.433+TEXT mmuinvalidateaddr(SB), 1, $-4
   6.434+	DSB
   6.435+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinvse
   6.436+	BARRIERS
   6.437+	RET
   6.438+
   6.439+/*
   6.440+ * `single-element' cache operations.
   6.441+ * in arm arch v7, if effective to PoC, they operate on all cache levels, so separate
   6.442+ * l2 functions are unnecessary.
   6.443+ */
   6.444+
   6.445+TEXT cachedwbse(SB), $-4			/* D writeback SE */
   6.446+	MOVW	R0, R2
   6.447+
   6.448+	MOVW	CPSR, R3
   6.449+	CPSID					/* splhi */
   6.450+
   6.451+	BARRIERS			/* force outstanding stores to cache */
   6.452+	MOVW	R2, R0
   6.453+	MOVW	4(FP), R1
   6.454+	ADD	R0, R1				/* R1 is end address */
   6.455+	BIC	$(CACHELINESZ-1), R0		/* cache line start */
   6.456+_dwbse:
   6.457+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEse
   6.458+	/* can't have a BARRIER here since it zeroes R0 */
   6.459+	ADD	$CACHELINESZ, R0
   6.460+	CMP.S	R0, R1
   6.461+	BGT	_dwbse
   6.462+	B	_wait
   6.463+
   6.464+/*
   6.465+ * TLB on armv7 loads from cache, so no need for writeback
   6.466+ */
   6.467+TEXT cachedwbtlb(SB), $-4
   6.468+	DSB
   6.469+	ISB
   6.470+	RET
   6.471+
   6.472+TEXT cachedwbinvse(SB), $-4			/* D writeback+invalidate SE */
   6.473+	MOVW	R0, R2
   6.474+
   6.475+	MOVW	CPSR, R3
   6.476+	CPSID					/* splhi */
   6.477+
   6.478+	BARRIERS			/* force outstanding stores to cache */
   6.479+	MOVW	R2, R0
   6.480+	MOVW	4(FP), R1
   6.481+	ADD	R0, R1				/* R1 is end address */
   6.482+	BIC	$(CACHELINESZ-1), R0		/* cache line start */
   6.483+_dwbinvse:
   6.484+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEse
   6.485+	/* can't have a BARRIER here since it zeroes R0 */
   6.486+	ADD	$CACHELINESZ, R0
   6.487+	CMP.S	R0, R1
   6.488+	BGT	_dwbinvse
   6.489+_wait:						/* drain write buffer */
   6.490+	BARRIERS
   6.491+
   6.492+	MOVW	R3, CPSR			/* splx */
   6.493+	RET
   6.494+
   6.495+TEXT cachedinvse(SB), $-4			/* D invalidate SE */
   6.496+	MOVW	R0, R2
   6.497+
   6.498+	MOVW	CPSR, R3
   6.499+	CPSID					/* splhi */
   6.500+
   6.501+	BARRIERS			/* force outstanding stores to cache */
   6.502+	MOVW	R2, R0
   6.503+	MOVW	4(FP), R1
   6.504+	ADD	R0, R1				/* R1 is end address */
   6.505+	BIC	$(CACHELINESZ-1), R0		/* cache line start */
   6.506+_dinvse:
   6.507+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEse
   6.508+	/* can't have a BARRIER here since it zeroes R0 */
   6.509+	ADD	$CACHELINESZ, R0
   6.510+	CMP.S	R0, R1
   6.511+	BGT	_dinvse
   6.512+	B	_wait
   6.513+
   6.514+#include "cache.v7.s"
     7.1new file mode 100644
     7.2--- /dev/null
     7.3+++ b/sys/src/9/bcm/cache.v7.s
     7.4@@ -0,0 +1,220 @@
     7.5+/*
     7.6+ * cortex arm arch v7 cache flushing and invalidation
     7.7+ * shared by l.s and rebootcode.s
     7.8+ */
     7.9+
    7.10+#define	BPIALL	MCR CpSC, 0, R0, C(CpCACHE), C(5), 6	/* branch predictor invalidate all */
    7.11+
    7.12+TEXT cacheiinv(SB), $-4				/* I invalidate */
    7.13+	DSB
    7.14+	MOVW	$0, R0
    7.15+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall /* ok on cortex */
    7.16+	BPIALL	/* redundant? */
    7.17+	DSB
    7.18+	ISB
    7.19+	RET
    7.20+
    7.21+TEXT cacheiinvse(SB), $0			/* I invalidate SE */
    7.22+	MOVW 4(FP), R1
    7.23+	ADD	R0, R1
    7.24+	BIC $(ICACHELINESZ - 1), R0
    7.25+	DSB
    7.26+_iinvse:
    7.27+	MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEse
    7.28+	ADD $ICACHELINESZ, R0
    7.29+	CMP.S R0, R1
    7.30+	BGT _iinvse
    7.31+	BPIALL
    7.32+	DSB
    7.33+	ISB
    7.34+	RET
    7.35+
    7.36+/*
    7.37+ * set/way operators, passed a suitable set/way value in R0.
    7.38+ */
    7.39+TEXT cachedwb_sw(SB), $-4
    7.40+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEsi
    7.41+	RET
    7.42+
    7.43+TEXT cachedwbinv_sw(SB), $-4
    7.44+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEsi
    7.45+	RET
    7.46+
    7.47+TEXT cachedinv_sw(SB), $-4
    7.48+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEsi
    7.49+	RET
    7.50+
    7.51+	/* set cache size select */
    7.52+TEXT setcachelvl(SB), $-4
    7.53+	MCR	CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0
    7.54+	ISB
    7.55+	RET
    7.56+
    7.57+	/* return cache sizes */
    7.58+TEXT getwayssets(SB), $-4
    7.59+	MRC	CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0
    7.60+	RET
    7.61+
    7.62+/*
    7.63+ * l1 cache operations.
    7.64+ * l1 and l2 ops are intended to be called from C, thus need save no
    7.65+ * caller's regs, only those we need to preserve across calls.
    7.66+ */
    7.67+
    7.68+TEXT cachedwb(SB), $-4
    7.69+	MOVW.W	R14, -8(R13)
    7.70+	MOVW	$cachedwb_sw(SB), R0
    7.71+	MOVW	$1, R8
    7.72+	BL	wholecache(SB)
    7.73+	MOVW.P	8(R13), R15
    7.74+
    7.75+TEXT cachedwbinv(SB), $-4
    7.76+	MOVW.W	R14, -8(R13)
    7.77+	MOVW	$cachedwbinv_sw(SB), R0
    7.78+	MOVW	$1, R8
    7.79+	BL	wholecache(SB)
    7.80+	MOVW.P	8(R13), R15
    7.81+
    7.82+TEXT cachedinv(SB), $-4
    7.83+	MOVW.W	R14, -8(R13)
    7.84+	MOVW	$cachedinv_sw(SB), R0
    7.85+	MOVW	$1, R8
    7.86+	BL	wholecache(SB)
    7.87+	MOVW.P	8(R13), R15
    7.88+
    7.89+TEXT cacheuwbinv(SB), $-4
    7.90+	MOVM.DB.W [R14], (R13)	/* save lr on stack */
    7.91+	MOVW	CPSR, R1
    7.92+	CPSID			/* splhi */
    7.93+
    7.94+	MOVM.DB.W [R1], (R13)	/* save R1 on stack */
    7.95+
    7.96+	BL	cachedwbinv(SB)
    7.97+	BL	cacheiinv(SB)
    7.98+
    7.99+	MOVM.IA.W (R13), [R1]	/* restore R1 (saved CPSR) */
   7.100+	MOVW	R1, CPSR
   7.101+	MOVM.IA.W (R13), [R14]	/* restore lr */
   7.102+	RET
   7.103+
   7.104+/*
   7.105+ * l2 cache operations
   7.106+ */
   7.107+
   7.108+TEXT l2cacheuwb(SB), $-4
   7.109+	MOVW.W	R14, -8(R13)
   7.110+	MOVW	$cachedwb_sw(SB), R0
   7.111+	MOVW	$2, R8
   7.112+	BL	wholecache(SB)
   7.113+	MOVW.P	8(R13), R15
   7.114+
   7.115+TEXT l2cacheuwbinv(SB), $-4
   7.116+	MOVW.W	R14, -8(R13)
   7.117+	MOVW	CPSR, R1
   7.118+	CPSID			/* splhi */
   7.119+
   7.120+	MOVM.DB.W [R1], (R13)	/* save R1 on stack */
   7.121+
   7.122+	MOVW	$cachedwbinv_sw(SB), R0
   7.123+	MOVW	$2, R8
   7.124+	BL	wholecache(SB)
   7.125+	BL	l2cacheuinv(SB)
   7.126+
   7.127+	MOVM.IA.W (R13), [R1]	/* restore R1 (saved CPSR) */
   7.128+	MOVW	R1, CPSR
   7.129+	MOVW.P	8(R13), R15
   7.130+
   7.131+TEXT l2cacheuinv(SB), $-4
   7.132+	MOVW.W	R14, -8(R13)
   7.133+	MOVW	$cachedinv_sw(SB), R0
   7.134+	MOVW	$2, R8
   7.135+	BL	wholecache(SB)
   7.136+	MOVW.P	8(R13), R15
   7.137+
   7.138+/*
   7.139+ * these shift values are for the Cortex-A8 L1 cache (A=2, L=6) and
   7.140+ * the Cortex-A8 L2 cache (A=3, L=6).
   7.141+ * A = log2(# of ways), L = log2(bytes per cache line).
   7.142+ * see armv7 arch ref p. 1403.
   7.143+ */
   7.144+#define L1WAYSH 30
   7.145+#define L1SETSH 6
   7.146+#define L2WAYSH 29
   7.147+#define L2SETSH 6
   7.148+
   7.149+/*
   7.150+ * callers are assumed to be the above l1 and l2 ops.
   7.151+ * R0 is the function to call in the innermost loop.
   7.152+ * R8 is the cache level (one-origin: 1 or 2).
   7.153+ *
   7.154+ * initial translation by 5c, then massaged by hand.
   7.155+ */
   7.156+TEXT wholecache+0(SB), $-4
   7.157+	MOVW	R0, R1		/* save argument for inner loop in R1 */
   7.158+	SUB	$1, R8		/* convert cache level to zero origin */
   7.159+
   7.160+	/* we may not have the MMU on yet, so map R1 to PC's space */
   7.161+	BIC	$KSEGM,	R1	/* strip segment from address */
   7.162+	MOVW	PC, R2		/* get PC's segment ... */
   7.163+	AND	$KSEGM, R2
   7.164+	ORR	R2, R1		/* combine them */
   7.165+
   7.166+	/* drain write buffers */
   7.167+	BARRIERS
   7.168+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
   7.169+	ISB
   7.170+
   7.171+	MOVW	CPSR, R2
   7.172+	MOVM.DB.W [R2,R14], (SP) /* save regs on stack */
   7.173+	CPSID			/* splhi to make entire op atomic */
   7.174+
   7.175+	/* get cache sizes */
   7.176+	SLL	$1, R8, R0	/* R0 = (cache - 1) << 1 */
   7.177+	MCR	CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0 /* set cache size select */
   7.178+	ISB
   7.179+	MRC	CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0 /* get cache sizes */
   7.180+
   7.181+	/* compute # of ways and sets for this cache level */
   7.182+	SRA	$3, R0, R5	/* R5 (ways) = R0 >> 3 */
   7.183+	AND	$1023, R5	/* R5 = (R0 >> 3) & MASK(10) */
   7.184+	ADD	$1, R5		/* R5 (ways) = ((R0 >> 3) & MASK(10)) + 1 */
   7.185+
   7.186+	SRA	$13, R0, R2	/* R2 = R0 >> 13 */
   7.187+	AND	$32767, R2	/* R2 = (R0 >> 13) & MASK(15) */
   7.188+	ADD	$1, R2		/* R2 (sets) = ((R0 >> 13) & MASK(15)) + 1 */
   7.189+
   7.190+	/* precompute set/way shifts for inner loop */
   7.191+	CMP	$0, R8		/* cache == 1? */
   7.192+	MOVW.EQ	$L1WAYSH, R3 	/* yes */
   7.193+	MOVW.EQ	$L1SETSH, R4
   7.194+	MOVW.NE	$L2WAYSH, R3	/* no */
   7.195+	MOVW.NE	$L2SETSH, R4
   7.196+
   7.197+	/* iterate over ways */
   7.198+	MOVW	$0, R7		/* R7: way */
   7.199+outer:
   7.200+	/* iterate over sets */
   7.201+	MOVW	$0, R6		/* R6: set */
   7.202+inner:
   7.203+	/* compute set/way register contents */
   7.204+	SLL	R3, R7, R0 	/* R0 = way << R3 (L?WAYSH) */
   7.205+	ORR	R8<<1, R0	/* R0 = way << L?WAYSH | (cache - 1) << 1 */
   7.206+	ORR	R6<<R4, R0 	/* R0 = way<<L?WAYSH | (cache-1)<<1 |set<<R4 */
   7.207+
   7.208+	BL	(R1)		/* call set/way operation with R0 */
   7.209+
   7.210+	ADD	$1, R6		/* set++ */
   7.211+	CMP	R2, R6		/* set >= sets? */
   7.212+	BLT	inner		/* no, do next set */
   7.213+
   7.214+	ADD	$1, R7		/* way++ */
   7.215+	CMP	R5, R7		/* way >= ways? */
   7.216+	BLT	outer		/* no, do next way */
   7.217+
   7.218+	MOVM.IA.W (SP), [R2,R14] /* restore regs */
   7.219+	MOVW	R2, CPSR	/* splx */
   7.220+
   7.221+	/* drain write buffers */
   7.222+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
   7.223+	ISB
   7.224+	RET
     8.1--- a/sys/src/9/bcm/clock.c
     8.2+++ b/sys/src/9/bcm/clock.c
     8.3@@ -1,11 +1,13 @@
     8.4 /*
     8.5- * bcm2835 timers
     8.6+ * bcm283[56] timers
     8.7  *	System timers run at 1MHz (timers 1 and 2 are used by GPU)
     8.8  *	ARM timer usually runs at 250MHz (may be slower in low power modes)
     8.9  *	Cycle counter runs at 700MHz (unless overclocked)
    8.10  *    All are free-running up-counters
    8.11+ *  Cortex-a7 has local generic timers per cpu (which we run at 1MHz)
    8.12  *
    8.13  * Use system timer 3 (64 bits) for hzclock interrupts and fastticks
    8.14+ *   For smp on bcm2836, use local generic timer for interrupts on cpu1-3
    8.15  * Use ARM timer (32 bits) for perfticks
    8.16  * Use ARM timer to force immediate interrupt
    8.17  * Use cycle counter for cycles()
    8.18@@ -17,14 +19,21 @@
    8.19 #include "dat.h"
    8.20 #include "fns.h"
    8.21 #include "io.h"
    8.22+#include "ureg.h"
    8.23+#include "arm.h"
    8.24 
    8.25 enum {
    8.26 	SYSTIMERS	= VIRTIO+0x3000,
    8.27 	ARMTIMER	= VIRTIO+0xB400,
    8.28 
    8.29+	Localctl	= 0x00,
    8.30+	Prescaler	= 0x08,
    8.31+	Localintpending	= 0x60,
    8.32+
    8.33 	SystimerFreq	= 1*Mhz,
    8.34 	MaxPeriod	= SystimerFreq / HZ,
    8.35-	MinPeriod	= SystimerFreq / (100*HZ),
    8.36+	MinPeriod	= 10,
    8.37+
    8.38 };
    8.39 
    8.40 typedef struct Systimers Systimers;
    8.41@@ -64,6 +73,11 @@ enum {
    8.42 	TmrPrescale256	= 0x02<<2,
    8.43 	CntWidth16	= 0<<1,
    8.44 	CntWidth32	= 1<<1,
    8.45+
    8.46+	/* generic timer (cortex-a7) */
    8.47+	Enable	= 1<<0,
    8.48+	Imask	= 1<<1,
    8.49+	Istatus = 1<<2,
    8.50 };
    8.51 
    8.52 static void
    8.53@@ -71,12 +85,23 @@ clockintr(Ureg *ureg, void *)
    8.54 {
    8.55 	Systimers *tn;
    8.56 
    8.57+	if(m->machno != 0)
    8.58+		panic("cpu%d: unexpected system timer interrupt", m->machno);
    8.59 	tn = (Systimers*)SYSTIMERS;
    8.60 	/* dismiss interrupt */
    8.61 	tn->cs = 1<<3;
    8.62 	timerintr(ureg, 0);
    8.63 }
    8.64 
    8.65+static void
    8.66+localclockintr(Ureg *ureg, void *)
    8.67+{
    8.68+	if(m->machno == 0)
    8.69+		panic("cpu0: Unexpected local generic timer interrupt");
    8.70+	cpwrsc(0, CpTIMER, CpTIMERphys, CpTIMERphysctl, Imask|Enable);
    8.71+	timerintr(ureg, 0);
    8.72+}
    8.73+
    8.74 void
    8.75 clockshutdown(void)
    8.76 {
    8.77@@ -84,6 +109,10 @@ clockshutdown(void)
    8.78 
    8.79 	tm = (Armtimer*)ARMTIMER;
    8.80 	tm->ctl = 0;
    8.81+	if(cpuserver)
    8.82+		wdogfeed();
    8.83+	else
    8.84+		wdogoff();
    8.85 }
    8.86 
    8.87 void
    8.88@@ -93,12 +122,16 @@ clockinit(void)
    8.89 	Armtimer *tm;
    8.90 	u32int t0, t1, tstart, tend;
    8.91 
    8.92+	if(((cprdsc(0, CpID, CpIDfeat, 1) >> 16) & 0xF) != 0) {
    8.93+		/* generic timer supported */
    8.94+		if(m->machno == 0){
    8.95+			*(ulong*)(ARMLOCAL + Localctl) = 0;				/* input clock is 19.2Mhz crystal */
    8.96+			*(ulong*)(ARMLOCAL + Prescaler) = 0x06aaaaab;	/* divide by (2^31/Prescaler) for 1Mhz */
    8.97+		}
    8.98+		cpwrsc(0, CpTIMER, CpTIMERphys, CpTIMERphysctl, Imask);
    8.99+	}
   8.100+
   8.101 	tn = (Systimers*)SYSTIMERS;
   8.102-	tm = (Armtimer*)ARMTIMER;
   8.103-	tm->load = 0;
   8.104-	tm->ctl = TmrPrescale1|CntEnable|CntWidth32;
   8.105-	coherence();
   8.106-
   8.107 	tstart = tn->clo;
   8.108 	do{
   8.109 		t0 = lcycles();
   8.110@@ -111,25 +144,36 @@ clockinit(void)
   8.111 	m->cpuhz = 100 * t1;
   8.112 	m->cpumhz = (m->cpuhz + Mhz/2 - 1) / Mhz;
   8.113 	m->cyclefreq = m->cpuhz;
   8.114-
   8.115-	tn->c3 = tn->clo - 1;
   8.116-	intrenable(IRQtimer3, clockintr, nil, 0, "clock");
   8.117+	if(m->machno == 0){
   8.118+		tn->c3 = tn->clo - 1;
   8.119+		tm = (Armtimer*)ARMTIMER;
   8.120+		tm->load = 0;
   8.121+		tm->ctl = TmrPrescale1|CntEnable|CntWidth32;
   8.122+		intrenable(IRQtimer3, clockintr, nil, 0, "clock");
   8.123+	}else
   8.124+		intrenable(IRQcntpns, localclockintr, nil, 0, "clock");
   8.125 }
   8.126 
   8.127 void
   8.128 timerset(uvlong next)
   8.129 {
   8.130 	Systimers *tn;
   8.131-	vlong now, period;
   8.132+	uvlong now;
   8.133+	long period;
   8.134 
   8.135-	tn = (Systimers*)SYSTIMERS;
   8.136 	now = fastticks(nil);
   8.137-	period = next - fastticks(nil);
   8.138+	period = next - now;
   8.139 	if(period < MinPeriod)
   8.140-		next = now + MinPeriod;
   8.141+		period = MinPeriod;
   8.142 	else if(period > MaxPeriod)
   8.143-		next = now + MaxPeriod;
   8.144-	tn->c3 = (ulong)next;
   8.145+		period = MaxPeriod;
   8.146+	if(m->machno > 0){
   8.147+		cpwrsc(0, CpTIMER, CpTIMERphys, CpTIMERphysval, period);
   8.148+		cpwrsc(0, CpTIMER, CpTIMERphys, CpTIMERphysctl, Enable);
   8.149+	}else{
   8.150+		tn = (Systimers*)SYSTIMERS;
   8.151+		tn->c3 = tn->clo + period;
   8.152+	}
   8.153 }
   8.154 
   8.155 uvlong
   8.156@@ -137,16 +181,17 @@ fastticks(uvlong *hz)
   8.157 {
   8.158 	Systimers *tn;
   8.159 	ulong lo, hi;
   8.160+	uvlong now;
   8.161 
   8.162-	tn = (Systimers*)SYSTIMERS;
   8.163 	if(hz)
   8.164 		*hz = SystimerFreq;
   8.165+	tn = (Systimers*)SYSTIMERS;
   8.166 	do{
   8.167 		hi = tn->chi;
   8.168 		lo = tn->clo;
   8.169 	}while(tn->chi != hi);
   8.170-	m->fastclock = (uvlong)hi<<32 | lo;
   8.171-	return m->fastclock;
   8.172+	now = (uvlong)hi<<32 | lo;
   8.173+	return now;
   8.174 }
   8.175 
   8.176 ulong
   8.177@@ -172,7 +217,6 @@ armtimerset(int n)
   8.178 		tm->ctl &= ~(TmrEnable|TmrIntEnable);
   8.179 		tm->irq = 1;
   8.180 	}
   8.181-	coherence();
   8.182 }
   8.183 
   8.184 ulong
   8.185@@ -180,7 +224,7 @@ ulong
   8.186 {
   8.187 	if(SystimerFreq != 1*Mhz)
   8.188 		return fastticks2us(fastticks(nil));
   8.189-	return fastticks(nil);
   8.190+	return ((Systimers*)SYSTIMERS)->clo;
   8.191 }
   8.192 
   8.193 void
   8.194@@ -189,8 +233,8 @@ microdelay(int n)
   8.195 	Systimers *tn;
   8.196 	u32int now, diff;
   8.197 
   8.198+	diff = n + 1;
   8.199 	tn = (Systimers*)SYSTIMERS;
   8.200-	diff = n + 1;
   8.201 	now = tn->clo;
   8.202 	while(tn->clo - now < diff)
   8.203 		;
     9.1--- a/sys/src/9/bcm/dat.h
     9.2+++ b/sys/src/9/bcm/dat.h
     9.3@@ -27,6 +27,7 @@ typedef struct PhysUart	PhysUart;
     9.4 typedef struct PMMU	PMMU;
     9.5 typedef struct Proc	Proc;
     9.6 typedef u32int		PTE;
     9.7+typedef struct Soc	Soc;
     9.8 typedef struct Uart	Uart;
     9.9 typedef struct Ureg	Ureg;
    9.10 typedef uvlong		Tval;
    9.11@@ -214,7 +215,7 @@ struct Mach
    9.12 typedef void		KMap;
    9.13 #define	VA(k)		((uintptr)(k))
    9.14 #define	kmap(p)		(KMap*)((p)->pa|kseg0)
    9.15-#define	kunmap(k)
    9.16+extern void kunmap(KMap*);
    9.17 
    9.18 struct
    9.19 {
    9.20@@ -279,3 +280,29 @@ struct DevConf
    9.21 	Devport	*ports;			/* The ports themselves */
    9.22 };
    9.23 
    9.24+struct Soc {			/* SoC dependent configuration */
    9.25+	ulong	dramsize;
    9.26+	uintptr	physio;
    9.27+	uintptr	busdram;
    9.28+	uintptr	busio;
    9.29+	uintptr	armlocal;
    9.30+	u32int	l1ptedramattrs;
    9.31+	u32int	l2ptedramattrs;
    9.32+};
    9.33+extern Soc soc;
    9.34+
    9.35+#define BUSUNKNOWN -1
    9.36+
    9.37+/*
    9.38+ * GPIO
    9.39+ */
    9.40+enum {
    9.41+	Input	= 0x0,
    9.42+	Output	= 0x1,
    9.43+	Alt0	= 0x4,
    9.44+	Alt1	= 0x5,
    9.45+	Alt2	= 0x6,
    9.46+	Alt3	= 0x7,
    9.47+	Alt4	= 0x3,
    9.48+	Alt5	= 0x2,
    9.49+};
    10.1--- a/sys/src/9/bcm/devarch.c
    10.2+++ b/sys/src/9/bcm/devarch.c
    10.3@@ -150,17 +150,19 @@ Dev archdevtab = {
    10.4 static long
    10.5 cputyperead(Chan*, void *a, long n, vlong offset)
    10.6 {
    10.7-	char str[128];
    10.8+	char name[64], str[128];
    10.9 
   10.10-	snprint(str, sizeof str, "ARM11 %d\n", m->cpumhz);
   10.11+	cputype2name(name, sizeof name);
   10.12+	snprint(str, sizeof str, "ARM %s %d\n", name, m->cpumhz);
   10.13 	return readstr(offset, a, n, str);
   10.14 }
   10.15 
   10.16 static long
   10.17 cputempread(Chan*, void *a, long n, vlong offset)
   10.18 {
   10.19- 	char str[128];
   10.20- 	snprint(str, sizeof str, "%d±%d\n", gettemp(0) / 1000, 1);
   10.21+	char str[16];
   10.22+
   10.23+	snprint(str, sizeof str, "%ud\n", (getcputemp()+500)/1000);
   10.24 	return readstr(offset, a, n, str);
   10.25 }
   10.26 
    11.1--- a/sys/src/9/bcm/devgpio.c
    11.2+++ b/sys/src/9/bcm/devgpio.c
    11.3@@ -232,109 +232,6 @@ getpintable(void)
    11.4 	}
    11.5 }
    11.6 
    11.7-// stolen from uartmini.c
    11.8-#define GPIOREGS	(VIRTIO+0x200000)
    11.9-/* GPIO regs */
   11.10-enum {
   11.11-	Fsel0	= 0x00>>2,
   11.12-		FuncMask= 0x7,
   11.13-	Set0	= 0x1c>>2,
   11.14-	Clr0	= 0x28>>2,
   11.15-	Lev0	= 0x34>>2,
   11.16-	Evds0	= 0x40>>2,
   11.17-	Redge0	= 0x4C>>2,
   11.18-	Fedge0	= 0x58>>2,
   11.19-	Hpin0	= 0x64>>2,
   11.20-	Lpin0	= 0x70>>2,
   11.21-	ARedge0	= 0x7C>>2,
   11.22-	AFedge0	= 0x88>2,
   11.23-	PUD	= 0x94>>2,
   11.24-	PUDclk0	= 0x98>>2,
   11.25-	PUDclk1	= 0x9c>>2,
   11.26-};
   11.27-
   11.28-static void
   11.29-gpiofuncset(uint pin, int func)
   11.30-{	
   11.31-	u32int *gp, *fsel;
   11.32-	int off;
   11.33-
   11.34-	gp = (u32int*)GPIOREGS;
   11.35-	fsel = &gp[Fsel0 + pin/10];
   11.36-	off = (pin % 10) * 3;
   11.37-	*fsel = (*fsel & ~(FuncMask<<off)) | func<<off;
   11.38-}
   11.39-
   11.40-static int
   11.41-gpiofuncget(uint pin)
   11.42-{	
   11.43-	u32int *gp, *fsel;
   11.44-	int off;
   11.45-
   11.46-	gp = (u32int*)GPIOREGS;
   11.47-	fsel = &gp[Fsel0 + pin/10];
   11.48-	off = (pin % 10) * 3;
   11.49-	return ((*fsel >> off) & FuncMask);
   11.50-}
   11.51-
   11.52-static void
   11.53-gpiopullset(uint pin, int state)
   11.54-{
   11.55-	u32int *gp, *reg;
   11.56-	u32int mask;
   11.57-
   11.58-	gp = (u32int*)GPIOREGS;
   11.59-	reg = &gp[PUDclk0 + pin/32];
   11.60-	mask = 1 << (pin % 32);
   11.61-	gp[PUD] = state;
   11.62-	microdelay(1);
   11.63-	*reg = mask;
   11.64-	microdelay(1);
   11.65-	*reg = 0;
   11.66-}
   11.67-
   11.68-static void
   11.69-gpioout(uint pin, int set)
   11.70-{
   11.71-	u32int *gp;
   11.72-	int v;
   11.73-
   11.74-	gp = (u32int*)GPIOREGS;
   11.75-	v = set? Set0 : Clr0;
   11.76-	gp[v + pin/32] = 1 << (pin % 32);
   11.77-}
   11.78-
   11.79-static int
   11.80-gpioin(uint pin)
   11.81-{
   11.82-	u32int *gp;
   11.83-
   11.84-	gp = (u32int*)GPIOREGS;
   11.85-	return (gp[Lev0 + pin/32] & (1 << (pin % 32))) != 0;
   11.86-}
   11.87-
   11.88-static void
   11.89-gpioevent(uint pin, int event, int enable)
   11.90-{
   11.91-	u32int *gp, *field;
   11.92-	int reg = 0;
   11.93-	
   11.94-	switch(event)
   11.95-	{
   11.96-		case Erising:
   11.97-			reg = Redge0;
   11.98-			break;
   11.99-		case Efalling:
  11.100-			reg = Fedge0;
  11.101-			break;
  11.102-		default:
  11.103-			panic("gpio: unknown event type");
  11.104-	}
  11.105-	gp = (u32int*)GPIOREGS;
  11.106-	field = &gp[reg + pin/32];
  11.107-	SET_BIT(field, pin, enable);
  11.108-}
  11.109-
  11.110 static void
  11.111 mkdeventry(Chan *c, Qid qid, Dirtab *tab, Dir *db)
  11.112 {
  11.113@@ -417,27 +314,16 @@ static void
  11.114 interrupt(Ureg*, void *)
  11.115 {
  11.116 	
  11.117-	u32int *gp, *field;
  11.118-	char pin;
  11.119+	uint pin;
  11.120 	
  11.121-	gp = (u32int*)GPIOREGS;
  11.122-
  11.123-	int set;
  11.124-
  11.125 	coherence();
  11.126 	
  11.127 	eventvalue = 0;
  11.128 	
  11.129 	for(pin = 0; pin < PIN_TABLE_SIZE; pin++)
  11.130 	{
  11.131-		set = (gp[Evds0 + pin/32] & (1 << (pin % 32))) != 0;
  11.132-
  11.133-		if(set)
  11.134-		{
  11.135-			field = &gp[Evds0 + pin/32];
  11.136-			SET_BIT(field, pin, 1);
  11.137+		if(gpiogetevent(pin))
  11.138 			SET_BIT(&eventvalue, pin, 1);
  11.139-		}
  11.140 	}
  11.141 	coherence();
  11.142 
  11.143@@ -447,7 +333,8 @@ interrupt(Ureg*, void *)
  11.144 static void
  11.145 gpioinit(void)
  11.146 {
  11.147-	boardrev = getrevision() & 0xff;
  11.148+	gpiomeminit();
  11.149+	boardrev = getboardrev() & 0xff;
  11.150 	pinscheme = Qboard;
  11.151 	intrenable(49, interrupt, nil, 0, "gpio1");
  11.152 }
  11.153@@ -676,7 +563,7 @@ gpiowrite(Chan *c, void *va, long n, vlo
  11.154 			{
  11.155 				if(strncmp(funcname[i], arg, strlen(funcname[i])) == 0)
  11.156 				{
  11.157-					gpiofuncset(pin, i);
  11.158+					gpiosel(pin, i);
  11.159 					break;
  11.160 				}
  11.161 			}
  11.162@@ -691,7 +578,7 @@ gpiowrite(Chan *c, void *va, long n, vlo
  11.163 			{
  11.164 				if(strncmp(pudname[i], arg, strlen(pudname[i])) == 0)
  11.165 				{
  11.166-					gpiopullset(pin, i);
  11.167+					gpiopull(pin, i);
  11.168 					break;
  11.169 				}
  11.170 			}
  11.171@@ -707,7 +594,7 @@ gpiowrite(Chan *c, void *va, long n, vlo
  11.172 			{
  11.173 				if(strncmp(evtypename[i], arg, strlen(evtypename[i])) == 0)
  11.174 				{
  11.175-					gpioevent(pin, i, (cb->f[2][0] == 'e'));
  11.176+					gpioselevent(pin, i, (cb->f[2][0] == 'e'));
  11.177 					break;
  11.178 				}
  11.179 			}
    12.1--- a/sys/src/9/bcm/dma.c
    12.2+++ b/sys/src/9/bcm/dma.c
    12.3@@ -25,7 +25,7 @@
    12.4 enum {
    12.5 	Nchan		= 7,		/* number of dma channels */
    12.6 	Regsize		= 0x100,	/* size of regs for each chan */
    12.7-	Cbalign		= 32,		/* control block byte alignment */
    12.8+	Cbalign		= 64,		/* control block byte alignment (allow for 64-byte cache on bcm2836) */
    12.9 	Dbg		= 0,
   12.10 	
   12.11 	/* registers for each dma controller */
   12.12@@ -97,6 +97,18 @@ struct Cb {
   12.13 static Ctlr dma[Nchan];
   12.14 static u32int *dmaregs = (u32int*)DMAREGS;
   12.15 
   12.16+uintptr
   12.17+dmaaddr(void *va)
   12.18+{
   12.19+	return soc.busdram | (PTR2UINT(va) & ~KSEGM);
   12.20+}
   12.21+
   12.22+static uintptr
   12.23+dmaioaddr(void *va)
   12.24+{
   12.25+	return soc.busio | (PTR2UINT(va) & ~VIRTIO);
   12.26+}
   12.27+
   12.28 static void
   12.29 dump(char *msg, uchar *p, int n)
   12.30 {
   12.31@@ -146,7 +158,7 @@ dmastart(int chan, int dev, int dir, voi
   12.32 		ctlr->regs = (u32int*)(DMAREGS + chan*Regsize);
   12.33 		ctlr->cb = xspanalloc(sizeof(Cb), Cbalign, 0);
   12.34 		assert(ctlr->cb != nil);
   12.35-		dmaregs[Enable] |= 1 << chan;
   12.36+		dmaregs[Enable] |= 1<<chan;
   12.37 		ctlr->regs[Cs] = Reset;
   12.38 		while(ctlr->regs[Cs] & Reset)
   12.39 			;
   12.40@@ -156,33 +168,33 @@ dmastart(int chan, int dev, int dir, voi
   12.41 	ti = 0;
   12.42 	switch(dir){
   12.43 	case DmaD2M:
   12.44-		cachedwbinvse(dst, len);
   12.45+		cachedinvse(dst, len);
   12.46 		ti = Srcdreq | Destinc;
   12.47-		cb->sourcead = DMAIO(src);
   12.48-		cb->destad = DMAADDR(dst);
   12.49+		cb->sourcead = dmaioaddr(src);
   12.50+		cb->destad = dmaaddr(dst);
   12.51 		break;
   12.52 	case DmaM2D:
   12.53 		cachedwbse(src, len);
   12.54 		ti = Destdreq | Srcinc;
   12.55-		cb->sourcead = DMAADDR(src);
   12.56-		cb->destad = DMAIO(dst);
   12.57+		cb->sourcead = dmaaddr(src);
   12.58+		cb->destad = dmaioaddr(dst);
   12.59 		break;
   12.60 	case DmaM2M:
   12.61 		cachedwbse(src, len);
   12.62-		cachedwbinvse(dst, len);
   12.63+		cachedinvse(dst, len);
   12.64 		ti = Srcinc | Destinc;
   12.65-		cb->sourcead = DMAADDR(src);
   12.66-		cb->destad = DMAADDR(dst);
   12.67+		cb->sourcead = dmaaddr(src);
   12.68+		cb->destad = dmaaddr(dst);
   12.69 		break;
   12.70 	}
   12.71-	cb->ti = ti | dev << Permapshift | Inten;
   12.72+	cb->ti = ti | dev<<Permapshift | Inten;
   12.73 	cb->txfrlen = len;
   12.74 	cb->stride = 0;
   12.75 	cb->nextconbk = 0;
   12.76 	cachedwbse(cb, sizeof(Cb));
   12.77 	ctlr->regs[Cs] = 0;
   12.78 	microdelay(1);
   12.79-	ctlr->regs[Conblkad] = DMAADDR(cb);
   12.80+	ctlr->regs[Conblkad] = dmaaddr(cb);
   12.81 	DBG print("dma start: %ux %ux %ux %ux %ux %ux\n",
   12.82 		cb->ti, cb->sourcead, cb->destad, cb->txfrlen,
   12.83 		cb->stride, cb->nextconbk);
    13.1--- a/sys/src/9/bcm/dwcotg.h
    13.2+++ b/sys/src/9/bcm/dwcotg.h
    13.3@@ -434,8 +434,8 @@ enum {
    13.4 		Episo		= 1<<18,
    13.5 		Epbulk		= 2<<18,
    13.6 		Epintr		= 3<<18,
    13.7-	Multicnt	= 0x3<<20,	/* transactions per μframe or retries */
    13.8-					/* per periodic split */
    13.9+	Multicnt	= 0x3<<20,	/* transactions per μframe */
   13.10+					/* or retries per periodic split */
   13.11 		OMulticnt	= 20,
   13.12 	Devaddr		= 0x7f<<22,	/* device address */
   13.13 		ODevaddr	= 22,
    14.1--- a/sys/src/9/bcm/fns.h
    14.2+++ b/sys/src/9/bcm/fns.h
    14.3@@ -10,6 +10,9 @@ extern void cachedwbse(void*, int);
    14.4 extern void cachedwbinvse(void*, int);
    14.5 extern void cacheiinv(void);
    14.6 extern void cacheuwbinv(void);
    14.7+extern void cachedwbtlb(void*, int);
    14.8+extern void cacheiinvse(void*, int);
    14.9+extern void cachedinvse(void*, int);
   14.10 extern uintptr cankaddr(uintptr pa);
   14.11 extern int cas32(void*, u32int, u32int);
   14.12 extern void checkmmu(uintptr, uintptr);
   14.13@@ -20,11 +23,13 @@ extern void coherence(void);
   14.14 extern ulong cprd(int cp, int op1, int crn, int crm, int op2);
   14.15 extern ulong cprdsc(int op1, int crn, int crm, int op2);
   14.16 extern void cpuidprint(void);
   14.17+extern u32int cpidget(void);
   14.18 extern void cpwr(int cp, int op1, int crn, int crm, int op2, ulong val);
   14.19 extern void cpwrsc(int op1, int crn, int crm, int op2, ulong val);
   14.20 #define cycles(ip) *(ip) = lcycles()
   14.21 extern void dmastart(int, int, int, void*, void*, int);
   14.22 extern int dmawait(int);
   14.23+extern uintptr dmaaddr(void *va);
   14.24 extern int fbblank(int);
   14.25 extern void* fbinit(int, int*, int*, int*);
   14.26 extern u32int farget(void);
   14.27@@ -41,13 +46,26 @@ extern char *getethermac(void);
   14.28 extern uint getfirmware(void);
   14.29 extern int getpower(int);
   14.30 extern void getramsize(Confmem*);
   14.31+extern int getncpus(void);
   14.32+extern void gpiosel(uint, int);
   14.33+extern void gpiopull(uint, int);
   14.34+extern void gpiopullup(uint);
   14.35+extern void gpiopulloff(uint);
   14.36+extern void gpiopulldown(uint);
   14.37+extern void gpioout(uint, int);
   14.38+extern int gpioin(uint);
   14.39+extern void gpioselevent(uint, int, int);
   14.40+extern int gpiogetevent(uint);
   14.41+extern void gpiomeminit(void);
   14.42 extern u32int ifsrget(void);
   14.43+extern void intrcpushutdown(void);
   14.44 extern void irqenable(int, void (*)(Ureg*, void*), void*);
   14.45 #define intrenable(i, f, a, b, n) irqenable((i), (f), (a))
   14.46 extern void intrsoff(void);
   14.47 extern int isaconfig(char*, int, ISAConf*);
   14.48+extern void l2cacheuwbinv(void);
   14.49 extern void links(void);
   14.50-extern void mmuinit(void);
   14.51+extern void mmuinit(void*);
   14.52 extern void mmuinit1(void);
   14.53 extern void mmuinvalidate(void);
   14.54 extern void mmuinvalidateaddr(u32int);
   14.55@@ -58,8 +76,10 @@ extern void procsave(Proc*);
   14.56 extern void procfork(Proc*);
   14.57 extern void procsetup(Proc*);
   14.58 extern void screeninit(void);
   14.59+extern void setclkrate(int, ulong);
   14.60 extern void setpower(int, int);
   14.61 extern void setr13(int, u32int*);
   14.62+extern int startcpus(uint);
   14.63 extern int splfhi(void);
   14.64 extern int splflo(void);
   14.65 extern int tas(void *);
   14.66@@ -68,9 +88,17 @@ extern void trapinit(void);
   14.67 extern void uartconsinit(void);
   14.68 extern int userureg(Ureg*);
   14.69 extern void vectors(void);
   14.70+extern void vgpinit(void);
   14.71+extern void vgpset(uint, int);
   14.72 extern void vtable(void);
   14.73-extern uint gettemp(int);
   14.74-extern uint getrevision(void);
   14.75+extern void wdogoff(void);
   14.76+extern void wdogfeed(void);
   14.77+extern void vtable(void);
   14.78+extern int l2ap(int);
   14.79+extern uint getcputemp(void);
   14.80+extern char *cputype2name(char *buf, int size);
   14.81+extern uint getboardrev(void);
   14.82+extern void sev(void);
   14.83 
   14.84 /*
   14.85  * floating point emulation
    15.1new file mode 100644
    15.2--- /dev/null
    15.3+++ b/sys/src/9/bcm/gpio.c
    15.4@@ -0,0 +1,142 @@
    15.5+/*
    15.6+ * Raspberry Pi GPIO support
    15.7+ */
    15.8+
    15.9+#include "u.h"
   15.10+#include "../port/lib.h"
   15.11+#include "../port/error.h"
   15.12+#include "mem.h"
   15.13+#include "dat.h"
   15.14+#include "fns.h"
   15.15+#include "io.h"
   15.16+
   15.17+#define GPIOREGS	(VIRTIO+0x200000)
   15.18+
   15.19+/* GPIO regs */
   15.20+enum {
   15.21+	Fsel0	= 0x00>>2,
   15.22+		FuncMask= 0x7,
   15.23+	Set0	= 0x1c>>2,
   15.24+	Clr0	= 0x28>>2,
   15.25+	Lev0	= 0x34>>2,
   15.26+	Evds0	= 0x40>>2,
   15.27+	Redge0	= 0x4C>>2,
   15.28+	Fedge0	= 0x58>>2,
   15.29+	Hpin0	= 0x64>>2,
   15.30+	Lpin0	= 0x70>>2,
   15.31+	ARedge0	= 0x7C>>2,
   15.32+	AFedge0	= 0x88>2,
   15.33+	PUD	= 0x94>>2,
   15.34+		Off	= 0x0,
   15.35+		Pulldown= 0x1,
   15.36+		Pullup	= 0x2,
   15.37+	PUDclk0	= 0x98>>2,
   15.38+	PUDclk1	= 0x9c>>2,
   15.39+};
   15.40+
   15.41+void
   15.42+gpiosel(uint pin, int func)
   15.43+{	
   15.44+	u32int *gp, *fsel;
   15.45+	int off;
   15.46+
   15.47+	gp = (u32int*)GPIOREGS;
   15.48+	fsel = &gp[Fsel0 + pin/10];
   15.49+	off = (pin % 10) * 3;
   15.50+	*fsel = (*fsel & ~(FuncMask<<off)) | func<<off;
   15.51+}
   15.52+
   15.53+void
   15.54+gpiopull(uint pin, int func)
   15.55+{
   15.56+	u32int *gp, *reg;
   15.57+	u32int mask;
   15.58+
   15.59+	gp = (u32int*)GPIOREGS;
   15.60+	reg = &gp[PUDclk0 + pin/32];
   15.61+	mask = 1 << (pin % 32);
   15.62+	gp[PUD] = func;
   15.63+	microdelay(1);
   15.64+	*reg = mask;
   15.65+	microdelay(1);
   15.66+	*reg = 0;
   15.67+}
   15.68+
   15.69+void
   15.70+gpiopulloff(uint pin)
   15.71+{
   15.72+	gpiopull(pin, Off);
   15.73+}
   15.74+
   15.75+void
   15.76+gpiopullup(uint pin)
   15.77+{
   15.78+	gpiopull(pin, Pullup);
   15.79+}
   15.80+
   15.81+void
   15.82+gpiopulldown(uint pin)
   15.83+{
   15.84+	gpiopull(pin, Pulldown);
   15.85+}
   15.86+
   15.87+void
   15.88+gpioout(uint pin, int set)
   15.89+{
   15.90+	u32int *gp;
   15.91+	int v;
   15.92+
   15.93+	gp = (u32int*)GPIOREGS;
   15.94+	v = set? Set0 : Clr0;
   15.95+	gp[v + pin/32] = 1 << (pin % 32);
   15.96+}
   15.97+
   15.98+int
   15.99+gpioin(uint pin)
  15.100+{
  15.101+	u32int *gp;
  15.102+
  15.103+	gp = (u32int*)GPIOREGS;
  15.104+	return (gp[Lev0 + pin/32] & (1 << (pin % 32))) != 0;
  15.105+}
  15.106+
  15.107+void
  15.108+gpioselevent(uint pin, int falling, int enable)
  15.109+{
  15.110+	u32int *gp, *field;
  15.111+	int reg;
  15.112+
  15.113+	enable = enable != 0;
  15.114+	if(falling)
  15.115+		reg = Fedge0;
  15.116+	else
  15.117+		reg = Redge0;
  15.118+	gp = (u32int*)GPIOREGS;
  15.119+	field = &gp[reg + pin/32];
  15.120+	*field = (*field & ~(enable<<pin)) | (enable<<pin);
  15.121+}
  15.122+
  15.123+int
  15.124+gpiogetevent(uint pin)
  15.125+{
  15.126+	u32int *gp, *reg, val;
  15.127+
  15.128+	gp = (u32int*)GPIOREGS;
  15.129+	reg = &gp[Evds0 + pin/32];
  15.130+	val = *reg & (1 << (pin % 32));
  15.131+	*reg |= val;
  15.132+	return val != 0;
  15.133+}
  15.134+
  15.135+void
  15.136+gpiomeminit(void)
  15.137+{
  15.138+	Physseg seg;
  15.139+
  15.140+	memset(&seg, 0, sizeof seg);
  15.141+	seg.attr = SG_PHYSICAL;
  15.142+	seg.name = "gpio";
  15.143+	seg.pa = GPIOREGS;
  15.144+	seg.size = BY2PG;
  15.145+	addphysseg(&seg);
  15.146+}
    16.1--- a/sys/src/9/bcm/io.h
    16.2+++ b/sys/src/9/bcm/io.h
    16.3@@ -8,11 +8,23 @@ enum {
    16.4 	IRQdma0		= 16,
    16.5 #define IRQDMA(chan)	(IRQdma0+(chan))
    16.6 	IRQaux		= 29,
    16.7+	IRQi2c		= 53,
    16.8+	IRQspi		= 54,
    16.9+	IRQsdhost	= 56,
   16.10 	IRQmmc		= 62,
   16.11 
   16.12 	IRQbasic	= 64,
   16.13 	IRQtimerArm	= IRQbasic + 0,
   16.14 
   16.15+	IRQlocal	= 96,
   16.16+	IRQcntps	= IRQlocal + 0,
   16.17+	IRQcntpns	= IRQlocal + 1,
   16.18+	IRQmbox0	= IRQlocal + 4,
   16.19+	IRQmbox1	= IRQlocal + 5,
   16.20+	IRQmbox2	= IRQlocal + 6,
   16.21+	IRQmbox3	= IRQlocal + 7,
   16.22+	IRQlocaltmr	= IRQlocal + 11,
   16.23+
   16.24 	IRQfiq		= IRQusb,	/* only one source can be FIQ */
   16.25 
   16.26 	DmaD2M		= 0,		/* device to memory */
   16.27@@ -20,7 +32,14 @@ enum {
   16.28 	DmaM2M		= 2,		/* memory to memory */
   16.29 
   16.30 	DmaChanEmmc	= 4,		/* can only use 2-5, maybe 0 */
   16.31+	DmaChanSdhost	= 5,
   16.32+	DmaChanSpiTx= 2,
   16.33+	DmaChanSpiRx= 0,
   16.34+
   16.35+	DmaDevSpiTx	= 6,
   16.36+	DmaDevSpiRx	= 7,
   16.37 	DmaDevEmmc	= 11,
   16.38+	DmaDevSdhost	= 13,
   16.39 
   16.40 	PowerSd		= 0,
   16.41 	PowerUart0,
   16.42@@ -43,4 +62,3 @@ enum {
   16.43 	ClkPixel,
   16.44 	ClkPwm,
   16.45 };
   16.46-#define BUSUNKNOWN	(-1)
    17.1--- a/sys/src/9/bcm/l.s
    17.2+++ b/sys/src/9/bcm/l.s
    17.3@@ -1,10 +1,14 @@
    17.4 /*
    17.5- * Broadcom bcm2835 SoC, as used in Raspberry Pi
    17.6- * arm1176jzf-s processor (armv6)
    17.7+ * Common startup for armv6 and armv7
    17.8+ * The rest of l.s has been moved to armv[67].s
    17.9  */
   17.10 
   17.11 #include "arm.s"
   17.12 
   17.13+/*
   17.14+ * on bcm2836, only cpu0 starts here
   17.15+ * other cpus enter at cpureset in armv7.s
   17.16+ */
   17.17 TEXT _start(SB), 1, $-4
   17.18 	/*
   17.19 	 * load physical base for SB addressing while mmu is off
   17.20@@ -16,259 +20,14 @@ TEXT _start(SB), 1, $-4
   17.21 	MOVW	$0, R0
   17.22 
   17.23 	/*
   17.24-	 * SVC mode, interrupts disabled
   17.25-	 */
   17.26-	MOVW	$(PsrDirq|PsrDfiq|PsrMsvc), R1
   17.27-	MOVW	R1, CPSR
   17.28-
   17.29-	/*
   17.30-	 * disable the mmu and L1 caches
   17.31-	 * invalidate caches and tlb
   17.32-	 */
   17.33-	MRC	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
   17.34-	BIC	$(CpCdcache|CpCicache|CpCpredict|CpCmmu), R1
   17.35-	MCR	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
   17.36-	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvu), CpCACHEall
   17.37-	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
   17.38-	ISB
   17.39-
   17.40-	/*
   17.41-	 * clear mach and page tables
   17.42-	 */
   17.43-	MOVW	$PADDR(MACHADDR), R1
   17.44-	MOVW	$PADDR(KTZERO), R2
   17.45-_ramZ:
   17.46-	MOVW	R0, (R1)
   17.47-	ADD	$4, R1
   17.48-	CMP	R1, R2
   17.49-	BNE	_ramZ
   17.50-
   17.51-	/*
   17.52 	 * start stack at top of mach (physical addr)
   17.53-	 * set up page tables for kernel
   17.54 	 */
   17.55 	MOVW	$PADDR(MACHADDR+MACHSIZE-4), R13
   17.56-	BL	,mmuinit(SB)
   17.57 
   17.58 	/*
   17.59-	 * set up domain access control and page table base
   17.60-	 */
   17.61-	MOVW	$Client, R1
   17.62-	MCR	CpSC, 0, R1, C(CpDAC), C(0)
   17.63-	MOVW	$PADDR(L1), R1
   17.64-	MCR	CpSC, 0, R1, C(CpTTB), C(0)
   17.65-
   17.66-	/*
   17.67-	 * enable caches, mmu, and high vectors
   17.68+	 * do arch-dependent startup (no return)
   17.69 	 */
   17.70-	MRC	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
   17.71-	ORR	$(CpChv|CpCdcache|CpCicache|CpCmmu), R0
   17.72-	MCR	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
   17.73-	ISB
   17.74-
   17.75-	/*
   17.76-	 * switch SB, SP, and PC into KZERO space
   17.77-	 */
   17.78-	MOVW	$setR12(SB), R12
   17.79-	MOVW	$(MACHADDR+MACHSIZE-4), R13
   17.80-	MOVW	$_startpg(SB), R15
   17.81-
   17.82-TEXT _startpg(SB), 1, $-4
   17.83-
   17.84-	/*
   17.85-	 * enable cycle counter
   17.86-	 */
   17.87-	MOVW	$1, R1
   17.88-	MCR	CpSC, 0, R1, C(CpSPM), C(CpSPMperf), CpSPMctl
   17.89-
   17.90-	/*
   17.91-	 * call main and loop forever if it returns
   17.92-	 */
   17.93-	BL	,main(SB)
   17.94+	BL	,armstart(SB)
   17.95 	B	,0(PC)
   17.96 
   17.97-	BL	_div(SB)		/* hack to load _div, etc. */
   17.98-
   17.99-TEXT fsrget(SB), 1, $-4				/* data fault status */
  17.100-	MRC	CpSC, 0, R0, C(CpFSR), C(0), CpFSRdata
  17.101 	RET
  17.102-
  17.103-TEXT ifsrget(SB), 1, $-4			/* instruction fault status */
  17.104-	MRC	CpSC, 0, R0, C(CpFSR), C(0), CpFSRinst
  17.105-	RET
  17.106-
  17.107-TEXT farget(SB), 1, $-4				/* fault address */
  17.108-	MRC	CpSC, 0, R0, C(CpFAR), C(0x0)
  17.109-	RET
  17.110-
  17.111-TEXT lcycles(SB), 1, $-4
  17.112-	MRC	CpSC, 0, R0, C(CpSPM), C(CpSPMperf), CpSPMcyc
  17.113-	RET
  17.114-
  17.115-TEXT splhi(SB), 1, $-4
  17.116-	MOVW	$(MACHADDR+4), R2		/* save caller pc in Mach */
  17.117-	MOVW	R14, 0(R2)
  17.118-
  17.119-	MOVW	CPSR, R0			/* turn off irqs (but not fiqs) */
  17.120-	ORR	$(PsrDirq), R0, R1
  17.121-	MOVW	R1, CPSR
  17.122-	RET
  17.123-
  17.124-TEXT splfhi(SB), 1, $-4
  17.125-	MOVW	$(MACHADDR+4), R2		/* save caller pc in Mach */
  17.126-	MOVW	R14, 0(R2)
  17.127-
  17.128-	MOVW	CPSR, R0			/* turn off irqs and fiqs */
  17.129-	ORR	$(PsrDirq|PsrDfiq), R0, R1
  17.130-	MOVW	R1, CPSR
  17.131-	RET
  17.132-
  17.133-TEXT splflo(SB), 1, $-4
  17.134-	MOVW	CPSR, R0			/* turn on fiqs */
  17.135-	BIC	$(PsrDfiq), R0, R1
  17.136-	MOVW	R1, CPSR
  17.137-	RET
  17.138-
  17.139-TEXT spllo(SB), 1, $-4
  17.140-	MOVW	CPSR, R0			/* turn on irqs and fiqs */
  17.141-	BIC	$(PsrDirq|PsrDfiq), R0, R1
  17.142-	MOVW	R1, CPSR
  17.143-	RET
  17.144-
  17.145-TEXT splx(SB), 1, $-4
  17.146-	MOVW	$(MACHADDR+0x04), R2		/* save caller pc in Mach */
  17.147-	MOVW	R14, 0(R2)
  17.148-
  17.149-	MOVW	R0, R1				/* reset interrupt level */
  17.150-	MOVW	CPSR, R0
  17.151-	MOVW	R1, CPSR
  17.152-	RET
  17.153-
  17.154-TEXT spldone(SB), 1, $0				/* end marker for devkprof.c */
  17.155-	RET
  17.156-
  17.157-TEXT islo(SB), 1, $-4
  17.158-	MOVW	CPSR, R0
  17.159-	AND	$(PsrDirq), R0
  17.160-	EOR	$(PsrDirq), R0
  17.161-	RET
  17.162-
  17.163-TEXT	tas(SB), $-4
  17.164-TEXT	_tas(SB), $-4
  17.165-	MOVW	R0,R1
  17.166-	MOVW	$1,R0
  17.167-	SWPW	R0,(R1)			/* fix: deprecated in armv6 */
  17.168-	RET
  17.169-
  17.170-TEXT setlabel(SB), 1, $-4
  17.171-	MOVW	R13, 0(R0)		/* sp */
  17.172-	MOVW	R14, 4(R0)		/* pc */
  17.173-	MOVW	$0, R0
  17.174-	RET
  17.175-
  17.176-TEXT gotolabel(SB), 1, $-4
  17.177-	MOVW	0(R0), R13		/* sp */
  17.178-	MOVW	4(R0), R14		/* pc */
  17.179-	MOVW	$1, R0
  17.180-	RET
  17.181-
  17.182-TEXT getcallerpc(SB), 1, $-4
  17.183-	MOVW	0(R13), R0
  17.184-	RET
  17.185-
  17.186-TEXT idlehands(SB), $-4
  17.187-	BARRIERS
  17.188-	MOVW	CPSR, R3
  17.189-	BIC	$(PsrDirq|PsrDfiq), R3, R1		/* spllo */
  17.190-	MOVW	R1, CPSR
  17.191-
  17.192-	MOVW	$0, R0				/* wait for interrupt */
  17.193-	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEintr), CpCACHEwait
  17.194-	ISB
  17.195-
  17.196-	MOVW	R3, CPSR			/* splx */
  17.197-	RET
  17.198-
  17.199-
  17.200-TEXT coherence(SB), $-4
  17.201-	BARRIERS
  17.202-	RET
  17.203-
  17.204-/*
  17.205- * invalidate tlb
  17.206- */
  17.207-TEXT mmuinvalidate(SB), 1, $-4
  17.208-	MOVW	$0, R0
  17.209-	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
  17.210-	BARRIERS
  17.211-	RET
  17.212-
  17.213-/*
  17.214- * mmuinvalidateaddr(va)
  17.215- *   invalidate tlb entry for virtual page address va, ASID 0
  17.216- */
  17.217-TEXT mmuinvalidateaddr(SB), 1, $-4
  17.218-	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinvse
  17.219-	BARRIERS
  17.220-	RET
  17.221-
  17.222-/*
  17.223- * drain write buffer
  17.224- * writeback and invalidate data cache
  17.225- */
  17.226-TEXT cachedwbinv(SB), 1, $-4
  17.227-	DSB
  17.228-	MOVW	$0, R0
  17.229-	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall
  17.230-	RET
  17.231-
  17.232-/*
  17.233- * cachedwbinvse(va, n)
  17.234- *   drain write buffer
  17.235- *   writeback and invalidate data cache range [va, va+n)
  17.236- */
  17.237-TEXT cachedwbinvse(SB), 1, $-4
  17.238-	MOVW	R0, R1		/* DSB clears R0 */
  17.239-	DSB
  17.240-	MOVW	n+4(FP), R2
  17.241-	ADD	R1, R2
  17.242-	SUB	$1, R2
  17.243-	BIC	$(CACHELINESZ-1), R1
  17.244-	BIC	$(CACHELINESZ-1), R2
  17.245-	MCRR(CpSC, 0, 2, 1, CpCACHERANGEdwbi)
  17.246-	RET
  17.247-
  17.248-/*
  17.249- * cachedwbse(va, n)
  17.250- *   drain write buffer
  17.251- *   writeback data cache range [va, va+n)
  17.252- */
  17.253-TEXT cachedwbse(SB), 1, $-4
  17.254-	MOVW	R0, R1		/* DSB clears R0 */
  17.255-	DSB
  17.256-	MOVW	n+4(FP), R2
  17.257-	ADD	R1, R2
  17.258-	BIC	$(CACHELINESZ-1), R1
  17.259-	BIC	$(CACHELINESZ-1), R2
  17.260-	MCRR(CpSC, 0, 2, 1, CpCACHERANGEdwb)
  17.261-	RET
  17.262-
  17.263-/*
  17.264- * drain write buffer and prefetch buffer
  17.265- * writeback and invalidate data cache
  17.266- * invalidate instruction cache
  17.267- */
  17.268-TEXT cacheuwbinv(SB), 1, $-4
  17.269-	BARRIERS
  17.270-	MOVW	$0, R0
  17.271-	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall
  17.272-	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
  17.273-	RET
  17.274-
  17.275-/*
  17.276- * invalidate instruction cache
  17.277- */
  17.278-TEXT cacheiinv(SB), 1, $-4
  17.279-	MOVW	$0, R0
  17.280-	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
  17.281-	RET
    18.1--- a/sys/src/9/bcm/lexception.s
    18.2+++ b/sys/src/9/bcm/lexception.s
    18.3@@ -27,6 +27,7 @@ TEXT vtable(SB), 1, $-4
    18.4 	WORD	$_vfiq(SB)		/* FIQ, switch to svc mode */
    18.5 
    18.6 TEXT _vsvc(SB), 1, $-4			/* SWI */
    18.7+	CLREX
    18.8 	MOVW.W	R14, -4(R13)		/* ureg->pc = interrupted PC */
    18.9 	MOVW	SPSR, R14		/* ureg->psr = SPSR */
   18.10 	MOVW.W	R14, -4(R13)		/* ... */
   18.11@@ -39,9 +40,16 @@ TEXT _vsvc(SB), 1, $-4			/* SWI */
   18.12 
   18.13 	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
   18.14 
   18.15-//	MOVW	$(KSEG0+16*KiB-MACHSIZE), R10	/* m */
   18.16-	MOVW	$(MACHADDR), R10	/* m */
   18.17-	MOVW	8(R10), R9		/* up */
   18.18+	/* get R(MACH) for this cpu */
   18.19+	CPUID(R1)
   18.20+	SLL	$2, R1			/* convert to word index */
   18.21+	MOVW	$machaddr(SB), R2
   18.22+	ADD	R1, R2
   18.23+	MOVW	(R2), R(MACH)		/* m = machaddr[cpuid] */
   18.24+	CMP	$0, R(MACH)
   18.25+	MOVW.EQ	$MACHADDR, R0		/* paranoia: use MACHADDR if 0 */
   18.26+
   18.27+	MOVW	8(R(MACH)), R(USER)		/* up */
   18.28 
   18.29 	MOVW	R13, R0			/* first arg is pointer to ureg */
   18.30 	SUB	$8, R13			/* space for argument+link */
   18.31@@ -81,6 +89,7 @@ TEXT _virq(SB), 1, $-4			/* IRQ */
   18.32 	 *  we'll switch to SVC mode and then call trap.
   18.33 	 */
   18.34 _vswitch:
   18.35+	CLREX
   18.36 	MOVW	SPSR, R1		/* save SPSR for ureg */
   18.37 	MOVW	R14, R2			/* save interrupted pc for ureg */
   18.38 	MOVW	R13, R3			/* save pointer to where the original [R0-R4] are */
   18.39@@ -119,7 +128,16 @@ TEXT _virq(SB), 1, $-4			/* IRQ */
   18.40 
   18.41 	BL	trap(SB)
   18.42 
   18.43+	MOVW	$setR12(SB), R12	/* reload kernel's SB (ORLY?) */
   18.44 	ADD	$(4*2+4*15), R13	/* make r13 point to ureg->type */
   18.45+	/*
   18.46+	 * if we interrupted a previous trap's handler and are now
   18.47+	 * returning to it, we need to propagate the current R(MACH) (R10)
   18.48+	 * by overriding the saved one on the stack, since we may have
   18.49+	 * been rescheduled and be on a different processor now than
   18.50+	 * at entry.
   18.51+	 */
   18.52+	MOVW	R(MACH), (-(15-MACH)*4)(R13) /* restore current cpu's MACH */
   18.53 	MOVW	8(R13), R14		/* restore link */
   18.54 	MOVW	4(R13), R0		/* restore SPSR */
   18.55 	MOVW	R0, SPSR		/* ... */
   18.56@@ -140,9 +158,16 @@ TEXT _virq(SB), 1, $-4			/* IRQ */
   18.57 
   18.58 	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
   18.59 
   18.60-//	MOVW	$(KSEG0+16*KiB-MACHSIZE), R10	/* m */
   18.61-	MOVW	$(MACHADDR), R10	/* m */
   18.62-	MOVW	8(R10), R9		/* up */
   18.63+	/* get R(MACH) for this cpu */
   18.64+	CPUID(R1)
   18.65+	SLL	$2, R1			/* convert to word index */
   18.66+	MOVW	$machaddr(SB), R2
   18.67+	ADD	R1, R2
   18.68+	MOVW	(R2), R(MACH)		/* m = machaddr[cpuid] */
   18.69+	CMP	$0, R(MACH)
   18.70+	MOVW.EQ	$MACHADDR, R(MACH)		/* paranoia: use MACHADDR if 0 */
   18.71+
   18.72+	MOVW	8(R(MACH)), R(USER)		/* up */
   18.73 
   18.74 	MOVW	R13, R0			/* first arg is pointer to ureg */
   18.75 	SUB	$(4*2), R13		/* space for argument+link (for debugger) */
   18.76@@ -158,14 +183,24 @@ TEXT _virq(SB), 1, $-4			/* IRQ */
   18.77 	RFE				/* MOVM.IA.S.W (R13), [R15] */
   18.78 
   18.79 TEXT _vfiq(SB), 1, $-4			/* FIQ */
   18.80+	CLREX
   18.81 	MOVW	$PsrMfiq, R8		/* trap type */
   18.82 	MOVW	SPSR, R9		/* interrupted psr */
   18.83 	MOVW	R14, R10		/* interrupted pc */
   18.84 	MOVM.DB.W [R8-R10], (R13)	/* save in ureg */
   18.85-	MOVM.DB.W.S [R0-R14], (R13)	/* save interrupted regs */
   18.86+	MOVM.DB.S [R0-R14], (R13)	/* save interrupted regs */
   18.87+	SUB	$(15*4), R13
   18.88 	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
   18.89-	MOVW	$(MACHADDR), R10	/* m */
   18.90-	MOVW	8(R10), R9		/* up */
   18.91+	/* get R(MACH) for this cpu */
   18.92+	CPUID(R1)
   18.93+	SLL	$2, R1			/* convert to word index */
   18.94+	MOVW	$machaddr(SB), R2
   18.95+	ADD	R1, R2
   18.96+	MOVW	(R2), R(MACH)		/* m = machaddr[cpuid] */
   18.97+	CMP	$0, R(MACH)
   18.98+	MOVW.EQ	$MACHADDR, R(MACH)		/* paranoia: use MACHADDR if 0 */
   18.99+
  18.100+	MOVW	8(R(MACH)), R(USER)		/* up */
  18.101 	MOVW	R13, R0			/* first arg is pointer to ureg */
  18.102 	SUB	$(4*2), R13		/* space for argument+link (for debugger) */
  18.103 
  18.104@@ -187,6 +222,7 @@ TEXT setr13(SB), 1, $-4
  18.105 
  18.106 	MOVW	CPSR, R2
  18.107 	BIC	$PsrMask, R2, R3
  18.108+	ORR	$(PsrDirq|PsrDfiq), R3
  18.109 	ORR	R0, R3
  18.110 	MOVW	R3, CPSR		/* switch to new mode */
  18.111 
    19.1--- a/sys/src/9/bcm/main.c
    19.2+++ b/sys/src/9/bcm/main.c
    19.3@@ -4,6 +4,7 @@
    19.4 #include "mem.h"
    19.5 #include "dat.h"
    19.6 #include "fns.h"
    19.7+#include "io.h"
    19.8 
    19.9 #include "init.h"
   19.10 #include <pool.h>
   19.11@@ -191,14 +192,27 @@ ataginit(Atag *a)
   19.12 void
   19.13 machinit(void)
   19.14 {
   19.15+	Mach *m0;
   19.16+
   19.17+	m->ticks = 1;
   19.18+	m->perf.period = 1;
   19.19+	m0 = MACHP(0);
   19.20+	if (m->machno != 0) {
   19.21+		/* synchronise with cpu 0 */
   19.22+		m->ticks = m0->ticks;
   19.23+	}
   19.24+}
   19.25+
   19.26+void
   19.27+mach0init(void)
   19.28+{
   19.29+	m->mmul1 = (PTE*)L1;
   19.30 	m->machno = 0;
   19.31 	machaddr[m->machno] = m;
   19.32 
   19.33 	m->ticks = 1;
   19.34 	m->perf.period = 1;
   19.35 
   19.36-	conf.nmach = 1;
   19.37-
   19.38 	active.machs[0] = 1;
   19.39 	active.exiting = 0;
   19.40 
   19.41@@ -206,6 +220,32 @@ machinit(void)
   19.42 }
   19.43 
   19.44 static void
   19.45+launchinit(void)
   19.46+{
   19.47+	int mach;
   19.48+	Mach *mm;
   19.49+	PTE *l1;
   19.50+
   19.51+	for(mach = 1; mach < conf.nmach; mach++){
   19.52+		machaddr[mach] = mm = mallocalign(MACHSIZE, MACHSIZE, 0, 0);
   19.53+		l1 = mallocalign(L1SIZE, L1SIZE, 0, 0);
   19.54+		if(mm == nil || l1 == nil)
   19.55+			panic("launchinit");
   19.56+		memset(mm, 0, MACHSIZE);
   19.57+		mm->machno = mach;
   19.58+
   19.59+		memmove(l1, m->mmul1, L1SIZE);  /* clone cpu0's l1 table */
   19.60+		cachedwbse(l1, L1SIZE);
   19.61+		mm->mmul1 = l1;
   19.62+		cachedwbse(mm, MACHSIZE);
   19.63+
   19.64+	}
   19.65+	cachedwbse(machaddr, sizeof machaddr);
   19.66+	if((mach = startcpus(conf.nmach)) < conf.nmach)
   19.67+			print("only %d cpu%s started\n", mach, mach == 1? "" : "s");
   19.68+}
   19.69+
   19.70+static void
   19.71 optionsinit(char* s)
   19.72 {
   19.73 	strecpy(oargb, oargb+sizeof(oargb), s);
   19.74@@ -216,29 +256,14 @@ optionsinit(char* s)
   19.75 }
   19.76 
   19.77 void
   19.78-gpiomeminit(void)
   19.79-{
   19.80-	Physseg seg;
   19.81-	memset(&seg, 0, sizeof seg);
   19.82-	seg.attr = SG_PHYSICAL;
   19.83-	seg.name = "gpio";
   19.84-	seg.pa = (VIRTIO+0x200000);
   19.85-	seg.size = BY2PG;
   19.86-	addphysseg(&seg);
   19.87-}
   19.88-
   19.89-
   19.90-void
   19.91 main(void)
   19.92 {
   19.93 	extern char edata[], end[];
   19.94-	uint rev;
   19.95+	uint fw, board;
   19.96 
   19.97-	okay(1);
   19.98 	m = (Mach*)MACHADDR;
   19.99 	memset(edata, 0, end - edata);	/* clear bss */
  19.100-	machinit();
  19.101-	mmuinit1();
  19.102+	mach0init();
  19.103 
  19.104 	optionsinit("/boot/boot boot");
  19.105 	quotefmtinstall();
  19.106@@ -250,20 +275,24 @@ main(void)
  19.107 	screeninit();
  19.108 
  19.109 	print("\nPlan 9 from Bell Labs\n");
  19.110-	rev = getfirmware();
  19.111-	print("firmware: rev %d\n", rev);
  19.112-	if(rev < Minfirmrev){
  19.113-		print("Sorry, firmware (start.elf) must be at least rev %d (%s)\n",
  19.114-			Minfirmrev, Minfirmdate);
  19.115+	board = getboardrev();
  19.116+	fw = getfirmware();
  19.117+	print("board rev: %#ux firmware rev: %d\n", board, fw);
  19.118+	if(fw < Minfirmrev){
  19.119+		print("Sorry, firmware (start*.elf) must be at least rev %d"
  19.120+		      " or newer than %s\n", Minfirmrev, Minfirmdate);
  19.121 		for(;;)
  19.122 			;
  19.123 	}
  19.124+	/* set clock rate to arm_freq from config.txt (default pi1:700Mhz pi2:900MHz) */
  19.125+	setclkrate(ClkArm, 0);
  19.126 	trapinit();
  19.127 	clockinit();
  19.128 	printinit();
  19.129 	timersinit();
  19.130 	cpuidprint();
  19.131 	archreset();
  19.132+	vgpinit();
  19.133 
  19.134 	procinit0();
  19.135 	initseg();
  19.136@@ -271,7 +300,8 @@ main(void)
  19.137 	chandevreset();			/* most devices are discovered here */
  19.138 	pageinit();
  19.139 	userinit();
  19.140-	gpiomeminit();
  19.141+	launchinit();
  19.142+	mmuinit1();
  19.143 	schedinit();
  19.144 	assert(0);			/* shouldn't have returned */
  19.145 }
  19.146@@ -484,8 +514,7 @@ confinit(void)
  19.147 	conf.upages = conf.npage - kpages;
  19.148 	conf.ialloc = (kpages/2)*BY2PG;
  19.149 
  19.150-	/* only one processor */
  19.151-	conf.nmach = 1;
  19.152+	conf.nmach = getncpus();
  19.153 
  19.154 	/* set up other configuration parameters */
  19.155 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
  19.156@@ -497,7 +526,7 @@ confinit(void)
  19.157 	conf.nswppo = 4096;
  19.158 	conf.nimage = 200;
  19.159 
  19.160-	conf.copymode = 0;		/* copy on write */
  19.161+	conf.copymode = conf.nmach > 1;
  19.162 
  19.163 	/*
  19.164 	 * Guess how much is taken by the large permanent
  19.165@@ -529,6 +558,14 @@ exit(int)
  19.166 {
  19.167 	cpushutdown();
  19.168 	splfhi();
  19.169+	if(m->machno != 0){
  19.170+		void (*f)(ulong, ulong, ulong) = (void*)REBOOTADDR;
  19.171+		intrsoff();
  19.172+		intrcpushutdown();
  19.173+		cacheuwbinv();
  19.174+		(*f)(0, 0, 0);
  19.175+		for(;;);
  19.176+	}
  19.177 	archreboot();
  19.178 }
  19.179 
  19.180@@ -536,11 +573,9 @@ exit(int)
  19.181  * stub for ../omap/devether.c
  19.182  */
  19.183 int
  19.184-isaconfig(char *class, int ctlrno, ISAConf *isa)
  19.185+isaconfig(char *, int, ISAConf *)
  19.186 {
  19.187-	USED(ctlrno);
  19.188-	USED(isa);
  19.189-	return strcmp(class, "ether") == 0;
  19.190+	return 0;
  19.191 }
  19.192 
  19.193 /*
  19.194@@ -553,37 +588,39 @@ reboot(void *entry, void *code, ulong si
  19.195 	void (*f)(ulong, ulong, ulong);
  19.196 
  19.197 	writeconf();
  19.198+	if (m->machno != 0) {
  19.199+		procwired(up, 0);
  19.200+		sched();
  19.201+	}
  19.202+
  19.203+	/* setup reboot trampoline function */
  19.204+	f = (void*)REBOOTADDR;
  19.205+	memmove(f, rebootcode, sizeof(rebootcode));
  19.206+	cachedwbse(f, sizeof(rebootcode));
  19.207+
  19.208 	cpushutdown();
  19.209+	delay(500);
  19.210+
  19.211+	splfhi();
  19.212 
  19.213 	/* turn off buffered serial console */
  19.214 	serialoq = nil;
  19.215-	kprintoq = nil;
  19.216-	screenputs = nil;
  19.217 
  19.218 	/* shutdown devices */
  19.219 	chandevshutdown();
  19.220 
  19.221 	/* stop the clock (and watchdog if any) */
  19.222 	clockshutdown();
  19.223-
  19.224-	splfhi();
  19.225 	intrsoff();
  19.226+	intrcpushutdown();
  19.227 
  19.228-	/* setup reboot trampoline function */
  19.229-	f = (void*)REBOOTADDR;
  19.230-	memmove(f, rebootcode, sizeof(rebootcode));
  19.231 	cacheuwbinv();
  19.232+	l2cacheuwbinv();
  19.233 
  19.234 	/* off we go - never to return */
  19.235 	(*f)(PADDR(entry), PADDR(code), size);
  19.236 }
  19.237 
  19.238-int
  19.239-cmpswap(long *addr, long old, long new)
  19.240-{
  19.241-	return cas32(addr, old, new);
  19.242-}
  19.243-
  19.244 void
  19.245 setupwatchpts(Proc *, Watchpt *, int n)
  19.246 {
    20.1--- a/sys/src/9/bcm/mem.h
    20.2+++ b/sys/src/9/bcm/mem.h
    20.3@@ -5,27 +5,31 @@
    20.4 #define MiB		1048576u		/* Mebi 0x0000000000100000 */
    20.5 #define GiB		1073741824u		/* Gibi 000000000040000000 */
    20.6 
    20.7-#define HOWMANY(x, y)	(((x)+((y)-1))/(y))
    20.8-#define ROUNDUP(x, y)	(HOWMANY((x), (y))*(y))	/* ceiling */
    20.9-#define ROUNDDN(x, y)	(((x)/(y))*(y))		/* floor */
   20.10-#define MIN(a, b)	((a) < (b)? (a): (b))
   20.11-#define MAX(a, b)	((a) > (b)? (a): (b))
   20.12-
   20.13 /*
   20.14  * Sizes
   20.15  */
   20.16 #define	BY2PG		(4*KiB)			/* bytes per page */
   20.17 #define	PGSHIFT		12			/* log(BY2PG) */
   20.18+#define	HOWMANY(x,y)	(((x)+((y)-1))/(y))
   20.19+#define	ROUNDUP(x,y)	(HOWMANY((x),(y))*(y))
   20.20 #define	PGROUND(s)	ROUNDUP(s, BY2PG)
   20.21 #define	ROUND(s, sz)	(((s)+(sz-1))&~(sz-1))
   20.22 
   20.23-#define	MAXMACH		1			/* max # cpus system can run */
   20.24+#define	MAXMACH		4			/* max # cpus system can run */
   20.25 #define	MACHSIZE	BY2PG
   20.26+#define L1SIZE		(4 * BY2PG)
   20.27 
   20.28 #define KSTKSIZE	(8*KiB)
   20.29 #define STACKALIGN(sp)	((sp) & ~3)		/* bug: assure with alloc */
   20.30 
   20.31 /*
   20.32+ * Magic registers
   20.33+ */
   20.34+
   20.35+#define	USER		9		/* R9 is up-> */
   20.36+#define	MACH		10		/* R10 is m-> */
   20.37+
   20.38+/*
   20.39  * Address spaces.
   20.40  * KTZERO is used by kprof and dumpstack (if any).
   20.41  *
   20.42@@ -36,8 +40,8 @@
   20.43  */
   20.44 
   20.45 #define	KSEG0		0x80000000		/* kernel segment */
   20.46-/* mask to check segment; good for 512MB dram */
   20.47-#define	KSEGM		0xE0000000
   20.48+/* mask to check segment; good for 1GB dram */
   20.49+#define	KSEGM		0xC0000000
   20.50 #define	KZERO		KSEG0			/* kernel address space */
   20.51 #define CONFADDR	(KZERO+0x100)		/* unparsed plan9.ini */
   20.52 #define	MACHADDR	(KZERO+0x2000)		/* Mach structure */
   20.53@@ -47,20 +51,24 @@
   20.54 #define	L1		(KZERO+0x4000)		/* tt ptes: 16KiB aligned */
   20.55 #define	KTZERO		(KZERO+0x8000)		/* kernel text start */
   20.56 #define VIRTIO		0x7E000000		/* i/o registers */
   20.57-#define	FRAMEBUFFER	0xA0000000		/* video framebuffer */
   20.58+#define	ARMLOCAL	(VIRTIO+IOSIZE)		/* armv7 only */
   20.59+#define	VGPIO		(ARMLOCAL+MiB)		/* virtual gpio for pi3 ACT LED */
   20.60+#define	FRAMEBUFFER	0xC0000000		/* video framebuffer */
   20.61 
   20.62 #define	UZERO		0			/* user segment */
   20.63 #define	UTZERO		(UZERO+BY2PG)		/* user text start */
   20.64-#define	USTKTOP		0x20000000		/* user segment end +1 */
   20.65+#define	USTKTOP		0x40000000		/* user segment end +1 */
   20.66 #define	USTKSIZE	(8*1024*1024)		/* user stack size */
   20.67+#define	TSTKTOP		(USTKTOP-USTKSIZE)	/* sysexec temporary stack */
   20.68+#define	TSTKSIZ	 	256
   20.69 
   20.70 /* address at which to copy and execute rebootcode */
   20.71-#define	REBOOTADDR	(KZERO+0x3400)
   20.72+#define	REBOOTADDR	(KZERO+0x1800)
   20.73 
   20.74 /*
   20.75  * Legacy...
   20.76  */
   20.77-#define BLOCKALIGN	32			/* only used in allocb.c */
   20.78+#define BLOCKALIGN	64			/* only used in allocb.c */
   20.79 #define KSTACK		KSTKSIZE
   20.80 
   20.81 /*
   20.82@@ -71,7 +79,6 @@
   20.83 #define BY2WD		4
   20.84 #define BY2V		8			/* only used in xalloc.c */
   20.85 
   20.86-#define CACHELINESZ	32
   20.87 #define	PTEMAPMEM	(1024*1024)
   20.88 #define	PTEPERTAB	(PTEMAPMEM/BY2PG)
   20.89 #define	SEGMAPSIZE	1984
   20.90@@ -93,8 +100,7 @@
   20.91  *	BUS  addresses as seen from the videocore gpu.
   20.92  */
   20.93 #define	PHYSDRAM	0
   20.94-#define BUSDRAM		0x40000000
   20.95-#define	DRAMSIZE	(512*MiB)
   20.96-#define	PHYSIO		0x20000000
   20.97-#define	BUSIO		0x7E000000
   20.98 #define	IOSIZE		(16*MiB)
   20.99+
  20.100+#define MIN(a, b)	((a) < (b)? (a): (b))
  20.101+#define MAX(a, b)	((a) > (b)? (a): (b))
    21.1--- a/sys/src/9/bcm/mkfile
    21.2+++ b/sys/src/9/bcm/mkfile
    21.3@@ -1,7 +1,7 @@
    21.4-CONF=pif
    21.5-CONFLIST=pif picpuf
    21.6+CONF=pi2
    21.7+CONFLIST=pi pi2
    21.8+CRAPLIST=pif picpuf
    21.9 EXTRACOPIES=
   21.10-#EXTRACOPIES=''piestand lookout boundary # bovril
   21.11 
   21.12 loadaddr=0x80008000
   21.13 
   21.14@@ -104,12 +104,13 @@ install:V: /$objtype/$p$CONF
   21.15 arch.$O clock.$O fpiarm.$O main.$O mmu.$O screen.$O syscall.$O trap.$O: \
   21.16 	/$objtype/include/ureg.h
   21.17 
   21.18-archbcm.$O: ../port/flashif.h
   21.19 fpi.$O fpiarm.$O fpimem.$O: fpi.h
   21.20-l.$O lexception.$O lproc.$O mmu.$O: arm.s mem.h
   21.21+l.$O lexception.$O lproc.$O mmu.$O: mem.h
   21.22+l.$O lexception.$O lproc.$O armv6.$O armv7.$O: arm.s
   21.23+armv7.$O: cache.v7.s
   21.24 main.$O: errstr.h init.h reboot.h
   21.25 devmouse.$O mouse.$O screen.$O: screen.h
   21.26-devusb.$O: ../port/usb.h
   21.27+usbdwc.$O: dwcotg.h ../port/usb.h
   21.28 
   21.29 init.h:D:	../port/initcode.c init9.s
   21.30 	$CC ../port/initcode.c
   21.31@@ -123,11 +124,12 @@ init.h:D:	../port/initcode.c init9.s
   21.32 reboot.h:D:	rebootcode.s arm.s arm.h mem.h
   21.33 	$AS rebootcode.s
   21.34 	# -lc is only for memmove.  -T arg is PADDR(REBOOTADDR)
   21.35-	$LD -l -s -T0x3400 -R4 -o reboot.out rebootcode.$O -lc
   21.36+	$LD -l -s -T0x1800 -R4 -o reboot.out rebootcode.$O -lc
   21.37 	{echo 'uchar rebootcode[]={'
   21.38 	 xd -1x reboot.out |
   21.39 		sed -e '1,2d' -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
   21.40 	 echo '};'} > reboot.h
   21.41+
   21.42 errstr.h:D:	../port/mkerrstr ../port/error.h
   21.43 	rc ../port/mkerrstr > errstr.h
   21.44 
    22.1--- a/sys/src/9/bcm/mmu.c
    22.2+++ b/sys/src/9/bcm/mmu.c
    22.3@@ -9,66 +9,75 @@
    22.4 #define FEXT(d, o, w)	(((d)>>(o)) & ((1<<(w))-1))
    22.5 #define L1X(va)		FEXT((va), 20, 12)
    22.6 #define L2X(va)		FEXT((va), 12, 8)
    22.7+#define L2AP(ap)	l2ap(ap)
    22.8+#define L1ptedramattrs	soc.l1ptedramattrs
    22.9+#define L2ptedramattrs	soc.l2ptedramattrs
   22.10 
   22.11 enum {
   22.12 	L1lo		= UZERO/MiB,		/* L1X(UZERO)? */
   22.13 	L1hi		= (USTKTOP+MiB-1)/MiB,	/* L1X(USTKTOP+MiB-1)? */
   22.14+	L2size		= 256*sizeof(PTE),
   22.15 };
   22.16 
   22.17+/*
   22.18+ * Set up initial PTEs for cpu0 (called with mmu off)
   22.19+ */
   22.20 void
   22.21-mmuinit(void)
   22.22+mmuinit(void *a)
   22.23 {
   22.24 	PTE *l1, *l2;
   22.25 	uintptr pa, va;
   22.26 
   22.27-	l1 = (PTE*)PADDR(L1);
   22.28+	l1 = (PTE*)a;
   22.29 	l2 = (PTE*)PADDR(L2);
   22.30 
   22.31 	/*
   22.32 	 * map all of ram at KZERO
   22.33 	 */
   22.34 	va = KZERO;
   22.35-	for(pa = PHYSDRAM; pa < PHYSDRAM+DRAMSIZE; pa += MiB){
   22.36-		l1[L1X(va)] = pa|Dom0|L1AP(Krw)|Section|Cached|Buffered;
   22.37+	for(pa = PHYSDRAM; pa < PHYSDRAM+soc.dramsize; pa += MiB){
   22.38+		l1[L1X(va)] = pa|Dom0|L1AP(Krw)|Section|L1ptedramattrs;
   22.39 		va += MiB;
   22.40 	}
   22.41 
   22.42 	/*
   22.43 	 * identity map first MB of ram so mmu can be enabled
   22.44 	 */
   22.45-	l1[L1X(PHYSDRAM)] = PHYSDRAM|Dom0|L1AP(Krw)|Section|Cached|Buffered;
   22.46+	l1[L1X(PHYSDRAM)] = PHYSDRAM|Dom0|L1AP(Krw)|Section|L1ptedramattrs;
   22.47 
   22.48 	/*
   22.49 	 * map i/o registers 
   22.50 	 */
   22.51 	va = VIRTIO;
   22.52-	for(pa = PHYSIO; pa < PHYSIO+IOSIZE; pa += MiB){
   22.53+	for(pa = soc.physio; pa < soc.physio+IOSIZE; pa += MiB){
   22.54 		l1[L1X(va)] = pa|Dom0|L1AP(Krw)|Section;
   22.55 		va += MiB;
   22.56 	}
   22.57-
   22.58+	pa = soc.armlocal;
   22.59+	if(pa)
   22.60+		l1[L1X(va)] = pa|Dom0|L1AP(Krw)|Section;
   22.61+	
   22.62 	/*
   22.63-	 * double map exception vectors at top of virtual memory
   22.64+	 * double map exception vectors near top of virtual memory
   22.65 	 */
   22.66 	va = HVECTORS;
   22.67 	l1[L1X(va)] = (uintptr)l2|Dom0|Coarse;
   22.68-	l2[L2X(va)] = PHYSDRAM|L2AP(Krw)|Small;
   22.69+	l2[L2X(va)] = PHYSDRAM|L2AP(Krw)|Small|L2ptedramattrs;
   22.70 }
   22.71 
   22.72 void
   22.73-mmuinit1(void)
   22.74+mmuinit1()
   22.75 {
   22.76 	PTE *l1;
   22.77 
   22.78-	l1 = (PTE*)L1;
   22.79-	m->mmul1 = l1;
   22.80+	l1 = m->mmul1;
   22.81 
   22.82 	/*
   22.83 	 * undo identity map of first MB of ram
   22.84 	 */
   22.85 	l1[L1X(PHYSDRAM)] = 0;
   22.86-	cachedwbse(&l1[L1X(PHYSDRAM)], sizeof(PTE));
   22.87-	mmuinvalidate();
   22.88+	cachedwbtlb(&l1[L1X(PHYSDRAM)], sizeof(PTE));
   22.89+	mmuinvalidateaddr(PHYSDRAM);
   22.90 }
   22.91 
   22.92 static void
   22.93@@ -81,10 +90,11 @@ mmul2empty(Proc* proc, int clear)
   22.94 	l2 = &proc->mmul2;
   22.95 	for(page = *l2; page != nil; page = page->next){
   22.96 		if(clear)
   22.97-			memset(UINT2PTR(page->va), 0, BY2PG);
   22.98+			memset(UINT2PTR(page->va), 0, L2size);
   22.99 		l1[page->daddr] = Fault;
  22.100 		l2 = &page->next;
  22.101 	}
  22.102+	coherence();
  22.103 	*l2 = proc->mmul2cache;
  22.104 	proc->mmul2cache = proc->mmul2;
  22.105 	proc->mmul2 = nil;
  22.106@@ -93,29 +103,24 @@ mmul2empty(Proc* proc, int clear)
  22.107 static void
  22.108 mmul1empty(void)
  22.109 {
  22.110-#ifdef notdef
  22.111-/* there's a bug in here */
  22.112 	PTE *l1;
  22.113 
  22.114 	/* clean out any user mappings still in l1 */
  22.115-	if(m->mmul1lo > L1lo){
  22.116+	if(m->mmul1lo > 0){
  22.117 		if(m->mmul1lo == 1)
  22.118 			m->mmul1[L1lo] = Fault;
  22.119 		else
  22.120 			memset(&m->mmul1[L1lo], 0, m->mmul1lo*sizeof(PTE));
  22.121-		m->mmul1lo = L1lo;
  22.122+		m->mmul1lo = 0;
  22.123 	}
  22.124-	if(m->mmul1hi < L1hi){
  22.125-		l1 = &m->mmul1[m->mmul1hi];
  22.126-		if((L1hi - m->mmul1hi) == 1)
  22.127+	if(m->mmul1hi > 0){
  22.128+		l1 = &m->mmul1[L1hi - m->mmul1hi];
  22.129+		if(m->mmul1hi == 1)
  22.130 			*l1 = Fault;
  22.131 		else
  22.132-			memset(l1, 0, (L1hi - m->mmul1hi)*sizeof(PTE));
  22.133-		m->mmul1hi = L1hi;
  22.134+			memset(l1, 0, m->mmul1hi*sizeof(PTE));
  22.135+		m->mmul1hi = 0;
  22.136 	}
  22.137-#else
  22.138-	memset(&m->mmul1[L1lo], 0, (L1hi - L1lo)*sizeof(PTE));
  22.139-#endif /* notdef */
  22.140 }
  22.141 
  22.142 void
  22.143@@ -125,15 +130,7 @@ mmuswitch(Proc* proc)
  22.144 	PTE *l1;
  22.145 	Page *page;
  22.146 
  22.147-	/* do kprocs get here and if so, do they need to? */
  22.148-	if(m->mmupid == proc->pid && !proc->newtlb)
  22.149-		return;
  22.150-	m->mmupid = proc->pid;
  22.151-
  22.152-	/* write back dirty and invalidate l1 caches */
  22.153-	cacheuwbinv();
  22.154-
  22.155-	if(proc->newtlb){
  22.156+	if(proc != nil && proc->newtlb){
  22.157 		mmul2empty(proc, 1);
  22.158 		proc->newtlb = 0;
  22.159 	}
  22.160@@ -142,19 +139,21 @@ mmuswitch(Proc* proc)
  22.161 
  22.162 	/* move in new map */
  22.163 	l1 = m->mmul1;
  22.164+	if(proc != nil)
  22.165 	for(page = proc->mmul2; page != nil; page = page->next){
  22.166 		x = page->daddr;
  22.167 		l1[x] = PPN(page->pa)|Dom0|Coarse;
  22.168-		/* know here that L1lo < x < L1hi */
  22.169-		if(x+1 - m->mmul1lo < m->mmul1hi - x)
  22.170-			m->mmul1lo = x+1;
  22.171-		else
  22.172-			m->mmul1hi = x;
  22.173+		if(x >= L1lo + m->mmul1lo && x < L1hi - m->mmul1hi){
  22.174+			if(x+1 - L1lo < L1hi - x)
  22.175+				m->mmul1lo = x+1 - L1lo;
  22.176+			else
  22.177+				m->mmul1hi = L1hi - x;
  22.178+		}
  22.179 	}
  22.180 
  22.181 	/* make sure map is in memory */
  22.182 	/* could be smarter about how much? */
  22.183-	cachedwbse(&l1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
  22.184+	cachedwbtlb(&l1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
  22.185 
  22.186 	/* lose any possible stale tlb entries */
  22.187 	mmuinvalidate();
  22.188@@ -176,9 +175,6 @@ mmurelease(Proc* proc)
  22.189 {
  22.190 	Page *page, *next;
  22.191 
  22.192-	/* write back dirty and invalidate l1 caches */
  22.193-	cacheuwbinv();
  22.194-
  22.195 	mmul2empty(proc, 0);
  22.196 	for(page = proc->mmul2cache; page != nil; page = next){
  22.197 		next = page->next;
  22.198@@ -194,7 +190,7 @@ mmurelease(Proc* proc)
  22.199 
  22.200 	/* make sure map is in memory */
  22.201 	/* could be smarter about how much? */
  22.202-	cachedwbse(&m->mmul1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
  22.203+	cachedwbtlb(&m->mmul1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
  22.204 
  22.205 	/* lose any possible stale tlb entries */
  22.206 	mmuinvalidate();
  22.207@@ -203,39 +199,45 @@ mmurelease(Proc* proc)
  22.208 void
  22.209 putmmu(uintptr va, uintptr pa, Page* page)
  22.210 {
  22.211-	int x;
  22.212+	int x, s;
  22.213 	Page *pg;
  22.214 	PTE *l1, *pte;
  22.215 
  22.216+	/*
  22.217+	 * disable interrupts to prevent flushmmu (called from hzclock)
  22.218+	 * from clearing page tables while we are setting them
  22.219+	 */
  22.220+	s = splhi();
  22.221 	x = L1X(va);
  22.222 	l1 = &m->mmul1[x];
  22.223 	if(*l1 == Fault){
  22.224-		/* wasteful - l2 pages only have 256 entries - fix */
  22.225+		/* l2 pages only have 256 entries - wastes 3K per 1M of address space */
  22.226 		if(up->mmul2cache == nil){
  22.227-			/* auxpg since we don't need much? memset if so */
  22.228+			spllo();
  22.229 			pg = newpage(1, 0, 0);
  22.230+			splhi();
  22.231+			/* if newpage slept, we might be on a different cpu */
  22.232+			l1 = &m->mmul1[x];
  22.233 			pg->va = VA(kmap(pg));
  22.234-		}
  22.235-		else{
  22.236+		}else{
  22.237 			pg = up->mmul2cache;
  22.238 			up->mmul2cache = pg->next;
  22.239-			memset(UINT2PTR(pg->va), 0, BY2PG);
  22.240 		}
  22.241 		pg->daddr = x;
  22.242 		pg->next = up->mmul2;
  22.243 		up->mmul2 = pg;
  22.244 
  22.245-		/* force l2 page to memory */
  22.246-		cachedwbse((void *)pg->va, BY2PG);
  22.247+		/* force l2 page to memory (armv6) */
  22.248+		cachedwbtlb((void *)pg->va, L2size);
  22.249 
  22.250 		*l1 = PPN(pg->pa)|Dom0|Coarse;
  22.251-		cachedwbse(l1, sizeof *l1);
  22.252+		cachedwbtlb(l1, sizeof *l1);
  22.253 
  22.254-		if(x >= m->mmul1lo && x < m->mmul1hi){
  22.255-			if(x+1 - m->mmul1lo < m->mmul1hi - x)
  22.256-				m->mmul1lo = x+1;
  22.257+		if(x >= L1lo + m->mmul1lo && x < L1hi - m->mmul1hi){
  22.258+			if(x+1 - L1lo < L1hi - x)
  22.259+				m->mmul1lo = x+1 - L1lo;
  22.260 			else
  22.261-				m->mmul1hi = x;
  22.262+				m->mmul1hi = L1hi - x;
  22.263 		}
  22.264 	}
  22.265 	pte = UINT2PTR(KADDR(PPN(*l1)));
  22.266@@ -247,29 +249,51 @@ putmmu(uintptr va, uintptr pa, Page* pag
  22.267 	 */
  22.268 	x = Small;
  22.269 	if(!(pa & PTEUNCACHED))
  22.270-		x |= Cached|Buffered;
  22.271+		x |= L2ptedramattrs;
  22.272 	if(pa & PTEWRITE)
  22.273 		x |= L2AP(Urw);
  22.274 	else
  22.275 		x |= L2AP(Uro);
  22.276 	pte[L2X(va)] = PPN(pa)|x;
  22.277-	cachedwbse(&pte[L2X(va)], sizeof pte[0]);
  22.278+	cachedwbtlb(&pte[L2X(va)], sizeof(PTE));
  22.279 
  22.280 	/* clear out the current entry */
  22.281 	mmuinvalidateaddr(PPN(va));
  22.282 
  22.283-	/*  write back dirty entries - we need this because the pio() in
  22.284-	 *  fault.c is writing via a different virt addr and won't clean
  22.285-	 *  its changes out of the dcache.  Page coloring doesn't work
  22.286-	 *  on this mmu because the virtual cache is set associative
  22.287-	 *  rather than direct mapped.
  22.288+	if((page->txtflush & (1<<m->machno)) != 0){
  22.289+		/* pio() sets PG_TXTFLUSH whenever a text pg has been written */
  22.290+		cachedwbse((void*)(page->pa|KZERO), BY2PG);
  22.291+		cacheiinvse((void*)page->va, BY2PG);
  22.292+		page->txtflush &= ~(1<<m->machno);
  22.293+	}
  22.294+	//checkmmu(va, PPN(pa));
  22.295+	splx(s);
  22.296+}
  22.297+
  22.298+void*
  22.299+mmuuncache(void* v, usize size)
  22.300+{
  22.301+	int x;
  22.302+	PTE *pte;
  22.303+	uintptr va;
  22.304+
  22.305+	/*
  22.306+	 * Simple helper for ucalloc().
  22.307+	 * Uncache a Section, must already be
  22.308+	 * valid in the MMU.
  22.309 	 */
  22.310-	cachedwbinv();
  22.311-	if(page->txtflush){
  22.312-		cacheiinv();
  22.313-		page->txtflush = 0;
  22.314-	}
  22.315-	checkmmu(va, PPN(pa));
  22.316+	va = PTR2UINT(v);
  22.317+	assert(!(va & (1*MiB-1)) && size == 1*MiB);
  22.318+
  22.319+	x = L1X(va);
  22.320+	pte = &m->mmul1[x];
  22.321+	if((*pte & (Fine|Section|Coarse)) != Section)
  22.322+		return nil;
  22.323+	*pte &= ~L1ptedramattrs;
  22.324+	mmuinvalidateaddr(va);
  22.325+	cachedwbinvse(pte, 4);
  22.326+
  22.327+	return v;
  22.328 }
  22.329 
  22.330 /*
  22.331@@ -304,15 +328,31 @@ mmukmap(uintptr va, uintptr pa, usize si
  22.332 		*pte++ = (pa+n)|Dom0|L1AP(Krw)|Section;
  22.333 		mmuinvalidateaddr(va+n);
  22.334 	}
  22.335-	cachedwbse(pte0, (uintptr)pte - (uintptr)pte0);
  22.336+	cachedwbtlb(pte0, (uintptr)pte - (uintptr)pte0);
  22.337 	return va + o;
  22.338 }
  22.339 
  22.340-
  22.341 void
  22.342 checkmmu(uintptr va, uintptr pa)
  22.343 {
  22.344-	USED(va);
  22.345-	USED(pa);
  22.346+	int x;
  22.347+	PTE *l1, *pte;
  22.348+
  22.349+	x = L1X(va);
  22.350+	l1 = &m->mmul1[x];
  22.351+	if(*l1 == Fault){
  22.352+		iprint("checkmmu cpu%d va=%lux l1 %p=%ux\n", m->machno, va, l1, *l1);
  22.353+		return;
  22.354+	}
  22.355+	pte = KADDR(PPN(*l1));
  22.356+	pte += L2X(va);
  22.357+	if(pa == ~0 || (pa != 0 && PPN(*pte) != pa))
  22.358+		iprint("checkmmu va=%lux pa=%lux l1 %p=%ux pte %p=%ux\n", va, pa, l1, *l1, pte, *pte);
  22.359 }
  22.360 
  22.361+void
  22.362+kunmap(KMap *k)
  22.363+{
  22.364+	USED(k);
  22.365+	coherence();
  22.366+}
    23.1new file mode 100644
    23.2--- /dev/null
    23.3+++ b/sys/src/9/bcm/pi
    23.4@@ -0,0 +1,55 @@
    23.5+dev
    23.6+	root
    23.7+	cons
    23.8+	swap
    23.9+	env
   23.10+	pipe
   23.11+	proc
   23.12+	mnt
   23.13+	srv
   23.14+	shr
   23.15+	swap
   23.16+	dup
   23.17+	arch
   23.18+	ssl
   23.19+	tls
   23.20+	cap
   23.21+	fs
   23.22+	ip		arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno
   23.23+	draw	screen swcursor
   23.24+	mouse	mouse
   23.25+	uart	gpio
   23.26+	gpio	gpio
   23.27+	sd
   23.28+	usb
   23.29+
   23.30+link
   23.31+	loopbackmedium
   23.32+	ethermedium
   23.33+	archbcm
   23.34+	usbdwc
   23.35+
   23.36+ip
   23.37+	tcp
   23.38+	udp
   23.39+	ipifc
   23.40+	icmp
   23.41+	icmp6
   23.42+	ipmux
   23.43+
   23.44+misc
   23.45+	armv6
   23.46+	uartmini
   23.47+#	sdmmc	emmc
   23.48+	dma
   23.49+	vcore
   23.50+	vfp3	coproc
   23.51+
   23.52+port
   23.53+	int cpuserver = 0;
   23.54+
   23.55+bootdir
   23.56+	/$objtype/bin/paqfs
   23.57+	/$objtype/bin/auth/factotum
   23.58+	bootfs.paq
   23.59+	boot
    24.1new file mode 100644
    24.2--- /dev/null
    24.3+++ b/sys/src/9/bcm/pi2
    24.4@@ -0,0 +1,55 @@
    24.5+dev
    24.6+	root
    24.7+	cons
    24.8+	swap
    24.9+	env
   24.10+	pipe
   24.11+	proc
   24.12+	mnt
   24.13+	srv
   24.14+	shr
   24.15+	swap
   24.16+	dup
   24.17+	arch
   24.18+	ssl
   24.19+	tls
   24.20+	cap
   24.21+	fs
   24.22+	ip		arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno
   24.23+	draw	screen swcursor
   24.24+	mouse	mouse
   24.25+	uart	gpio
   24.26+	gpio	gpio
   24.27+	sd
   24.28+	usb
   24.29+
   24.30+link
   24.31+	loopbackmedium
   24.32+	ethermedium
   24.33+	archbcm2
   24.34+	usbdwc
   24.35+
   24.36+ip
   24.37+	tcp
   24.38+	udp
   24.39+	ipifc
   24.40+	icmp
   24.41+	icmp6
   24.42+	ipmux
   24.43+
   24.44+misc
   24.45+	armv7
   24.46+	uartmini
   24.47+	sdmmc	emmc
   24.48+	dma
   24.49+	vcore
   24.50+	vfp3	coproc
   24.51+
   24.52+port
   24.53+	int cpuserver = 0;
   24.54+
   24.55+bootdir
   24.56+	/$objtype/bin/paqfs
   24.57+	/$objtype/bin/auth/factotum
   24.58+	bootfs.paq
   24.59+	boot
    25.1deleted file mode 100644
    25.2--- a/sys/src/9/bcm/pif
    25.3+++ /dev/null
    25.4@@ -1,56 +0,0 @@
    25.5-dev
    25.6-	root
    25.7-	cons
    25.8-	swap
    25.9-	env
   25.10-	pipe
   25.11-	proc
   25.12-	mnt
   25.13-	srv
   25.14-	shr
   25.15-	dup
   25.16-	arch
   25.17-	ssl
   25.18-	tls
   25.19-	cap
   25.20-	fs
   25.21-	ip		arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno
   25.22-	draw	screen swcursor
   25.23-	mouse	mouse
   25.24-	uart
   25.25-	gpio
   25.26-	sd
   25.27-	usb
   25.28-
   25.29-link
   25.30-	ethermedium
   25.31-	loopbackmedium
   25.32-	netdevmedium
   25.33-	archbcm
   25.34-	usbdwc
   25.35-
   25.36-ip
   25.37-	tcp
   25.38-	udp
   25.39-	ipifc
   25.40-	icmp
   25.41-	icmp6
   25.42-	ipmux
   25.43-	il
   25.44-
   25.45-misc
   25.46-	uartmini
   25.47-	sdmmc	emmc
   25.48-	dma
   25.49-	vcore
   25.50-	vfp3	coproc
   25.51-
   25.52-port
   25.53-	int cpuserver = 0;
   25.54-
   25.55-bootdir
   25.56-	/$objtype/bin/paqfs
   25.57-	/$objtype/bin/auth/factotum
   25.58-	bootfs.paq
   25.59-	boot
   25.60-
    26.1--- a/sys/src/9/bcm/rebootcode.s
    26.2+++ b/sys/src/9/bcm/rebootcode.s
    26.3@@ -1,8 +1,13 @@
    26.4 /*
    26.5- * armv6 reboot code
    26.6+ * armv6/armv7 reboot code
    26.7  */
    26.8 #include "arm.s"
    26.9 
   26.10+#define PTEDRAM		(Dom0|L1AP(Krw)|Section)
   26.11+
   26.12+#define WFI	WORD	$0xe320f003	/* wait for interrupt */
   26.13+#define WFE	WORD	$0xe320f002	/* wait for event */
   26.14+
   26.15 /*
   26.16  * Turn off MMU, then copy the new kernel to its correct location
   26.17  * in physical memory.  Then jump to the start of the kernel.
   26.18@@ -15,7 +20,7 @@ TEXT	main(SB), 1, $-4
   26.19 	/* copy in arguments before stack gets unmapped */
   26.20 	MOVW	R0, R8			/* entry point */
   26.21 	MOVW	p2+4(FP), R9		/* source */
   26.22-	MOVW	n+8(FP), R10		/* byte count */
   26.23+	MOVW	n+8(FP), R6		/* byte count */
   26.24 
   26.25 	/* SVC mode, interrupts disabled */
   26.26 	MOVW	$(PsrDirq|PsrDfiq|PsrMsvc), R1
   26.27@@ -29,6 +34,28 @@ TEXT	main(SB), 1, $-4
   26.28 	BIC	$CpCmmu, R1
   26.29 	MCR	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
   26.30 
   26.31+	/* continue with reboot only on cpu0 */
   26.32+	CPUID(R2)
   26.33+	BEQ	bootcpu
   26.34+
   26.35+	/* other cpus wait for inter processor interrupt from cpu0 */
   26.36+	/* turn icache back on */
   26.37+	MRC	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
   26.38+	ORR	$(CpCicache), R1
   26.39+	MCR	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
   26.40+	BARRIERS
   26.41+dowfi:
   26.42+	WFI
   26.43+	MOVW	$0x40000060, R1
   26.44+	ADD		R2<<2, R1
   26.45+	MOVW	0(R1), R0
   26.46+	AND		$0x10, R0
   26.47+	BEQ		dowfi
   26.48+	MOVW	$0x8000, R1
   26.49+	BL		(R1)
   26.50+	B		dowfi
   26.51+
   26.52+bootcpu:
   26.53 	/* set up a tiny stack for local vars and memmove args */
   26.54 	MOVW	R8, SP			/* stack top just before kernel dest */
   26.55 	SUB	$20, SP			/* allocate stack frame */
   26.56@@ -37,11 +64,12 @@ TEXT	main(SB), 1, $-4
   26.57 	MOVW	R8, 16(SP)		/* save dest (entry point) */
   26.58 	MOVW	R8, R0			/* first arg is dest */
   26.59 	MOVW	R9, 8(SP)		/* push src */
   26.60-	MOVW	R10, 12(SP)		/* push size */
   26.61+	MOVW	R6, 12(SP)		/* push size */
   26.62 	BL	memmove(SB)
   26.63 	MOVW	16(SP), R8		/* restore entry point */
   26.64 
   26.65 	/* jump to kernel physical entry point */
   26.66+	ORR	R8,R8
   26.67 	B	(R8)
   26.68 	B	0(PC)
   26.69 
   26.70@@ -51,43 +79,40 @@ TEXT	main(SB), 1, $-4
   26.71  * clobbers R0-R2, and returns with SP invalid.
   26.72  */
   26.73 TEXT cachesoff(SB), 1, $-4
   26.74+	MOVM.DB.W [R14,R1-R10], (R13)		/* save regs on stack */
   26.75 
   26.76-	/* write back and invalidate caches */
   26.77-	BARRIERS
   26.78-	MOVW	$0, R0
   26.79-	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall
   26.80-	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
   26.81-
   26.82-	/* turn caches off */
   26.83+	/* turn caches off, invalidate icache */
   26.84 	MRC	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
   26.85 	BIC	$(CpCdcache|CpCicache|CpCpredict), R1
   26.86 	MCR	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
   26.87+	MOVW	$0, R0
   26.88+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
   26.89 
   26.90 	/* invalidate stale TLBs before changing them */
   26.91 	BARRIERS
   26.92-	MOVW	$KZERO, R0			/* some valid virtual address */
   26.93+	MOVW	$0, R0
   26.94 	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
   26.95 	BARRIERS
   26.96 
   26.97-	/* from here on, R0 is base of physical memory */
   26.98-	MOVW	$PHYSDRAM, R0
   26.99-
  26.100 	/* redo double map of first MiB PHYSDRAM = KZERO */
  26.101-	MOVW	$(L1+L1X(PHYSDRAM)), R2		/* address of PHYSDRAM's PTE */
  26.102+	MOVW	12(R(MACH)), R2		/* m->mmul1 (virtual addr) */
  26.103 	MOVW	$PTEDRAM, R1			/* PTE bits */
  26.104-	ORR	R0, R1				/* dram base */
  26.105 	MOVW	R1, (R2)
  26.106+	DSB
  26.107+	MCR	CpSC, 0, R2, C(CpCACHE), C(CpCACHEwb), CpCACHEse
  26.108 
  26.109 	/* invalidate stale TLBs again */
  26.110 	BARRIERS
  26.111+	MOVW	$0, R0
  26.112 	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
  26.113 	BARRIERS
  26.114 
  26.115 	/* relocate SB and return address to PHYSDRAM addressing */
  26.116 	MOVW	$KSEGM, R1		/* clear segment bits */
  26.117 	BIC	R1, R12			/* adjust SB */
  26.118-	ORR	R0, R12
  26.119+	MOVM.IA.W (R13), [R14,R1-R10]		/* restore regs from stack */
  26.120+
  26.121+	MOVW	$KSEGM, R1		/* clear segment bits */
  26.122 	BIC	R1, R14			/* adjust return address */
  26.123-	ORR	R0, R14
  26.124 
  26.125 	RET
    27.1--- a/sys/src/9/bcm/trap.c
    27.2+++ b/sys/src/9/bcm/trap.c
    27.3@@ -13,6 +13,7 @@
    27.4 #include "arm.h"
    27.5 
    27.6 #define INTREGS		(VIRTIO+0xB200)
    27.7+#define	LOCALREGS	(VIRTIO+IOSIZE)
    27.8 
    27.9 typedef struct Intregs Intregs;
   27.10 typedef struct Vctl Vctl;
   27.11@@ -20,6 +21,10 @@ typedef struct Vctl Vctl;
   27.12 enum {
   27.13 	Nvec = 8,		/* # of vectors at start of lexception.s */
   27.14 	Fiqenable = 1<<7,
   27.15+
   27.16+	Localtimerint	= 0x40,
   27.17+	Localmboxint	= 0x50,
   27.18+	Localintpending	= 0x60,
   27.19 };
   27.20 
   27.21 /*
   27.22@@ -46,12 +51,14 @@ struct Intregs {
   27.23 struct Vctl {
   27.24 	Vctl	*next;
   27.25 	int	irq;
   27.26+	int	cpu;
   27.27 	u32int	*reg;
   27.28 	u32int	mask;
   27.29 	void	(*f)(Ureg*, void*);
   27.30 	void	*a;
   27.31 };
   27.32 
   27.33+static Lock vctllock;
   27.34 static Vctl *vctl, *vfiq;
   27.35 
   27.36 static char *trapnames[PsrMask+1] = {
   27.37@@ -75,14 +82,16 @@ trapinit(void)
   27.38 {
   27.39 	Vpage0 *vpage0;
   27.40 
   27.41-	/* disable everything */
   27.42-	intrsoff();
   27.43-
   27.44-	/* set up the exception vectors */
   27.45-	vpage0 = (Vpage0*)HVECTORS;
   27.46-	memmove(vpage0->vectors, vectors, sizeof(vpage0->vectors));
   27.47-	memmove(vpage0->vtable, vtable, sizeof(vpage0->vtable));
   27.48-	cacheuwbinv();
   27.49+	if (m->machno == 0) {
   27.50+		/* disable everything */
   27.51+		intrsoff();
   27.52+		/* set up the exception vectors */
   27.53+		vpage0 = (Vpage0*)HVECTORS;
   27.54+		memmove(vpage0->vectors, vectors, sizeof(vpage0->vectors));
   27.55+		memmove(vpage0->vtable, vtable, sizeof(vpage0->vtable));
   27.56+		cacheuwbinv();
   27.57+		l2cacheuwbinv();
   27.58+	}
   27.59 
   27.60 	/* set up the stacks for the interrupt modes */
   27.61 	setr13(PsrMfiq, (u32int*)(FIQSTKTOP));
   27.62@@ -95,6 +104,21 @@ trapinit(void)
   27.63 }
   27.64 
   27.65 void
   27.66+intrcpushutdown(void)
   27.67+{
   27.68+	u32int *enable;
   27.69+
   27.70+	if(soc.armlocal == 0)
   27.71+		return;
   27.72+	enable = (u32int*)(LOCALREGS + Localtimerint) + m->machno;
   27.73+	*enable = 0;
   27.74+	if(m->machno){
   27.75+		enable = (u32int*)(LOCALREGS + Localmboxint) + m->machno;
   27.76+		*enable = 1;
   27.77+	}
   27.78+}
   27.79+
   27.80+void
   27.81 intrsoff(void)
   27.82 {
   27.83 	Intregs *ip;
   27.84@@ -120,11 +144,11 @@ irq(Ureg* ureg)
   27.85 
   27.86 	clockintr = 0;
   27.87 	for(v = vctl; v; v = v->next)
   27.88-		if(*v->reg & v->mask){
   27.89+		if(v->cpu == m->machno && (*v->reg & v->mask) != 0){
   27.90 			coherence();
   27.91 			v->f(ureg, v->a);
   27.92 			coherence();
   27.93-			if(v->irq == IRQclock)
   27.94+			if(v->irq == IRQclock || v->irq == IRQcntps || v->irq == IRQcntpns)
   27.95 				clockintr = 1;
   27.96 		}
   27.97 	return clockintr;
   27.98@@ -140,7 +164,7 @@ fiq(Ureg *ureg)
   27.99 
  27.100 	v = vfiq;
  27.101 	if(v == nil)
  27.102-		panic("unexpected item in bagging area");
  27.103+		panic("cpu%d: unexpected item in bagging area", m->machno);
  27.104 	m->intr++;
  27.105 	ureg->pc -= 4;
  27.106 	coherence();
  27.107@@ -160,7 +184,16 @@ irqenable(int irq, void (*f)(Ureg*, void
  27.108 	if(v == nil)
  27.109 		panic("irqenable: no mem");
  27.110 	v->irq = irq;
  27.111-	if(irq >= IRQbasic){
  27.112+	v->cpu = 0;
  27.113+	if(irq >= IRQlocal){
  27.114+		v->reg = (u32int*)(LOCALREGS + Localintpending) + m->machno;
  27.115+		if(irq >= IRQmbox0)
  27.116+			enable = (u32int*)(LOCALREGS + Localmboxint) + m->machno;
  27.117+		else
  27.118+			enable = (u32int*)(LOCALREGS + Localtimerint) + m->machno;
  27.119+		v->mask = 1 << (irq - IRQlocal);
  27.120+		v->cpu = m->machno;
  27.121+	}else if(irq >= IRQbasic){
  27.122 		enable = &ip->ARMenable;
  27.123 		v->reg = &ip->ARMpending;
  27.124 		v->mask = 1 << (irq - IRQbasic);
  27.125@@ -171,6 +204,7 @@ irqenable(int irq, void (*f)(Ureg*, void
  27.126 	}
  27.127 	v->f = f;
  27.128 	v->a = a;
  27.129+	lock(&vctllock);
  27.130 	if(irq == IRQfiq){
  27.131 		assert((ip->FIQctl & Fiqenable) == 0);
  27.132 		assert((*enable & v->mask) == 0);
  27.133@@ -179,8 +213,15 @@ irqenable(int irq, void (*f)(Ureg*, void
  27.134 	}else{
  27.135 		v->next = vctl;
  27.136 		vctl = v;
  27.137-		*enable = v->mask;
  27.138+		if(irq >= IRQmbox0){
  27.139+			if(irq <= IRQmbox3)
  27.140+				*enable |= 1 << (irq - IRQmbox0);
  27.141+		}else if(irq >= IRQlocal)
  27.142+			*enable |= 1 << (irq - IRQlocal);
  27.143+		else
  27.144+			*enable = v->mask;
  27.145 	}
  27.146+	unlock(&vctllock);
  27.147 }
  27.148 
  27.149 static char *
    28.1--- a/sys/src/9/bcm/uartmini.c
    28.2+++ b/sys/src/9/bcm/uartmini.c
    28.3@@ -10,35 +10,11 @@
    28.4 #include "fns.h"
    28.5 #include "io.h"
    28.6 
    28.7-#define GPIOREGS	(VIRTIO+0x200000)
    28.8 #define AUXREGS		(VIRTIO+0x215000)
    28.9 #define	OkLed		16
   28.10 #define	TxPin		14
   28.11 #define	RxPin		15
   28.12 
   28.13-/* GPIO regs */
   28.14-enum {
   28.15-	Fsel0	= 0x00>>2,
   28.16-		FuncMask= 0x7,
   28.17-		Input	= 0x0,
   28.18-		Output	= 0x1,
   28.19-		Alt0	= 0x4,
   28.20-		Alt1	= 0x5,
   28.21-		Alt2	= 0x6,
   28.22-		Alt3	= 0x7,
   28.23-		Alt4	= 0x3,
   28.24-		Alt5	= 0x2,
   28.25-	Set0	= 0x1c>>2,
   28.26-	Clr0	= 0x28>>2,
   28.27-	Lev0	= 0x34>>2,
   28.28-	PUD	= 0x94>>2,
   28.29-		Off	= 0x0,
   28.30-		Pulldown= 0x1,
   28.31-		Pullup	= 0x2,
   28.32-	PUDclk0	= 0x98>>2,
   28.33-	PUDclk1	= 0x9c>>2,
   28.34-};
   28.35-
   28.36 /* AUX regs */
   28.37 enum {
   28.38 	Irq	= 0x00>>2,
   28.39@@ -73,56 +49,11 @@ static Uart miniuart = {
   28.40 	.regs	= (u32int*)AUXREGS,
   28.41 	.name	= "uart0",
   28.42 	.freq	= 250000000,
   28.43+	.baud	= 115200,
   28.44 	.phys	= &miniphysuart,
   28.45 };
   28.46 
   28.47-void
   28.48-gpiosel(uint pin, int func)
   28.49-{	
   28.50-	u32int *gp, *fsel;
   28.51-	int off;
   28.52-
   28.53-	gp = (u32int*)GPIOREGS;
   28.54-	fsel = &gp[Fsel0 + pin/10];
   28.55-	off = (pin % 10) * 3;
   28.56-	*fsel = (*fsel & ~(FuncMask << off)) | func << off;
   28.57-}
   28.58-
   28.59-void
   28.60-gpiopulloff(uint pin)
   28.61-{
   28.62-	u32int *gp, *reg;
   28.63-	u32int mask;
   28.64-
   28.65-	gp = (u32int*)GPIOREGS;
   28.66-	reg = &gp[PUDclk0 + pin/32];
   28.67-	mask = 1 << (pin % 32);
   28.68-	gp[PUD] = Off;
   28.69-	microdelay(1);
   28.70-	*reg = mask;
   28.71-	microdelay(1);
   28.72-	*reg = 0;
   28.73-}
   28.74-
   28.75-void
   28.76-gpioout(uint pin, int set)
   28.77-{
   28.78-	u32int *gp;
   28.79-	int v;
   28.80-
   28.81-	gp = (u32int*)GPIOREGS;
   28.82-	v = set? Set0: Clr0;
   28.83-	gp[v + pin/32] = 1 << (pin % 32);
   28.84-}
   28.85-
   28.86-int
   28.87-gpioin(uint pin)
   28.88-{
   28.89-	u32int *gp;
   28.90-
   28.91-	gp = (u32int*)GPIOREGS;
   28.92-	return (gp[Lev0 + pin/32] & (1 << (pin % 32))) != 0;
   28.93-}
   28.94+static int baud(Uart*, int);
   28.95 
   28.96 static void
   28.97 interrupt(Ureg*, void *arg)
   28.98@@ -162,10 +93,12 @@ enable(Uart *uart, int ie)
   28.99 	gpiosel(TxPin, Alt5);
  28.100 	gpiosel(RxPin, Alt5);
  28.101 	gpiopulloff(TxPin);
  28.102-	gpiopulloff(RxPin);
  28.103+	gpiopullup(RxPin);
  28.104 	ap[Enables] |= UartEn;
  28.105 	ap[MuIir] = 6;
  28.106+	ap[MuLcr] = Bits8;
  28.107 	ap[MuCntl] = TxEn|RxEn;
  28.108+	baud(uart, uart->baud);
  28.109 	if(ie){
  28.110 		intrenable(IRQaux, interrupt, uart, 0, "uart");
  28.111 		ap[MuIer] = RxIen|TxIen;
  28.112@@ -370,13 +303,12 @@ uartconsinit(void)
  28.113 		break;
  28.114 	}
  28.115 
  28.116-	uartctl(uart, "b9600 l8 pn s1");
  28.117+	if(!uart->enabled)
  28.118+		(*uart->phys->enable)(uart, 0);
  28.119+	uartctl(uart, "l8 pn s1");
  28.120 	if(*cmd != '\0')
  28.121 		uartctl(uart, cmd);
  28.122 
  28.123-	if(!uart->enabled)
  28.124-		(*uart->phys->enable)(uart, 0);
  28.125-
  28.126 	consuart = uart;
  28.127 	uart->console = 1;
  28.128 }
  28.129@@ -405,8 +337,26 @@ void
  28.130 okay(int on)
  28.131 {
  28.132 	static int first;
  28.133+	static int okled, polarity;
  28.134+	char *p;
  28.135 
  28.136-	if(!first++)
  28.137-		gpiosel(OkLed, Output);
  28.138-	gpioout(OkLed, !on);
  28.139+	if(!first++){
  28.140+		p = getconf("bcm2709.disk_led_gpio");
  28.141+		if(p == nil)
  28.142+			p = getconf("bcm2708.disk_led_gpio");
  28.143+		if(p != nil)
  28.144+			okled = strtol(p, 0, 0);
  28.145+		else
  28.146+			okled = 'v';
  28.147+		p = getconf("bcm2709.disk_led_active_low");
  28.148+		if(p == nil)
  28.149+			p = getconf("bcm2708.disk_led_active_low");
  28.150+		polarity = (p == nil || *p == '1');
  28.151+		if(okled != 'v')
  28.152+			gpiosel(okled, Output);
  28.153+	}
  28.154+	if(okled == 'v')
  28.155+		vgpset(0, on);
  28.156+	else if(okled != 0)
  28.157+		gpioout(okled, on^polarity);
  28.158 }
    29.1--- a/sys/src/9/bcm/usbdwc.c
    29.2+++ b/sys/src/9/bcm/usbdwc.c
    29.3@@ -33,16 +33,29 @@ enum
    29.4 
    29.5 	Read		= 0,
    29.6 	Write		= 1,
    29.7+
    29.8+	/*
    29.9+	 * Workaround for an unexplained glitch where an Ack interrupt
   29.10+	 * is received without Chhltd, whereupon all channels remain
   29.11+	 * permanently busy and can't be halted.  This was only seen
   29.12+	 * when the controller is reading a sequence of bulk input
   29.13+	 * packets in DMA mode.  Setting Slowbulkin=1 will avoid the
   29.14+	 * lockup by reading packets individually with an interrupt
   29.15+	 * after each.  More recent chips don't seem to exhibit the
   29.16+	 * problem, so it's probably safe to leave this off now.
   29.17+	 */
   29.18+	Slowbulkin	= 0,
   29.19 };
   29.20 
   29.21 typedef struct Ctlr Ctlr;
   29.22 typedef struct Epio Epio;
   29.23 
   29.24 struct Ctlr {
   29.25+	Lock;
   29.26 	Dwcregs	*regs;		/* controller registers */
   29.27 	int	nchan;		/* number of host channels */
   29.28 	ulong	chanbusy;	/* bitmap of in-use channels */
   29.29-	QLock	chanlock;	/* serialise access to chanbusy */
   29.30+	Lock	chanlock;	/* serialise access to chanbusy */
   29.31 	QLock	split;		/* serialise split transactions */
   29.32 	int	splitretry;	/* count retries of Nyet */
   29.33 	int	sofchan;	/* bitmap of channels waiting for sof */
   29.34@@ -52,7 +65,11 @@ struct Ctlr {
   29.35 };
   29.36 
   29.37 struct Epio {
   29.38-	QLock;
   29.39+	union {
   29.40+		QLock	rlock;
   29.41+		QLock	ctllock;
   29.42+	};
   29.43+	QLock	wlock;
   29.44 	Block	*cb;
   29.45 	ulong	lastpoll;
   29.46 };
   29.47@@ -61,29 +78,48 @@ static Ctlr dwc;
   29.48 static int debug;
   29.49 
   29.50 static char Ebadlen[] = "bad usb request length";
   29.51-static char Enotconfig[] = "usb endpoint not configured";
   29.52 
   29.53 static void clog(Ep *ep, Hostchan *hc);
   29.54 static void logdump(Ep *ep);
   29.55 
   29.56+static void
   29.57+filock(Lock *l)
   29.58+{
   29.59+	int x;
   29.60+
   29.61+	x = splfhi();
   29.62+	ilock(l);
   29.63+	l->sr = x;
   29.64+}
   29.65+
   29.66+static void
   29.67+fiunlock(Lock *l)
   29.68+{
   29.69+	iunlock(l);
   29.70+}
   29.71+
   29.72 static Hostchan*
   29.73 chanalloc(Ep *ep)
   29.74 {
   29.75 	Ctlr *ctlr;
   29.76 	int bitmap, i;
   29.77+	static int first;
   29.78 
   29.79 	ctlr = ep->hp->aux;
   29.80-	qlock(&ctlr->chanlock);
   29.81+retry:
   29.82+	lock(&ctlr->chanlock);
   29.83 	bitmap = ctlr->chanbusy;
   29.84 	for(i = 0; i < ctlr->nchan; i++)
   29.85 		if((bitmap & (1<<i)) == 0){
   29.86 			ctlr->chanbusy = bitmap | 1<<i;
   29.87-			qunlock(&ctlr->chanlock);
   29.88+			unlock(&ctlr->chanlock);
   29.89 			return &ctlr->regs->hchan[i];
   29.90 		}
   29.91-	qunlock(&ctlr->chanlock);
   29.92-	panic("miller is a lazy git");
   29.93-	return nil;
   29.94+	unlock(&ctlr->chanlock);
   29.95+	if(!first++)
   29.96+		print("usbdwc: all host channels busy - retrying\n");
   29.97+	tsleep(&up->sleep, return0, 0, 1);
   29.98+	goto retry;
   29.99 }
  29.100 
  29.101 static void
  29.102@@ -94,9 +130,9 @@ chanrelease(Ep *ep, Hostchan *chan)
  29.103 
  29.104 	ctlr = ep->hp->aux;
  29.105 	i = chan - ctlr->regs->hchan;
  29.106-	qlock(&ctlr->chanlock);
  29.107+	lock(&ctlr->chanlock);
  29.108 	ctlr->chanbusy &= ~(1<<i);
  29.109-	qunlock(&ctlr->chanlock);
  29.110+	unlock(&ctlr->chanlock);
  29.111 }
  29.112 
  29.113 static void
  29.114@@ -158,23 +194,22 @@ sofdone(void *a)
  29.115 	Dwcregs *r;
  29.116 
  29.117 	r = a;
  29.118-	return r->gintsts & Sofintr;
  29.119+	return (r->gintmsk & Sofintr) == 0;
  29.120 }
  29.121 
  29.122 static void
  29.123 sofwait(Ctlr *ctlr, int n)
  29.124 {
  29.125 	Dwcregs *r;
  29.126-	int x;
  29.127 
  29.128 	r = ctlr->regs;
  29.129 	do{
  29.130+		filock(ctlr);
  29.131 		r->gintsts = Sofintr;
  29.132-		x = splfhi();
  29.133 		ctlr->sofchan |= 1<<n;
  29.134 		r->gintmsk |= Sofintr;
  29.135+		fiunlock(ctlr);
  29.136 		sleep(&ctlr->chanintr[n], sofdone, r);
  29.137-		splx(x);
  29.138 	}while((r->hfnum & 7) == 6);
  29.139 }
  29.140 
  29.141@@ -192,7 +227,7 @@ chandone(void *a)
  29.142 static int
  29.143 chanwait(Ep *ep, Ctlr *ctlr, Hostchan *hc, int mask)
  29.144 {
  29.145-	int intr, n, x, ointr;
  29.146+	int intr, n, ointr;
  29.147 	ulong start, now;
  29.148 	Dwcregs *r;
  29.149 
  29.150@@ -200,13 +235,14 @@ chanwait(Ep *ep, Ctlr *ctlr, Hostchan *h
  29.151 	n = hc - r->hchan;
  29.152 	for(;;){
  29.153 restart:
  29.154-		x = splfhi();
  29.155+		filock(ctlr);
  29.156 		r->haintmsk |= 1<<n;
  29.157 		hc->hcintmsk = mask;
  29.158-		sleep(&ctlr->chanintr[n], chandone, hc);
  29.159+		fiunlock(ctlr);
  29.160+		tsleep(&ctlr->chanintr[n], chandone, hc, 1000);
  29.161+		if((intr = hc->hcint) == 0)
  29.162+			goto restart;
  29.163 		hc->hcintmsk = 0;
  29.164-		splx(x);
  29.165-		intr = hc->hcint;
  29.166 		if(intr & Chhltd)
  29.167 			return intr;
  29.168 		start = fastticks(0);
  29.169@@ -218,13 +254,14 @@ restart:
  29.170 				if((ointr != Ack && ointr != (Ack|Xfercomp)) ||
  29.171 				   intr != (Ack|Chhltd|Xfercomp) ||
  29.172 				   (now - start) > 60)
  29.173-					dprint("await %x after %ld %x -> %x\n",
  29.174+					dprint("await %x after %ldµs %x -> %x\n",
  29.175 						mask, now - start, ointr, intr);
  29.176 				return intr;
  29.177 			}
  29.178 			if((intr & mask) == 0){
  29.179-				dprint("ep%d.%d await %x intr %x -> %x\n",
  29.180-					ep->dev->nb, ep->nb, mask, ointr, intr);
  29.181+				if(intr != Nak)
  29.182+					dprint("ep%d.%d await %x after %ldµs intr %x -> %x\n",
  29.183+						ep->dev->nb, ep->nb, mask, now - start, ointr, intr);
  29.184 				goto restart;
  29.185 			}
  29.186 			now = fastticks(0);
  29.187@@ -254,6 +291,8 @@ chanintr(Ctlr *ctlr, int n)
  29.188 	int i;
  29.189 
  29.190 	hc = &ctlr->regs->hchan[n];
  29.191+	if((hc->hcint & hc->hcintmsk) == 0)
  29.192+		return 1;
  29.193 	if(ctlr->debugchan & (1<<n))
  29.194 		clog(nil, hc);
  29.195 	if((hc->hcsplt & Spltena) == 0)
  29.196@@ -347,7 +386,7 @@ chanio(Ep *ep, Hostchan *hc, int dir, in
  29.197 	else
  29.198 		n = len;
  29.199 	hc->hctsiz = n | npkt<<OPktcnt | pid;
  29.200-	hc->hcdma  = PADDR(a);
  29.201+	hc->hcdma  = dmaaddr(a);
  29.202 
  29.203 	nleft = len;
  29.204 	logstart(ep);
  29.205@@ -378,13 +417,19 @@ chanio(Ep *ep, Hostchan *hc, int dir, in
  29.206 		}
  29.207 		hc->hcchar = (hc->hcchar &~ Chdis) | Chen;
  29.208 		clog(ep, hc);
  29.209+wait:
  29.210 		if(ep->ttype == Tbulk && dir == Epin)
  29.211-			i = chanwait(ep, ctlr, hc, /* Ack| */ Chhltd);
  29.212+			i = chanwait(ep, ctlr, hc, Chhltd);
  29.213 		else if(ep->ttype == Tintr && (hc->hcsplt & Spltena))
  29.214 			i = chanwait(ep, ctlr, hc, Chhltd);
  29.215 		else
  29.216 			i = chanwait(ep, ctlr, hc, Chhltd|Nak);
  29.217 		clog(ep, hc);
  29.218+		if(hc->hcint != i){
  29.219+			dprint("chanwait intr %ux->%ux\n", i, hc->hcint);
  29.220+			if((i = hc->hcint) == 0)
  29.221+				goto wait;
  29.222+		}
  29.223 		hc->hcint = i;
  29.224 
  29.225 		if(hc->hcsplt & Spltena){
  29.226@@ -405,12 +450,12 @@ chanio(Ep *ep, Hostchan *hc, int dir, in
  29.227 				continue;
  29.228 			}
  29.229 			logdump(ep);
  29.230-			print("usbotg: ep%d.%d error intr %8.8ux\n",
  29.231+			print("usbdwc: ep%d.%d error intr %8.8ux\n",
  29.232 				ep->dev->nb, ep->nb, i);
  29.233 			if(i & ~(Chhltd|Ack))
  29.234 				error(Eio);
  29.235 			if(hc->hcdma != hcdma)
  29.236-				print("usbotg: weird hcdma %x->%x intr %x->%x\n",
  29.237+				print("usbdwc: weird hcdma %ux->%ux intr %ux->%ux\n",
  29.238 					hcdma, hc->hcdma, i, hc->hcint);
  29.239 		}
  29.240 		n = hc->hcdma - hcdma;
  29.241@@ -420,13 +465,13 @@ chanio(Ep *ep, Hostchan *hc, int dir, in
  29.242 			else
  29.243 				continue;
  29.244 		}
  29.245-		if(dir == Epin && ep->ttype == Tbulk && n == nleft){
  29.246+		if(dir == Epin && ep->ttype == Tbulk){
  29.247 			nt = (hctsiz & Xfersize) - (hc->hctsiz & Xfersize);
  29.248 			if(nt != n){
  29.249 				if(n == ROUND(nt, 4))
  29.250 					n = nt;
  29.251 				else
  29.252-					print("usbotg: intr %8.8ux "
  29.253+					print("usbdwc: intr %8.8ux "
  29.254 						"dma %8.8ux-%8.8ux "
  29.255 						"hctsiz %8.8ux-%8.ux\n",
  29.256 						i, hcdma, hc->hcdma, hctsiz,
  29.257@@ -491,7 +536,7 @@ eptrans(Ep *ep, int rw, void *a, long n)
  29.258 		nexterror();
  29.259 	}
  29.260 	chansetup(hc, ep);
  29.261-	if(rw == Read && ep->ttype == Tbulk)
  29.262+	if(Slowbulkin && rw == Read && ep->ttype == Tbulk)
  29.263 		n = multitrans(ep, hc, rw, a, n);
  29.264 	else{
  29.265 		n = chanio(ep, hc, rw == Read? Epin : Epout, ep->toggle[rw],
  29.266@@ -524,8 +569,8 @@ ctltrans(Ep *ep, uchar *req, long n)
  29.267 		if(datalen <= 0 || datalen > Maxctllen)
  29.268 			error(Ebadlen);
  29.269 		/* XXX cache madness */
  29.270-		epio->cb = b = allocb(ROUND(datalen, ep->maxpkt) + CACHELINESZ);
  29.271-		b->wp = (uchar*)ROUND((uintptr)b->wp, CACHELINESZ);
  29.272+		epio->cb = b = allocb(ROUND(datalen, ep->maxpkt));
  29.273+		assert(((uintptr)b->wp & (BLOCKALIGN-1)) == 0);
  29.274 		memset(b->wp, 0x55, b->lim - b->wp);
  29.275 		cachedwbinvse(b->wp, b->lim - b->wp);
  29.276 		data = b->wp;
  29.277@@ -550,6 +595,7 @@ ctltrans(Ep *ep, uchar *req, long n)
  29.278 		}else
  29.279 			b->wp += chanio(ep, hc, Epin, DATA1, data, datalen);
  29.280 		chanio(ep, hc, Epout, DATA1, nil, 0);
  29.281+		cachedinvse(b->rp, BLEN(b));
  29.282 		n = Rsetuplen;
  29.283 	}else{
  29.284 		if(datalen > 0)
  29.285@@ -627,7 +673,7 @@ init(Hci *hp)
  29.286 	greset(r, Rxfflsh);
  29.287 	r->grstctl = TXF_ALL;
  29.288 	greset(r, Txfflsh);
  29.289-	dprint("usbotg: FIFO depth %d sizes rx/nptx/ptx %8.8ux %8.8ux %8.8ux\n",
  29.290+	dprint("usbdwc: FIFO depth %d sizes rx/nptx/ptx %8.8ux %8.8ux %8.8ux\n",
  29.291 		n, r->grxfsiz, r->gnptxfsiz, r->hptxfsiz);
  29.292 
  29.293 	r->hport0 = Prtpwr|Prtconndet|Prtenchng|Prtovrcurrchng;
  29.294@@ -654,6 +700,7 @@ fiqintr(Ureg*, void *a)
  29.295 	ctlr = hp->aux;
  29.296 	r = ctlr->regs;
  29.297 	wakechan = 0;
  29.298+	filock(ctlr);
  29.299 	intr = r->gintsts;
  29.300 	if(intr & Hcintr){
  29.301 		haint = r->haint & r->haintmsk;
  29.302@@ -679,6 +726,7 @@ fiqintr(Ureg*, void *a)
  29.303 		ctlr->wakechan |= wakechan;
  29.304 		armtimerset(1);
  29.305 	}
  29.306+	fiunlock(ctlr);
  29.307 }
  29.308 
  29.309 static void
  29.310@@ -686,14 +734,14 @@ irqintr(Ureg*, void *a)
  29.311 {
  29.312 	Ctlr *ctlr;
  29.313 	uint wakechan;
  29.314-	int i, x;
  29.315+	int i;
  29.316 
  29.317 	ctlr = a;
  29.318-	x = splfhi();
  29.319+	filock(ctlr);
  29.320 	armtimerset(0);
  29.321 	wakechan = ctlr->wakechan;
  29.322 	ctlr->wakechan = 0;
  29.323-	splx(x);
  29.324+	fiunlock(ctlr);
  29.325 	for(i = 0; wakechan; i++){
  29.326 		if(wakechan & 1)
  29.327 			wakeup(&ctlr->chanintr[i]);
  29.328@@ -704,11 +752,12 @@ irqintr(Ureg*, void *a)
  29.329 static void
  29.330 epopen(Ep *ep)
  29.331 {
  29.332-	ddprint("usbotg: epopen ep%d.%d ttype %d\n",
  29.333+	ddprint("usbdwc: epopen ep%d.%d ttype %d\n",
  29.334 		ep->dev->nb, ep->nb, ep->ttype);
  29.335 	switch(ep->ttype){
  29.336-	case Tnone:
  29.337-		error(Enotconfig);
  29.338+	default:
  29.339+		error("endpoint type not supported");
  29.340+		return;
  29.341 	case Tintr:
  29.342 		assert(ep->pollival > 0);
  29.343 		/* fall through */
  29.344@@ -717,6 +766,8 @@ epopen(Ep *ep)
  29.345 			ep->toggle[Read] = DATA0;
  29.346 		if(ep->toggle[Write] == 0)
  29.347 			ep->toggle[Write] = DATA0;
  29.348+		/* fall through */
  29.349+	case Tctl:
  29.350 		break;
  29.351 	}
  29.352 	ep->aux = malloc(sizeof(Epio));
  29.353@@ -727,7 +778,7 @@ epopen(Ep *ep)
  29.354 static void
  29.355 epclose(Ep *ep)
  29.356 {
  29.357-	ddprint("usbotg: epclose ep%d.%d ttype %d\n",
  29.358+	ddprint("usbdwc: epclose ep%d.%d ttype %d\n",
  29.359 		ep->dev->nb, ep->nb, ep->ttype);
  29.360 	switch(ep->ttype){
  29.361 	case Tctl:
  29.362@@ -743,6 +794,7 @@ static long
  29.363 epread(Ep *ep, void *a, long n)
  29.364 {
  29.365 	Epio *epio;
  29.366+	QLock *q;
  29.367 	Block *b;
  29.368 	uchar *p;
  29.369 	ulong elapsed;
  29.370@@ -750,10 +802,11 @@ epread(Ep *ep, void *a, long n)
  29.371 
  29.372 	ddprint("epread ep%d.%d %ld\n", ep->dev->nb, ep->nb, n);
  29.373 	epio = ep->aux;
  29.374+	q = ep->ttype == Tctl? &epio->ctllock : &epio->rlock;
  29.375 	b = nil;
  29.376-	qlock(epio);
  29.377+	qlock(q);
  29.378 	if(waserror()){
  29.379-		qunlock(epio);
  29.380+		qunlock(q);
  29.381 		if(b)
  29.382 			freeb(b);
  29.383 		nexterror();
  29.384@@ -763,7 +816,7 @@ epread(Ep *ep, void *a, long n)
  29.385 		error(Egreg);
  29.386 	case Tctl:
  29.387 		nr = ctldata(ep, a, n);
  29.388-		qunlock(epio);
  29.389+		qunlock(q);
  29.390 		poperror();
  29.391 		return nr;
  29.392 	case Tintr:
  29.393@@ -773,13 +826,15 @@ epread(Ep *ep, void *a, long n)
  29.394 		/* fall through */
  29.395 	case Tbulk:
  29.396 		/* XXX cache madness */
  29.397-		b = allocb(ROUND(n, ep->maxpkt) + CACHELINESZ);
  29.398-		p = (uchar*)ROUND((uintptr)b->base, CACHELINESZ);
  29.399-		cachedwbinvse(p, n);
  29.400+		b = allocb(ROUND(n, ep->maxpkt));
  29.401+		p = b->rp;
  29.402+		assert(((uintptr)p & (BLOCKALIGN-1)) == 0);
  29.403+		cachedinvse(p, n);
  29.404 		nr = eptrans(ep, Read, p, n);
  29.405+		cachedinvse(p, nr);
  29.406 		epio->lastpoll = TK2MS(m->ticks);
  29.407 		memmove(a, p, nr);
  29.408-		qunlock(epio);
  29.409+		qunlock(q);
  29.410 		freeb(b);
  29.411 		poperror();
  29.412 		return nr;
  29.413@@ -790,16 +845,18 @@ static long
  29.414 epwrite(Ep *ep, void *a, long n)
  29.415 {
  29.416 	Epio *epio;
  29.417+	QLock *q;
  29.418 	Block *b;
  29.419 	uchar *p;
  29.420 	ulong elapsed;
  29.421 
  29.422 	ddprint("epwrite ep%d.%d %ld\n", ep->dev->nb, ep->nb, n);
  29.423 	epio = ep->aux;
  29.424+	q = ep->ttype == Tctl? &epio->ctllock : &epio->wlock;
  29.425 	b = nil;
  29.426-	qlock(epio);
  29.427+	qlock(q);
  29.428 	if(waserror()){
  29.429-		qunlock(epio);
  29.430+		qunlock(q);
  29.431 		if(b)
  29.432 			freeb(b);
  29.433 		nexterror();
  29.434@@ -815,8 +872,9 @@ epwrite(Ep *ep, void *a, long n)
  29.435 	case Tctl:
  29.436 	case Tbulk:
  29.437 		/* XXX cache madness */
  29.438-		b = allocb(n + CACHELINESZ);
  29.439-		p = (uchar*)ROUND((uintptr)b->base, CACHELINESZ);
  29.440+		b = allocb(n);
  29.441+		p = b->wp;
  29.442+		assert(((uintptr)p & (BLOCKALIGN-1)) == 0);
  29.443 		memmove(p, a, n);
  29.444 		cachedwbse(p, n);
  29.445 		if(ep->ttype == Tctl)
  29.446@@ -825,7 +883,7 @@ epwrite(Ep *ep, void *a, long n)
  29.447 			n = eptrans(ep, Write, p, n);
  29.448 			epio->lastpoll = TK2MS(m->ticks);
  29.449 		}
  29.450-		qunlock(epio);
  29.451+		qunlock(q);
  29.452 		freeb(b);
  29.453 		poperror();
  29.454 		return n;
  29.455@@ -847,11 +905,11 @@ portenable(Hci *hp, int port, int on)
  29.456 	assert(port == 1);
  29.457 	ctlr = hp->aux;
  29.458 	r = ctlr->regs;
  29.459-	dprint("usbotg enable=%d; sts %#x\n", on, r->hport0);
  29.460+	dprint("usbdwc enable=%d; sts %#x\n", on, r->hport0);
  29.461 	if(!on)
  29.462 		r->hport0 = Prtpwr | Prtena;
  29.463 	tsleep(&up->sleep, return0, 0, Enabledelay);
  29.464-	dprint("usbotg enable=%d; sts %#x\n", on, r->hport0);
  29.465+	dprint("usbdwc enable=%d; sts %#x\n", on, r->hport0);
  29.466 	return 0;
  29.467 }
  29.468 
  29.469@@ -865,7 +923,7 @@ portreset(Hci *hp, int port, int on)
  29.470 	assert(port == 1);
  29.471 	ctlr = hp->aux;
  29.472 	r = ctlr->regs;
  29.473-	dprint("usbotg reset=%d; sts %#x\n", on, r->hport0);
  29.474+	dprint("usbdwc reset=%d; sts %#x\n", on, r->hport0);
  29.475 	if(!on)
  29.476 		return 0;
  29.477 	r->hport0 = Prtpwr | Prtrst;
  29.478@@ -876,9 +934,9 @@ portreset(Hci *hp, int port, int on)
  29.479 	b = s & (Prtconndet|Prtenchng|Prtovrcurrchng);
  29.480 	if(b != 0)
  29.481 		r->hport0 = Prtpwr | b;
  29.482-	dprint("usbotg reset=%d; sts %#x\n", on, s);
  29.483+	dprint("usbdwc reset=%d; sts %#x\n", on, s);
  29.484 	if((s & Prtena) == 0)
  29.485-		print("usbotg: host port not enabled after reset");
  29.486+		print("usbdwc: host port not enabled after reset");
  29.487 	return 0;
  29.488 }
  29.489 
  29.490@@ -948,7 +1006,7 @@ reset(Hci *hp)
  29.491 	id = ctlr->regs->gsnpsid;
  29.492 	if((id>>16) != ('O'<<8 | 'T'))
  29.493 		return -1;
  29.494-	dprint("usbotg: rev %d.%3.3x\n", (id>>12)&0xF, id&0xFFF);
  29.495+	dprint("usbdwc: rev %d.%3.3x\n", (id>>12)&0xF, id&0xFFF);
  29.496 
  29.497 	intrenable(IRQtimerArm, irqintr, ctlr, 0, "dwc");
  29.498 
    30.1--- a/sys/src/9/bcm/vcore.c
    30.2+++ b/sys/src/9/bcm/vcore.c
    30.3@@ -12,6 +12,7 @@
    30.4 
    30.5 typedef struct Prophdr Prophdr;
    30.6 typedef struct Fbinfo Fbinfo;
    30.7+typedef struct Vgpio Vgpio;
    30.8 
    30.9 enum {
   30.10 	Read		= 0x00>>2,
   30.11@@ -33,13 +34,16 @@ enum {
   30.12 	TagResp		= 1<<31,
   30.13 
   30.14 	TagGetfwrev	= 0x00000001,
   30.15-	TagGetbrdrev	= 0x00010002,
   30.16+	TagGetrev	= 0x00010002,
   30.17 	TagGetmac	= 0x00010003,
   30.18 	TagGetram	= 0x00010005,
   30.19 	TagGetpower	= 0x00020001,
   30.20 	TagSetpower	= 0x00028001,
   30.21 		Powerwait	= 1<<1,
   30.22 	TagGetclkspd= 0x00030002,
   30.23+	TagGetclkmax= 0x00030004,
   30.24+	TagSetclkspd= 0x00038002,
   30.25+	TagGettemp	= 0x00030006,
   30.26 	TagFballoc	= 0x00040001,
   30.27 	TagFbfree	= 0x00048001,
   30.28 	TagFbblank	= 0x00040002,
   30.29@@ -49,8 +53,11 @@ enum {
   30.30 	TagSetvres	= 0x00048004,
   30.31 	TagGetdepth	= 0x00040005,
   30.32 	TagSetdepth	= 0x00048005,
   30.33-	TagGetrgb	= 0x00044006,
   30.34+	TagGetrgb	= 0x00040006,
   30.35 	TagSetrgb	= 0x00048006,
   30.36+	TagGetGpio	= 0x00040010,
   30.37+
   30.38+	Nvgpio		= 2,
   30.39 };
   30.40 
   30.41 struct Fbinfo {
   30.42@@ -76,6 +83,15 @@ struct Prophdr {
   30.43 	u32int	data[1];
   30.44 };
   30.45 
   30.46+struct Vgpio {
   30.47+	u32int	*counts;
   30.48+	u16int	incs;
   30.49+	u16int	decs;
   30.50+	int	ison;
   30.51+};
   30.52+
   30.53+static Vgpio vgpio;
   30.54+
   30.55 static void
   30.56 vcwrite(uint chan, int val)
   30.57 {
   30.58@@ -115,7 +131,8 @@ vcreq(int tag, void *buf, int vallen, in
   30.59 	uintptr r;
   30.60 	int n;
   30.61 	Prophdr *prop;
   30.62-	static uintptr base = BUSDRAM;
   30.63+	uintptr aprop;
   30.64+	static int busaddr = 1;
   30.65 
   30.66 	if(rsplen < vallen)
   30.67 		rsplen = vallen;
   30.68@@ -132,15 +149,18 @@ vcreq(int tag, void *buf, int vallen, in
   30.69 		memmove(prop->data, buf, vallen);
   30.70 	cachedwbinvse(prop, prop->len);
   30.71 	for(;;){
   30.72-		vcwrite(ChanProps, PADDR(prop) + base);
   30.73+		aprop = busaddr? dmaaddr(prop) : PTR2UINT(prop);
   30.74+		vcwrite(ChanProps, aprop);
   30.75 		r = vcread(ChanProps);
   30.76-		if(r == PADDR(prop) + base)
   30.77+		if(r == aprop)
   30.78 			break;
   30.79-		if(base == 0)
   30.80+		if(!busaddr)
   30.81 			return -1;
   30.82-		base = 0;
   30.83+		busaddr = 0;
   30.84 	}
   30.85-	if(prop->req == RspOk && prop->tag == tag && prop->taglen & TagResp) {
   30.86+	if(prop->req == RspOk &&
   30.87+	   prop->tag == tag &&
   30.88+	   (prop->taglen&TagResp)) {
   30.89 		if((n = prop->taglen & ~TagResp) < rsplen)
   30.90 			rsplen = n;
   30.91 		memmove(buf, prop->data, rsplen);
   30.92@@ -158,13 +178,17 @@ static int
   30.93 fbdefault(int *width, int *height, int *depth)
   30.94 {
   30.95 	u32int buf[3];
   30.96+	char *p;
   30.97 
   30.98 	if(vcreq(TagGetres, &buf[0], 0, 2*4) != 2*4 ||
   30.99 	   vcreq(TagGetdepth, &buf[2], 0, 4) != 4)
  30.100 		return -1;
  30.101 	*width = buf[0];
  30.102 	*height = buf[1];
  30.103-	*depth = buf[2];
  30.104+	if((p = getconf("bcm2708_fb.fbdepth")) != nil)
  30.105+		*depth = atoi(p);
  30.106+	else
  30.107+		*depth = buf[2];
  30.108 	return 0;
  30.109 }
  30.110 
  30.111@@ -184,7 +208,7 @@ fbinit(int set, int *width, int *height,
  30.112 	fi->yres = fi->yresvirtual = *height;
  30.113 	fi->bpp = *depth;
  30.114 	cachedwbinvse(fi, sizeof(*fi));
  30.115-	vcwrite(ChanFb, DMAADDR(fi));
  30.116+	vcwrite(ChanFb, dmaaddr(fi));
  30.117 	if(vcread(ChanFb) != 0)
  30.118 		return 0;
  30.119 	va = mmukmap(FRAMEBUFFER, PADDR(fi->base), fi->screensize);
  30.120@@ -213,7 +237,7 @@ setpower(int dev, int on)
  30.121 	u32int buf[2];
  30.122 
  30.123 	buf[0] = dev;
  30.124-	buf[1] = Powerwait | (on? 1: 0);
  30.125+	buf[1] = Powerwait | (on? 1 : 0);
  30.126 	vcreq(TagSetpower, buf, sizeof buf, sizeof buf);
  30.127 }
  30.128 
  30.129@@ -250,6 +274,19 @@ getethermac(void)
  30.130 }
  30.131 
  30.132 /*
  30.133+ * Get board revision
  30.134+ */
  30.135+uint
  30.136+getboardrev(void)
  30.137+{
  30.138+	u32int buf[1];
  30.139+
  30.140+	if(vcreq(TagGetrev, buf, 0, sizeof buf) != sizeof buf)
  30.141+		return 0;
  30.142+	return buf[0];
  30.143+}
  30.144+
  30.145+/*
  30.146  * Get firmware revision
  30.147  */
  30.148 uint
  30.149@@ -262,15 +299,6 @@ getfirmware(void)
  30.150 	return buf[0];
  30.151 }
  30.152 
  30.153-uint
  30.154-getrevision(void)
  30.155-{
  30.156-	u32int buf[1];
  30.157-	if(vcreq(TagGetbrdrev, buf, 0, sizeof buf) != sizeof buf)
  30.158-		return 0;
  30.159-	return buf[0];
  30.160-}
  30.161-
  30.162 /*
  30.163  * Get ARM ram
  30.164  */
  30.165@@ -299,13 +327,63 @@ getclkrate(int clkid)
  30.166 	return buf[1];
  30.167 }
  30.168 
  30.169+/*
  30.170+ * Set clock rate to hz (or max speed if hz == 0)
  30.171+ */
  30.172+void
  30.173+setclkrate(int clkid, ulong hz)
  30.174+{
  30.175+	u32int buf[2];
  30.176+
  30.177+	buf[0] = clkid;
  30.178+	if(hz != 0)
  30.179+		buf[1] = hz;
  30.180+	else if(vcreq(TagGetclkmax, buf, sizeof(buf[0]), sizeof(buf)) != sizeof buf)
  30.181+		return;
  30.182+	vcreq(TagSetclkspd, buf, sizeof(buf), sizeof(buf));
  30.183+}
  30.184+
  30.185+/*
  30.186+ * Get cpu temperature
  30.187+ */
  30.188 uint
  30.189-gettemp(int tempid)
  30.190+getcputemp(void)
  30.191 {
  30.192 	u32int buf[2];
  30.193-	buf[0] = tempid;
  30.194-	if(vcreq(0x00030006, buf, sizeof(buf[0]), sizeof(buf)) != sizeof buf)
  30.195+
  30.196+	buf[0] = 0;
  30.197+	if(vcreq(TagGettemp, buf, sizeof(buf[0]), sizeof buf) != sizeof buf)
  30.198 		return 0;
  30.199-
  30.200 	return buf[1];
  30.201 }
  30.202+
  30.203+/*
  30.204+ * Virtual GPIO - used for ACT LED on pi3
  30.205+ */
  30.206+void
  30.207+vgpinit(void)
  30.208+{
  30.209+	u32int buf[1];
  30.210+	uintptr va;
  30.211+
  30.212+	buf[0] = 0;
  30.213+	if(vcreq(TagGetGpio, buf, 0, sizeof(buf)) != sizeof buf || buf[0] == 0)
  30.214+		return;
  30.215+	va = mmukmap(VGPIO, buf[0] & ~0xC0000000, BY2PG);
  30.216+	if(va == 0)
  30.217+		return;
  30.218+	vgpio.counts = (u32int*)va;
  30.219+}
  30.220+
  30.221+void
  30.222+vgpset(uint port, int on)
  30.223+{
  30.224+	if(vgpio.counts == nil || port >= Nvgpio || on == vgpio.ison)
  30.225+		return;
  30.226+	if(on)
  30.227+		vgpio.incs++;
  30.228+	else
  30.229+		vgpio.decs++;
  30.230+	vgpio.counts[port] = (vgpio.incs << 16) | vgpio.decs;
  30.231+	vgpio.ison = on;
  30.232+}
    31.1--- a/sys/src/9/bcm/vfp3.c
    31.2+++ b/sys/src/9/bcm/vfp3.c
    31.3@@ -163,7 +163,10 @@ fpcfg(void)
    31.4 	static int printed;
    31.5 
    31.6 	/* clear pending exceptions; no traps in vfp3; all v7 ops are scalar */
    31.7-	m->fpscr = Dn | Fz | FPRNR | (FPINVAL | FPZDIV | FPOVFL) & ~Alltraps;
    31.8+	m->fpscr = Dn | FPRNR | (FPINVAL | FPZDIV | FPOVFL) & ~Alltraps;
    31.9+	/* VFPv2 needs software support for underflows, so force them to zero */
   31.10+	if(m->havefp == VFPv2)
   31.11+		m->fpscr |= Fz;
   31.12 	fpwr(Fpscr, m->fpscr);
   31.13 	m->fpconfiged = 1;
   31.14 
   31.15@@ -278,7 +281,7 @@ fpuprocsave(Proc *p)
   31.16 {
   31.17 	if(p->fpstate == FPactive){
   31.18 		if(p->state == Moribund)
   31.19-			fpclear();
   31.20+			fpoff();
   31.21 		else{
   31.22 			/*
   31.23 			 * Fpsave() stores without handling pending
   31.24@@ -371,8 +374,6 @@ mathnote(void)
   31.25 static void
   31.26 mathemu(Ureg *)
   31.27 {
   31.28-	if(m->havefp == VFPv3 && !(fprd(Fpexc) & (Fpex|Fpdex)))
   31.29-		iprint("mathemu: not an FP exception but an unknown FP opcode\n");
   31.30 	switch(up->fpstate){
   31.31 	case FPemu:
   31.32 		error("illegal instruction: VFP opcode in emulated mode");
   31.33@@ -472,6 +473,7 @@ fpuemu(Ureg* ureg)
   31.34 {
   31.35 	int s, nfp, cop, op;
   31.36 	uintptr pc;
   31.37+	static int already;
   31.38 
   31.39 	if(waserror()){
   31.40 		postnote(up, 1, up->errstr, NDebug);
   31.41@@ -484,16 +486,14 @@ fpuemu(Ureg* ureg)
   31.42 	nfp = 0;
   31.43 	pc = ureg->pc;
   31.44 	validaddr(pc, 4, 0);
   31.45-	if(!condok(ureg->psr, *(ulong*)pc >> 28))
   31.46-		iprint("fpuemu: conditional instr shouldn't have got here\n");
   31.47 	op  = (*(ulong *)pc >> 24) & MASK(4);
   31.48 	cop = (*(ulong *)pc >>  8) & MASK(4);
   31.49 	if(m->fpon)
   31.50 		fpstuck(pc);		/* debugging; could move down 1 line */
   31.51 	if (ISFPAOP(cop, op)) {		/* old arm 7500 fpa opcode? */
   31.52-//		iprint("fpuemu: fpa instr %#8.8lux at %#p\n", *(ulong *)pc, pc);
   31.53-//		error("illegal instruction: old arm 7500 fpa opcode");
   31.54 		s = spllo();
   31.55+		if(!already++)
   31.56+			pprint("warning: emulated arm7500 fpa instr %#8.8lux at %#p\n", *(ulong *)pc, pc);
   31.57 		if(waserror()){
   31.58 			splx(s);
   31.59 			nexterror();
   31.60@@ -503,7 +503,7 @@ fpuemu(Ureg* ureg)
   31.61 			m->fppc = m->fpcnt = 0;
   31.62 		splx(s);
   31.63 		poperror();
   31.64-	} else if (ISVFPOP(cop, op)) {	/* if vfp, fpu must be off */
   31.65+	} else if (ISVFPOP(cop, op)) {	/* if vfp, fpu off or unsupported instruction */
   31.66 		mathemu(ureg);		/* enable fpu & retry */
   31.67 		nfp = 1;
   31.68 	}