changelog shortlog tags branches files raw gz bz2 help

Mercurial > hg > plan9front / changeset: kernel: introduce devswap #¶ to serve /dev/swap and handle swapfile encryption

changeset 6198: f498f01d02f3
parent 6197: 433e72142654
child 6199: 911ce7c3a9b5
author: cinap_lenrek@felloff.net
date: Sun, 29 Oct 2017 23:09:54 +0100
files: lib/namespace sys/man/3/cons sys/man/3/swap sys/man/8/swap sys/src/9/bcm/main.c sys/src/9/bcm/mkfile sys/src/9/bcm/picpuf sys/src/9/bcm/pif sys/src/9/boot/bootrc sys/src/9/kw/main.c sys/src/9/kw/mkfile sys/src/9/kw/plug sys/src/9/mtx/main.c sys/src/9/mtx/mkfile sys/src/9/mtx/mtx sys/src/9/mtx/mtxcpu sys/src/9/omap/beagle sys/src/9/omap/main.c sys/src/9/omap/mkfile sys/src/9/pc/main.c sys/src/9/pc/mkfile sys/src/9/pc/pc sys/src/9/pc64/main.c sys/src/9/pc64/mkfile sys/src/9/pc64/pc64 sys/src/9/port/devcons.c sys/src/9/port/devswap.c sys/src/9/port/portfns.h sys/src/9/port/portmkfile sys/src/9/port/swap.c sys/src/9/ppc/blast sys/src/9/ppc/main.c sys/src/9/ppc/mkfile sys/src/9/sgi/indy sys/src/9/sgi/main.c sys/src/9/sgi/mkfile sys/src/9/teg2/main.c sys/src/9/teg2/mkfile sys/src/9/teg2/ts sys/src/9/xen/main.c sys/src/9/xen/mkfile sys/src/9/xen/xenpcf sys/src/9/zynq/main.c sys/src/9/zynq/mkfile sys/src/9/zynq/zynq
description: kernel: introduce devswap #¶ to serve /dev/swap and handle swapfile encryption
     1.1--- a/lib/namespace
     1.2+++ b/lib/namespace
     1.3@@ -10,6 +10,7 @@ bind #p /proc
     1.4 bind -c #s /srv
     1.5 bind -q #σ /shr
     1.6 bind -a #¤ /dev
     1.7+bind -qa #¶ /dev
     1.8 
     1.9 # authentication
    1.10 mount -b /srv/factotum /mnt
     2.1--- a/sys/man/3/cons
     2.2+++ b/sys/man/3/cons
     2.3@@ -22,7 +22,6 @@ cons \- console, clocks, process/process
     2.4 .B /dev/ppid
     2.5 .B /dev/random
     2.6 .B /dev/reboot
     2.7-.B /dev/swap
     2.8 .B /dev/sysname
     2.9 .B /dev/sysstat
    2.10 .B /dev/time
    2.11@@ -240,41 +239,6 @@ Writing anything to
    2.12 .B sysstat
    2.13 resets all of the counts on all processors.
    2.14 .PP
    2.15-The
    2.16-.B swap
    2.17-device holds a text block giving memory usage statistics:
    2.18-.IP
    2.19-.EX
    2.20-\fIn\fP memory
    2.21-\fIn\fP pagesize
    2.22-\fIn\fP kernel
    2.23-\fIn\fP/\fIm\fP user
    2.24-\fIn\fP/\fIm\fP swap
    2.25-\fIa\fP/\fIn\fP/\fIm\fP kernel malloc
    2.26-\fIa\fP/\fIn\fP/\fIm\fP kernel draw
    2.27-.EE
    2.28-.PP
    2.29-These are total memory (bytes), system page size (bytes),
    2.30-kernel memory (pages), user memory (pages), swap space (pages),
    2.31-kernel malloced data (bytes), and kernel graphics data (bytes).
    2.32-The expression
    2.33-.IR n / m
    2.34-indicates
    2.35-.I n
    2.36-used out of
    2.37-.I m
    2.38-available.
    2.39-For kernel malloc and kernel draw,
    2.40-.IR a
    2.41-indicates the current allocation in bytes.
    2.42-These numbers are not blank padded.
    2.43-.PP
    2.44-To turn on swapping, write to
    2.45-.B swap
    2.46-the textual file descriptor number of a file or device on which to swap.
    2.47-See
    2.48-.IR swap (8).
    2.49-.PP
    2.50 Reads and writes to
    2.51 .IR mordor
    2.52 will inevitably cause the front to fall off.
     3.1new file mode 100644
     3.2--- /dev/null
     3.3+++ b/sys/man/3/swap
     3.4@@ -0,0 +1,46 @@
     3.5+.TH SWAP 3
     3.6+.SH NAME
     3.7+swap \- memory usage statistics and pagefile control
     3.8+.SH SYNOPSIS
     3.9+.nf
    3.10+.B bind -a #¶ /dev
    3.11+
    3.12+.B /dev/swap
    3.13+.fi
    3.14+.SH DESCRIPTION
    3.15+The
    3.16+.B swap
    3.17+device holds a text block giving memory usage statistics:
    3.18+.IP
    3.19+.EX
    3.20+\fIn\fP memory
    3.21+\fIn\fP pagesize
    3.22+\fIn\fP kernel
    3.23+\fIn\fP/\fIm\fP user
    3.24+\fIn\fP/\fIm\fP swap
    3.25+\fIa\fP/\fIn\fP/\fIm\fP kernel malloc
    3.26+\fIa\fP/\fIn\fP/\fIm\fP kernel draw
    3.27+.EE
    3.28+.PP
    3.29+These are total memory (bytes), system page size (bytes),
    3.30+kernel memory (pages), user memory (pages), swap space (pages),
    3.31+kernel malloced data (bytes), and kernel graphics data (bytes).
    3.32+The expression
    3.33+.IR n / m
    3.34+indicates
    3.35+.I n
    3.36+used out of
    3.37+.I m
    3.38+available.
    3.39+For kernel malloc and kernel draw,
    3.40+.IR a
    3.41+indicates the current allocation in bytes.
    3.42+These numbers are not blank padded.
    3.43+.PP
    3.44+To turn on swapping, write to
    3.45+.B swap
    3.46+the textual file descriptor number of a file or device on which to swap.
    3.47+.SH SEE ALSO
    3.48+.IR swap (8).
    3.49+.SH SOURCE
    3.50+.B /sys/src/9/port/devswap.c
     4.1--- a/sys/man/8/swap
     4.2+++ b/sys/man/8/swap
     4.3@@ -35,4 +35,5 @@ will lead to deadlock if the process isn
     4.4 ctl-message in
     4.5 .IR proc (3)).
     4.6 .SH "SEE ALSO"
     4.7+.IR swap (3),
     4.8 .IR proc (3)
     5.1--- a/sys/src/9/bcm/main.c
     5.2+++ b/sys/src/9/bcm/main.c
     5.3@@ -270,7 +270,6 @@ main(void)
     5.4 	links();
     5.5 	chandevreset();			/* most devices are discovered here */
     5.6 	pageinit();
     5.7-	swapinit();
     5.8 	userinit();
     5.9 	gpiomeminit();
    5.10 	schedinit();
     6.1--- a/sys/src/9/bcm/mkfile
     6.2+++ b/sys/src/9/bcm/mkfile
     6.3@@ -33,7 +33,6 @@ PORT=\
     6.4 	rdb.$O\
     6.5 	rebootcmd.$O\
     6.6 	segment.$O\
     6.7-	swap.$O\
     6.8 	syscallfmt.$O\
     6.9 	sysfile.$O\
    6.10 	sysproc.$O\
     7.1--- a/sys/src/9/bcm/picpuf
     7.2+++ b/sys/src/9/bcm/picpuf
     7.3@@ -1,6 +1,7 @@
     7.4 dev
     7.5 	root
     7.6 	cons
     7.7+	swap
     7.8 	env
     7.9 	pipe
    7.10 	proc
     8.1--- a/sys/src/9/bcm/pif
     8.2+++ b/sys/src/9/bcm/pif
     8.3@@ -1,6 +1,7 @@
     8.4 dev
     8.5 	root
     8.6 	cons
     8.7+	swap
     8.8 	env
     8.9 	pipe
    8.10 	proc
     9.1--- a/sys/src/9/boot/bootrc
     9.2+++ b/sys/src/9/boot/bootrc
     9.3@@ -10,7 +10,7 @@ unmount /root
     9.4 
     9.5 bind -q '#d' /fd
     9.6 bind -q '#p' /proc
     9.7-for(i in S f k æ t b m)
     9.8+for(i in ¶ P S f k æ t b m)
     9.9 	bind -qa '#'^$i /dev
    9.10 
    9.11 # bind in an ip interface
    10.1--- a/sys/src/9/kw/main.c
    10.2+++ b/sys/src/9/kw/main.c
    10.3@@ -322,7 +322,6 @@ wave(' ');
    10.4 	chandevreset();			/* most devices are discovered here */
    10.5 
    10.6 	pageinit();
    10.7-	swapinit();
    10.8 	userinit();
    10.9 	schedinit();
   10.10 	panic("schedinit returned");
    11.1--- a/sys/src/9/kw/mkfile
    11.2+++ b/sys/src/9/kw/mkfile
    11.3@@ -32,7 +32,6 @@ PORT=\
    11.4 	qio.$O\
    11.5 	qlock.$O\
    11.6 	segment.$O\
    11.7-	swap.$O\
    11.8 	syscallfmt.$O\
    11.9 	sysfile.$O\
   11.10 	sysproc.$O\
    12.1--- a/sys/src/9/kw/plug
    12.2+++ b/sys/src/9/kw/plug
    12.3@@ -3,6 +3,7 @@
    12.4 dev
    12.5 	root
    12.6 	cons
    12.7+	swap
    12.8 	env
    12.9 	pipe
   12.10 	proc
    13.1--- a/sys/src/9/mtx/main.c
    13.2+++ b/sys/src/9/mtx/main.c
    13.3@@ -35,7 +35,6 @@ main(void)
    13.4 	links();
    13.5 	chandevreset();
    13.6 	pageinit();
    13.7-	swapinit();
    13.8 	fpsave(&initfp);
    13.9 	initfp.fpscr = 0;
   13.10 	userinit();
    14.1--- a/sys/src/9/mtx/mkfile
    14.2+++ b/sys/src/9/mtx/mkfile
    14.3@@ -30,7 +30,6 @@ PORT=\
    14.4 	qlock.$O\
    14.5 	rdb.$O\
    14.6 	segment.$O\
    14.7-	swap.$O\
    14.8 	sysfile.$O\
    14.9 	sysproc.$O\
   14.10 	taslock.$O\
    15.1--- a/sys/src/9/mtx/mtx
    15.2+++ b/sys/src/9/mtx/mtx
    15.3@@ -1,6 +1,7 @@
    15.4 dev
    15.5 	root
    15.6 	cons
    15.7+	swap
    15.8 	arch
    15.9 	pnp		pci
   15.10 	env
    16.1--- a/sys/src/9/mtx/mtxcpu
    16.2+++ b/sys/src/9/mtx/mtxcpu
    16.3@@ -1,6 +1,7 @@
    16.4 dev
    16.5 	root
    16.6 	cons
    16.7+	swap
    16.8 	arch
    16.9 	pnp		pci
   16.10 	env
    17.1--- a/sys/src/9/omap/beagle
    17.2+++ b/sys/src/9/omap/beagle
    17.3@@ -2,6 +2,7 @@
    17.4 dev
    17.5 	root
    17.6 	cons
    17.7+	swap
    17.8 	env
    17.9 	pipe
   17.10 	proc
    18.1--- a/sys/src/9/omap/main.c
    18.2+++ b/sys/src/9/omap/main.c
    18.3@@ -276,7 +276,6 @@ wave('l');
    18.4 //	i8250console();			/* too early; see init0 */
    18.5 
    18.6 	pageinit();
    18.7-	swapinit();
    18.8 	userinit();
    18.9 	schedinit();
   18.10 }
    19.1--- a/sys/src/9/omap/mkfile
    19.2+++ b/sys/src/9/omap/mkfile
    19.3@@ -33,7 +33,6 @@ PORT=\
    19.4 	qio.$O\
    19.5 	qlock.$O\
    19.6 	segment.$O\
    19.7-	swap.$O\
    19.8 	sysfile.$O\
    19.9 	sysproc.$O\
   19.10 	taslock.$O\
    20.1--- a/sys/src/9/pc/main.c
    20.2+++ b/sys/src/9/pc/main.c
    20.3@@ -62,7 +62,6 @@ main(void)
    20.4 	chandevreset();
    20.5 	netconsole();
    20.6 	pageinit();
    20.7-	swapinit();
    20.8 	userinit();
    20.9 	schedinit();
   20.10 }
    21.1--- a/sys/src/9/pc/mkfile
    21.2+++ b/sys/src/9/pc/mkfile
    21.3@@ -35,7 +35,6 @@ PORT=\
    21.4 	rdb.$O\
    21.5 	rebootcmd.$O\
    21.6 	segment.$O\
    21.7-	swap.$O\
    21.8 	syscallfmt.$O\
    21.9 	sysfile.$O\
   21.10 	sysproc.$O\
    22.1--- a/sys/src/9/pc/pc
    22.2+++ b/sys/src/9/pc/pc
    22.3@@ -2,6 +2,7 @@
    22.4 dev
    22.5 	root
    22.6 	cons
    22.7+	swap
    22.8 	arch
    22.9 	pnp		pci
   22.10 	env
    23.1--- a/sys/src/9/pc64/main.c
    23.2+++ b/sys/src/9/pc64/main.c
    23.3@@ -332,7 +332,6 @@ main()
    23.4 	netconsole();
    23.5 	preallocpages();
    23.6 	pageinit();
    23.7-	swapinit();
    23.8 	userinit();
    23.9 	schedinit();
   23.10 }
    24.1--- a/sys/src/9/pc64/mkfile
    24.2+++ b/sys/src/9/pc64/mkfile
    24.3@@ -33,7 +33,6 @@ PORT=\
    24.4 	rdb.$O\
    24.5 	rebootcmd.$O\
    24.6 	segment.$O\
    24.7-	swap.$O\
    24.8 	syscallfmt.$O\
    24.9 	sysfile.$O\
   24.10 	sysproc.$O\
    25.1--- a/sys/src/9/pc64/pc64
    25.2+++ b/sys/src/9/pc64/pc64
    25.3@@ -2,6 +2,7 @@
    25.4 dev
    25.5 	root
    25.6 	cons
    25.7+	swap
    25.8 	arch
    25.9 	pnp		pci
   25.10 	env
    26.1--- a/sys/src/9/port/devcons.c
    26.2+++ b/sys/src/9/port/devcons.c
    26.3@@ -5,7 +5,6 @@
    26.4 #include	"fns.h"
    26.5 #include	"../port/error.h"
    26.6 
    26.7-#include	<pool.h>
    26.8 #include	<authsrv.h>
    26.9 
   26.10 void	(*consdebug)(void) = nil;
   26.11@@ -324,7 +323,6 @@ enum{
   26.12 	Qppid,
   26.13 	Qrandom,
   26.14 	Qreboot,
   26.15-	Qswap,
   26.16 	Qsysname,
   26.17 	Qsysstat,
   26.18 	Qtime,
   26.19@@ -357,7 +355,6 @@ static Dirtab consdir[]={
   26.20 	"ppid",		{Qppid},	NUMSIZE,	0444,
   26.21 	"random",	{Qrandom},	0,		0444,
   26.22 	"reboot",	{Qreboot},	0,		0664,
   26.23-	"swap",		{Qswap},	0,		0664,
   26.24 	"sysname",	{Qsysname},	0,		0664,
   26.25 	"sysstat",	{Qsysstat},	0,		0666,
   26.26 	"time",		{Qtime},	NUMSIZE+3*VLNUMSIZE,	0664,
   26.27@@ -471,8 +468,6 @@ consread(Chan *c, void *buf, long n, vlo
   26.28 	int i, k, id;
   26.29 	vlong offset = off;
   26.30 	extern char configfile[];
   26.31-	extern Image fscache;
   26.32-	extern Image swapimage;
   26.33 
   26.34 	if(n <= 0)
   26.35 		return n;
   26.36@@ -592,33 +587,6 @@ consread(Chan *c, void *buf, long n, vlo
   26.37 		poperror();
   26.38 		return n;
   26.39 
   26.40-	case Qswap:
   26.41-		snprint(tmp, sizeof tmp,
   26.42-			"%llud memory\n"
   26.43-			"%llud pagesize\n"
   26.44-			"%lud kernel\n"
   26.45-			"%lud/%lud user\n"
   26.46-			"%lud/%lud swap\n"
   26.47-			"%llud/%llud/%llud kernel malloc\n"
   26.48-			"%llud/%llud/%llud kernel draw\n"
   26.49-			"%llud/%llud/%llud kernel secret\n",
   26.50-			(uvlong)conf.npage*BY2PG,
   26.51-			(uvlong)BY2PG,
   26.52-			conf.npage-conf.upages,
   26.53-			palloc.user-palloc.freecount-fscache.pgref-swapimage.pgref, palloc.user,
   26.54-			conf.nswap-swapalloc.free, conf.nswap,
   26.55-			(uvlong)mainmem->curalloc,
   26.56-			(uvlong)mainmem->cursize,
   26.57-			(uvlong)mainmem->maxsize,
   26.58-			(uvlong)imagmem->curalloc,
   26.59-			(uvlong)imagmem->cursize,
   26.60-			(uvlong)imagmem->maxsize,
   26.61-			(uvlong)secrmem->curalloc,
   26.62-			(uvlong)secrmem->cursize,
   26.63-			(uvlong)secrmem->maxsize);
   26.64-
   26.65-		return readstr((ulong)offset, buf, n, tmp);
   26.66-
   26.67 	case Qsysname:
   26.68 		if(sysname == nil)
   26.69 			return 0;
   26.70@@ -669,8 +637,7 @@ conswrite(Chan *c, void *va, long n, vlo
   26.71 	long l, bp;
   26.72 	char *a;
   26.73 	Mach *mp;
   26.74-	int id, fd;
   26.75-	Chan *swc;
   26.76+	int id;
   26.77 	ulong offset;
   26.78 	Cmdbuf *cb;
   26.79 	Cmdtab *ct;
   26.80@@ -765,25 +732,6 @@ conswrite(Chan *c, void *va, long n, vlo
   26.81 		}
   26.82 		break;
   26.83 
   26.84-	case Qswap:
   26.85-		if(n >= sizeof buf)
   26.86-			error(Egreg);
   26.87-		memmove(buf, va, n);	/* so we can NUL-terminate */
   26.88-		buf[n] = 0;
   26.89-		/* start a pager if not already started */
   26.90-		if(strncmp(buf, "start", 5) == 0){
   26.91-			kickpager();
   26.92-			break;
   26.93-		}
   26.94-		if(!iseve())
   26.95-			error(Eperm);
   26.96-		if(buf[0]<'0' || '9'<buf[0])
   26.97-			error(Ebadarg);
   26.98-		fd = strtoul(buf, 0, 0);
   26.99-		swc = fdtochan(fd, ORDWR, 1, 1);
  26.100-		setswapchan(swc);
  26.101-		break;
  26.102-
  26.103 	case Qsysname:
  26.104 		if(offset != 0)
  26.105 			error(Ebadarg);
    27.1new file mode 100644
    27.2--- /dev/null
    27.3+++ b/sys/src/9/port/devswap.c
    27.4@@ -0,0 +1,612 @@
    27.5+#include	"u.h"
    27.6+#include	"../port/lib.h"
    27.7+#include	"mem.h"
    27.8+#include	"dat.h"
    27.9+#include	"fns.h"
   27.10+#include	"../port/error.h"
   27.11+
   27.12+#include	<libsec.h>
   27.13+#include	<pool.h>
   27.14+
   27.15+static int	canflush(Proc*, Segment*);
   27.16+static void	executeio(void);
   27.17+static void	pageout(Proc*, Segment*);
   27.18+static void	pagepte(int, Page**);
   27.19+static void	pager(void*);
   27.20+
   27.21+Image 	swapimage = {
   27.22+	.notext = 1,
   27.23+};
   27.24+
   27.25+static Chan	*swapchan;
   27.26+static uchar	*swapbuf;
   27.27+static AESstate *swapkey;
   27.28+
   27.29+static Page	**iolist;
   27.30+static int	ioptr;
   27.31+
   27.32+static ushort	ageclock;
   27.33+
   27.34+static void
   27.35+swapinit(void)
   27.36+{
   27.37+	swapalloc.swmap = xalloc(conf.nswap);
   27.38+	swapalloc.top = &swapalloc.swmap[conf.nswap];
   27.39+	swapalloc.alloc = swapalloc.swmap;
   27.40+	swapalloc.last = swapalloc.swmap;
   27.41+	swapalloc.free = conf.nswap;
   27.42+	swapalloc.xref = 0;
   27.43+
   27.44+	iolist = xalloc(conf.nswppo*sizeof(Page*));
   27.45+	if(swapalloc.swmap == nil || iolist == nil)
   27.46+		panic("swapinit: not enough memory");
   27.47+}
   27.48+
   27.49+static uintptr
   27.50+newswap(void)
   27.51+{
   27.52+	uchar *look;
   27.53+
   27.54+	lock(&swapalloc);
   27.55+	if(swapalloc.free == 0) {
   27.56+		unlock(&swapalloc);
   27.57+		return ~0;
   27.58+	}
   27.59+	look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last);
   27.60+	if(look == nil)
   27.61+		look = memchr(swapalloc.swmap, 0, swapalloc.last-swapalloc.swmap);
   27.62+	*look = 2;	/* ref for pte + io transaction */
   27.63+	swapalloc.last = look;
   27.64+	swapalloc.free--;
   27.65+	unlock(&swapalloc);
   27.66+	return (look-swapalloc.swmap) * BY2PG;
   27.67+}
   27.68+
   27.69+void
   27.70+putswap(Page *p)
   27.71+{
   27.72+	uchar *idx;
   27.73+
   27.74+	lock(&swapalloc);
   27.75+	idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
   27.76+	if(*idx == 0)
   27.77+		panic("putswap %#p ref == 0", p);
   27.78+
   27.79+	if(*idx == 255) {
   27.80+		if(swapalloc.xref == 0)
   27.81+			panic("putswap %#p xref == 0", p);
   27.82+
   27.83+		if(--swapalloc.xref == 0) {
   27.84+			for(idx = swapalloc.swmap; idx < swapalloc.top; idx++) {
   27.85+				if(*idx == 255) {
   27.86+					*idx = 0;
   27.87+					swapalloc.free++;
   27.88+				}
   27.89+			}
   27.90+		}
   27.91+	} else {
   27.92+		if(--(*idx) == 0)
   27.93+			swapalloc.free++;
   27.94+	}
   27.95+	unlock(&swapalloc);
   27.96+}
   27.97+
   27.98+void
   27.99+dupswap(Page *p)
  27.100+{
  27.101+	uchar *idx;
  27.102+
  27.103+	lock(&swapalloc);
  27.104+	idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
  27.105+	if(*idx == 255)
  27.106+		swapalloc.xref++;
  27.107+	else {
  27.108+		if(++(*idx) == 255)
  27.109+			swapalloc.xref += 255;
  27.110+	}
  27.111+	unlock(&swapalloc);
  27.112+}
  27.113+
  27.114+int
  27.115+swapcount(uintptr daddr)
  27.116+{
  27.117+	return swapalloc.swmap[daddr/BY2PG];
  27.118+}
  27.119+
  27.120+void
  27.121+kickpager(void)
  27.122+{
  27.123+	static Ref started;
  27.124+
  27.125+	if(started.ref || incref(&started) != 1)
  27.126+		wakeup(&swapalloc.r);
  27.127+	else
  27.128+		kproc("pager", pager, 0);
  27.129+}
  27.130+
  27.131+static int
  27.132+reclaim(void)
  27.133+{
  27.134+	ulong np;
  27.135+
  27.136+	for(;;){
  27.137+		if((np = pagereclaim(&fscache, 1000)) > 0) {
  27.138+			if(0) print("reclaim: %lud fscache\n", np);
  27.139+		} else if((np = pagereclaim(&swapimage, 1000)) > 0) {
  27.140+			if(0) print("reclaim: %lud swap\n", np);
  27.141+		} else if((np = imagereclaim(1000)) > 0) {
  27.142+			if(0) print("reclaim: %lud image\n", np);
  27.143+		}
  27.144+		if(!needpages(nil))
  27.145+			return 1;	/* have pages, done */
  27.146+		if(np == 0)
  27.147+			return 0;	/* didnt reclaim, need to swap */
  27.148+		sched();
  27.149+	}
  27.150+}
  27.151+
  27.152+static void
  27.153+pager(void*)
  27.154+{
  27.155+	int i;
  27.156+	Segment *s;
  27.157+	Proc *p, *ep;
  27.158+
  27.159+	p = proctab(0);
  27.160+	ep = &p[conf.nproc];
  27.161+
  27.162+	while(waserror())
  27.163+		;
  27.164+
  27.165+	for(;;){
  27.166+		up->psstate = "Reclaim";
  27.167+		if(reclaim()){
  27.168+			up->psstate = "Idle";
  27.169+			wakeup(&palloc.pwait[0]);
  27.170+			wakeup(&palloc.pwait[1]);
  27.171+			sleep(&swapalloc.r, needpages, nil);
  27.172+			continue;
  27.173+		}
  27.174+
  27.175+		if(swapimage.c == nil || swapalloc.free == 0){
  27.176+		Killbig:
  27.177+			if(!freebroken())
  27.178+				killbig("out of memory");
  27.179+			sched();
  27.180+			continue;
  27.181+		}
  27.182+
  27.183+		i = ageclock;
  27.184+		do {
  27.185+			if(++p >= ep){
  27.186+				if(++ageclock == i)
  27.187+					goto Killbig;
  27.188+				p = proctab(0);
  27.189+			}
  27.190+		} while(p->state == Dead || p->noswap || !canqlock(&p->seglock));
  27.191+		up->psstate = "Pageout";
  27.192+		for(i = 0; i < NSEG; i++) {
  27.193+			if((s = p->seg[i]) != nil) {
  27.194+				switch(s->type&SG_TYPE) {
  27.195+				default:
  27.196+					break;
  27.197+				case SG_TEXT:
  27.198+					pageout(p, s);
  27.199+					break;
  27.200+				case SG_DATA:
  27.201+				case SG_BSS:
  27.202+				case SG_STACK:
  27.203+				case SG_SHARED:
  27.204+					pageout(p, s);
  27.205+					break;
  27.206+				}
  27.207+			}
  27.208+		}
  27.209+		qunlock(&p->seglock);
  27.210+
  27.211+		if(ioptr > 0) {
  27.212+			up->psstate = "I/O";
  27.213+			executeio();
  27.214+		}
  27.215+	}
  27.216+}
  27.217+
  27.218+static void
  27.219+pageout(Proc *p, Segment *s)
  27.220+{
  27.221+	int type, i, size;
  27.222+	short age;
  27.223+	Pte *l;
  27.224+	Page **pg, *entry;
  27.225+
  27.226+	if(!canqlock(s))	/* We cannot afford to wait, we will surely deadlock */
  27.227+		return;
  27.228+
  27.229+	if(!canflush(p, s)	/* Able to invalidate all tlbs with references */
  27.230+	|| waserror()) {
  27.231+		qunlock(s);
  27.232+		putseg(s);
  27.233+		return;
  27.234+	}
  27.235+
  27.236+	/* Pass through the pte tables looking for memory pages to swap out */
  27.237+	type = s->type&SG_TYPE;
  27.238+	size = s->mapsize;
  27.239+	for(i = 0; i < size; i++) {
  27.240+		l = s->map[i];
  27.241+		if(l == nil)
  27.242+			continue;
  27.243+		for(pg = l->first; pg <= l->last; pg++) {
  27.244+			entry = *pg;
  27.245+			if(pagedout(entry))
  27.246+				continue;
  27.247+			if(entry->modref & PG_REF) {
  27.248+				entry->modref &= ~PG_REF;
  27.249+				entry->refage = ageclock;
  27.250+				continue;
  27.251+			}
  27.252+			age = (short)(ageclock - entry->refage);
  27.253+			if(age < 16)
  27.254+				continue;
  27.255+			pagepte(type, pg);
  27.256+		}
  27.257+	}
  27.258+	poperror();
  27.259+	qunlock(s);
  27.260+	putseg(s);
  27.261+}
  27.262+
  27.263+static int
  27.264+canflush(Proc *p, Segment *s)
  27.265+{
  27.266+	int i;
  27.267+	Proc *ep;
  27.268+
  27.269+	if(incref(s) == 2)		/* Easy if we are the only user */
  27.270+		return canpage(p);
  27.271+
  27.272+	/* Now we must do hardwork to ensure all processes which have tlb
  27.273+	 * entries for this segment will be flushed if we succeed in paging it out
  27.274+	 */
  27.275+	p = proctab(0);
  27.276+	ep = &p[conf.nproc];
  27.277+	while(p < ep) {
  27.278+		if(p->state != Dead) {
  27.279+			for(i = 0; i < NSEG; i++)
  27.280+				if(p->seg[i] == s)
  27.281+					if(!canpage(p))
  27.282+						return 0;
  27.283+		}
  27.284+		p++;
  27.285+	}
  27.286+	return 1;
  27.287+}
  27.288+
  27.289+static void
  27.290+pagepte(int type, Page **pg)
  27.291+{
  27.292+	uintptr daddr;
  27.293+	Page *outp;
  27.294+
  27.295+	outp = *pg;
  27.296+	switch(type) {
  27.297+	case SG_TEXT:				/* Revert to demand load */
  27.298+		putpage(outp);
  27.299+		*pg = nil;
  27.300+		break;
  27.301+
  27.302+	case SG_DATA:
  27.303+	case SG_BSS:
  27.304+	case SG_STACK:
  27.305+	case SG_SHARED:
  27.306+		if(ioptr >= conf.nswppo)
  27.307+			break;
  27.308+
  27.309+		/*
  27.310+		 *  get a new swap address with swapcount 2, one for the pte
  27.311+		 *  and one extra ref for us while we write the page to disk
  27.312+		 */
  27.313+		daddr = newswap();
  27.314+		if(daddr == ~0)
  27.315+			break;
  27.316+
  27.317+		/* clear any pages referring to it from the cache */
  27.318+		cachedel(&swapimage, daddr);
  27.319+
  27.320+		/* forget anything that it used to cache */
  27.321+		uncachepage(outp);
  27.322+
  27.323+		/*
  27.324+		 *  enter it into the cache so that a fault happening
  27.325+		 *  during the write will grab the page from the cache
  27.326+		 *  rather than one partially written to the disk
  27.327+		 */
  27.328+		outp->daddr = daddr;
  27.329+		cachepage(outp, &swapimage);
  27.330+		*pg = (Page*)(daddr|PG_ONSWAP);
  27.331+
  27.332+		/* Add page to IO transaction list */
  27.333+		iolist[ioptr++] = outp;
  27.334+		break;
  27.335+	}
  27.336+}
  27.337+
  27.338+void
  27.339+pagersummary(void)
  27.340+{
  27.341+	print("%lud/%lud memory %lud/%lud swap %d iolist\n",
  27.342+		palloc.user-palloc.freecount,
  27.343+		palloc.user, conf.nswap-swapalloc.free, conf.nswap,
  27.344+		ioptr);
  27.345+}
  27.346+
  27.347+static void
  27.348+executeio(void)
  27.349+{
  27.350+	Page *outp;
  27.351+	int i, n;
  27.352+	Chan *c;
  27.353+	char *kaddr;
  27.354+	KMap *k;
  27.355+
  27.356+	c = swapimage.c;
  27.357+	for(i = 0; i < ioptr; i++) {
  27.358+		if(ioptr > conf.nswppo)
  27.359+			panic("executeio: ioptr %d > %d", ioptr, conf.nswppo);
  27.360+		outp = iolist[i];
  27.361+
  27.362+		assert(outp->ref > 0);
  27.363+		assert(outp->image == &swapimage);
  27.364+		assert(outp->daddr != ~0);
  27.365+
  27.366+		/* only write when swap address still in use */
  27.367+		if(swapcount(outp->daddr) > 1){
  27.368+			k = kmap(outp);
  27.369+			kaddr = (char*)VA(k);
  27.370+
  27.371+			if(waserror())
  27.372+				panic("executeio: page outp I/O error");
  27.373+
  27.374+			n = devtab[c->type]->write(c, kaddr, BY2PG, outp->daddr);
  27.375+			if(n != BY2PG)
  27.376+				nexterror();
  27.377+
  27.378+			kunmap(k);
  27.379+			poperror();
  27.380+		}
  27.381+
  27.382+		/* drop our extra swap reference */
  27.383+		putswap((Page*)outp->daddr);
  27.384+
  27.385+		/* Free up the page after I/O */
  27.386+		putpage(outp);
  27.387+	}
  27.388+	ioptr = 0;
  27.389+}
  27.390+
  27.391+int
  27.392+needpages(void*)
  27.393+{
  27.394+	return palloc.freecount < swapalloc.headroom;
  27.395+}
  27.396+
  27.397+static void
  27.398+setswapchan(Chan *c)
  27.399+{
  27.400+	uchar buf[sizeof(Dir)+100];
  27.401+	Dir d;
  27.402+	int n;
  27.403+
  27.404+	if(waserror()){
  27.405+		cclose(c);
  27.406+		nexterror();
  27.407+	}
  27.408+	if(swapimage.c != nil) {
  27.409+		if(swapalloc.free != conf.nswap)
  27.410+			error(Einuse);
  27.411+		cclose(swapimage.c);
  27.412+		swapimage.c = nil;
  27.413+	}
  27.414+
  27.415+	/*
  27.416+	 *  if this isn't a file, set the swap space
  27.417+	 *  to be at most the size of the partition
  27.418+	 */
  27.419+	if(devtab[c->type]->dc != L'M'){
  27.420+		n = devtab[c->type]->stat(c, buf, sizeof buf);
  27.421+		if(n <= 0 || convM2D(buf, n, &d, nil) == 0)
  27.422+			error("stat failed in setswapchan");
  27.423+		if(d.length < conf.nswppo*BY2PG)
  27.424+			error("swap device too small");
  27.425+		if(d.length < conf.nswap*BY2PG){
  27.426+			conf.nswap = d.length/BY2PG;
  27.427+			swapalloc.top = &swapalloc.swmap[conf.nswap];
  27.428+			swapalloc.free = conf.nswap;
  27.429+		}
  27.430+	}
  27.431+	c->flag &= ~CCACHE;
  27.432+	cclunk(c);
  27.433+	poperror();
  27.434+
  27.435+	swapchan = c;
  27.436+	swapimage.c = namec("#¶/swapfile", Aopen, ORDWR, 0);
  27.437+}
  27.438+
  27.439+enum {
  27.440+	Qdir,
  27.441+	Qswap,
  27.442+	Qswapfile,
  27.443+};
  27.444+
  27.445+static Dirtab swapdir[]={
  27.446+	".",		{Qdir, 0, QTDIR},	0,		DMDIR|0555,
  27.447+	"swap",		{Qswap},		0,		0664,
  27.448+	"swapfile",	{Qswapfile},		0,		0600,
  27.449+};
  27.450+
  27.451+static Chan*
  27.452+swapattach(char *spec)
  27.453+{
  27.454+	return devattach(L'¶', spec);
  27.455+}
  27.456+
  27.457+static Walkqid*
  27.458+swapwalk(Chan *c, Chan *nc, char **name, int nname)
  27.459+{
  27.460+	return devwalk(c, nc, name, nname, swapdir, nelem(swapdir), devgen);
  27.461+}
  27.462+
  27.463+static int
  27.464+swapstat(Chan *c, uchar *dp, int n)
  27.465+{
  27.466+	return devstat(c, dp, n, swapdir, nelem(swapdir), devgen);
  27.467+}
  27.468+
  27.469+static Chan*
  27.470+swapopen(Chan *c, int omode)
  27.471+{
  27.472+	uchar key[128/8];
  27.473+
  27.474+	switch((ulong)c->qid.path){
  27.475+	case Qswapfile:
  27.476+		if(!iseve() || omode != ORDWR)
  27.477+			error(Eperm);
  27.478+		if(swapimage.c != nil)
  27.479+			error(Einuse);
  27.480+		if(swapchan == nil)
  27.481+			error(Egreg);
  27.482+
  27.483+		c->mode = openmode(omode);
  27.484+		c->flag |= COPEN;
  27.485+		c->offset = 0;
  27.486+
  27.487+		swapbuf = mallocalign(BY2PG, BY2PG, 0, 0);
  27.488+		swapkey = secalloc(sizeof(AESstate)*2);
  27.489+		if(swapbuf == nil || swapkey == nil)
  27.490+			error(Enomem);
  27.491+
  27.492+		genrandom(key, sizeof(key));
  27.493+		setupAESstate(&swapkey[0], key, sizeof(key), nil);
  27.494+		genrandom(key, sizeof(key));
  27.495+		setupAESstate(&swapkey[1], key, sizeof(key), nil);
  27.496+		memset(key, 0, sizeof(key));
  27.497+
  27.498+		return c;
  27.499+	}
  27.500+	return devopen(c, omode, swapdir, nelem(swapdir), devgen);
  27.501+}
  27.502+
  27.503+static void
  27.504+swapclose(Chan *c)
  27.505+{
  27.506+	if((c->flag & COPEN) == 0)
  27.507+		return;
  27.508+	switch((ulong)c->qid.path){
  27.509+	case Qswapfile:
  27.510+		cclose(swapchan);
  27.511+		swapchan = nil;
  27.512+		secfree(swapkey);
  27.513+		swapkey = nil;
  27.514+		free(swapbuf);
  27.515+		swapbuf = nil;
  27.516+		break;
  27.517+	}
  27.518+}
  27.519+
  27.520+static long
  27.521+swapread(Chan *c, void *va, long n, vlong off)
  27.522+{
  27.523+	char tmp[256];		/* must be >= 18*NUMSIZE (Qswap) */
  27.524+
  27.525+	switch((ulong)c->qid.path){
  27.526+	case Qdir:
  27.527+		return devdirread(c, va, n, swapdir, nelem(swapdir), devgen);
  27.528+	case Qswap:
  27.529+		snprint(tmp, sizeof tmp,
  27.530+			"%llud memory\n"
  27.531+			"%llud pagesize\n"
  27.532+			"%lud kernel\n"
  27.533+			"%lud/%lud user\n"
  27.534+			"%lud/%lud swap\n"
  27.535+			"%llud/%llud/%llud kernel malloc\n"
  27.536+			"%llud/%llud/%llud kernel draw\n"
  27.537+			"%llud/%llud/%llud kernel secret\n",
  27.538+			(uvlong)conf.npage*BY2PG,
  27.539+			(uvlong)BY2PG,
  27.540+			conf.npage-conf.upages,
  27.541+			palloc.user-palloc.freecount-fscache.pgref-swapimage.pgref, palloc.user,
  27.542+			conf.nswap-swapalloc.free, conf.nswap,
  27.543+			(uvlong)mainmem->curalloc,
  27.544+			(uvlong)mainmem->cursize,
  27.545+			(uvlong)mainmem->maxsize,
  27.546+			(uvlong)imagmem->curalloc,
  27.547+			(uvlong)imagmem->cursize,
  27.548+			(uvlong)imagmem->maxsize,
  27.549+			(uvlong)secrmem->curalloc,
  27.550+			(uvlong)secrmem->cursize,
  27.551+			(uvlong)secrmem->maxsize);
  27.552+		return readstr((ulong)off, va, n, tmp);
  27.553+	case Qswapfile:
  27.554+		if(n != BY2PG)
  27.555+			error(Ebadarg);
  27.556+		if(devtab[swapchan->type]->read(swapchan, va, n, off) != n)
  27.557+			error(Eio);
  27.558+		aes_xts_decrypt(&swapkey[0], &swapkey[1], off, va, va, n);
  27.559+		return n;
  27.560+	}
  27.561+	error(Egreg);
  27.562+	return 0;
  27.563+}
  27.564+
  27.565+static long
  27.566+swapwrite(Chan *c, void *va, long n, vlong off)
  27.567+{
  27.568+	char buf[256];
  27.569+	
  27.570+	switch((ulong)c->qid.path){
  27.571+	case Qswap:
  27.572+		if(!iseve())
  27.573+			error(Eperm);
  27.574+		if(n >= sizeof buf)
  27.575+			error(Egreg);
  27.576+		memmove(buf, va, n);	/* so we can NUL-terminate */
  27.577+		buf[n] = 0;
  27.578+		/* start a pager if not already started */
  27.579+		if(strncmp(buf, "start", 5) == 0)
  27.580+			kickpager();
  27.581+		else if(buf[0]>='0' && '9'<=buf[0])
  27.582+			setswapchan(fdtochan(strtoul(buf, nil, 0), ORDWR, 1, 1));
  27.583+		else
  27.584+			error(Ebadctl);
  27.585+		return n;
  27.586+	case Qswapfile:
  27.587+		if(n != BY2PG)
  27.588+			error(Ebadarg);
  27.589+		aes_xts_encrypt(&swapkey[0], &swapkey[1], off, va, swapbuf, n);
  27.590+		if(devtab[swapchan->type]->write(swapchan, swapbuf, n, off) != n)
  27.591+			error(Eio);
  27.592+		return n;
  27.593+	}
  27.594+	error(Egreg);
  27.595+	return 0;
  27.596+}
  27.597+
  27.598+Dev swapdevtab = {
  27.599+	L'¶',
  27.600+	"swap",
  27.601+	devreset,
  27.602+	swapinit,
  27.603+	devshutdown,
  27.604+	swapattach,
  27.605+	swapwalk,
  27.606+	swapstat,
  27.607+	swapopen,
  27.608+	devcreate,
  27.609+	swapclose,
  27.610+	swapread,
  27.611+	devbread,
  27.612+	swapwrite,
  27.613+	devbwrite,
  27.614+	devremove,
  27.615+	devwstat,
  27.616+};
    28.1--- a/sys/src/9/port/portfns.h
    28.2+++ b/sys/src/9/port/portfns.h
    28.3@@ -318,7 +318,6 @@ int		setlabel(Label*);
    28.4 void		setmalloctag(void*, uintptr);
    28.5 void		setrealloctag(void*, uintptr);
    28.6 void		setregisters(Ureg*, char*, char*, int);
    28.7-void		setswapchan(Chan*);
    28.8 void		setupwatchpts(Proc*, Watchpt*, int);
    28.9 char*		skipslash(char*);
   28.10 void		sleep(Rendez*, int(*)(void*), void*);
   28.11@@ -332,7 +331,6 @@ void		srvrenameuser(char*, char*);
   28.12 void		shrrenameuser(char*, char*);
   28.13 int		swapcount(uintptr);
   28.14 int		swapfull(void);
   28.15-void		swapinit(void);
   28.16 void		syscallfmt(ulong syscallno, uintptr pc, va_list list);
   28.17 void		sysretfmt(ulong syscallno, va_list list, uintptr ret, uvlong start, uvlong stop);
   28.18 void		timeradd(Timer*);
    29.1--- a/sys/src/9/port/portmkfile
    29.2+++ b/sys/src/9/port/portmkfile
    29.3@@ -62,15 +62,15 @@ errstr.h:	../port/mkerrstr ../port/error
    29.4 %.db:		main.$O
    29.5 	$CC -s$stem main.c | dbfmt > $stem.db
    29.6 
    29.7-alloc.$O:	/sys/include/pool.h
    29.8+alloc.$O devswap.$O:	/sys/include/pool.h
    29.9 devmnt.$O:	/sys/include/fcall.h
   29.10 proc.$O proc.acid:	errstr.h
   29.11 devroot.$O:	errstr.h
   29.12 devaudio.$O:	../port/audioif.h
   29.13-devaoe.$O:	/$objtype/include/ureg.h
   29.14-devfs.$O:	/$objtype/include/ureg.h
   29.15-devsd.$O:	/$objtype/include/ureg.h
   29.16-sdscsi.$O:	/$objtype/include/ureg.h
   29.17+devaoe.$O:	../port/sd.h /$objtype/include/ureg.h
   29.18+devfs.$O:	../port/sd.h /$objtype/include/ureg.h
   29.19+devsd.$O:	../port/sd.h /$objtype/include/ureg.h
   29.20+sdscsi.$O:	../port/sd.h /$objtype/include/ureg.h
   29.21 trap.$O:	/$objtype/include/ureg.h
   29.22 devproc.$O:	/$objtype/include/ureg.h
   29.23 main.$O:	init.h
   29.24@@ -87,3 +87,5 @@ unthwack.$O:	../port/thwack.h
   29.25 devsdp.$O:	../port/thwack.h
   29.26 devproc.$O sysproc.$O:	/sys/include/tos.h
   29.27 devproc.$O edf.$O proc.$O: /sys/include/trace.h
   29.28+devcons.$O:	/sys/include/authsrv.h
   29.29+devcap.$O devfs.$O devsdp.$O devssl.$O devtls.$O devswap.$O random.$O: /sys/include/libsec.h
    30.1deleted file mode 100644
    30.2--- a/sys/src/9/port/swap.c
    30.3+++ /dev/null
    30.4@@ -1,430 +0,0 @@
    30.5-#include	"u.h"
    30.6-#include	"../port/lib.h"
    30.7-#include	"mem.h"
    30.8-#include	"dat.h"
    30.9-#include	"fns.h"
   30.10-#include	"../port/error.h"
   30.11-
   30.12-static int	canflush(Proc*, Segment*);
   30.13-static void	executeio(void);
   30.14-static void	pageout(Proc*, Segment*);
   30.15-static void	pagepte(int, Page**);
   30.16-static void	pager(void*);
   30.17-
   30.18-Image 	swapimage;
   30.19-
   30.20-static 	int	swopen;
   30.21-static	Page	**iolist;
   30.22-static	int	ioptr;
   30.23-
   30.24-static	ushort	ageclock;
   30.25-
   30.26-void
   30.27-swapinit(void)
   30.28-{
   30.29-	swapalloc.swmap = xalloc(conf.nswap);
   30.30-	swapalloc.top = &swapalloc.swmap[conf.nswap];
   30.31-	swapalloc.alloc = swapalloc.swmap;
   30.32-	swapalloc.last = swapalloc.swmap;
   30.33-	swapalloc.free = conf.nswap;
   30.34-	swapalloc.xref = 0;
   30.35-
   30.36-	iolist = xalloc(conf.nswppo*sizeof(Page*));
   30.37-	if(swapalloc.swmap == 0 || iolist == 0)
   30.38-		panic("swapinit: not enough memory");
   30.39-
   30.40-	swapimage.notext = 1;
   30.41-}
   30.42-
   30.43-static uintptr
   30.44-newswap(void)
   30.45-{
   30.46-	uchar *look;
   30.47-
   30.48-	lock(&swapalloc);
   30.49-	if(swapalloc.free == 0) {
   30.50-		unlock(&swapalloc);
   30.51-		return ~0;
   30.52-	}
   30.53-	look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last);
   30.54-	if(look == nil)
   30.55-		look = memchr(swapalloc.swmap, 0, swapalloc.last-swapalloc.swmap);
   30.56-	*look = 2;	/* ref for pte + io transaction */
   30.57-	swapalloc.last = look;
   30.58-	swapalloc.free--;
   30.59-	unlock(&swapalloc);
   30.60-	return (look-swapalloc.swmap) * BY2PG;
   30.61-}
   30.62-
   30.63-void
   30.64-putswap(Page *p)
   30.65-{
   30.66-	uchar *idx;
   30.67-
   30.68-	lock(&swapalloc);
   30.69-	idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
   30.70-	if(*idx == 0)
   30.71-		panic("putswap %#p ref == 0", p);
   30.72-
   30.73-	if(*idx == 255) {
   30.74-		if(swapalloc.xref == 0)
   30.75-			panic("putswap %#p xref == 0", p);
   30.76-
   30.77-		if(--swapalloc.xref == 0) {
   30.78-			for(idx = swapalloc.swmap; idx < swapalloc.top; idx++) {
   30.79-				if(*idx == 255) {
   30.80-					*idx = 0;
   30.81-					swapalloc.free++;
   30.82-				}
   30.83-			}
   30.84-		}
   30.85-	} else {
   30.86-		if(--(*idx) == 0)
   30.87-			swapalloc.free++;
   30.88-	}
   30.89-	unlock(&swapalloc);
   30.90-}
   30.91-
   30.92-void
   30.93-dupswap(Page *p)
   30.94-{
   30.95-	uchar *idx;
   30.96-
   30.97-	lock(&swapalloc);
   30.98-	idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
   30.99-	if(*idx == 255)
  30.100-		swapalloc.xref++;
  30.101-	else {
  30.102-		if(++(*idx) == 255)
  30.103-			swapalloc.xref += 255;
  30.104-	}
  30.105-	unlock(&swapalloc);
  30.106-}
  30.107-
  30.108-int
  30.109-swapcount(uintptr daddr)
  30.110-{
  30.111-	return swapalloc.swmap[daddr/BY2PG];
  30.112-}
  30.113-
  30.114-void
  30.115-kickpager(void)
  30.116-{
  30.117-	static Ref started;
  30.118-
  30.119-	if(started.ref || incref(&started) != 1)
  30.120-		wakeup(&swapalloc.r);
  30.121-	else
  30.122-		kproc("pager", pager, 0);
  30.123-}
  30.124-
  30.125-static int
  30.126-reclaim(void)
  30.127-{
  30.128-	ulong np;
  30.129-
  30.130-	for(;;){
  30.131-		if((np = pagereclaim(&fscache, 1000)) > 0) {
  30.132-			if(0) print("reclaim: %lud fscache\n", np);
  30.133-		} else if((np = pagereclaim(&swapimage, 1000)) > 0) {
  30.134-			if(0) print("reclaim: %lud swap\n", np);
  30.135-		} else if((np = imagereclaim(1000)) > 0) {
  30.136-			if(0) print("reclaim: %lud image\n", np);
  30.137-		}
  30.138-		if(!needpages(nil))
  30.139-			return 1;	/* have pages, done */
  30.140-		if(np == 0)
  30.141-			return 0;	/* didnt reclaim, need to swap */
  30.142-		sched();
  30.143-	}
  30.144-}
  30.145-
  30.146-static void
  30.147-pager(void*)
  30.148-{
  30.149-	int i;
  30.150-	Segment *s;
  30.151-	Proc *p, *ep;
  30.152-
  30.153-	p = proctab(0);
  30.154-	ep = &p[conf.nproc];
  30.155-
  30.156-	while(waserror())
  30.157-		;
  30.158-
  30.159-	for(;;){
  30.160-		up->psstate = "Reclaim";
  30.161-		if(reclaim()){
  30.162-			up->psstate = "Idle";
  30.163-			wakeup(&palloc.pwait[0]);
  30.164-			wakeup(&palloc.pwait[1]);
  30.165-			sleep(&swapalloc.r, needpages, nil);
  30.166-			continue;
  30.167-		}
  30.168-
  30.169-		if(swapimage.c == nil || swapalloc.free == 0){
  30.170-		Killbig:
  30.171-			if(!freebroken())
  30.172-				killbig("out of memory");
  30.173-			sched();
  30.174-			continue;
  30.175-		}
  30.176-
  30.177-		i = ageclock;
  30.178-		do {
  30.179-			if(++p >= ep){
  30.180-				if(++ageclock == i)
  30.181-					goto Killbig;
  30.182-				p = proctab(0);
  30.183-			}
  30.184-		} while(p->state == Dead || p->noswap || !canqlock(&p->seglock));
  30.185-		up->psstate = "Pageout";
  30.186-		for(i = 0; i < NSEG; i++) {
  30.187-			if((s = p->seg[i]) != nil) {
  30.188-				switch(s->type&SG_TYPE) {
  30.189-				default:
  30.190-					break;
  30.191-				case SG_TEXT:
  30.192-					pageout(p, s);
  30.193-					break;
  30.194-				case SG_DATA:
  30.195-				case SG_BSS:
  30.196-				case SG_STACK:
  30.197-				case SG_SHARED:
  30.198-					pageout(p, s);
  30.199-					break;
  30.200-				}
  30.201-			}
  30.202-		}
  30.203-		qunlock(&p->seglock);
  30.204-
  30.205-		if(ioptr > 0) {
  30.206-			up->psstate = "I/O";
  30.207-			executeio();
  30.208-		}
  30.209-	}
  30.210-}
  30.211-
  30.212-static void
  30.213-pageout(Proc *p, Segment *s)
  30.214-{
  30.215-	int type, i, size;
  30.216-	short age;
  30.217-	Pte *l;
  30.218-	Page **pg, *entry;
  30.219-
  30.220-	if(!canqlock(s))	/* We cannot afford to wait, we will surely deadlock */
  30.221-		return;
  30.222-
  30.223-	if(!canflush(p, s)) {	/* Able to invalidate all tlbs with references */
  30.224-		qunlock(s);
  30.225-		putseg(s);
  30.226-		return;
  30.227-	}
  30.228-
  30.229-	if(waserror()) {
  30.230-		qunlock(s);
  30.231-		putseg(s);
  30.232-		return;
  30.233-	}
  30.234-
  30.235-	/* Pass through the pte tables looking for memory pages to swap out */
  30.236-	type = s->type&SG_TYPE;
  30.237-	size = s->mapsize;
  30.238-	for(i = 0; i < size; i++) {
  30.239-		l = s->map[i];
  30.240-		if(l == nil)
  30.241-			continue;
  30.242-		for(pg = l->first; pg <= l->last; pg++) {
  30.243-			entry = *pg;
  30.244-			if(pagedout(entry))
  30.245-				continue;
  30.246-			if(entry->modref & PG_REF) {
  30.247-				entry->modref &= ~PG_REF;
  30.248-				entry->refage = ageclock;
  30.249-				continue;
  30.250-			}
  30.251-			age = (short)(ageclock - entry->refage);
  30.252-			if(age < 16)
  30.253-				continue;
  30.254-			pagepte(type, pg);
  30.255-		}
  30.256-	}
  30.257-	poperror();
  30.258-	qunlock(s);
  30.259-	putseg(s);
  30.260-}
  30.261-
  30.262-static int
  30.263-canflush(Proc *p, Segment *s)
  30.264-{
  30.265-	int i;
  30.266-	Proc *ep;
  30.267-
  30.268-	if(incref(s) == 2)		/* Easy if we are the only user */
  30.269-		return canpage(p);
  30.270-
  30.271-	/* Now we must do hardwork to ensure all processes which have tlb
  30.272-	 * entries for this segment will be flushed if we succeed in paging it out
  30.273-	 */
  30.274-	p = proctab(0);
  30.275-	ep = &p[conf.nproc];
  30.276-	while(p < ep) {
  30.277-		if(p->state != Dead) {
  30.278-			for(i = 0; i < NSEG; i++)
  30.279-				if(p->seg[i] == s)
  30.280-					if(!canpage(p))
  30.281-						return 0;
  30.282-		}
  30.283-		p++;
  30.284-	}
  30.285-	return 1;
  30.286-}
  30.287-
  30.288-static void
  30.289-pagepte(int type, Page **pg)
  30.290-{
  30.291-	uintptr daddr;
  30.292-	Page *outp;
  30.293-
  30.294-	outp = *pg;
  30.295-	switch(type) {
  30.296-	case SG_TEXT:				/* Revert to demand load */
  30.297-		putpage(outp);
  30.298-		*pg = nil;
  30.299-		break;
  30.300-
  30.301-	case SG_DATA:
  30.302-	case SG_BSS:
  30.303-	case SG_STACK:
  30.304-	case SG_SHARED:
  30.305-		if(ioptr >= conf.nswppo)
  30.306-			break;
  30.307-
  30.308-		/*
  30.309-		 *  get a new swap address with swapcount 2, one for the pte
  30.310-		 *  and one extra ref for us while we write the page to disk
  30.311-		 */
  30.312-		daddr = newswap();
  30.313-		if(daddr == ~0)
  30.314-			break;
  30.315-
  30.316-		/* clear any pages referring to it from the cache */
  30.317-		cachedel(&swapimage, daddr);
  30.318-
  30.319-		/* forget anything that it used to cache */
  30.320-		uncachepage(outp);
  30.321-
  30.322-		/*
  30.323-		 *  enter it into the cache so that a fault happening
  30.324-		 *  during the write will grab the page from the cache
  30.325-		 *  rather than one partially written to the disk
  30.326-		 */
  30.327-		outp->daddr = daddr;
  30.328-		cachepage(outp, &swapimage);
  30.329-		*pg = (Page*)(daddr|PG_ONSWAP);
  30.330-
  30.331-		/* Add page to IO transaction list */
  30.332-		iolist[ioptr++] = outp;
  30.333-		break;
  30.334-	}
  30.335-}
  30.336-
  30.337-void
  30.338-pagersummary(void)
  30.339-{
  30.340-	print("%lud/%lud memory %lud/%lud swap %d iolist\n",
  30.341-		palloc.user-palloc.freecount,
  30.342-		palloc.user, conf.nswap-swapalloc.free, conf.nswap,
  30.343-		ioptr);
  30.344-}
  30.345-
  30.346-static void
  30.347-executeio(void)
  30.348-{
  30.349-	Page *outp;
  30.350-	int i, n;
  30.351-	Chan *c;
  30.352-	char *kaddr;
  30.353-	KMap *k;
  30.354-
  30.355-	c = swapimage.c;
  30.356-	for(i = 0; i < ioptr; i++) {
  30.357-		if(ioptr > conf.nswppo)
  30.358-			panic("executeio: ioptr %d > %d", ioptr, conf.nswppo);
  30.359-		outp = iolist[i];
  30.360-
  30.361-		assert(outp->ref > 0);
  30.362-		assert(outp->image == &swapimage);
  30.363-		assert(outp->daddr != ~0);
  30.364-
  30.365-		/* only write when swap address still in use */
  30.366-		if(swapcount(outp->daddr) > 1){
  30.367-			k = kmap(outp);
  30.368-			kaddr = (char*)VA(k);
  30.369-
  30.370-			if(waserror())
  30.371-				panic("executeio: page outp I/O error");
  30.372-
  30.373-			n = devtab[c->type]->write(c, kaddr, BY2PG, outp->daddr);
  30.374-			if(n != BY2PG)
  30.375-				nexterror();
  30.376-
  30.377-			kunmap(k);
  30.378-			poperror();
  30.379-		}
  30.380-
  30.381-		/* drop our extra swap reference */
  30.382-		putswap((Page*)outp->daddr);
  30.383-
  30.384-		/* Free up the page after I/O */
  30.385-		putpage(outp);
  30.386-	}
  30.387-	ioptr = 0;
  30.388-}
  30.389-
  30.390-int
  30.391-needpages(void*)
  30.392-{
  30.393-	return palloc.freecount < swapalloc.headroom;
  30.394-}
  30.395-
  30.396-void
  30.397-setswapchan(Chan *c)
  30.398-{
  30.399-	uchar dirbuf[sizeof(Dir)+100];
  30.400-	Dir d;
  30.401-	int n;
  30.402-
  30.403-	if(waserror()){
  30.404-		cclose(c);
  30.405-		nexterror();
  30.406-	}
  30.407-	if(swapimage.c != nil) {
  30.408-		if(swapalloc.free != conf.nswap)
  30.409-			error(Einuse);
  30.410-		cclose(swapimage.c);
  30.411-		swapimage.c = nil;
  30.412-	}
  30.413-
  30.414-	/*
  30.415-	 *  if this isn't a file, set the swap space
  30.416-	 *  to be at most the size of the partition
  30.417-	 */
  30.418-	if(devtab[c->type]->dc != L'M'){
  30.419-		n = devtab[c->type]->stat(c, dirbuf, sizeof dirbuf);
  30.420-		if(n <= 0 || convM2D(dirbuf, n, &d, nil) == 0)
  30.421-			error("stat failed in setswapchan");
  30.422-		if(d.length < conf.nswppo*BY2PG)
  30.423-			error("swap device too small");
  30.424-		if(d.length < conf.nswap*BY2PG){
  30.425-			conf.nswap = d.length/BY2PG;
  30.426-			swapalloc.top = &swapalloc.swmap[conf.nswap];
  30.427-			swapalloc.free = conf.nswap;
  30.428-		}
  30.429-	}
  30.430-	c->flag &= ~CCACHE;
  30.431-	cclunk(c);
  30.432-	swapimage.c = c;
  30.433-	poperror();
  30.434-}
    31.1--- a/sys/src/9/ppc/blast
    31.2+++ b/sys/src/9/ppc/blast
    31.3@@ -1,6 +1,7 @@
    31.4 dev
    31.5 	root
    31.6 	cons
    31.7+	swap
    31.8 	env
    31.9 	flash
   31.10 	pipe
    32.1--- a/sys/src/9/ppc/main.c
    32.2+++ b/sys/src/9/ppc/main.c
    32.3@@ -84,7 +84,6 @@ main(void)
    32.4 	links();
    32.5 	chandevreset();
    32.6 	pageinit();
    32.7-	swapinit();
    32.8 	sharedseginit();
    32.9 	fpsave(&initfp);
   32.10 	initfp.fpscr = 0;
    33.1--- a/sys/src/9/ppc/mkfile
    33.2+++ b/sys/src/9/ppc/mkfile
    33.3@@ -31,7 +31,6 @@ PORT=\
    33.4 	qlock.$O\
    33.5 	rdb.$O\
    33.6 	segment.$O\
    33.7-	swap.$O\
    33.8 	sysfile.$O\
    33.9 	sysproc.$O\
   33.10 	taslock.$O\
    34.1--- a/sys/src/9/sgi/indy
    34.2+++ b/sys/src/9/sgi/indy
    34.3@@ -1,6 +1,7 @@
    34.4 dev
    34.5 	root
    34.6 	cons
    34.7+	swap
    34.8 	uart
    34.9 	mnt
   34.10 	srv
    35.1--- a/sys/src/9/sgi/main.c
    35.2+++ b/sys/src/9/sgi/main.c
    35.3@@ -192,8 +192,6 @@ main(void)
    35.4 	initseg();
    35.5 	links();
    35.6 	chandevreset();
    35.7-
    35.8-	swapinit();
    35.9 	userinit();
   35.10 	schedinit();
   35.11 	panic("schedinit returned");
    36.1--- a/sys/src/9/sgi/mkfile
    36.2+++ b/sys/src/9/sgi/mkfile
    36.3@@ -38,7 +38,6 @@ PORT=\
    36.4 	rdb.$O\
    36.5 	rebootcmd.$O\
    36.6 	segment.$O\
    36.7-	swap.$O\
    36.8 	syscallfmt.$O\
    36.9 	sysfile.$O\
   36.10 	sysproc.$O\
    37.1--- a/sys/src/9/teg2/main.c
    37.2+++ b/sys/src/9/teg2/main.c
    37.3@@ -455,7 +455,6 @@ main(void)
    37.4 //	i8250console();			/* too early; see init0 */
    37.5 
    37.6 	pageinit();			/* prints "1020M memory: ⋯ */
    37.7-	swapinit();
    37.8 	userinit();
    37.9 
   37.10 	/*
    38.1--- a/sys/src/9/teg2/mkfile
    38.2+++ b/sys/src/9/teg2/mkfile
    38.3@@ -34,7 +34,6 @@ PORT=\
    38.4 	qio.$O\
    38.5 	qlock.$O\
    38.6 	segment.$O\
    38.7-	swap.$O\
    38.8 	syscallfmt.$O\
    38.9 	sysfile.$O\
   38.10 	sysproc.$O\
    39.1--- a/sys/src/9/teg2/ts
    39.2+++ b/sys/src/9/teg2/ts
    39.3@@ -2,6 +2,7 @@
    39.4 dev
    39.5 	root
    39.6 	cons
    39.7+	swap
    39.8 	env
    39.9 	pipe
   39.10 	proc
    40.1--- a/sys/src/9/xen/main.c
    40.2+++ b/sys/src/9/xen/main.c
    40.3@@ -103,8 +103,6 @@ main(void)
    40.4 //	conf.monitor = 1;
    40.5 	chandevreset();
    40.6 	pageinit();
    40.7-
    40.8-	swapinit();
    40.9 	userinit();
   40.10 	schedinit();
   40.11 }
    41.1--- a/sys/src/9/xen/mkfile
    41.2+++ b/sys/src/9/xen/mkfile
    41.3@@ -32,7 +32,6 @@ PORT=\
    41.4 	qlock.$O\
    41.5 	rebootcmd.$O\
    41.6 	segment.$O\
    41.7-	swap.$O\
    41.8 	sysfile.$O\
    41.9 	sysproc.$O\
   41.10 	taslock.$O\
    42.1--- a/sys/src/9/xen/xenpcf
    42.2+++ b/sys/src/9/xen/xenpcf
    42.3@@ -1,6 +1,7 @@
    42.4 dev
    42.5 	root		netif
    42.6 	cons
    42.7+	swap
    42.8 	uart
    42.9 	arch
   42.10 	env
    43.1--- a/sys/src/9/zynq/main.c
    43.2+++ b/sys/src/9/zynq/main.c
    43.3@@ -393,7 +393,6 @@ main(void)
    43.4 	archinit();
    43.5 	chandevreset();
    43.6 	pageinit();
    43.7-	swapinit();
    43.8 	screeninit();
    43.9 	userinit();
   43.10 	schedinit();
    44.1--- a/sys/src/9/zynq/mkfile
    44.2+++ b/sys/src/9/zynq/mkfile
    44.3@@ -31,7 +31,6 @@ PORT=\
    44.4 	qio.$O\
    44.5 	qlock.$O\
    44.6 	segment.$O\
    44.7-	swap.$O\
    44.8 	sysfile.$O\
    44.9 	sysproc.$O\
   44.10 	taslock.$O\
    45.1--- a/sys/src/9/zynq/zynq
    45.2+++ b/sys/src/9/zynq/zynq
    45.3@@ -1,6 +1,7 @@
    45.4 dev
    45.5 	root
    45.6 	cons
    45.7+	swap
    45.8 	arch
    45.9 	uart
   45.10 	mnt