changelog shortlog tags branches changeset files revisions annotate raw help

Mercurial > hg > plan9front / sys/src/9/pc/mmu.c

changeset 7201: b73a652b1030
parent: 5badfb81b87c
child: 6e0c926efd3b
author: cinap_lenrek@felloff.net
date: Fri, 03 May 2019 23:52:49 +0200
permissions: -rw-r--r--
description: pc kernel: remove countpagerefs() (thanks BurnZeZ)

forgot to commit this...
1 /*
2  * Memory mappings. Life was easier when 2G of memory was enough.
3  *
4  * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
5  * (9load sits under 1M during the load). The memory from KZERO to the
6  * top of memory is mapped 1-1 with physical memory, starting at physical
7  * address 0. All kernel memory and data structures (i.e., the entries stored
8  * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
9  * then the kernel can only have 256MB of memory for itself.
10  *
11  * The 256M below KZERO comprises three parts. The lowest 4M is the
12  * virtual page table, a virtual address representation of the current
13  * page table tree. The second 4M is used for temporary per-process
14  * mappings managed by kmap and kunmap. The remaining 248M is used
15  * for global (shared by all procs and all processors) device memory
16  * mappings and managed by vmap and vunmap. The total amount (256M)
17  * could probably be reduced somewhat if desired. The largest device
18  * mapping is that of the video card, and even though modern video cards
19  * have embarrassing amounts of memory, the video drivers only use one
20  * frame buffer worth (at most 16M). Each is described in more detail below.
21  *
22  * The VPT is a 4M frame constructed by inserting the pdb into itself.
23  * This short-circuits one level of the page tables, with the result that
24  * the contents of second-level page tables can be accessed at VPT.
25  * We use the VPT to edit the page tables (see mmu) after inserting them
26  * into the page directory. It is a convenient mechanism for mapping what
27  * might be otherwise-inaccessible pages. The idea was borrowed from
28  * the Exokernel.
29  *
30  * The VPT doesn't solve all our problems, because we still need to
31  * prepare page directories before we can install them. For that, we
32  * use tmpmap/tmpunmap, which map a single page at TMPADDR.
33  */
34 
35 #include "u.h"
36 #include "../port/lib.h"
37 #include "mem.h"
38 #include "dat.h"
39 #include "fns.h"
40 #include "io.h"
41 
42 /*
43  * Simple segment descriptors with no translation.
44  */
45 #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
46 #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
47 #define EXEC16SEGM(p) { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
48 #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
49  ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
50 
51 Segdesc gdt[NGDT] =
52 {
53 [NULLSEG] { 0, 0}, /* null descriptor */
54 [KDSEG] DATASEGM(0), /* kernel data/stack */
55 [KESEG] EXECSEGM(0), /* kernel code */
56 [UDSEG] DATASEGM(3), /* user data/stack */
57 [UESEG] EXECSEGM(3), /* user code */
58 [TSSSEG] TSSSEGM(0,0), /* tss segment */
59 [KESEG16] EXEC16SEGM(0), /* kernel code 16-bit */
60 };
61 
62 static void taskswitch(ulong, ulong);
63 static void memglobal(void);
64 
65 #define vpt ((ulong*)VPT)
66 #define VPTX(va) (((ulong)(va))>>12)
67 #define vpd (vpt+VPTX(VPT))
68 
69 enum {
70  /* PAT entry used for write combining */
71  PATWC = 7,
72 };
73 
74 void
75 mmuinit(void)
76 {
77  ulong x, *p;
78  ushort ptr[3];
79  vlong v;
80 
81  if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n",
82  VPT, vpd, KMAP);
83 
84  memglobal();
85  m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
86 
87  m->tss = mallocz(sizeof(Tss), 1);
88  if(m->tss == nil)
89  panic("mmuinit: no memory for Tss");
90  m->tss->iomap = 0xDFFF<<16;
91 
92  /*
93  * We used to keep the GDT in the Mach structure, but it
94  * turns out that that slows down access to the rest of the
95  * page. Since the Mach structure is accessed quite often,
96  * it pays off anywhere from a factor of 1.25 to 2 on real
97  * hardware to separate them (the AMDs are more sensitive
98  * than Intels in this regard). Under VMware it pays off
99  * a factor of about 10 to 100.
100  */
101  memmove(m->gdt, gdt, sizeof gdt);
102  x = (ulong)m->tss;
103  m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
104  m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
105 
106  ptr[0] = sizeof(gdt)-1;
107  x = (ulong)m->gdt;
108  ptr[1] = x & 0xFFFF;
109  ptr[2] = (x>>16) & 0xFFFF;
110  lgdt(ptr);
111 
112  ptr[0] = sizeof(Segdesc)*256-1;
113  x = IDTADDR;
114  ptr[1] = x & 0xFFFF;
115  ptr[2] = (x>>16) & 0xFFFF;
116  lidt(ptr);
117 
118  /* make kernel text unwritable */
119  for(x = KTZERO; x < (ulong)etext; x += BY2PG){
120  p = mmuwalk(m->pdb, x, 2, 0);
121  if(p == nil)
122  panic("mmuinit");
123  *p &= ~PTEWRITE;
124  }
125 
126  taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
127  ltr(TSSSEL);
128 
129  /* IA32_PAT write combining */
130  if((MACHP(0)->cpuiddx & Pat) != 0
131  && rdmsr(0x277, &v) != -1){
132  v &= ~(255LL<<(PATWC*8));
133  v |= 1LL<<(PATWC*8); /* WC */
134  wrmsr(0x277, v);
135  }
136 }
137 
138 /*
139  * On processors that support it, we set the PTEGLOBAL bit in
140  * page table and page directory entries that map kernel memory.
141  * Doing this tells the processor not to bother flushing them
142  * from the TLB when doing the TLB flush associated with a
143  * context switch (write to CR3). Since kernel memory mappings
144  * are never removed, this is safe. (If we ever remove kernel memory
145  * mappings, we can do a full flush by turning off the PGE bit in CR4,
146  * writing to CR3, and then turning the PGE bit back on.)
147  *
148  * See also mmukmap below.
149  *
150  * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
151  */
152 static void
153 memglobal(void)
154 {
155  int i, j;
156  ulong *pde, *pte;
157 
158  /* only need to do this once, on bootstrap processor */
159  if(m->machno != 0)
160  return;
161 
162  if(!m->havepge)
163  return;
164 
165  pde = m->pdb;
166  for(i=PDX(KZERO); i<1024; i++){
167  if(pde[i] & PTEVALID){
168  pde[i] |= PTEGLOBAL;
169  if(!(pde[i] & PTESIZE)){
170  pte = KADDR(pde[i]&~(BY2PG-1));
171  for(j=0; j<1024; j++)
172  if(pte[j] & PTEVALID)
173  pte[j] |= PTEGLOBAL;
174  }
175  }
176  }
177 }
178 
179 /*
180  * Flush all the user-space and device-mapping mmu info
181  * for this process, because something has been deleted.
182  * It will be paged back in on demand.
183  */
184 void
185 flushmmu(void)
186 {
187  int s;
188 
189  s = splhi();
190  up->newtlb = 1;
191  mmuswitch(up);
192  splx(s);
193 }
194 
195 /*
196  * Flush a single page mapping from the tlb.
197  */
198 void
199 flushpg(ulong va)
200 {
201  if(m->cpuidfamily >= 4)
202  invlpg(va);
203  else
204  putcr3(getcr3());
205 }
206 
207 /*
208  * Allocate a new page for a page directory.
209  * We keep a small cache of pre-initialized
210  * page directories in each mach.
211  */
212 static Page*
213 mmupdballoc(void)
214 {
215  int s;
216  Page *page;
217  ulong *pdb;
218 
219  s = splhi();
220  m->pdballoc++;
221  if(m->pdbpool == 0){
222  spllo();
223  page = newpage(0, 0, 0);
224  page->va = (ulong)vpd;
225  splhi();
226  pdb = tmpmap(page);
227  memmove(pdb, m->pdb, BY2PG);
228  pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID; /* set up VPT */
229  tmpunmap(pdb);
230  }else{
231  page = m->pdbpool;
232  m->pdbpool = page->next;
233  m->pdbcnt--;
234  }
235  splx(s);
236  return page;
237 }
238 
239 static void
240 mmupdbfree(Proc *proc, Page *p)
241 {
242  if(islo())
243  panic("mmupdbfree: islo");
244  m->pdbfree++;
245  if(m->pdbcnt >= 10){
246  p->next = proc->mmufree;
247  proc->mmufree = p;
248  }else{
249  p->next = m->pdbpool;
250  m->pdbpool = p;
251  m->pdbcnt++;
252  }
253 }
254 
255 /*
256  * A user-space memory segment has been deleted, or the
257  * process is exiting. Clear all the pde entries for user-space
258  * memory mappings and device mappings. Any entries that
259  * are needed will be paged back in as necessary.
260  */
261 static void
262 mmuptefree(Proc* proc)
263 {
264  int s;
265  ulong *pdb;
266  Page **last, *page;
267 
268  if(proc->mmupdb == nil || proc->mmuused == nil)
269  return;
270  s = splhi();
271  pdb = tmpmap(proc->mmupdb);
272  last = &proc->mmuused;
273  for(page = *last; page; page = page->next){
274  pdb[page->daddr] = 0;
275  last = &page->next;
276  }
277  tmpunmap(pdb);
278  splx(s);
279  *last = proc->mmufree;
280  proc->mmufree = proc->mmuused;
281  proc->mmuused = 0;
282 }
283 
284 static void
285 taskswitch(ulong pdb, ulong stack)
286 {
287  Tss *tss;
288 
289  tss = m->tss;
290  tss->ss0 = KDSEL;
291  tss->esp0 = stack;
292  tss->ss1 = KDSEL;
293  tss->esp1 = stack;
294  tss->ss2 = KDSEL;
295  tss->esp2 = stack;
296  putcr3(pdb);
297 }
298 
299 void
300 mmuswitch(Proc* proc)
301 {
302  ulong *pdb;
303  ulong x;
304  int n;
305 
306  if(proc->newtlb){
307  mmuptefree(proc);
308  proc->newtlb = 0;
309  }
310 
311  if(proc->mmupdb != nil){
312  pdb = tmpmap(proc->mmupdb);
313  pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
314  tmpunmap(pdb);
315  taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
316  }else
317  taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
318 
319  memmove(&m->gdt[PROCSEG0], proc->gdt, sizeof(proc->gdt));
320  if((x = (ulong)proc->ldt) && (n = proc->nldt) > 0){
321  m->gdt[LDTSEG].d0 = (x<<16)|((n * sizeof(Segdesc)) - 1);
322  m->gdt[LDTSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGLDT|SEGPL(0)|SEGP;
323  lldt(LDTSEL);
324  } else
325  lldt(NULLSEL);
326 }
327 
328 /*
329  * Release any pages allocated for a page directory base or page-tables
330  * for this process:
331  * switch to the prototype pdb for this processor (m->pdb);
332  * call mmuptefree() to place all pages used for page-tables (proc->mmuused)
333  * onto the process' free list (proc->mmufree). This has the side-effect of
334  * cleaning any user entries in the pdb (proc->mmupdb);
335  * if there's a pdb put it in the cache of pre-initialised pdb's
336  * for this processor (m->pdbpool) or on the process' free list;
337  * finally, place any pages freed back into the free pool (palloc).
338  * This routine is only called from schedinit() with palloc locked.
339  */
340 void
341 mmurelease(Proc* proc)
342 {
343  Page *page, *next;
344  ulong *pdb;
345 
346  if(islo())
347  panic("mmurelease: islo");
348  taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
349  if(proc->kmaptable != nil){
350  if(proc->mmupdb == nil)
351  panic("mmurelease: no mmupdb");
352  if(--proc->kmaptable->ref != 0)
353  panic("mmurelease: kmap ref %ld", proc->kmaptable->ref);
354  if(proc->nkmap)
355  panic("mmurelease: nkmap %d", proc->nkmap);
356  /*
357  * remove kmaptable from pdb before putting pdb up for reuse.
358  */
359  pdb = tmpmap(proc->mmupdb);
360  if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
361  panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
362  pdb[PDX(KMAP)], proc->kmaptable->pa);
363  pdb[PDX(KMAP)] = 0;
364  tmpunmap(pdb);
365  /*
366  * move kmaptable to free list.
367  */
368  pagechainhead(proc->kmaptable);
369  proc->kmaptable = nil;
370  }
371  if(proc->mmupdb != nil){
372  mmuptefree(proc);
373  mmupdbfree(proc, proc->mmupdb);
374  proc->mmupdb = nil;
375  }
376  for(page = proc->mmufree; page != nil; page = next){
377  next = page->next;
378  if(--page->ref != 0)
379  panic("mmurelease: page->ref %ld", page->ref);
380  pagechainhead(page);
381  }
382  if(proc->mmufree != nil)
383  pagechaindone();
384  proc->mmufree = nil;
385  if(proc->ldt != nil){
386  free(proc->ldt);
387  proc->ldt = nil;
388  proc->nldt = 0;
389  }
390 }
391 
392 /*
393  * Allocate and install pdb for the current process.
394  */
395 static void
396 upallocpdb(void)
397 {
398  int s;
399  ulong *pdb;
400  Page *page;
401 
402  if(up->mmupdb != nil)
403  return;
404  page = mmupdballoc();
405  s = splhi();
406  if(up->mmupdb != nil){
407  /*
408  * Perhaps we got an interrupt while
409  * mmupdballoc was sleeping and that
410  * interrupt allocated an mmupdb?
411  * Seems unlikely.
412  */
413  mmupdbfree(up, page);
414  splx(s);
415  return;
416  }
417  pdb = tmpmap(page);
418  pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
419  tmpunmap(pdb);
420  up->mmupdb = page;
421  putcr3(up->mmupdb->pa);
422  splx(s);
423 }
424 
425 /*
426  * Update the mmu in response to a user fault. pa may have PTEWRITE set.
427  */
428 void
429 putmmu(uintptr va, uintptr pa, Page*)
430 {
431  int old, s;
432  Page *page;
433 
434  if(up->mmupdb == nil)
435  upallocpdb();
436 
437  /*
438  * We should be able to get through this with interrupts
439  * turned on (if we get interrupted we'll just pick up
440  * where we left off) but we get many faults accessing
441  * vpt[] near the end of this function, and they always happen
442  * after the process has been switched out and then
443  * switched back, usually many times in a row (perhaps
444  * it cannot switch back successfully for some reason).
445  *
446  * In any event, I'm tired of searching for this bug.
447  * Turn off interrupts during putmmu even though
448  * we shouldn't need to. - rsc
449  */
450 
451  s = splhi();
452  if(!(vpd[PDX(va)]&PTEVALID)){
453  if(up->mmufree == 0){
454  spllo();
455  page = newpage(0, 0, 0);
456  splhi();
457  }
458  else{
459  page = up->mmufree;
460  up->mmufree = page->next;
461  }
462  vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
463  /* page is now mapped into the VPT - clear it */
464  memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
465  page->daddr = PDX(va);
466  page->next = up->mmuused;
467  up->mmuused = page;
468  }
469  old = vpt[VPTX(va)];
470  vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
471  if(old&PTEVALID)
472  flushpg(va);
473  if(getcr3() != up->mmupdb->pa)
474  print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa);
475  splx(s);
476 }
477 
478 /*
479  * Double-check the user MMU.
480  * Error checking only.
481  */
482 void
483 checkmmu(uintptr va, uintptr pa)
484 {
485  if(up->mmupdb == 0)
486  return;
487  if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
488  return;
489  if(PPN(vpt[VPTX(va)]) != pa)
490  print("%ld %s: va=%#p pa=%#p pte=%#08lux\n",
491  up->pid, up->text,
492  va, pa, vpt[VPTX(va)]);
493 }
494 
495 /*
496  * Walk the page-table pointed to by pdb and return a pointer
497  * to the entry for virtual address va at the requested level.
498  * If the entry is invalid and create isn't requested then bail
499  * out early. Otherwise, for the 2nd level walk, allocate a new
500  * page-table page and register it in the 1st level. This is used
501  * only to edit kernel mappings, which use pages from kernel memory,
502  * so it's okay to use KADDR to look at the tables.
503  */
504 ulong*
505 mmuwalk(ulong* pdb, ulong va, int level, int create)
506 {
507  ulong *table;
508  void *map;
509 
510  table = &pdb[PDX(va)];
511  if(!(*table & PTEVALID) && create == 0)
512  return 0;
513 
514  switch(level){
515 
516  default:
517  return 0;
518 
519  case 1:
520  return table;
521 
522  case 2:
523  if(*table & PTESIZE)
524  panic("mmuwalk2: va %luX entry %luX", va, *table);
525  if(!(*table & PTEVALID)){
526  /*
527  * Have to call low-level allocator from
528  * memory.c if we haven't set up the xalloc
529  * tables yet.
530  */
531  if(conf.mem[0].npage != 0)
532  map = xspanalloc(BY2PG, BY2PG, 0);
533  else
534  map = rampage();
535  if(map == nil)
536  panic("mmuwalk xspanalloc failed");
537  *table = PADDR(map)|PTEWRITE|PTEVALID;
538  }
539  table = KADDR(PPN(*table));
540  return &table[PTX(va)];
541  }
542 }
543 
544 /*
545  * Device mappings are shared by all procs and processors and
546  * live in the virtual range VMAP to VMAP+VMAPSIZE. The master
547  * copy of the mappings is stored in mach0->pdb, and they are
548  * paged in from there as necessary by vmapsync during faults.
549  */
550 
551 static Lock vmaplock;
552 
553 static int findhole(ulong *a, int n, int count);
554 static ulong vmapalloc(ulong size);
555 static void pdbunmap(ulong*, ulong, int);
556 
557 /*
558  * Add a device mapping to the vmap range.
559  */
560 void*
561 vmap(ulong pa, int size)
562 {
563  int osize;
564  ulong o, va;
565 
566  /*
567  * might be asking for less than a page.
568  */
569  osize = size;
570  o = pa & (BY2PG-1);
571  pa -= o;
572  size += o;
573 
574  size = ROUND(size, BY2PG);
575  if(pa == 0){
576  print("vmap pa=0 pc=%#p\n", getcallerpc(&pa));
577  return nil;
578  }
579  ilock(&vmaplock);
580  if((va = vmapalloc(size)) == 0
581  || pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
582  iunlock(&vmaplock);
583  return 0;
584  }
585  iunlock(&vmaplock);
586  /* avoid trap on local processor
587  for(i=0; i<size; i+=4*MB)
588  vmapsync(va+i);
589  */
590  USED(osize);
591 // print(" vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
592  return (void*)(va + o);
593 }
594 
595 static int
596 findhole(ulong *a, int n, int count)
597 {
598  int have, i;
599 
600  have = 0;
601  for(i=0; i<n; i++){
602  if(a[i] == 0)
603  have++;
604  else
605  have = 0;
606  if(have >= count)
607  return i+1 - have;
608  }
609  return -1;
610 }
611 
612 /*
613  * Look for free space in the vmap.
614  */
615 static ulong
616 vmapalloc(ulong size)
617 {
618  int i, n, o;
619  ulong *vpdb;
620  int vpdbsize;
621 
622  vpdb = &MACHP(0)->pdb[PDX(VMAP)];
623  vpdbsize = VMAPSIZE/(4*MB);
624 
625  if(size >= 4*MB){
626  n = (size+4*MB-1) / (4*MB);
627  if((o = findhole(vpdb, vpdbsize, n)) != -1)
628  return VMAP + o*4*MB;
629  return 0;
630  }
631  n = (size+BY2PG-1) / BY2PG;
632  for(i=0; i<vpdbsize; i++)
633  if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
634  if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
635  return VMAP + i*4*MB + o*BY2PG;
636  if((o = findhole(vpdb, vpdbsize, 1)) != -1)
637  return VMAP + o*4*MB;
638 
639  /*
640  * could span page directory entries, but not worth the trouble.
641  * not going to be very much contention.
642  */
643  return 0;
644 }
645 
646 /*
647  * Remove a device mapping from the vmap range.
648  * Since pdbunmap does not remove page tables, just entries,
649  * the call need not be interlocked with vmap.
650  */
651 void
652 vunmap(void *v, int size)
653 {
654  int i;
655  ulong va, o;
656  Mach *nm;
657  Proc *p;
658 
659  /*
660  * might not be aligned
661  */
662  va = (ulong)v;
663  o = va&(BY2PG-1);
664  va -= o;
665  size += o;
666  size = ROUND(size, BY2PG);
667 
668  if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
669  panic("vunmap va=%#.8lux size=%#x pc=%#.8lux",
670  va, size, getcallerpc(&v));
671 
672  pdbunmap(MACHP(0)->pdb, va, size);
673 
674  /*
675  * Flush mapping from all the tlbs and copied pdbs.
676  * This can be (and is) slow, since it is called only rarely.
677  * It is possible for vunmap to be called with up == nil,
678  * e.g. from the reset/init driver routines during system
679  * boot. In that case it suffices to flush the MACH(0) TLB
680  * and return.
681  */
682  if(up == nil){
683  putcr3(PADDR(MACHP(0)->pdb));
684  return;
685  }
686  for(i=0; i<conf.nproc; i++){
687  p = proctab(i);
688  if(p->state == Dead)
689  continue;
690  if(p != up)
691  p->newtlb = 1;
692  }
693  for(i=0; i<conf.nmach; i++){
694  nm = MACHP(i);
695  if(nm != m)
696  nm->flushmmu = 1;
697  }
698  flushmmu();
699  for(i=0; i<conf.nmach; i++){
700  nm = MACHP(i);
701  if(nm != m)
702  while(active.machs[nm->machno] && nm->flushmmu)
703  ;
704  }
705 }
706 
707 /*
708  * Add kernel mappings for pa -> va for a section of size bytes.
709  */
710 int
711 pdbmap(ulong *pdb, ulong pa, ulong va, int size)
712 {
713  int pse;
714  ulong pgsz, *pte, *table;
715  ulong flag, off;
716 
717  flag = pa&0xFFF;
718  pa &= ~0xFFF;
719 
720  if((MACHP(0)->cpuiddx & Pse) && (getcr4() & 0x10))
721  pse = 1;
722  else
723  pse = 0;
724 
725  for(off=0; off<size; off+=pgsz){
726  table = &pdb[PDX(va+off)];
727  if((*table&PTEVALID) && (*table&PTESIZE))
728  panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
729  va+off, pa+off, *table);
730 
731  /*
732  * Check if it can be mapped using a 4MB page:
733  * va, pa aligned and size >= 4MB and processor can do it.
734  */
735  if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){
736  *table = (pa+off)|flag|PTESIZE|PTEVALID;
737  pgsz = 4*MB;
738  }else{
739  pte = mmuwalk(pdb, va+off, 2, 1);
740  if(*pte&PTEVALID)
741  panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
742  va+off, pa+off, *pte);
743  *pte = (pa+off)|flag|PTEVALID;
744  pgsz = BY2PG;
745  }
746  }
747  return 0;
748 }
749 
750 /*
751  * Remove mappings. Must already exist, for sanity.
752  * Only used for kernel mappings, so okay to use KADDR.
753  */
754 static void
755 pdbunmap(ulong *pdb, ulong va, int size)
756 {
757  ulong vae;
758  ulong *table;
759 
760  vae = va+size;
761  while(va < vae){
762  table = &pdb[PDX(va)];
763  if(!(*table & PTEVALID))
764  panic("vunmap: not mapped");
765  if(*table & PTESIZE){
766  if(va & 4*MB-1)
767  panic("vunmap: misaligned: %#p", va);
768  *table = 0;
769  va += 4*MB;
770  continue;
771  }
772  table = KADDR(PPN(*table));
773  if(!(table[PTX(va)] & PTEVALID))
774  panic("vunmap: not mapped");
775  table[PTX(va)] = 0;
776  va += BY2PG;
777  }
778 }
779 
780 /*
781  * Handle a fault by bringing vmap up to date.
782  * Only copy pdb entries and they never go away,
783  * so no locking needed.
784  */
785 int
786 vmapsync(ulong va)
787 {
788  ulong entry, *table;
789 
790  if(va < VMAP || va >= VMAP+VMAPSIZE)
791  return 0;
792 
793  entry = MACHP(0)->pdb[PDX(va)];
794  if(!(entry&PTEVALID))
795  return 0;
796  if(!(entry&PTESIZE)){
797  /* make sure entry will help the fault */
798  table = KADDR(PPN(entry));
799  if(!(table[PTX(va)]&PTEVALID))
800  return 0;
801  }
802  vpd[PDX(va)] = entry;
803  /*
804  * TLB doesn't cache negative results, so no flush needed.
805  */
806  return 1;
807 }
808 
809 
810 /*
811  * KMap is used to map individual pages into virtual memory.
812  * It is rare to have more than a few KMaps at a time (in the
813  * absence of interrupts, only two at a time are ever used,
814  * but interrupts can stack). The mappings are local to a process,
815  * so we can use the same range of virtual address space for
816  * all processes without any coordination.
817  */
818 #define kpt (vpt+VPTX(KMAP))
819 #define NKPT (KMAPSIZE/BY2PG)
820 
821 KMap*
822 kmap(Page *page)
823 {
824  int i, o, s;
825 
826  if(up == nil)
827  panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
828  if(up->mmupdb == nil)
829  upallocpdb();
830  if(up->nkmap < 0)
831  panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
832 
833  /*
834  * Splhi shouldn't be necessary here, but paranoia reigns.
835  * See comment in putmmu above.
836  */
837  s = splhi();
838  up->nkmap++;
839  if(!(vpd[PDX(KMAP)]&PTEVALID)){
840  /* allocate page directory */
841  if(KMAPSIZE > BY2XPG)
842  panic("bad kmapsize");
843  if(up->kmaptable != nil)
844  panic("kmaptable");
845  spllo();
846  up->kmaptable = newpage(0, 0, 0);
847  splhi();
848  vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
849  flushpg((ulong)kpt);
850  memset(kpt, 0, BY2PG);
851  kpt[0] = page->pa|PTEWRITE|PTEVALID;
852  up->lastkmap = 0;
853  splx(s);
854  return (KMap*)KMAP;
855  }
856  if(up->kmaptable == nil)
857  panic("no kmaptable");
858  o = up->lastkmap+1;
859  for(i=0; i<NKPT; i++){
860  if(kpt[(i+o)%NKPT] == 0){
861  o = (i+o)%NKPT;
862  kpt[o] = page->pa|PTEWRITE|PTEVALID;
863  up->lastkmap = o;
864  splx(s);
865  return (KMap*)(KMAP+o*BY2PG);
866  }
867  }
868  panic("out of kmap");
869  return nil;
870 }
871 
872 void
873 kunmap(KMap *k)
874 {
875  ulong va;
876 
877  va = (ulong)k;
878  if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
879  panic("kunmap: no kmaps");
880  if(va < KMAP || va >= KMAP+KMAPSIZE)
881  panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k));
882  if(!(vpt[VPTX(va)]&PTEVALID))
883  panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k));
884  up->nkmap--;
885  if(up->nkmap < 0)
886  panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
887  vpt[VPTX(va)] = 0;
888  flushpg(va);
889 }
890 
891 /*
892  * Temporary one-page mapping used to edit page directories.
893  *
894  * The fasttmp #define controls whether the code optimizes
895  * the case where the page is already mapped in the physical
896  * memory window.
897  */
898 #define fasttmp 1
899 
900 void*
901 tmpmap(Page *p)
902 {
903  ulong i;
904  ulong *entry;
905 
906  if(islo())
907  panic("tmpaddr: islo");
908 
909  if(fasttmp && p->pa < -KZERO)
910  return KADDR(p->pa);
911 
912  /*
913  * PDX(TMPADDR) == PDX(MACHADDR), so this
914  * entry is private to the processor and shared
915  * between up->mmupdb (if any) and m->pdb.
916  */
917  entry = &vpt[VPTX(TMPADDR)];
918  if(!(*entry&PTEVALID)){
919  for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
920  print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
921  panic("tmpmap: no entry");
922  }
923  if(PPN(*entry) != PPN(TMPADDR-KZERO))
924  panic("tmpmap: already mapped entry=%#.8lux", *entry);
925  *entry = p->pa|PTEWRITE|PTEVALID;
926  flushpg(TMPADDR);
927  return (void*)TMPADDR;
928 }
929 
930 void
931 tmpunmap(void *v)
932 {
933  ulong *entry;
934 
935  if(islo())
936  panic("tmpaddr: islo");
937  if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
938  return;
939  if(v != (void*)TMPADDR)
940  panic("tmpunmap: bad address");
941  entry = &vpt[VPTX(TMPADDR)];
942  if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
943  panic("tmpmap: not mapped entry=%#.8lux", *entry);
944  *entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
945  flushpg(TMPADDR);
946 }
947 
948 /*
949  * These could go back to being macros once the kernel is debugged,
950  * but the extra checking is nice to have.
951  */
952 void*
953 kaddr(ulong pa)
954 {
955  if(pa >= (ulong)-KZERO)
956  panic("kaddr: pa=%#.8lux", pa);
957  return (void*)(pa+KZERO);
958 }
959 
960 ulong
961 paddr(void *v)
962 {
963  ulong va;
964 
965  va = (ulong)v;
966  if(va < KZERO)
967  panic("paddr: va=%#.8lux pc=%#p", va, getcallerpc(&v));
968  return va-KZERO;
969 }
970 
971 /*
972  * More debugging.
973  */
974 void
975 checkfault(ulong, ulong)
976 {
977 }
978 
979 /*
980  * Return the number of bytes that can be accessed via KADDR(pa).
981  * If pa is not a valid argument to KADDR, return 0.
982  */
983 ulong
984 cankaddr(ulong pa)
985 {
986  if(pa >= -KZERO)
987  return 0;
988  return -KZERO - pa;
989 }
990 
991 /*
992  * mark pages as write combining (used for framebuffer)
993  */
994 void
995 patwc(void *a, int n)
996 {
997  ulong *pte, mask, attr, va;
998  vlong v;
999  int z;
1000 
1001  /* check if pat is usable */
1002  if((MACHP(0)->cpuiddx & Pat) == 0
1003  || rdmsr(0x277, &v) == -1
1004  || ((v >> PATWC*8) & 7) != 1)
1005  return;
1006 
1007  /* set the bits for all pages in range */
1008  for(va = (ulong)a; n > 0; n -= z, va += z){
1009  pte = mmuwalk(MACHP(0)->pdb, va, 1, 0);
1010  if(pte && (*pte & (PTEVALID|PTESIZE)) == (PTEVALID|PTESIZE)){
1011  z = 4*MB - (va & (4*MB-1));
1012  mask = 3<<3 | 1<<12;
1013  } else {
1014  pte = mmuwalk(MACHP(0)->pdb, va, 2, 0);
1015  if(pte == 0 || (*pte & PTEVALID) == 0)
1016  panic("patwc: va=%#p", va);
1017  z = BY2PG - (va & (BY2PG-1));
1018  mask = 3<<3 | 1<<7;
1019  }
1020  attr = (((PATWC&3)<<3) | ((PATWC&4)<<5) | ((PATWC&4)<<10));
1021  *pte = (*pte & ~mask) | (attr & mask);
1022  }
1023 }