changelog shortlog tags branches changeset files revisions annotate raw help

Mercurial > hg > plan9front / sys/src/9/pc/sdnvme.c

changeset 5854: 30cf99b1c789
parent: f0c30306e7d5
child: 4042d5dd934f
author: cinap_lenrek@felloff.net
date: Thu, 30 Mar 2017 23:33:46 +0200
permissions: -rw-r--r--
description: sdnvme: don't write completion queue doorbell register when nothing has been processed

turns out on real hardware, the front falls off if we write
the completion queue doorbell registers without consuming
an entry. so only write the register when we have processed
something.
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "io.h"
7 #include "ureg.h"
8 #include "../port/error.h"
9 
10 #include "../port/sd.h"
11 
12 typedef struct WS WS;
13 typedef struct CQ CQ;
14 typedef struct SQ SQ;
15 typedef struct Ctlr Ctlr;
16 
17 struct WS
18 {
19  u32int cdw0;
20  ushort status;
21  Rendez *sleep;
22  WS **link;
23  SQ *queue;
24 };
25 
26 struct CQ
27 {
28  u32int head;
29  u32int mask;
30  u32int shift;
31  u32int *base;
32  Ctlr *ctlr;
33 };
34 
35 struct SQ
36 {
37  u32int tail;
38  u32int mask;
39  u32int shift;
40  u32int *base;
41  WS **wait;
42  Ctlr *ctlr;
43 };
44 
45 struct Ctlr
46 {
47  QLock;
48 
49  Lock intr;
50  u32int ints;
51  u32int irqc[2];
52 
53  Pcidev *pci;
54  u32int *reg;
55 
56  u64int cap;
57  uchar *ident;
58  u32int *nsid;
59  int nnsid;
60 
61  u32int mps; /* mps = 1<<mpsshift */
62  u32int mpsshift;
63  u32int dstrd;
64 
65  CQ cq[1+1];
66  SQ sq[1+MAXMACH];
67 
68  Ctlr *next;
69 };
70 
71 /* controller registers */
72 enum {
73  Cap0,
74  Cap1,
75  Ver,
76  IntMs,
77  IntMc,
78  CCfg,
79 
80  CSts = 0x1C/4,
81  Nssr,
82  AQAttr,
83  ASQBase0,
84  ASQBase1,
85  ACQBase0,
86  ACQBase1,
87 
88  DBell = 0x1000/4,
89 };
90 
91 static u32int*
92 qcmd(WS *ws, Ctlr *ctlr, int adm, u32int opc, u32int nsid, void *mptr, void *data, ulong len)
93 {
94  u32int cid, *e;
95  u64int pa;
96  SQ *sq;
97 
98  if(!adm){
99  Retry:
100  splhi();
101  sq = &ctlr->sq[1+m->machno];
102  } else {
103  qlock(ctlr);
104  sq = &ctlr->sq[0];
105  }
106  ws->sleep = &up->sleep;
107  ws->queue = sq;
108  ws->link = &sq->wait[sq->tail & sq->mask];
109  while(*ws->link != nil){
110  sched();
111  if(!adm){
112  /* should be very rare */
113  goto Retry;
114  }
115  }
116  *ws->link = ws;
117 
118  e = &sq->base[((cid = sq->tail++) & sq->mask)<<4];
119  e[0] = opc | cid<<16;
120  e[1] = nsid;
121  e[2] = 0;
122  e[3] = 0;
123  if(mptr != nil){
124  pa = PADDR(mptr);
125  e[4] = pa;
126  e[5] = pa>>32;
127  } else {
128  e[4] = 0;
129  e[5] = 0;
130  }
131  if(len > 0){
132  pa = PADDR(data);
133  e[6] = pa;
134  e[7] = pa>>32;
135  if(len > ctlr->mps - (pa & ctlr->mps-1))
136  pa += ctlr->mps - (pa & ctlr->mps-1);
137  else
138  pa = 0;
139  } else {
140  e[6] = 0;
141  e[7] = 0;
142  pa = 0;
143  }
144  e[8] = pa;
145  e[9] = pa>>32;
146  return e;
147 }
148 
149 static void
150 nvmeintr(Ureg *, void *arg)
151 {
152  u32int phaseshift, *e;
153  WS *ws, **wp;
154  Ctlr *ctlr;
155  SQ *sq;
156  CQ *cq;
157 
158  ctlr = arg;
159  if(ctlr->ints == 0)
160  return;
161 
162  ilock(&ctlr->intr);
163  ctlr->reg[IntMs] = ctlr->ints;
164  for(cq = &ctlr->cq[nelem(ctlr->cq)-1]; cq >= ctlr->cq; cq--){
165  if(cq->base == nil)
166  continue;
167  phaseshift = 16 - cq->shift;
168  for(;;){
169  e = &cq->base[(cq->head & cq->mask)<<2];
170  if(((e[3] ^ (cq->head << phaseshift)) & 0x10000) == 0)
171  break;
172 
173  if(0) iprint("nvmeintr: cq%d [%.4ux] %.8ux %.8ux %.8ux %.8ux\n",
174  (int)(cq - ctlr->cq), cq->head & cq->mask,
175  e[0], e[1], e[2], e[3]);
176 
177  sq = &ctlr->sq[e[2] >> 16];
178  wp = &sq->wait[e[3] & sq->mask];
179  if((ws = *wp) != nil && ws->link == wp){
180  Rendez *z = ws->sleep;
181  ws->cdw0 = e[0];
182  ws->status = e[3]>>17;
183  *wp = nil;
184  wakeup(z);
185  }
186  ctlr->reg[DBell + ((cq-ctlr->cq)*2+1 << ctlr->dstrd)] = ++cq->head & cq->mask;
187  }
188  }
189  ctlr->reg[IntMc] = ctlr->ints;
190  iunlock(&ctlr->intr);
191 }
192 
193 static int
194 wdone(void *arg)
195 {
196  WS *ws = arg;
197  return *ws->link != ws;
198 }
199 
200 static u32int
201 wcmd(WS *ws)
202 {
203  SQ *sq = ws->queue;
204  Ctlr *ctlr = sq->ctlr;
205 
206  coherence();
207  ctlr->reg[DBell + ((sq-ctlr->sq)*2+0 << ctlr->dstrd)] = sq->tail & sq->mask;
208  if(sq > ctlr->sq) {
209  assert(sq == &ctlr->sq[1+m->machno]);
210  spllo();
211  } else
212  qunlock(sq->ctlr);
213  while(waserror())
214  ;
215  tsleep(ws->sleep, wdone, ws, 5);
216  while(!wdone(ws)){
217  nvmeintr(nil, ctlr);
218  tsleep(ws->sleep, wdone, ws, 10);
219  }
220  poperror();
221  return ws->status;
222 }
223 
224 void
225 checkstatus(u32int status, char *info)
226 {
227  if(status == 0)
228  return;
229  snprint(up->genbuf, sizeof(up->genbuf), "%s: status %ux", info, status);
230  error(up->genbuf);
231 }
232 
233 static long
234 nvmebio(SDunit *u, int lun, int write, void *a, long count, uvlong lba)
235 {
236  u32int nsid, s, n, m, *e;
237  Ctlr *ctlr;
238  uchar *p;
239  WS ws;
240 
241  USED(lun);
242 
243  ctlr = u->dev->ctlr;
244  nsid = ctlr->nsid[u->subno];
245  s = u->secsize;
246  p = a;
247  while(count > 0){
248  m = (2*ctlr->mps - ((uintptr)p & ctlr->mps-1)) / s;
249  if((n = count) > m)
250  n = m;
251  e = qcmd(&ws, ctlr, 0, write ? 0x01 : 0x02, nsid, nil, p, n*s);
252  e[10] = lba;
253  e[11] = lba>>32;
254  e[12] = n-1;
255  e[13] = (count>n)<<6; /* sequential request */
256  e[14] = 0;
257  e[15] = 0;
258  checkstatus(wcmd(&ws), write ? "write" : "read");
259  p += n*s;
260  count -= n;
261  lba += n;
262  }
263  return p - (uchar*)a;
264 }
265 
266 static int
267 nvmerio(SDreq *r)
268 {
269  int i, count, rw;
270  uvlong lba;
271  SDunit *u;
272 
273  u = r->unit;
274  if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91)
275  return sdsetsense(r, SDok, 0, 0, 0);
276  if((i = sdfakescsi(r)) != SDnostatus)
277  return r->status = i;
278  if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus)
279  return i;
280  r->rlen = nvmebio(u, r->lun, rw == SDwrite, r->data, count, lba);
281  return r->status = SDok;
282 }
283 
284 static int
285 nvmeverify(SDunit *u)
286 {
287  Ctlr *ctlr = u->dev->ctlr;
288  return u->subno < ctlr->nnsid;
289 }
290 
291 static int
292 nvmeonline(SDunit *u)
293 {
294  u32int *e, lbaf;
295  uchar *info, *p;
296  Ctlr *ctlr;
297  WS ws;
298 
299  if(u->sectors != 0)
300  return 1;
301 
302  ctlr = u->dev->ctlr;
303  if((info = mallocalign(0x1000, ctlr->mps, 0, 0)) == nil)
304  return 0;
305 
306  e = qcmd(&ws, ctlr, 1, 0x06, ctlr->nsid[u->subno], nil, info, 0x1000);
307  e[10] = 0; // identify namespace
308  if(wcmd(&ws) != 0){
309  free(info);
310  return 0;
311  }
312  p = info;
313  u->sectors = p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24
314  | (u64int)p[4]<<32
315  | (u64int)p[5]<<40
316  | (u64int)p[6]<<48
317  | (u64int)p[7]<<56;
318  p = &info[128 + 4*(info[26]&15)];
319  lbaf = p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24;
320  u->secsize = 1<<((lbaf>>16)&0xFF);
321  free(info);
322 
323  memset(u->inquiry, 0, sizeof u->inquiry);
324  u->inquiry[2] = 2;
325  u->inquiry[3] = 2;
326  u->inquiry[4] = sizeof u->inquiry - 4;
327  memmove(u->inquiry+8, ctlr->ident+24, 20);
328 
329  return 2;
330 }
331 
332 static int
333 nvmerctl(SDunit *u, char *p, int l)
334 {
335  Ctlr *ctlr;
336  char *e, *s;
337 
338  if((ctlr = u->dev->ctlr) == nil || ctlr->ident == nil)
339  return 0;
340 
341  e = p+l;
342  s = p;
343 
344  p = seprint(p, e, "model\t%.20s\n", (char*)ctlr->ident+24);
345  p = seprint(p, e, "serial\t%.10s\n", (char*)ctlr->ident+4);
346  p = seprint(p, e, "firm\t%.6s\n", (char*)ctlr->ident+64);
347  p = seprint(p, e, "geometry %llud %lud\n", u->sectors, u->secsize);
348 
349  return p-s;
350 }
351 
352 static void*
353 cqalloc(Ctlr *ctlr, CQ *cq, u32int lgsize)
354 {
355  cq->ctlr = ctlr;
356  cq->head = 0;
357  cq->shift = lgsize-4;
358  cq->mask = (1<<cq->shift)-1;
359  if((cq->base = mallocalign(1<<lgsize, ctlr->mps, 0, 0)) == nil)
360  error(Enomem);
361  memset(cq->base, 0, 1<<lgsize);
362  return cq->base;
363 }
364 
365 static void*
366 sqalloc(Ctlr *ctlr, SQ *sq, u32int lgsize)
367 {
368  sq->ctlr = ctlr;
369  sq->tail = 0;
370  sq->shift = lgsize-6;
371  sq->mask = (1<<sq->shift)-1;
372  if((sq->base = mallocalign(1<<lgsize, ctlr->mps, 0, 0)) == nil)
373  error(Enomem);
374  if((sq->wait = mallocz(sizeof(WS*)*(sq->mask+1), 1)) == nil)
375  error(Enomem);
376  memset(sq->base, 0, 1<<lgsize);
377  return sq->base;
378 }
379 
380 static void
381 setupqueues(Ctlr *ctlr)
382 {
383  u32int lgsize, *e;
384  CQ *cq;
385  SQ *sq;
386  WS ws;
387  int i;
388 
389  /* Overkill */
390  lgsize = 12-6+4;
391  while(lgsize < 16+4 && lgsize < ctlr->mpsshift && 1<<lgsize < conf.nmach<<12-6+4)
392  lgsize++;
393 
394  /* CQID1: shared completion queue */
395  cq = &ctlr->cq[1];
396  cqalloc(ctlr, cq, lgsize);
397  e = qcmd(&ws, ctlr, 1, 0x05, ~0, nil, cq->base, 1<<lgsize);
398  e[10] = (cq - ctlr->cq) | cq->mask<<16;
399  e[11] = 3; /* IEN | PC */
400  checkstatus(wcmd(&ws), "create completion queue");
401 
402  /* SQID[1..nmach]: submission queue per cpu */
403  for(i=1; i<=conf.nmach; i++){
404  sq = &ctlr->sq[i];
405  sqalloc(ctlr, sq, 12);
406  e = qcmd(&ws, ctlr, 1, 0x01, ~0, nil, sq->base, 0x1000);
407  e[10] = i | sq->mask<<16;
408  e[11] = (cq - ctlr->cq)<<16 | 1; /* CQID<<16 | PC */
409  checkstatus(wcmd(&ws), "create submission queue");
410  }
411 
412  ilock(&ctlr->intr);
413  ctlr->ints |= 1<<(cq - ctlr->cq);
414  ctlr->reg[IntMc] = ctlr->ints;
415  iunlock(&ctlr->intr);
416 }
417 
418 static void
419 identify(Ctlr *ctlr)
420 {
421  u32int *e;
422  WS ws;
423 
424  if(ctlr->ident == nil)
425  if((ctlr->ident = mallocalign(0x1000, ctlr->mps, 0, 0)) == nil)
426  error(Enomem);
427  if(ctlr->nsid == nil)
428  if((ctlr->nsid = mallocalign(0x1000, ctlr->mps, 0, 0)) == nil)
429  error(Enomem);
430 
431  e = qcmd(&ws, ctlr, 1, 0x06, ~0, nil, ctlr->ident, 0x1000);
432  e[10] = 1; // identify controller
433  checkstatus(wcmd(&ws), "identify controller");
434 
435  e = qcmd(&ws, ctlr, 1, 0x06, 0, nil, ctlr->nsid, 0x1000);
436  e[10] = 2; // namespace list
437  checkstatus(wcmd(&ws), "namespace list");
438 
439  ctlr->nnsid = 0;
440  while(ctlr->nnsid < 1024 && ctlr->nsid[ctlr->nnsid] != 0)
441  ctlr->nnsid++;
442 }
443 
444 static int
445 nvmedisable(SDev *sd)
446 {
447  char name[32];
448  Ctlr *ctlr;
449  int i;
450 
451  ctlr = sd->ctlr;
452 
453  /* mask interrupts */
454  ilock(&ctlr->intr);
455  ctlr->ints = 0;
456  ctlr->reg[IntMs] = ~ctlr->ints;
457  iunlock(&ctlr->intr);
458 
459  /* disable controller */
460  ctlr->reg[CCfg] = 0;
461 
462  for(i = 0; i < 10; i++){
463  if((ctlr->reg[CSts] & 1) == 0)
464  break;
465  tsleep(&up->sleep, return0, nil, 100);
466  }
467 
468  snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
469  intrdisable(ctlr->pci->intl, nvmeintr, ctlr, ctlr->pci->tbdf, name);
470 
471  pciclrbme(ctlr->pci); /* dma disable */
472 
473  for(i=0; i<nelem(ctlr->sq); i++){
474  free(ctlr->sq[i].base);
475  free(ctlr->sq[i].wait);
476  }
477  for(i=0; i<nelem(ctlr->cq); i++)
478  free(ctlr->cq[i].base);
479 
480  memset(ctlr->sq, 0, sizeof(ctlr->sq));
481  memset(ctlr->cq, 0, sizeof(ctlr->cq));
482 
483  free(ctlr->ident);
484  ctlr->ident = nil;
485  free(ctlr->nsid);
486  ctlr->nsid = nil;
487  ctlr->nnsid = 0;
488 
489  return 1;
490 }
491 
492 static int
493 nvmeenable(SDev *sd)
494 {
495  char name[32];
496  Ctlr *ctlr;
497  u64int pa;
498  int to;
499 
500  ctlr = sd->ctlr;
501 
502  snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
503  intrenable(ctlr->pci->intl, nvmeintr, ctlr, ctlr->pci->tbdf, name);
504 
505  if(waserror()){
506  print("%s: %s\n", name, up->errstr);
507  nvmedisable(sd);
508  sd->nunit = 0; /* hack: prevent further probing */
509  return 0;
510  }
511 
512  pa = PADDR(cqalloc(ctlr, &ctlr->cq[0], ctlr->mpsshift));
513  ctlr->reg[ACQBase0] = pa;
514  ctlr->reg[ACQBase1] = pa>>32;
515 
516  pa = PADDR(sqalloc(ctlr, &ctlr->sq[0], ctlr->mpsshift));
517  ctlr->reg[ASQBase0] = pa;
518  ctlr->reg[ASQBase1] = pa>>32;
519 
520  ctlr->reg[AQAttr] = ctlr->sq[0].mask | ctlr->cq[0].mask<<16;
521 
522  /* dma enable */
523  pcisetbme(ctlr->pci);
524 
525  /* enable interrupt */
526  ilock(&ctlr->intr);
527  ctlr->ints = 1;
528  ctlr->reg[IntMc] = ctlr->ints;
529  iunlock(&ctlr->intr);
530 
531  /* enable controller */
532  ctlr->reg[CCfg] = 1 | (ctlr->mpsshift-12)<<7 | 6<<16 | 4<<20;
533 
534  for(to = (ctlr->cap>>24) & 255; to >= 0; to--){
535  tsleep(&up->sleep, return0, nil, 500);
536  if((ctlr->reg[CSts] & 3) == 1)
537  goto Ready;
538  }
539  if(ctlr->reg[CSts] & 2)
540  error("fatal controller status during initialization");
541  error("controller initialization timeout");
542 Ready:
543  identify(ctlr);
544  setupqueues(ctlr);
545 
546  poperror();
547 
548  return 1;
549 }
550 
551 static Ctlr*
552 nvmepnpctlrs(void)
553 {
554  Ctlr *ctlr, *h, *t;
555  Pcidev *p;
556  int i;
557 
558  h = t = nil;
559  for(p = nil; p = pcimatch(p, 0, 0);){
560  if(p->ccrb != 1 || p->ccru != 8 || p->ccrp != 2)
561  continue;
562  if(p->mem[0].size == 0)
563  continue;
564  if((ctlr = malloc(sizeof(*ctlr))) == nil){
565  print("nvme: no memory for Ctlr\n");
566  break;
567  }
568  ctlr->pci = p;
569  ctlr->reg = vmap(p->mem[0].bar & ~0xF, p->mem[0].size);
570  if(ctlr->reg == nil){
571  print("nvme: can't vmap bar0\n");
572  Bad:
573  if(ctlr->reg != nil)
574  vunmap(ctlr->reg, p->mem[0].size);
575  free(ctlr);
576  continue;
577  }
578  ctlr->cap = ctlr->reg[Cap0];
579  ctlr->cap |= (u64int)ctlr->reg[Cap1]<<32;
580 
581  /* mask interrupts */
582  ctlr->ints = 0;
583  ctlr->reg[IntMs] = ~ctlr->ints;
584 
585  /* disable controller */
586  ctlr->reg[CCfg] = 0;
587 
588  if((ctlr->cap&(1ULL<<37)) == 0){
589  print("nvme: doesnt support NVM commactlr set: %ux\n",
590  (u32int)(ctlr->cap>>37) & 0xFF);
591  goto Bad;
592  }
593 
594  /* use 64K page size when possible */
595  ctlr->dstrd = (ctlr->cap >> 32) & 15;
596  for(i = (ctlr->cap >> 48) & 15; i < ((ctlr->cap >> 52) & 15); i++){
597  if(i >= 16-12) /* 64K */
598  break;
599  }
600  ctlr->mpsshift = i+12;
601  ctlr->mps = 1 << ctlr->mpsshift;
602 
603  if(h == nil)
604  h = ctlr;
605  else
606  t->next = ctlr;
607  t = ctlr;
608  }
609 
610  return h;
611 }
612 
613 SDifc sdnvmeifc;
614 
615 static SDev*
616 nvmepnp(void)
617 {
618  SDev *s, *h, *t;
619  Ctlr *ctlr;
620  int id;
621 
622  h = t = nil;
623 
624  id = 'N';
625  for(ctlr = nvmepnpctlrs(); ctlr != nil; ctlr = ctlr->next){
626  if((s = malloc(sizeof(*s))) == nil)
627  break;
628  s->ctlr = ctlr;
629  s->idno = id++;
630  s->ifc = &sdnvmeifc;
631  s->nunit = 1024;
632  if(h)
633  t->next = s;
634  else
635  h = s;
636  t = s;
637  }
638 
639  return h;
640 }
641 
642 SDifc sdnvmeifc = {
643  "nvme", /* name */
644 
645  nvmepnp, /* pnp */
646  nil, /* legacy */
647  nvmeenable, /* enable */
648  nvmedisable, /* disable */
649 
650  nvmeverify, /* verify */
651  nvmeonline, /* online */
652  nvmerio, /* rio */
653  nvmerctl, /* rctl */
654  nil, /* wctl */
655 
656  nvmebio, /* bio */
657  nil, /* probe */
658  nil, /* clear */
659  nil, /* rtopctl */
660  nil, /* wtopctl */
661 };