changelog shortlog tags branches files raw gz bz2 help

Mercurial > hg > ventivac / changeset: manual page for rabin module. also changed variable name to match current adt name, instead of an earlier one.

changeset 144: 207beea1188c
parent 143: 23937fde3687
child 145: 735de23113c5
author: Mechiel Lukkien <mechiel@ueber.net>
date: Thu, 30 Aug 2007 14:13:56 +0200
files: TODO appl/lib/rabin.b man/2/rabin module/rabin.m
description: manual page for rabin module. also changed variable name to match current adt name, instead of an earlier one.
     1.1--- a/TODO	Mon Aug 20 18:18:59 2007 +0200
     1.2+++ b/TODO	Thu Aug 30 14:13:56 2007 +0200
     1.3@@ -1,7 +1,6 @@
     1.4 things left to do (maybe):
     1.5 - update manual page for venti.m? (describe Entry and Root)
     1.6 - manual page for vac.m?
     1.7-- manual page for rabin.m?
     1.8 - some more testing
     1.9 - look at memory requirements for vacfs and its qid caching
    1.10 - careful hunt for bugs in appl/lib/vac.b, perhaps also appl/lib/venti.b.
     2.1--- a/appl/lib/rabin.b	Mon Aug 20 18:18:59 2007 +0200
     2.2+++ b/appl/lib/rabin.b	Thu Aug 30 14:13:56 2007 +0200
     2.3@@ -39,46 +39,46 @@
     2.4 		return (nil, sprint("bad min/max"));
     2.5 	if(min < rcfg.width)
     2.6 		return (nil, "min < width");
     2.7-	c := ref Rfile(b, rcfg, min, max, array[max+rcfg.width] of byte, 0, 0, big 0);
     2.8+	r := ref Rfile(b, rcfg, min, max, array[max+rcfg.width] of byte, 0, 0, big 0);
     2.9 
    2.10-	(prime, width, mod) := (c.rcfg.prime, c.rcfg.width, c.rcfg.mod);
    2.11-	while(c.n < width) {
    2.12-		ch := c.b.getb();
    2.13+	(prime, width, mod) := (r.rcfg.prime, r.rcfg.width, r.rcfg.mod);
    2.14+	while(r.n < width) {
    2.15+		ch := r.b.getb();
    2.16 		if(ch == ERROR)
    2.17 			return (nil, sprint("reading: %r"));
    2.18 		if(ch == EOF)
    2.19 			break;
    2.20-		c.buf[c.n] = byte ch;
    2.21-		c.state = (prime*c.state + ch) % mod;
    2.22-		c.n++;
    2.23+		r.buf[r.n] = byte ch;
    2.24+		r.state = (prime*r.state + ch) % mod;
    2.25+		r.n++;
    2.26 	}
    2.27-	return (c, nil);
    2.28+	return (r, nil);
    2.29 }
    2.30 
    2.31-Rfile.read(c: self ref Rfile): (array of byte, big, string)
    2.32+Rfile.read(r: self ref Rfile): (array of byte, big, string)
    2.33 {
    2.34-	(prime, width, mod) := (c.rcfg.prime, c.rcfg.width, c.rcfg.mod);
    2.35+	(prime, width, mod) := (r.rcfg.prime, r.rcfg.width, r.rcfg.mod);
    2.36 	for(;;) {
    2.37-		ch := c.b.getb();
    2.38+		ch := r.b.getb();
    2.39 		if(ch == ERROR)
    2.40 			return (nil, big 0, sprint("reading: %r"));
    2.41 		if(ch == EOF) {
    2.42-			d := c.buf[:c.n];
    2.43-			off := c.off;
    2.44-			c.n = 0;
    2.45-			c.off += big len d;
    2.46+			d := r.buf[:r.n];
    2.47+			off := r.off;
    2.48+			r.n = 0;
    2.49+			r.off += big len d;
    2.50 			return (d, off, nil);
    2.51 		}
    2.52-		c.buf[c.n] = byte ch;
    2.53-		c.state = (mod+prime*c.state + ch - c.rcfg.tab[int c.buf[c.n-width]]) % mod;
    2.54-		c.n++;
    2.55-		if(c.n-width >= c.max || (c.n-width >= c.min && c.state == mod-1)) {
    2.56-			d := array[c.n-width] of byte;
    2.57-			d[:] = c.buf[:len d];
    2.58-			off := c.off;
    2.59-			c.buf[:] = c.buf[c.n-width:c.n];
    2.60-			c.n = width;
    2.61-			c.off += big len d;
    2.62+		r.buf[r.n] = byte ch;
    2.63+		r.state = (mod+prime*r.state + ch - r.rcfg.tab[int r.buf[r.n-width]]) % mod;
    2.64+		r.n++;
    2.65+		if(r.n-width >= r.max || (r.n-width >= r.min && r.state == mod-1)) {
    2.66+			d := array[r.n-width] of byte;
    2.67+			d[:] = r.buf[:len d];
    2.68+			off := r.off;
    2.69+			r.buf[:] = r.buf[r.n-width:r.n];
    2.70+			r.n = width;
    2.71+			r.off += big len d;
    2.72 			return (d, off, nil);
    2.73 		}
    2.74 	}
     3.1--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2+++ b/man/2/rabin	Thu Aug 30 14:13:56 2007 +0200
     3.3@@ -0,0 +1,52 @@
     3.4+.TH RABIN 2
     3.5+.SH NAME
     3.6+rabin \- rabin fingerprinting
     3.7+.SH SYNOPSIS
     3.8+.EX
     3.9+include "rabin.m";
    3.10+rabin := load Rabin Rabin->PATH;
    3.11+Rcfg, Rfile: import rabin;
    3.12+
    3.13+init:		fn(bufio: Bufio);
    3.14+open:		fn(rcfg: ref Rcfg, b: ref Iobuf, min, max: int): (ref Rfile, string);
    3.15+
    3.16+Rcfg: adt {
    3.17+	mk:     fn(prime, width, mod: int): (ref Rcfg, string);
    3.18+};
    3.19+
    3.20+Rfile: adt {
    3.21+	read:   fn(r: self ref Rfile): (array of byte, big, string);
    3.22+};
    3.23+.EE
    3.24+.SH DESCRIPTION
    3.25+.B Rabin
    3.26+implements a data fingerprinting algorithm.  A rolling checksum is calculated while reading data.  Certain checksum values are taken to be data boundaries and used for splitting the data into chunks.
    3.27+.PP
    3.28+.B Rcfg
    3.29+represents the parameters to the algorithm,
    3.30+.B Rcfg.mk
    3.31+creates a new instance.
    3.32+.I Prime
    3.33+should be a prime number.
    3.34+.I Width
    3.35+is the width of the rolling checksum window in bytes.  A wider window results in more diverse boundary patterns.  A window of 30 bytes should be reasonable for most uses.
    3.36+.I Mod
    3.37+effectively sets the mean desired chunk size.  The rolling checksum is calculated modulo
    3.38+.IR mod .
    3.39+All three parameters influence where chunk boundaries will be found.
    3.40+.PP
    3.41+.B Rfile
    3.42+represents a file to read chunks from.
    3.43+.B Open 
    3.44+returns an initialised Rfile or an error string.
    3.45+.I Min
    3.46+and
    3.47+.I max
    3.48+are the minimum and maximum size in bytes of chunks that will be returned.  Only the last chunk in a file can be smaller than the minimum chunk size.  Note that the mean chunk size may be off due to these parameters.
    3.49+Data is read from
    3.50+.B Iobuf
    3.51+.IR b .
    3.52+.B Rfile.read
    3.53+returns subsequent chunks of data and the file offset at which they were found, or an error message.  After end of file, the returned chunks are zero bytes long.
    3.54+.SH SOURCE
    3.55+.B /appl/lib/rabin.b
     4.1--- a/module/rabin.m	Mon Aug 20 18:18:59 2007 +0200
     4.2+++ b/module/rabin.m	Thu Aug 30 14:13:56 2007 +0200
     4.3@@ -23,6 +23,6 @@
     4.4 		state:	int;
     4.5 		off:	big;
     4.6 
     4.7-		read:	fn(c: self ref Rfile): (array of byte, big, string);
     4.8+		read:	fn(r: self ref Rfile): (array of byte, big, string);
     4.9 	};
    4.10 };