changelog shortlog tags branches files raw gz bz2 help

Mercurial > hg > plan9front / changeset: awk: make empty FS unicodely-correct.

changeset 7412: cae3f2645bdf
parent 7411: 82cc8a9cd294
child 7413: 9c36e8f222fb
author: Ori Bernstein <ori@eigenstate.org>
date: Wed, 09 Oct 2019 17:36:02 -0700
files: sys/src/cmd/awk/lib.c
description: awk: make empty FS unicodely-correct.
     1.1--- a/sys/src/cmd/awk/lib.c
     1.2+++ b/sys/src/cmd/awk/lib.c
     1.3@@ -249,7 +249,7 @@ void fldbld(void)	/* create fields from 
     1.4 	/* the fields are all stored in this one array with \0's */
     1.5 	char *r, *fr, sep;
     1.6 	Cell *p;
     1.7-	int i, j, n;
     1.8+	int i, j, n, w;
     1.9 
    1.10 	if (donefld)
    1.11 		return;
    1.12@@ -287,15 +287,18 @@ void fldbld(void)	/* create fields from 
    1.13 		}
    1.14 		*fr = 0;
    1.15 	} else if ((sep = *inputFS) == 0) {		/* new: FS="" => 1 char/field */
    1.16-		for (i = 0; *r != 0; r++) {
    1.17-			char buf[2];
    1.18+		for (i = 0; *r != 0; r += w) {
    1.19+			char buf[UTFmax + 1];
    1.20+			Rune chr;
    1.21+
    1.22 			i++;
    1.23 			if (i > nfields)
    1.24 				growfldtab(i);
    1.25 			if (freeable(fldtab[i]))
    1.26 				xfree(fldtab[i]->sval);
    1.27-			buf[0] = *r;
    1.28-			buf[1] = 0;
    1.29+			w = chartorune(&chr, r);
    1.30+			n = runetochar(buf, &chr);
    1.31+			buf[n] = 0;
    1.32 			fldtab[i]->sval = tostring(buf);
    1.33 			fldtab[i]->tval = FLD | STR;
    1.34 		}