changelog shortlog tags branches files raw gz bz2 help

Mercurial > hg > plan9front / changeset: libc: provide arm64 assembly versions for memmove() and memset()

changeset 7218: 0e07d7fce9f3
parent 7217: 871931727b28
child 7219: 029d634f0e18
author: cinap_lenrek@felloff.net
date: Thu, 09 May 2019 02:32:09 +0200
files: sys/src/ape/lib/ap/arm64/memmove.s sys/src/ape/lib/ap/arm64/memset.s sys/src/ape/lib/ap/arm64/mkfile sys/src/libc/arm64/memmove.s sys/src/libc/arm64/memset.s sys/src/libc/arm64/mkfile
description: libc: provide arm64 assembly versions for memmove() and memset()

just a first approximation, uses unaligned 8 byte loads and stores.
MOVP not yet implemented by the linker. no ZVA cache magic yet.
     1.1new file mode 100644
     1.2--- /dev/null
     1.3+++ b/sys/src/ape/lib/ap/arm64/memmove.s
     1.4@@ -0,0 +1,50 @@
     1.5+TEXT memcpy(SB), $-4
     1.6+TEXT memmove(SB), $-4
     1.7+	MOV	from+8(FP), R1
     1.8+	MOVWU	n+16(FP), R2
     1.9+
    1.10+	CMP	R0, R1
    1.11+	BEQ	_done
    1.12+	BLT	_backward
    1.13+
    1.14+_forward:
    1.15+	ADD	R0, R2, R3
    1.16+	BIC	$7, R2, R4
    1.17+	CBZ	R4, _floop1
    1.18+	ADD	R0, R4, R4
    1.19+
    1.20+_floop8:
    1.21+	MOV	(R1)8!, R5
    1.22+	MOV	R5, (R0)8!
    1.23+	CMP	R4, R0
    1.24+	BNE	_floop8
    1.25+
    1.26+_floop1:
    1.27+	CMP	R3, R0
    1.28+	BEQ	_done
    1.29+	MOVBU	(R1)1!, R5
    1.30+	MOVBU	R5, (R0)1!
    1.31+	B	_floop1
    1.32+
    1.33+_done:
    1.34+	RETURN
    1.35+
    1.36+_backward:
    1.37+	ADD	R2, R1, R1
    1.38+	ADD	R2, R0, R3
    1.39+	BIC	$7, R2, R4
    1.40+	CBZ	R4, _bloop1
    1.41+	SUB	R4, R3, R4
    1.42+
    1.43+_bloop8:
    1.44+	MOV	-8(R1)!, R5
    1.45+	MOV	R5, -8(R3)!
    1.46+	CMP	R4, R3
    1.47+	BNE	_bloop8
    1.48+
    1.49+_bloop1:
    1.50+	CMP	R0, R3
    1.51+	BEQ	_done
    1.52+	MOVBU	-1(R1)!, R5
    1.53+	MOVBU	R5, -1(R3)!
    1.54+	B	_bloop1
     2.1new file mode 100644
     2.2--- /dev/null
     2.3+++ b/sys/src/ape/lib/ap/arm64/memset.s
     2.4@@ -0,0 +1,27 @@
     2.5+TEXT memset(SB), $-4
     2.6+	MOVBU	c+8(FP), R1
     2.7+	MOVWU	n+16(FP), R2
     2.8+
     2.9+	ADD	R0, R2, R3
    2.10+	BIC	$7, R2, R4
    2.11+	CBZ	R4, _loop1
    2.12+	ADD	R0, R4, R4
    2.13+
    2.14+	ORR	R1<<8, R1
    2.15+	ORR	R1<<16, R1
    2.16+	ORR	R1<<32, R1
    2.17+
    2.18+_loop8:
    2.19+	MOV	R1, (R0)8!
    2.20+	CMP	R4, R0
    2.21+	BNE	_loop8
    2.22+
    2.23+_loop1:
    2.24+	CMP	R3, R0
    2.25+	BEQ	_done
    2.26+
    2.27+	MOVBU	R1, (R0)1!
    2.28+	B	_loop1
    2.29+
    2.30+_done:
    2.31+	RETURN
     3.1--- a/sys/src/ape/lib/ap/arm64/mkfile
     3.2+++ b/sys/src/ape/lib/ap/arm64/mkfile
     3.3@@ -6,6 +6,8 @@ OFILES=\
     3.4 	getfcr.$O\
     3.5 	lock.$O\
     3.6 	main9.$O\
     3.7+	memmove.$O\
     3.8+	memset.$O\
     3.9 	notetramp.$O\
    3.10 	setjmp.$O\
    3.11 	tas.$O\
     4.1new file mode 100644
     4.2--- /dev/null
     4.3+++ b/sys/src/libc/arm64/memmove.s
     4.4@@ -0,0 +1,50 @@
     4.5+TEXT memcpy(SB), $-4
     4.6+TEXT memmove(SB), $-4
     4.7+	MOV	from+8(FP), R1
     4.8+	MOVWU	n+16(FP), R2
     4.9+
    4.10+	CMP	R0, R1
    4.11+	BEQ	_done
    4.12+	BLT	_backward
    4.13+
    4.14+_forward:
    4.15+	ADD	R0, R2, R3
    4.16+	BIC	$7, R2, R4
    4.17+	CBZ	R4, _floop1
    4.18+	ADD	R0, R4, R4
    4.19+
    4.20+_floop8:
    4.21+	MOV	(R1)8!, R5
    4.22+	MOV	R5, (R0)8!
    4.23+	CMP	R4, R0
    4.24+	BNE	_floop8
    4.25+
    4.26+_floop1:
    4.27+	CMP	R3, R0
    4.28+	BEQ	_done
    4.29+	MOVBU	(R1)1!, R5
    4.30+	MOVBU	R5, (R0)1!
    4.31+	B	_floop1
    4.32+
    4.33+_done:
    4.34+	RETURN
    4.35+
    4.36+_backward:
    4.37+	ADD	R2, R1, R1
    4.38+	ADD	R2, R0, R3
    4.39+	BIC	$7, R2, R4
    4.40+	CBZ	R4, _bloop1
    4.41+	SUB	R4, R3, R4
    4.42+
    4.43+_bloop8:
    4.44+	MOV	-8(R1)!, R5
    4.45+	MOV	R5, -8(R3)!
    4.46+	CMP	R4, R3
    4.47+	BNE	_bloop8
    4.48+
    4.49+_bloop1:
    4.50+	CMP	R0, R3
    4.51+	BEQ	_done
    4.52+	MOVBU	-1(R1)!, R5
    4.53+	MOVBU	R5, -1(R3)!
    4.54+	B	_bloop1
     5.1new file mode 100644
     5.2--- /dev/null
     5.3+++ b/sys/src/libc/arm64/memset.s
     5.4@@ -0,0 +1,27 @@
     5.5+TEXT memset(SB), $-4
     5.6+	MOVBU	c+8(FP), R1
     5.7+	MOVWU	n+16(FP), R2
     5.8+
     5.9+	ADD	R0, R2, R3
    5.10+	BIC	$7, R2, R4
    5.11+	CBZ	R4, _loop1
    5.12+	ADD	R0, R4, R4
    5.13+
    5.14+	ORR	R1<<8, R1
    5.15+	ORR	R1<<16, R1
    5.16+	ORR	R1<<32, R1
    5.17+
    5.18+_loop8:
    5.19+	MOV	R1, (R0)8!
    5.20+	CMP	R4, R0
    5.21+	BNE	_loop8
    5.22+
    5.23+_loop1:
    5.24+	CMP	R3, R0
    5.25+	BEQ	_done
    5.26+
    5.27+	MOVBU	R1, (R0)1!
    5.28+	B	_loop1
    5.29+
    5.30+_done:
    5.31+	RETURN
     6.1--- a/sys/src/libc/arm64/mkfile
     6.2+++ b/sys/src/libc/arm64/mkfile
     6.3@@ -11,6 +11,8 @@ SFILES=\
     6.4 	getfcr.s\
     6.5 	main9.s\
     6.6 	main9p.s\
     6.7+	memmove.s\
     6.8+	memset.s\
     6.9 	setjmp.s\
    6.10 	tas.s\
    6.11