changelog shortlog tags branches files raw gz bz2 help

Mercurial > hg > plan9front / changeset: libc: use MOVP instruction for arm64 memmove() and memset()

changeset 7223: f6786e3ba3da
parent 7222: 761a37c3fc9a
child 7224: 31f1de7ff0d4
author: cinap_lenrek@felloff.net
date: Thu, 09 May 2019 11:40:54 +0200
files: sys/src/ape/lib/ap/arm64/memmove.s sys/src/ape/lib/ap/arm64/memset.s sys/src/libc/arm64/memmove.s sys/src/libc/arm64/memset.s
description: libc: use MOVP instruction for arm64 memmove() and memset()
     1.1--- a/sys/src/ape/lib/ap/arm64/memmove.s
     1.2+++ b/sys/src/ape/lib/ap/arm64/memmove.s
     1.3@@ -9,15 +9,15 @@ TEXT memmove(SB), $-4
     1.4 
     1.5 _forward:
     1.6 	ADD	R0, R2, R3
     1.7-	BIC	$7, R2, R4
     1.8+	BIC	$15, R2, R4
     1.9 	CBZ	R4, _floop1
    1.10 	ADD	R0, R4, R4
    1.11 
    1.12-_floop8:
    1.13-	MOV	(R1)8!, R5
    1.14-	MOV	R5, (R0)8!
    1.15+_floop16:
    1.16+	MOVP	(R1)16!, R5, R6
    1.17+	MOVP	R5, R6, (R0)16!
    1.18 	CMP	R4, R0
    1.19-	BNE	_floop8
    1.20+	BNE	_floop16
    1.21 
    1.22 _floop1:
    1.23 	CMP	R3, R0
    1.24@@ -32,15 +32,15 @@ TEXT memmove(SB), $-4
    1.25 _backward:
    1.26 	ADD	R2, R1, R1
    1.27 	ADD	R2, R0, R3
    1.28-	BIC	$7, R2, R4
    1.29+	BIC	$15, R2, R4
    1.30 	CBZ	R4, _bloop1
    1.31 	SUB	R4, R3, R4
    1.32 
    1.33-_bloop8:
    1.34-	MOV	-8(R1)!, R5
    1.35-	MOV	R5, -8(R3)!
    1.36+_bloop16:
    1.37+	MOVP	-16(R1)!, R5, R6
    1.38+	MOVP	R5, R6, -16(R3)!
    1.39 	CMP	R4, R3
    1.40-	BNE	_bloop8
    1.41+	BNE	_bloop16
    1.42 
    1.43 _bloop1:
    1.44 	CMP	R0, R3
     2.1--- a/sys/src/ape/lib/ap/arm64/memset.s
     2.2+++ b/sys/src/ape/lib/ap/arm64/memset.s
     2.3@@ -3,7 +3,7 @@ TEXT memset(SB), $-4
     2.4 	MOVWU	n+16(FP), R2
     2.5 
     2.6 	ADD	R0, R2, R3
     2.7-	BIC	$7, R2, R4
     2.8+	BIC	$15, R2, R4
     2.9 	CBZ	R4, _loop1
    2.10 	ADD	R0, R4, R4
    2.11 
    2.12@@ -11,10 +11,10 @@ TEXT memset(SB), $-4
    2.13 	ORR	R1<<16, R1
    2.14 	ORR	R1<<32, R1
    2.15 
    2.16-_loop8:
    2.17-	MOV	R1, (R0)8!
    2.18+_loop16:
    2.19+	MOVP	R1, R1, (R0)16!
    2.20 	CMP	R4, R0
    2.21-	BNE	_loop8
    2.22+	BNE	_loop16
    2.23 
    2.24 _loop1:
    2.25 	CMP	R3, R0
     3.1--- a/sys/src/libc/arm64/memmove.s
     3.2+++ b/sys/src/libc/arm64/memmove.s
     3.3@@ -9,15 +9,15 @@ TEXT memmove(SB), $-4
     3.4 
     3.5 _forward:
     3.6 	ADD	R0, R2, R3
     3.7-	BIC	$7, R2, R4
     3.8+	BIC	$15, R2, R4
     3.9 	CBZ	R4, _floop1
    3.10 	ADD	R0, R4, R4
    3.11 
    3.12-_floop8:
    3.13-	MOV	(R1)8!, R5
    3.14-	MOV	R5, (R0)8!
    3.15+_floop16:
    3.16+	MOVP	(R1)16!, R5, R6
    3.17+	MOVP	R5, R6, (R0)16!
    3.18 	CMP	R4, R0
    3.19-	BNE	_floop8
    3.20+	BNE	_floop16
    3.21 
    3.22 _floop1:
    3.23 	CMP	R3, R0
    3.24@@ -32,15 +32,15 @@ TEXT memmove(SB), $-4
    3.25 _backward:
    3.26 	ADD	R2, R1, R1
    3.27 	ADD	R2, R0, R3
    3.28-	BIC	$7, R2, R4
    3.29+	BIC	$15, R2, R4
    3.30 	CBZ	R4, _bloop1
    3.31 	SUB	R4, R3, R4
    3.32 
    3.33-_bloop8:
    3.34-	MOV	-8(R1)!, R5
    3.35-	MOV	R5, -8(R3)!
    3.36+_bloop16:
    3.37+	MOVP	-16(R1)!, R5, R6
    3.38+	MOVP	R5, R6, -16(R3)!
    3.39 	CMP	R4, R3
    3.40-	BNE	_bloop8
    3.41+	BNE	_bloop16
    3.42 
    3.43 _bloop1:
    3.44 	CMP	R0, R3
     4.1--- a/sys/src/libc/arm64/memset.s
     4.2+++ b/sys/src/libc/arm64/memset.s
     4.3@@ -3,7 +3,7 @@ TEXT memset(SB), $-4
     4.4 	MOVWU	n+16(FP), R2
     4.5 
     4.6 	ADD	R0, R2, R3
     4.7-	BIC	$7, R2, R4
     4.8+	BIC	$15, R2, R4
     4.9 	CBZ	R4, _loop1
    4.10 	ADD	R0, R4, R4
    4.11 
    4.12@@ -11,10 +11,10 @@ TEXT memset(SB), $-4
    4.13 	ORR	R1<<16, R1
    4.14 	ORR	R1<<32, R1
    4.15 
    4.16-_loop8:
    4.17-	MOV	R1, (R0)8!
    4.18+_loop16:
    4.19+	MOVP	R1, R1, (R0)16!
    4.20 	CMP	R4, R0
    4.21-	BNE	_loop8
    4.22+	BNE	_loop16
    4.23 
    4.24 _loop1:
    4.25 	CMP	R3, R0