// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "go_asm.h" #include "textflag.h" TEXT ·Equal(SB),NOSPLIT,$0-25 MOVL a_len+4(FP), BX MOVL b_len+16(FP), CX CMPL BX, CX JNE neq MOVL a_base+0(FP), SI MOVL b_base+12(FP), DI CMPL SI, DI JEQ eq LEAL ret+24(FP), AX JMP memeqbody<>(SB) neq: MOVB $0, ret+24(FP) RET eq: MOVB $1, ret+24(FP) RET // memequal(a, b unsafe.Pointer, size uintptr) bool TEXT runtime·memequal(SB),NOSPLIT,$0-13 MOVL a+0(FP), SI MOVL b+4(FP), DI CMPL SI, DI JEQ eq MOVL size+8(FP), BX LEAL ret+12(FP), AX JMP memeqbody<>(SB) eq: MOVB $1, ret+12(FP) RET // memequal_varlen(a, b unsafe.Pointer) bool TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 MOVL a+0(FP), SI MOVL b+4(FP), DI CMPL SI, DI JEQ eq MOVL 4(DX), BX // compiler stores size at offset 4 in the closure LEAL ret+8(FP), AX JMP memeqbody<>(SB) eq: MOVB $1, ret+8(FP) RET // a in SI // b in DI // count in BX // address of result byte in AX TEXT memeqbody<>(SB),NOSPLIT,$0-0 CMPL BX, $4 JB small // 64 bytes at a time using xmm registers hugeloop: CMPL BX, $64 JB bigloop CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 JNE bigloop MOVOU (SI), X0 MOVOU (DI), X1 MOVOU 16(SI), X2 MOVOU 16(DI), X3 MOVOU 32(SI), X4 MOVOU 32(DI), X5 MOVOU 48(SI), X6 MOVOU 48(DI), X7 PCMPEQB X1, X0 PCMPEQB X3, X2 PCMPEQB X5, X4 PCMPEQB X7, X6 PAND X2, X0 PAND X6, X4 PAND X4, X0 PMOVMSKB X0, DX ADDL $64, SI ADDL $64, DI SUBL $64, BX CMPL DX, $0xffff JEQ hugeloop MOVB $0, (AX) RET // 4 bytes at a time using 32-bit register bigloop: CMPL BX, $4 JBE leftover MOVL (SI), CX MOVL (DI), DX ADDL $4, SI ADDL $4, DI SUBL $4, BX CMPL CX, DX JEQ bigloop MOVB $0, (AX) RET // remaining 0-4 bytes leftover: MOVL -4(SI)(BX*1), CX MOVL -4(DI)(BX*1), DX CMPL CX, DX SETEQ (AX) RET small: CMPL BX, $0 JEQ equal LEAL 0(BX*8), CX NEGL CX MOVL SI, DX CMPB DX, $0xfc JA si_high // load at SI won't cross a page boundary. MOVL (SI), SI JMP si_finish si_high: // address ends in 111111xx. Load up to bytes we want, move to correct position. MOVL -4(SI)(BX*1), SI SHRL CX, SI si_finish: // same for DI. MOVL DI, DX CMPB DX, $0xfc JA di_high MOVL (DI), DI JMP di_finish di_high: MOVL -4(DI)(BX*1), DI SHRL CX, DI di_finish: SUBL SI, DI SHLL CX, DI equal: SETEQ (AX) RET