// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "textflag.h"

// func castagnoliSSE42(crc uint32, p []byte) uint32
TEXT ·castagnoliSSE42(SB),NOSPLIT,$0
	MOVL crc+0(FP), AX  // CRC value
	MOVL p+4(FP), SI  // data pointer
	MOVL p_len+8(FP), CX  // len(p)

	NOTL AX

	/* If there's less than 8 bytes to process, we do it byte-by-byte. */
	CMPQ CX, $8
	JL cleanup

	/* Process individual bytes until the input is 8-byte aligned. */
startup:
	MOVQ SI, BX
	ANDQ $7, BX
	JZ aligned

	CRC32B (SI), AX
	DECQ CX
	INCQ SI
	JMP startup

aligned:
	/* The input is now 8-byte aligned and we can process 8-byte chunks. */
	CMPQ CX, $8
	JL cleanup

	CRC32Q (SI), AX
	ADDQ $8, SI
	SUBQ $8, CX
	JMP aligned

cleanup:
	/* We may have some bytes left over that we process one at a time. */
	CMPQ CX, $0
	JE done

	CRC32B (SI), AX
	INCQ SI
	DECQ CX
	JMP cleanup

done:
	NOTL AX
	MOVL AX, ret+16(FP)
	RET