// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Based on CRYPTOGAMS code with the following comment: // # ==================================================================== // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL // # project. The module is, however, dual licensed under OpenSSL and // # CRYPTOGAMS licenses depending on where you obtain it. For further // # details see http://www.openssl.org/~appro/cryptogams/. // # ==================================================================== // Original code can be found at the link below: // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl // I changed some function names in order to be more likely to go standards. // For instance, function aes_p8_set_{en,de}crypt_key become // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts // and a new session was created (doEncryptKeyAsm). This was necessary to // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm. // There were other modifications as well but kept the same functionality. #include "textflag.h" // For set{En,De}cryptKeyAsm #define INP R3 #define BITS R4 #define OUT R5 #define PTR R6 #define CNT R7 #define ROUNDS R8 #define TEMP R19 #define ZERO V0 #define IN0 V1 #define IN1 V2 #define KEY V3 #define RCON V4 #define MASK V5 #define TMP V6 #define STAGE V7 #define OUTPERM V8 #define OUTMASK V9 #define OUTHEAD V10 #define OUTTAIL V11 // For {en,de}cryptBlockAsm #define BLK_INP R3 #define BLK_OUT R4 #define BLK_KEY R5 #define BLK_ROUNDS R6 #define BLK_IDX R7 DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000 DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000 DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK DATA ·rcon+0x30(SB)/8, $0x0000000000000000 DATA ·rcon+0x38(SB)/8, $0x0000000000000000 GLOBL ·rcon(SB), RODATA, $64 // func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int TEXT ·setEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 // Load the arguments inside the registers MOVD key+0(FP), INP MOVD keylen+8(FP), BITS MOVD enc+16(FP), OUT JMP ·doEncryptKeyAsm(SB) // This text is used both setEncryptKeyAsm and setDecryptKeyAsm TEXT ·doEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 // Do not change R10 since it's storing the LR value in setDecryptKeyAsm // Check arguments MOVD $-1, PTR // li 6,-1 exit code to -1 (255) CMPU INP, $0 // cmpldi r3,0 input key pointer set? BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort CMPU OUT, $0 // cmpldi r5,0 output key pointer set? BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort MOVD $-2, PTR // li 6,-2 exit code to -2 (254) CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128 BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256 BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64 BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort MOVD $·rcon(SB), PTR // PTR point to rcon addr // Get key from memory and write aligned into VR NEG INP, R9 // neg 9,3 R9 is ~INP + 1 LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0 ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr LVSR (R9)(R0), KEY // lvsr 3,0,9 MOVD $0x20, R8 // li 8,0x20 R8 = 32 CMPW BITS, $192 // cmpwi 4,192 Key size == 192? LVX (INP)(R0), IN1 // lvx 2,0,3 VSPLTISB $0x0f, MASK // vspltisb 5,0x0f 0x0f0f0f0f... mask LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap LVX (PTR)(R8), MASK // lvx 5,8,6 ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align MOVD $8, CNT // li 7,8 CNT = 8 VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5 VSPLTISB $-1, OUTMASK // vspltisb 9,-1 LVX (OUT)(R0), OUTHEAD // lvx 10,0,5 VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8 BLT loop128 // blt .Loop128 ADD $8, INP, INP // addi 3,3,8 BEQ l192 // beq .L192 ADD $8, INP, INP // addi 3,3,8 JMP l256 // b .L256 loop128: // Key schedule (Round 1 to 8) VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output ADD $16, OUT, OUT // addi 5,5,16 Point to the next round VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 VXOR IN0, KEY, IN0 // vxor 1,1,3 BC 0x10, 0, loop128 // bdnz .Loop128 LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys // Key schedule (Round 9) VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9 ADD $16, OUT, OUT // addi 5,5,16 // Key schedule (Round 10) VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 VXOR IN0, KEY, IN0 // vxor 1,1,3 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10 ADD $16, OUT, OUT // addi 5,5,16 // Key schedule (Round 11) VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 VXOR IN0, KEY, IN0 // vxor 1,1,3 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11 ADD $15, OUT, INP // addi 3,5,15 ADD $0x50, OUT, OUT // addi 5,5,0x50 MOVD $10, ROUNDS // li 8,10 JMP done // b .Ldone l192: LVX (INP)(R0), TMP // lvx 6,0,3 MOVD $4, CNT // li 7,4 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 STVX STAGE, (OUT+R0) // stvx 7,0,5 ADD $16, OUT, OUT // addi 5,5,16 VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 VSPLTISB $8, KEY // vspltisb 3,8 MOVD CNT, CTR // mtctr 7 VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 loop192: VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 VSPLTW $3, IN0, TMP // vspltw 6,1,3 VXOR TMP, IN1, TMP // vxor 6,6,2 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 VXOR IN1, TMP, IN1 // vxor 2,2,6 VXOR IN0, KEY, IN0 // vxor 1,1,3 VXOR IN1, KEY, IN1 // vxor 2,2,3 VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 STVX STAGE, (OUT+R0) // stvx 7,0,5 ADD $16, OUT, OUT // addi 5,5,16 VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 STVX STAGE, (OUT+R0) // stvx 7,0,5 ADD $16, OUT, OUT // addi 5,5,16 VSPLTW $3, IN0, TMP // vspltw 6,1,3 VXOR TMP, IN1, TMP // vxor 6,6,2 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 VXOR IN1, TMP, IN1 // vxor 2,2,6 VXOR IN0, KEY, IN0 // vxor 1,1,3 VXOR IN1, KEY, IN1 // vxor 2,2,3 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 STVX STAGE, (OUT+R0) // stvx 7,0,5 ADD $15, OUT, INP // addi 3,5,15 ADD $16, OUT, OUT // addi 5,5,16 BC 0x10, 0, loop192 // bdnz .Loop192 MOVD $12, ROUNDS // li 8,12 ADD $0x20, OUT, OUT // addi 5,5,0x20 JMP done // b .Ldone l256: LVX (INP)(R0), TMP // lvx 6,0,3 MOVD $7, CNT // li 7,7 MOVD $14, ROUNDS // li 8,14 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 STVX STAGE, (OUT+R0) // stvx 7,0,5 ADD $16, OUT, OUT // addi 5,5,16 VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 MOVD CNT, CTR // mtctr 7 loop256: VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 STVX STAGE, (OUT+R0) // stvx 7,0,5 ADD $16, OUT, OUT // addi 5,5,16 VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN0, TMP, IN0 // vxor 1,1,6 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 VXOR IN0, KEY, IN0 // vxor 1,1,3 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 STVX STAGE, (OUT+R0) // stvx 7,0,5 ADD $15, OUT, INP // addi 3,5,15 ADD $16, OUT, OUT // addi 5,5,16 BC 0x12, 0, done // bdz .Ldone VSPLTW $3, IN0, KEY // vspltw 3,1,3 VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 VSBOX KEY, KEY // vsbox 3,3 VXOR IN1, TMP, IN1 // vxor 2,2,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN1, TMP, IN1 // vxor 2,2,6 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 VXOR IN1, TMP, IN1 // vxor 2,2,6 VXOR IN1, KEY, IN1 // vxor 2,2,3 JMP loop256 // b .Loop256 done: LVX (INP)(R0), IN1 // lvx 2,0,3 VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9 STVX IN1, (INP+R0) // stvx 2,0,3 MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0) MOVW ROUNDS, 0(OUT) // stw 8,0(5) enc_key_abort: MOVD PTR, INP // mr 3,6 set exit code with PTR value MOVD INP, ret+24(FP) // Put return value into the FP RET // blr // func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int TEXT ·setDecryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 // Load the arguments inside the registers MOVD key+0(FP), INP MOVD keylen+8(FP), BITS MOVD dec+16(FP), OUT MOVD LR, R10 // mflr 10 CALL ·doEncryptKeyAsm(SB) MOVD R10, LR // mtlr 10 CMPW INP, $0 // cmpwi 3,0 exit 0 = ok BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort // doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode SLW $4, ROUNDS, CNT // slwi 7,8,4 SUB $240, OUT, INP // subi 3,5,240 SRW $1, ROUNDS, ROUNDS // srwi 8,8,1 ADD R7, INP, OUT // add 5,3,7 MOVD ROUNDS, CTR // mtctr 8 // dec_key will invert the key sequence in order to be used for decrypt dec_key: MOVWZ 0(INP), TEMP // lwz 0, 0(3) MOVWZ 4(INP), R6 // lwz 6, 4(3) MOVWZ 8(INP), R7 // lwz 7, 8(3) MOVWZ 12(INP), R8 // lwz 8, 12(3) ADD $16, INP, INP // addi 3,3,16 MOVWZ 0(OUT), R9 // lwz 9, 0(5) MOVWZ 4(OUT), R10 // lwz 10,4(5) MOVWZ 8(OUT), R11 // lwz 11,8(5) MOVWZ 12(OUT), R12 // lwz 12,12(5) MOVW TEMP, 0(OUT) // stw 0, 0(5) MOVW R6, 4(OUT) // stw 6, 4(5) MOVW R7, 8(OUT) // stw 7, 8(5) MOVW R8, 12(OUT) // stw 8, 12(5) SUB $16, OUT, OUT // subi 5,5,16 MOVW R9, -16(INP) // stw 9, -16(3) MOVW R10, -12(INP) // stw 10,-12(3) MOVW R11, -8(INP) // stw 11,-8(3) MOVW R12, -4(INP) // stw 12,-4(3) BC 0x10, 0, dec_key // bdnz .Ldeckey XOR R3, R3, R3 // xor 3,3,3 Clean R3 dec_key_abort: MOVD R3, ret+24(FP) // Put return value into the FP RET // blr // func encryptBlockAsm(dst, src *byte, enc *uint32) TEXT ·encryptBlockAsm(SB),NOSPLIT|NOFRAME,$0 // Load the arguments inside the registers MOVD dst+0(FP), BLK_OUT MOVD src+8(FP), BLK_INP MOVD enc+16(FP), BLK_KEY MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) MOVD $15, BLK_IDX // li 7,15 LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 NEG BLK_OUT, R11 // neg 11,4 LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 VSPLTISB $0x0f, RCON // vspltisb 4,0x0f LVSR (R11)(R0), KEY // lvsr 3,0,11 VXOR IN1, RCON, IN1 // vxor 2,2,4 MOVD $16, BLK_IDX // li 7,16 VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 VXOR ZERO, IN0, ZERO // vxor 0,0,1 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 MOVD BLK_ROUNDS, CTR // mtctr 6 loop_enc: VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 BC 0x10, 0, loop_enc // bdnz .Loop_enc VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1 VSPLTISB $-1, IN1 // vspltisb 2,-1 VXOR IN0, IN0, IN0 // vxor 1,1,1 MOVD $15, BLK_IDX // li 7,15 VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 VXOR KEY, RCON, KEY // vxor 3,3,4 LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 RET // blr // func decryptBlockAsm(dst, src *byte, dec *uint32) TEXT ·decryptBlockAsm(SB),NOSPLIT|NOFRAME,$0 // Load the arguments inside the registers MOVD dst+0(FP), BLK_OUT MOVD src+8(FP), BLK_INP MOVD dec+16(FP), BLK_KEY MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) MOVD $15, BLK_IDX // li 7,15 LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 NEG BLK_OUT, R11 // neg 11,4 LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 VSPLTISB $0x0f, RCON // vspltisb 4,0x0f LVSR (R11)(R0), KEY // lvsr 3,0,11 VXOR IN1, RCON, IN1 // vxor 2,2,4 MOVD $16, BLK_IDX // li 7,16 VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 VXOR ZERO, IN0, ZERO // vxor 0,0,1 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 MOVD BLK_ROUNDS, CTR // mtctr 6 loop_dec: VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 BC 0x10, 0, loop_dec // bdnz .Loop_dec VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1 VSPLTISB $-1, IN1 // vspltisb 2,-1 VXOR IN0, IN0, IN0 // vxor 1,1,1 MOVD $15, BLK_IDX // li 7,15 VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 VXOR KEY, RCON, KEY // vxor 3,3,4 LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 RET // blr