.text .set noat .set noreorder .align 5 .globl bn_mul_mont .ent bn_mul_mont bn_mul_mont: lw $8,16($29) lw $9,20($29) slt $1,$9,4 bnez $1,1f li $2,0 slt $1,$9,17 # on in-order CPU bnezl $1,bn_mul_mont_internal nop 1: jr $31 li $4,0 .end bn_mul_mont .align 5 .ent bn_mul_mont_internal bn_mul_mont_internal: .frame $30,14*4,$31 .mask 0x40000000|16711680,-4 sub $29,14*4 sw $30,(14-1)*4($29) sw $23,(14-2)*4($29) sw $22,(14-3)*4($29) sw $21,(14-4)*4($29) sw $20,(14-5)*4($29) sw $19,(14-6)*4($29) sw $18,(14-7)*4($29) sw $17,(14-8)*4($29) sw $16,(14-9)*4($29) move $30,$29 .set reorder lw $8,0($8) lw $13,0($6) # bp[0] lw $12,0($5) # ap[0] lw $14,0($7) # np[0] sub $29,2*4 # place for two extra words sll $9,2 li $1,-4096 sub $29,$9 and $29,$1 multu $12,$13 lw $16,4($5) lw $18,4($7) mflo $10 mfhi $11 multu $10,$8 mflo $23 multu $16,$13 mflo $16 mfhi $17 multu $14,$23 mflo $24 mfhi $25 multu $18,$23 addu $24,$10 sltu $1,$24,$10 addu $25,$1 mflo $18 mfhi $19 move $15,$29 li $22,2*4 .align 4 .L1st: .set noreorder add $12,$5,$22 add $14,$7,$22 lw $12,($12) lw $14,($14) multu $12,$13 addu $10,$16,$11 addu $24,$18,$25 sltu $1,$10,$11 sltu $2,$24,$25 addu $11,$17,$1 addu $25,$19,$2 mflo $16 mfhi $17 addu $24,$10 sltu $1,$24,$10 multu $14,$23 addu $25,$1 addu $22,4 sw $24,($15) sltu $2,$22,$9 mflo $18 mfhi $19 bnez $2,.L1st add $15,4 .set reorder addu $10,$16,$11 sltu $1,$10,$11 addu $11,$17,$1 addu $24,$18,$25 sltu $2,$24,$25 addu $25,$19,$2 addu $24,$10 sltu $1,$24,$10 addu $25,$1 sw $24,($15) addu $25,$11 sltu $1,$25,$11 sw $25,4($15) sw $1,2*4($15) li $21,4 .align 4 .Louter: add $13,$6,$21 lw $13,($13) lw $12,($5) lw $16,4($5) lw $20,($29) multu $12,$13 lw $14,($7) lw $18,4($7) mflo $10 mfhi $11 addu $10,$20 multu $10,$8 sltu $1,$10,$20 addu $11,$1 mflo $23 multu $16,$13 mflo $16 mfhi $17 multu $14,$23 mflo $24 mfhi $25 multu $18,$23 addu $24,$10 sltu $1,$24,$10 addu $25,$1 mflo $18 mfhi $19 move $15,$29 li $22,2*4 lw $20,4($15) .align 4 .Linner: .set noreorder add $12,$5,$22 add $14,$7,$22 lw $12,($12) lw $14,($14) multu $12,$13 addu $10,$16,$11 addu $24,$18,$25 sltu $1,$10,$11 sltu $2,$24,$25 addu $11,$17,$1 addu $25,$19,$2 mflo $16 mfhi $17 addu $10,$20 addu $22,4 multu $14,$23 sltu $1,$10,$20 addu $24,$10 addu $11,$1 sltu $2,$24,$10 lw $20,2*4($15) addu $25,$2 sltu $1,$22,$9 mflo $18 mfhi $19 sw $24,($15) bnez $1,.Linner add $15,4 .set reorder addu $10,$16,$11 sltu $1,$10,$11 addu $11,$17,$1 addu $10,$20 sltu $2,$10,$20 addu $11,$2 lw $20,2*4($15) addu $24,$18,$25 sltu $1,$24,$25 addu $25,$19,$1 addu $24,$10 sltu $2,$24,$10 addu $25,$2 sw $24,($15) addu $24,$25,$11 sltu $25,$24,$11 addu $24,$20 sltu $1,$24,$20 addu $25,$1 sw $24,4($15) sw $25,2*4($15) addu $21,4 sltu $2,$21,$9 bnez $2,.Louter .set noreorder add $20,$29,$9 # &tp[num] move $15,$29 move $5,$29 li $11,0 # clear borrow bit .align 4 .Lsub: lw $10,($15) lw $24,($7) add $15,4 add $7,4 subu $24,$10,$24 # tp[i]-np[i] sgtu $1,$24,$10 subu $10,$24,$11 sgtu $11,$10,$24 sw $10,($4) or $11,$1 sltu $1,$15,$20 bnez $1,.Lsub add $4,4 subu $11,$25,$11 # handle upmost overflow bit move $15,$29 sub $4,$9 # restore rp not $25,$11 and $5,$11,$29 and $6,$25,$4 or $5,$5,$6 # ap=borrow?tp:rp .align 4 .Lcopy: lw $12,($5) add $5,4 sw $0,($15) add $15,4 sltu $1,$15,$20 sw $12,($4) bnez $1,.Lcopy add $4,4 li $4,1 li $2,1 .set noreorder move $29,$30 lw $30,(14-1)*4($29) lw $23,(14-2)*4($29) lw $22,(14-3)*4($29) lw $21,(14-4)*4($29) lw $20,(14-5)*4($29) lw $19,(14-6)*4($29) lw $18,(14-7)*4($29) lw $17,(14-8)*4($29) lw $16,(14-9)*4($29) jr $31 add $29,14*4 .end bn_mul_mont_internal .rdata .asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"