.section #gk110_builtin_code // DIV U32 // // UNR recurrence (q = a / b): // look for z such that 2^32 - b <= b * z < 2^32 // then q - 1 <= (a * z) / 2^32 <= q // // INPUT: $r0: dividend, $r1: divisor // OUTPUT: $r0: result, $r1: modulus // CLOBBER: $r2 - $r3, $p0 - $p1 // SIZE: 22 / 14 * 8 bytes // gk110_div_u32: sched 0x28 0x04 0x28 0x04 0x28 0x28 0x28 bfind u32 $r2 $r1 xor b32 $r2 $r2 0x1f mov b32 $r3 0x1 shl b32 $r2 $r3 clamp $r2 cvt u32 $r1 neg u32 $r1 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 sched 0x04 0x28 0x04 0x28 0x28 0x2c 0x04 add $r2 (mul high u32 $r2 u32 $r3) $r2 mov b32 $r3 $r0 mul high $r0 u32 $r0 u32 $r2 cvt u32 $r2 neg u32 $r1 add $r1 (mul u32 $r1 u32 $r0) $r3 set $p0 0x1 ge u32 $r1 $r2 $p0 sub b32 $r1 $r1 $r2 sched 0x28 0x2c 0x04 0x20 0x2e 0x28 0x20 $p0 add b32 $r0 $r0 0x1 $p0 set $p0 0x1 ge u32 $r1 $r2 $p0 sub b32 $r1 $r1 $r2 $p0 add b32 $r0 $r0 0x1 ret // DIV S32, like DIV U32 after taking ABS(inputs) // // INPUT: $r0: dividend, $r1: divisor // OUTPUT: $r0: result, $r1: modulus // CLOBBER: $r2 - $r3, $p0 - $p3 // gk110_div_s32: set $p2 0x1 lt s32 $r0 0x0 set $p3 0x1 lt s32 $r1 0x0 xor $p2 sched 0x20 0x28 0x28 0x04 0x28 0x04 0x28 cvt s32 $r0 abs s32 $r0 cvt s32 $r1 abs s32 $r1 bfind u32 $r2 $r1 xor b32 $r2 $r2 0x1f mov b32 $r3 0x1 shl b32 $r2 $r3 clamp $r2 cvt u32 $r1 neg u32 $r1 sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 sched 0x28 0x28 0x04 0x28 0x04 0x28 0x28 add $r2 (mul high u32 $r2 u32 $r3) $r2 mul $r3 u32 $r1 u32 $r2 add $r2 (mul high u32 $r2 u32 $r3) $r2 mov b32 $r3 $r0 mul high $r0 u32 $r0 u32 $r2 cvt u32 $r2 neg u32 $r1 add $r1 (mul u32 $r1 u32 $r0) $r3 sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20 set $p0 0x1 ge u32 $r1 $r2 $p0 sub b32 $r1 $r1 $r2 $p0 add b32 $r0 $r0 0x1 $p0 set $p0 0x1 ge u32 $r1 $r2 $p0 sub b32 $r1 $r1 $r2 $p0 add b32 $r0 $r0 0x1 $p3 cvt s32 $r0 neg s32 $r0 sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c $p2 cvt s32 $r1 neg s32 $r1 ret gk110_rcp_f64: gk110_rsq_f64: ret .section #gk110_builtin_offsets .b64 #gk110_div_u32 .b64 #gk110_div_s32 .b64 #gk110_rcp_f64 .b64 #gk110_rsq_f64