/* fuc microcode util functions for nvc0 PGRAPH * * Copyright 2011 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: Ben Skeggs */ define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)') define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))') ifdef(`include_code', ` // Error codes define(`E_BAD_COMMAND', 0x01) define(`E_CMD_OVERFLOW', 0x02) // Util macros to help with debugging ucode hangs etc define(`T_WAIT', 0) define(`T_MMCTX', 1) define(`T_STRWAIT', 2) define(`T_STRINIT', 3) define(`T_AUTO', 4) define(`T_CHAN', 5) define(`T_LOAD', 6) define(`T_SAVE', 7) define(`T_LCHAN', 8) define(`T_LCTXH', 9) define(`trace_set', ` mov $r8 0x83c shl b32 $r8 6 clear b32 $r9 bset $r9 $1 iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7] ') define(`trace_clr', ` mov $r8 0x85c shl b32 $r8 6 clear b32 $r9 bset $r9 $1 iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7] ') // queue_put - add request to queue // // In : $r13 queue pointer // $r14 command // $r15 data // queue_put: // make sure we have space.. ld b32 $r8 D[$r13 + 0x0] // GET ld b32 $r9 D[$r13 + 0x4] // PUT xor $r8 8 cmpu b32 $r8 $r9 bra ne #queue_put_next mov $r15 E_CMD_OVERFLOW call #error ret // store cmd/data on queue queue_put_next: and $r8 $r9 7 shl b32 $r8 3 add b32 $r8 $r13 add b32 $r8 8 st b32 D[$r8 + 0x0] $r14 st b32 D[$r8 + 0x4] $r15 // update PUT add b32 $r9 1 and $r9 0xf st b32 D[$r13 + 0x4] $r9 ret // queue_get - fetch request from queue // // In : $r13 queue pointer // // Out: $p1 clear on success (data available) // $r14 command // $r15 data // queue_get: bset $flags $p1 ld b32 $r8 D[$r13 + 0x0] // GET ld b32 $r9 D[$r13 + 0x4] // PUT cmpu b32 $r8 $r9 bra e #queue_get_done // fetch first cmd/data pair and $r9 $r8 7 shl b32 $r9 3 add b32 $r9 $r13 add b32 $r9 8 ld b32 $r14 D[$r9 + 0x0] ld b32 $r15 D[$r9 + 0x4] // update GET add b32 $r8 1 and $r8 0xf st b32 D[$r13 + 0x0] $r8 bclr $flags $p1 queue_get_done: ret // nv_rd32 - read 32-bit value from nv register // // In : $r14 register // Out: $r15 value // nv_rd32: mov $r11 0x728 shl b32 $r11 6 mov b32 $r12 $r14 bset $r12 31 // MMIO_CTRL_PENDING iowr I[$r11 + 0x000] $r12 // MMIO_CTRL nv_rd32_wait: iord $r12 I[$r11 + 0x000] xbit $r12 $r12 31 bra ne #nv_rd32_wait mov $r10 6 // DONE_MMIO_RD call #wait_doneo iord $r15 I[$r11 + 0x100] // MMIO_RDVAL ret // nv_wr32 - write 32-bit value to nv register // // In : $r14 register // $r15 value // nv_wr32: mov $r11 0x728 shl b32 $r11 6 iowr I[$r11 + 0x200] $r15 // MMIO_WRVAL mov b32 $r12 $r14 bset $r12 31 // MMIO_CTRL_PENDING bset $r12 30 // MMIO_CTRL_WRITE iowr I[$r11 + 0x000] $r12 // MMIO_CTRL nv_wr32_wait: iord $r12 I[$r11 + 0x000] xbit $r12 $r12 31 bra ne #nv_wr32_wait ret // (re)set watchdog timer // // In : $r15 timeout // watchdog_reset: mov $r8 0x430 shl b32 $r8 6 bset $r15 31 iowr I[$r8 + 0x000] $r15 ret // clear watchdog timer watchdog_clear: mov $r8 0x430 shl b32 $r8 6 iowr I[$r8 + 0x000] $r0 ret // wait_done{z,o} - wait on FUC_DONE bit to become clear/set // // In : $r10 bit to wait on // define(`wait_done', ` $1: trace_set(T_WAIT); mov $r8 0x818 shl b32 $r8 6 iowr I[$r8 + 0x000] $r10 // CC_SCRATCH[6] = wait bit wait_done_$1: mov $r8 0x400 shl b32 $r8 6 iord $r8 I[$r8 + 0x000] // DONE xbit $r8 $r8 $r10 bra $2 #wait_done_$1 trace_clr(T_WAIT) ret ') wait_done(wait_donez, ne) wait_done(wait_doneo, e) // mmctx_size - determine size of a mmio list transfer // // In : $r14 mmio list head // $r15 mmio list tail // Out: $r15 transfer size (in bytes) // mmctx_size: clear b32 $r9 nv_mmctx_size_loop: ld b32 $r8 D[$r14] shr b32 $r8 26 add b32 $r8 1 shl b32 $r8 2 add b32 $r9 $r8 add b32 $r14 4 cmpu b32 $r14 $r15 bra ne #nv_mmctx_size_loop mov b32 $r15 $r9 ret // mmctx_xfer - execute a list of mmio transfers // // In : $r10 flags // bit 0: direction (0 = save, 1 = load) // bit 1: set if first transfer // bit 2: set if last transfer // $r11 base // $r12 mmio list head // $r13 mmio list tail // $r14 multi_stride // $r15 multi_mask // mmctx_xfer: trace_set(T_MMCTX) mov $r8 0x710 shl b32 $r8 6 clear b32 $r9 or $r11 $r11 bra e #mmctx_base_disabled iowr I[$r8 + 0x000] $r11 // MMCTX_BASE bset $r9 0 // BASE_EN mmctx_base_disabled: or $r14 $r14 bra e #mmctx_multi_disabled iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK bset $r9 1 // MULTI_EN mmctx_multi_disabled: add b32 $r8 0x100 xbit $r11 $r10 0 shl b32 $r11 16 // DIR bset $r11 12 // QLIMIT = 0x10 xbit $r14 $r10 1 shl b32 $r14 17 or $r11 $r14 // START_TRIGGER iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL // loop over the mmio list, and send requests to the hw mmctx_exec_loop: // wait for space in mmctx queue mmctx_wait_free: iord $r14 I[$r8 + 0x000] // MMCTX_CTRL and $r14 0x1f bra e #mmctx_wait_free // queue up an entry ld b32 $r14 D[$r12] or $r14 $r9 iowr I[$r8 + 0x300] $r14 add b32 $r12 4 cmpu b32 $r12 $r13 bra ne #mmctx_exec_loop xbit $r11 $r10 2 bra ne #mmctx_stop // wait for queue to empty mmctx_fini_wait: iord $r11 I[$r8 + 0x000] // MMCTX_CTRL and $r11 0x1f cmpu b32 $r11 0x10 bra ne #mmctx_fini_wait mov $r10 2 // DONE_MMCTX call #wait_donez bra #mmctx_done mmctx_stop: xbit $r11 $r10 0 shl b32 $r11 16 // DIR bset $r11 12 // QLIMIT = 0x10 bset $r11 18 // STOP_TRIGGER iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL mmctx_stop_wait: // wait for STOP_TRIGGER to clear iord $r11 I[$r8 + 0x000] // MMCTX_CTRL xbit $r11 $r11 18 bra ne #mmctx_stop_wait mmctx_done: trace_clr(T_MMCTX) ret // Wait for DONE_STRAND // strand_wait: push $r10 mov $r10 2 call #wait_donez pop $r10 ret // unknown - call before issuing strand commands // strand_pre: mov $r8 0x4afc sethi $r8 0x20000 mov $r9 0xc iowr I[$r8] $r9 call #strand_wait ret // unknown - call after issuing strand commands // strand_post: mov $r8 0x4afc sethi $r8 0x20000 mov $r9 0xd iowr I[$r8] $r9 call #strand_wait ret // Selects strand set?! // // In: $r14 id // strand_set: mov $r10 0x4ffc sethi $r10 0x20000 sub b32 $r11 $r10 0x500 mov $r12 0xf iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf mov $r12 0xb iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb call #strand_wait iowr I[$r10 + 0x000] $r14 // 0x93c = <id> mov $r12 0xa iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa call #strand_wait ret // Initialise strand context data // // In : $r15 context base // Out: $r15 context size (in bytes) // // Strandset(?) 3 hardcoded currently // strand_ctx_init: trace_set(T_STRINIT) call #strand_pre mov $r14 3 call #strand_set mov $r10 0x46fc sethi $r10 0x20000 add b32 $r11 $r10 0x400 iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0 mov $r12 1 iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE call #strand_wait sub b32 $r12 $r0 1 iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff mov $r12 2 iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT call #strand_wait call #strand_post // read the size of each strand, poke the context offset of // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry // about it later then. mov $r8 0x880 shl b32 $r8 6 iord $r9 I[$r8 + 0x000] // STRANDS add b32 $r8 0x2200 shr b32 $r14 $r15 8 ctx_init_strand_loop: iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE iowr I[$r8 + 0x100] $r14 // STRAND_LOAD_SWBASE iord $r10 I[$r8 + 0x200] // STRAND_SIZE shr b32 $r10 6 add b32 $r10 1 add b32 $r14 $r10 add b32 $r8 4 sub b32 $r9 1 bra ne #ctx_init_strand_loop shl b32 $r14 8 sub b32 $r15 $r14 $r15 trace_clr(T_STRINIT) ret ')