/* * Copyright 2012 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * Author: Tom Stellard <thomas.stellard@amd.com> */ #include "radeon_compiler.h" #include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_program.h" #include "radeon_program_constants.h" struct vert_fc_state { struct radeon_compiler *C; unsigned BranchDepth; unsigned LoopDepth; unsigned LoopsReserved; int PredStack[R500_PVS_MAX_LOOP_DEPTH]; int PredicateReg; unsigned InCFBreak; }; static void build_pred_src( struct rc_src_register * src, struct vert_fc_state * fc_state) { src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_W); src->File = RC_FILE_TEMPORARY; src->Index = fc_state->PredicateReg; } static void build_pred_dst( struct rc_dst_register * dst, struct vert_fc_state * fc_state) { dst->WriteMask = RC_MASK_W; dst->File = RC_FILE_TEMPORARY; dst->Index = fc_state->PredicateReg; } static void mark_write(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { unsigned int * writemasks = userdata; if (file != RC_FILE_TEMPORARY) return; if (index >= R300_VS_MAX_TEMPS) return; writemasks[index] |= mask; } static int reserve_predicate_reg(struct vert_fc_state * fc_state) { int i; unsigned int writemasks[RC_REGISTER_MAX_INDEX]; struct rc_instruction * inst; memset(writemasks, 0, sizeof(writemasks)); for(inst = fc_state->C->Program.Instructions.Next; inst != &fc_state->C->Program.Instructions; inst = inst->Next) { rc_for_all_writes_mask(inst, mark_write, writemasks); } for(i = 0; i < fc_state->C->max_temp_regs; i++) { /* Most of the control flow instructions only write the * W component of the Predicate Register, but * the docs say that ME_PRED_SET_CLR and * ME_PRED_SET_RESTORE write all components of the * register, so we must reserve a register that has * all its components free. */ if (!writemasks[i]) { fc_state->PredicateReg = i; break; } } if (i == fc_state->C->max_temp_regs) { rc_error(fc_state->C, "No free temporary to use for" " predicate stack counter.\n"); return -1; } return 1; } static void lower_bgnloop( struct rc_instruction * inst, struct vert_fc_state * fc_state) { struct rc_instruction * new_inst = rc_insert_new_instruction(fc_state->C, inst->Prev); if ((!fc_state->C->is_r500 && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH) || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) { rc_error(fc_state->C, "Loops are nested too deep."); return; } if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) { if (fc_state->PredicateReg == -1) { if (reserve_predicate_reg(fc_state) == -1) { return; } } /* Initialize the predicate bit to true. */ new_inst->U.I.Opcode = RC_ME_PRED_SEQ; build_pred_dst(&new_inst->U.I.DstReg, fc_state); new_inst->U.I.SrcReg[0].Index = 0; new_inst->U.I.SrcReg[0].File = RC_FILE_NONE; new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; } else { fc_state->PredStack[fc_state->LoopDepth] = fc_state->PredicateReg; /* Copy the current predicate value to this loop's * predicate register */ /* Use the old predicate value for src0 */ build_pred_src(&new_inst->U.I.SrcReg[0], fc_state); /* Reserve this loop's predicate register */ if (reserve_predicate_reg(fc_state) == -1) { return; } /* Copy the old predicate value to the new register */ new_inst->U.I.Opcode = RC_OPCODE_ADD; build_pred_dst(&new_inst->U.I.DstReg, fc_state); new_inst->U.I.SrcReg[1].Index = 0; new_inst->U.I.SrcReg[1].File = RC_FILE_NONE; new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000; } } static void lower_brk( struct rc_instruction * inst, struct vert_fc_state * fc_state) { if (fc_state->LoopDepth == 1) { inst->U.I.Opcode = RC_OPCODE_RCP; inst->U.I.DstReg.Pred = RC_PRED_INV; inst->U.I.SrcReg[0].Index = 0; inst->U.I.SrcReg[0].File = RC_FILE_NONE; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; } else { inst->U.I.Opcode = RC_ME_PRED_SET_CLR; inst->U.I.DstReg.Pred = RC_PRED_SET; } build_pred_dst(&inst->U.I.DstReg, fc_state); } static void lower_endloop( struct rc_instruction * inst, struct vert_fc_state * fc_state) { struct rc_instruction * new_inst = rc_insert_new_instruction(fc_state->C, inst); new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE; build_pred_dst(&new_inst->U.I.DstReg, fc_state); /* Restore the previous predicate register. */ fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1]; build_pred_src(&new_inst->U.I.SrcReg[0], fc_state); } static void lower_if( struct rc_instruction * inst, struct vert_fc_state * fc_state) { /* Reserve a temporary to use as our predicate stack counter, if we * don't already have one. */ if (fc_state->PredicateReg == -1) { /* If we are inside a loop, the Predicate Register should * have already been defined. */ assert(fc_state->LoopDepth == 0); if (reserve_predicate_reg(fc_state) == -1) { return; } } if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) { fc_state->InCFBreak = 1; } if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) || (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) { if (fc_state->InCFBreak) { inst->U.I.Opcode = RC_ME_PRED_SEQ; inst->U.I.DstReg.Pred = RC_PRED_SET; } else { inst->U.I.Opcode = RC_ME_PRED_SNEQ; } } else { unsigned swz; inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH; memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0], sizeof(inst->U.I.SrcReg[1])); swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle); /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the * w component */ inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz); build_pred_src(&inst->U.I.SrcReg[0], fc_state); } build_pred_dst(&inst->U.I.DstReg, fc_state); } void rc_vert_fc(struct radeon_compiler *c, void *user) { struct rc_instruction * inst; struct vert_fc_state fc_state; memset(&fc_state, 0, sizeof(fc_state)); fc_state.PredicateReg = -1; fc_state.C = c; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { switch (inst->U.I.Opcode) { case RC_OPCODE_BGNLOOP: lower_bgnloop(inst, &fc_state); fc_state.LoopDepth++; break; case RC_OPCODE_BRK: lower_brk(inst, &fc_state); break; case RC_OPCODE_ENDLOOP: if (fc_state.BranchDepth != 0 || fc_state.LoopDepth != 1) { lower_endloop(inst, &fc_state); } fc_state.LoopDepth--; /* Skip PRED_RESTORE */ inst = inst->Next; break; case RC_OPCODE_IF: lower_if(inst, &fc_state); fc_state.BranchDepth++; break; case RC_OPCODE_ELSE: inst->U.I.Opcode = RC_ME_PRED_SET_INV; build_pred_dst(&inst->U.I.DstReg, &fc_state); build_pred_src(&inst->U.I.SrcReg[0], &fc_state); break; case RC_OPCODE_ENDIF: if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) { struct rc_instruction * to_delete = inst; inst = inst->Prev; rc_remove_instruction(to_delete); /* XXX: Delete the endif instruction */ } else { inst->U.I.Opcode = RC_ME_PRED_SET_POP; build_pred_dst(&inst->U.I.DstReg, &fc_state); build_pred_src(&inst->U.I.SrcReg[0], &fc_state); } fc_state.InCFBreak = 0; fc_state.BranchDepth--; break; default: if (fc_state.BranchDepth || fc_state.LoopDepth) { inst->U.I.DstReg.Pred = RC_PRED_SET; } break; } if (c->Error) { return; } } }