/* * Copyright (C) 2009 Nicolai Haehnle. * Copyright 2012 Advanced Micro Devices, Inc. * * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial * portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Nicolai Haehnle * Tom Stellard <thomas.stellard@amd.com> */ #include "radeon_dataflow.h" #include "radeon_code.h" #include "radeon_compiler.h" #include "radeon_compiler_util.h" #include "radeon_swizzle.h" static void rewrite_source(struct radeon_compiler * c, struct rc_instruction * inst, unsigned src) { struct rc_swizzle_split split; unsigned int tempreg = rc_find_free_temporary(c); unsigned int usemask; usemask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) usemask |= 1 << chan; } c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); unsigned int phase_refmask; unsigned int masked_negate; mov->U.I.Opcode = RC_OPCODE_MOV; mov->U.I.DstReg.File = RC_FILE_TEMPORARY; mov->U.I.DstReg.Index = tempreg; mov->U.I.DstReg.WriteMask = split.Phase[phase]; mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; mov->U.I.PreSub = inst->U.I.PreSub; phase_refmask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { if (!GET_BIT(split.Phase[phase], chan)) SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); else phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); } phase_refmask &= RC_MASK_XYZW; masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; if (masked_negate == 0) mov->U.I.SrcReg[0].Negate = 0; else if (masked_negate == split.Phase[phase]) mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; } inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[src].Index = tempreg; inst->U.I.SrcReg[src].Swizzle = 0; inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; inst->U.I.SrcReg[src].Abs = 0; for(unsigned int chan = 0; chan < 4; ++chan) { SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); } } /** * This function will attempt to rewrite non-native swizzles that read from * immediate registers by rearranging the immediates to allow the * instruction to use native swizzles. */ static unsigned try_rewrite_constant(struct radeon_compiler *c, struct rc_src_register *reg) { unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz; unsigned all_inline = 0; float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f}; if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) { /* The register does not contain immediates, but if all * the swizzles are inline constants, we can still rewrite * it. */ new_swizzle = RC_SWIZZLE_XYZW; for (chan = 0 ; chan < 4; chan++) { unsigned swz = GET_SWZ(reg->Swizzle, chan); if (swz <= RC_SWIZZLE_W) { return 0; } if (swz == RC_SWIZZLE_UNUSED) { SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED); } } all_inline = 1; } else { new_swizzle = reg->Swizzle; } swz = RC_SWIZZLE_UNUSED; found_swizzle = 1; /* Check if all channels have the same swizzle. If they do we can skip * the search for a native swizzle. We only need to check the first * three channels, because any swizzle is legal in the fourth channel. */ for (chan = 0; chan < 3; chan++) { unsigned chan_swz = GET_SWZ(reg->Swizzle, chan); if (chan_swz == RC_SWIZZLE_UNUSED) { continue; } if (swz == RC_SWIZZLE_UNUSED) { swz = chan_swz; } else if (swz != chan_swz) { found_swizzle = 0; break; } } /* Find a legal swizzle */ /* This loop attempts to find a native swizzle where all the * channels are different. */ while (!found_swizzle && !all_inline) { swz0 = GET_SWZ(new_swizzle, 0); swz1 = GET_SWZ(new_swizzle, 1); swz2 = GET_SWZ(new_swizzle, 2); /* Swizzle .W. is never legal. */ if (swz1 == RC_SWIZZLE_W || swz1 == RC_SWIZZLE_UNUSED || swz1 == RC_SWIZZLE_ZERO || swz1 == RC_SWIZZLE_HALF || swz1 == RC_SWIZZLE_ONE) { /* We chose Z, because there are two non-repeating * swizzle combinations of the form .Z. There are * only one combination each for .X. and .Y. */ SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); continue; } if (swz2 == RC_SWIZZLE_UNUSED) { /* We choose Y, because there are two non-repeating * swizzle combinations of the form ..Y */ SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); continue; } switch (swz0) { /* X.. */ case RC_SWIZZLE_X: /* Legal swizzles that start with X: XYZ, XXX */ switch (swz1) { /* XX. */ case RC_SWIZZLE_X: /* The new swizzle will be: * ZXY (XX. => ZX. => ZXY) */ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); break; /* XY. */ case RC_SWIZZLE_Y: /* The new swizzle is XYZ */ SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z); found_swizzle = 1; break; /* XZ. */ case RC_SWIZZLE_Z: /* XZZ */ if (swz2 == RC_SWIZZLE_Z) { /* The new swizzle is XYZ */ SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y); found_swizzle = 1; } else { /* XZ[^Z] */ /* The new swizzle will be: * YZX (XZ. => YZ. => YZX) */ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y); } break; /* XW. Should have already been handled. */ case RC_SWIZZLE_W: assert(0); break; } break; /* Y.. */ case RC_SWIZZLE_Y: /* Legal swizzles that start with Y: YYY, YZX */ switch (swz1) { /* YY. */ case RC_SWIZZLE_Y: /* The new swizzle will be: * XYZ (YY. => XY. => XYZ) */ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); break; /* YZ. */ case RC_SWIZZLE_Z: /* The new swizzle is YZX */ SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X); found_swizzle = 1; break; /* YX. */ case RC_SWIZZLE_X: /* YXX */ if (swz2 == RC_SWIZZLE_X) { /*The new swizzle is YZX */ SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); found_swizzle = 1; } else { /* YX[^X] */ /* The new swizzle will be: * ZXY (YX. => ZX. -> ZXY) */ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); } break; /* YW. Should have already been handled. */ case RC_SWIZZLE_W: assert(0); break; } break; /* Z.. */ case RC_SWIZZLE_Z: /* Legal swizzles that start with Z: ZZZ, ZXY */ switch (swz1) { /* ZZ. */ case RC_SWIZZLE_Z: /* The new swizzle will be: * WZY (ZZ. => WZ. => WZY) */ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W); break; /* ZX. */ case RC_SWIZZLE_X: /* The new swizzle is ZXY */ SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); found_swizzle = 1; break; /* ZY. */ case RC_SWIZZLE_Y: /* ZYY */ if (swz2 == RC_SWIZZLE_Y) { /* The new swizzle is ZXY */ SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X); found_swizzle = 1; } else { /* ZY[^Y] */ /* The new swizzle will be: * XYZ (ZY. => XY. => XYZ) */ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); } break; /* ZW. Should have already been handled. */ case RC_SWIZZLE_W: assert(0); break; } break; /* W.. */ case RC_SWIZZLE_W: /* Legal swizzles that start with X: WWW, WZY */ switch (swz1) { /* WW. Should have already been handled. */ case RC_SWIZZLE_W: assert(0); break; /* WZ. */ case RC_SWIZZLE_Z: /* The new swizzle will be WZY */ SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); found_swizzle = 1; break; /* WX. */ case RC_SWIZZLE_X: /* WY. */ case RC_SWIZZLE_Y: /* W[XY]Y */ if (swz2 == RC_SWIZZLE_Y) { /* The new swizzle will be WZY */ SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); found_swizzle = 1; } else { /* W[XY][^Y] */ /* The new swizzle will be: * ZXY (WX. => XX. => ZX. => ZXY) or * XYZ (WY. => XY. => XYZ) */ SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); } break; } break; /* U.. 0.. 1.. H..*/ case RC_SWIZZLE_UNUSED: case RC_SWIZZLE_ZERO: case RC_SWIZZLE_ONE: case RC_SWIZZLE_HALF: SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); break; } } /* Handle the swizzle in the w channel. */ swz3 = GET_SWZ(reg->Swizzle, 3); /* We can skip this if the swizzle in channel w is an inline constant. */ if (swz3 <= RC_SWIZZLE_W) { for (chan = 0; chan < 3; chan++) { unsigned old_swz = GET_SWZ(reg->Swizzle, chan); unsigned new_swz = GET_SWZ(new_swizzle, chan); /* If the swizzle in the w channel is the same as the * swizzle in any other channels, we need to rewrite it. * For example: * reg->Swizzle == XWZW * new_swizzle == XYZX * Since the swizzle in the y channel is being * rewritten from W -> Y we need to change the swizzle * in the w channel from W -> Y as well. */ if (old_swz == swz3) { SET_SWZ(new_swizzle, 3, GET_SWZ(new_swizzle, chan)); break; } /* The swizzle in channel w will be overwritten by one * of the new swizzles. */ if (new_swz == swz3) { /* Find an unused swizzle */ unsigned i; unsigned used = 0; for (i = 0; i < 3; i++) { used |= 1 << GET_SWZ(new_swizzle, i); } for (i = 0; i < 4; i++) { if (used & (1 << i)) { continue; } SET_SWZ(new_swizzle, 3, i); } } } } for (chan = 0; chan < 4; chan++) { unsigned old_swz = GET_SWZ(reg->Swizzle, chan); unsigned new_swz = GET_SWZ(new_swizzle, chan); if (old_swz == RC_SWIZZLE_UNUSED) { continue; } /* We don't need to change the swizzle in channel w if it is * an inline constant. These are always legal in the w channel. * * Swizzles with a value > RC_SWIZZLE_W are inline constants. */ if (chan == 3 && old_swz > RC_SWIZZLE_W) { continue; } assert(new_swz <= RC_SWIZZLE_W); switch (old_swz) { case RC_SWIZZLE_ZERO: imms[new_swz] = 0.0f; break; case RC_SWIZZLE_HALF: if (reg->Negate & (1 << chan)) { imms[new_swz] = -0.5f; } else { imms[new_swz] = 0.5f; } break; case RC_SWIZZLE_ONE: if (reg->Negate & (1 << chan)) { imms[new_swz] = -1.0f; } else { imms[new_swz] = 1.0f; } break; default: imms[new_swz] = rc_get_constant_value(c, reg->Index, reg->Swizzle, reg->Negate, chan); } SET_SWZ(reg->Swizzle, chan, new_swz); } reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants, imms); /* We need to set the register file to CONSTANT in case we are * converting a non-constant register with constant swizzles (e.g. * ONE, ZERO, HALF). */ reg->File = RC_FILE_CONSTANT; reg->Negate = 0; return 1; } void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) { struct rc_instruction * inst; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned int src; for(src = 0; src < opcode->NumSrcRegs; ++src) { struct rc_src_register *reg = &inst->U.I.SrcReg[src]; if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) { continue; } if (!c->is_r500 && c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS && try_rewrite_constant(c, reg)) { continue; } rewrite_source(c, inst, src); } } if (c->Debug & RC_DBG_LOG) rc_constants_print(&c->Program.Constants); }