C++程序  |  303行  |  8.47 KB

/*
 * Copyright 2012 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Author: Tom Stellard <thomas.stellard@amd.com>
 */

#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_constants.h"

struct vert_fc_state {
	struct radeon_compiler *C;
	unsigned BranchDepth;
	unsigned LoopDepth;
	unsigned LoopsReserved;
	int PredStack[R500_PVS_MAX_LOOP_DEPTH];
	int PredicateReg;
	unsigned InCFBreak;
};

static void build_pred_src(
	struct rc_src_register * src,
	struct vert_fc_state * fc_state)
{
	src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
					RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
	src->File = RC_FILE_TEMPORARY;
	src->Index = fc_state->PredicateReg;
}

static void build_pred_dst(
	struct rc_dst_register * dst,
	struct vert_fc_state * fc_state)
{
	dst->WriteMask = RC_MASK_W;
	dst->File = RC_FILE_TEMPORARY;
	dst->Index = fc_state->PredicateReg;
}

static void mark_write(void * userdata,	struct rc_instruction * inst,
		rc_register_file file,	unsigned int index, unsigned int mask)
{
	unsigned int * writemasks = userdata;

	if (file != RC_FILE_TEMPORARY)
		return;

	if (index >= R300_VS_MAX_TEMPS)
		return;

	writemasks[index] |= mask;
}

static int reserve_predicate_reg(struct vert_fc_state * fc_state)
{
	int i;
	unsigned int writemasks[RC_REGISTER_MAX_INDEX];
	struct rc_instruction * inst;
	memset(writemasks, 0, sizeof(writemasks));
	for(inst = fc_state->C->Program.Instructions.Next;
				inst != &fc_state->C->Program.Instructions;
				inst = inst->Next) {
		rc_for_all_writes_mask(inst, mark_write, writemasks);
	}

	for(i = 0; i < fc_state->C->max_temp_regs; i++) {
		/* Most of the control flow instructions only write the
		 * W component of the Predicate Register, but
		 * the docs say that ME_PRED_SET_CLR and
		 * ME_PRED_SET_RESTORE write all components of the
		 * register, so we must reserve a register that has
		 * all its components free. */
		if (!writemasks[i]) {
			fc_state->PredicateReg = i;
			break;
		}
	}
	if (i == fc_state->C->max_temp_regs) {
		rc_error(fc_state->C, "No free temporary to use for"
				" predicate stack counter.\n");
		return -1;
	}
	return 1;
}

static void lower_bgnloop(
	struct rc_instruction * inst,
	struct vert_fc_state * fc_state)
{
	struct rc_instruction * new_inst =
			rc_insert_new_instruction(fc_state->C, inst->Prev);

	if ((!fc_state->C->is_r500
		&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
	     || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
		rc_error(fc_state->C, "Loops are nested too deep.");
		return;
	}

	if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
		if (fc_state->PredicateReg == -1) {
			if (reserve_predicate_reg(fc_state) == -1) {
				return;
			}
		}

		/* Initialize the predicate bit to true. */
		new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
		new_inst->U.I.SrcReg[0].Index = 0;
		new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
		new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
	} else {
		fc_state->PredStack[fc_state->LoopDepth] =
						fc_state->PredicateReg;
		/* Copy the current predicate value to this loop's
		 * predicate register */

		/* Use the old predicate value for src0 */
		build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);

		/* Reserve this loop's predicate register */
		if (reserve_predicate_reg(fc_state) == -1) {
			return;
		}

		/* Copy the old predicate value to the new register */
		new_inst->U.I.Opcode = RC_OPCODE_ADD;
		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
		new_inst->U.I.SrcReg[1].Index = 0;
		new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
		new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
	}

}

static void lower_brk(
	struct rc_instruction * inst,
	struct vert_fc_state * fc_state)
{
	if (fc_state->LoopDepth == 1) {
		inst->U.I.Opcode = RC_OPCODE_RCP;
		inst->U.I.DstReg.Pred = RC_PRED_INV;
		inst->U.I.SrcReg[0].Index = 0;
		inst->U.I.SrcReg[0].File = RC_FILE_NONE;
		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
	} else {
		inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
		inst->U.I.DstReg.Pred = RC_PRED_SET;
	}

	build_pred_dst(&inst->U.I.DstReg, fc_state);
}

static void lower_endloop(
	struct rc_instruction * inst,
	struct vert_fc_state * fc_state)
{
	struct rc_instruction * new_inst =
			rc_insert_new_instruction(fc_state->C, inst);

	new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
	build_pred_dst(&new_inst->U.I.DstReg, fc_state);
	/* Restore the previous predicate register. */
	fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
	build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
}

static void lower_if(
	struct rc_instruction * inst,
	struct vert_fc_state * fc_state)
{
	/* Reserve a temporary to use as our predicate stack counter, if we
	 * don't already have one. */
	if (fc_state->PredicateReg == -1) {
		/* If we are inside a loop, the Predicate Register should
		 * have already been defined. */
		assert(fc_state->LoopDepth == 0);

		if (reserve_predicate_reg(fc_state) == -1) {
			return;
		}
	}

	if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
		fc_state->InCFBreak = 1;
	}
	if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
			|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
		if (fc_state->InCFBreak) {
			inst->U.I.Opcode = RC_ME_PRED_SEQ;
			inst->U.I.DstReg.Pred = RC_PRED_SET;
		} else {
			inst->U.I.Opcode = RC_ME_PRED_SNEQ;
		}
	} else {
		unsigned swz;
		inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
		memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
						sizeof(inst->U.I.SrcReg[1]));
		swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
		/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
		 * w component */
		inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
				RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
		build_pred_src(&inst->U.I.SrcReg[0], fc_state);
	}
	build_pred_dst(&inst->U.I.DstReg, fc_state);
}

void rc_vert_fc(struct radeon_compiler *c, void *user)
{
	struct rc_instruction * inst;
	struct vert_fc_state fc_state;

	memset(&fc_state, 0, sizeof(fc_state));
	fc_state.PredicateReg = -1;
	fc_state.C = c;

	for(inst = c->Program.Instructions.Next;
					inst != &c->Program.Instructions;
					inst = inst->Next) {

		switch (inst->U.I.Opcode) {

		case RC_OPCODE_BGNLOOP:
			lower_bgnloop(inst, &fc_state);
			fc_state.LoopDepth++;
			break;

		case RC_OPCODE_BRK:
			lower_brk(inst, &fc_state);
			break;

		case RC_OPCODE_ENDLOOP:
			if (fc_state.BranchDepth != 0
					|| fc_state.LoopDepth != 1) {
				lower_endloop(inst, &fc_state);
			}
			fc_state.LoopDepth--;
			/* Skip PRED_RESTORE */
			inst = inst->Next;
			break;
		case RC_OPCODE_IF:
			lower_if(inst, &fc_state);
			fc_state.BranchDepth++;
			break;

		case RC_OPCODE_ELSE:
			inst->U.I.Opcode = RC_ME_PRED_SET_INV;
			build_pred_dst(&inst->U.I.DstReg, &fc_state);
			build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
			break;

		case RC_OPCODE_ENDIF:
			if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
				struct rc_instruction * to_delete = inst;
				inst = inst->Prev;
				rc_remove_instruction(to_delete);
				/* XXX: Delete the endif instruction */
			} else {
				inst->U.I.Opcode = RC_ME_PRED_SET_POP;
				build_pred_dst(&inst->U.I.DstReg, &fc_state);
				build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
			}
			fc_state.InCFBreak = 0;
			fc_state.BranchDepth--;
			break;

		default:
			if (fc_state.BranchDepth || fc_state.LoopDepth) {
				inst->U.I.DstReg.Pred = RC_PRED_SET;
			}
			break;
		}

		if (c->Error) {
			return;
		}
	}
}