C++程序  |  754行  |  18.64 KB

/*
 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
 *
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 */

/**
 * \file
 */

#include "radeon_compiler_util.h"

#include "radeon_compiler.h"
#include "radeon_dataflow.h"
/**
 */
unsigned int rc_swizzle_to_writemask(unsigned int swz)
{
	unsigned int mask = 0;
	unsigned int i;

	for(i = 0; i < 4; i++) {
		mask |= 1 << GET_SWZ(swz, i);
	}
	mask &= RC_MASK_XYZW;

	return mask;
}

rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
{
	if (idx & 0x4)
		return idx;
	return GET_SWZ(swz, idx);
}

/**
 * The purpose of this function is to standardize the number channels used by
 * swizzles.  All swizzles regardless of what instruction they are a part of
 * should have 4 channels initialized with values.
 * @param channels The number of channels in initial_value that have a
 * meaningful value.
 * @return An initialized swizzle that has all of the unused channels set to
 * RC_SWIZZLE_UNUSED.
 */
unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels)
{
	unsigned int i;
	for (i = channels; i < 4; i++) {
		SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED);
	}
	return initial_value;
}

unsigned int combine_swizzles4(unsigned int src,
		rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
{
	unsigned int ret = 0;

	ret |= get_swz(src, swz_x);
	ret |= get_swz(src, swz_y) << 3;
	ret |= get_swz(src, swz_z) << 6;
	ret |= get_swz(src, swz_w) << 9;

	return ret;
}

unsigned int combine_swizzles(unsigned int src, unsigned int swz)
{
	unsigned int ret = 0;

	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;

	return ret;
}

/**
 * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
 */
rc_swizzle rc_mask_to_swizzle(unsigned int mask)
{
	switch (mask) {
	case RC_MASK_X: return RC_SWIZZLE_X;
	case RC_MASK_Y: return RC_SWIZZLE_Y;
	case RC_MASK_Z: return RC_SWIZZLE_Z;
	case RC_MASK_W: return RC_SWIZZLE_W;
	}
	return RC_SWIZZLE_UNUSED;
}

/* Reorder mask bits according to swizzle. */
unsigned swizzle_mask(unsigned swizzle, unsigned mask)
{
	unsigned ret = 0;
	for (unsigned chan = 0; chan < 4; ++chan) {
		unsigned swz = GET_SWZ(swizzle, chan);
		if (swz < 4)
			ret |= GET_BIT(mask, swz) << chan;
	}
	return ret;
}

static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
{
	if (info->HasTexture) {
		return 0;
	}
	switch (info->Opcode) {
		case RC_OPCODE_DP2:
		case RC_OPCODE_DP3:
		case RC_OPCODE_DP4:
		case RC_OPCODE_DDX:
		case RC_OPCODE_DDY:
			return 0;
		default:
			return 1;
	}
}

/**
 * @return A swizzle the results from converting old_swizzle using
 * conversion_swizzle
 */
unsigned int rc_adjust_channels(
	unsigned int old_swizzle,
	unsigned int conversion_swizzle)
{
	unsigned int i;
	unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
	for (i = 0; i < 4; i++) {
		unsigned int new_chan = get_swz(conversion_swizzle, i);
		if (new_chan == RC_SWIZZLE_UNUSED) {
			continue;
		}
		SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
	}
	return new_swizzle;
}

static unsigned int rewrite_writemask(
	unsigned int old_mask,
	unsigned int conversion_swizzle)
{
	unsigned int new_mask = 0;
	unsigned int i;

	for (i = 0; i < 4; i++) {
		if (!GET_BIT(old_mask, i)
		   || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
			continue;
		}
		new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
	}

	return new_mask;
}

/**
 * This function rewrites the writemask of sub and adjusts the swizzles
 * of all its source registers based on the conversion_swizzle.
 * conversion_swizzle represents a mapping of the old writemask to the
 * new writemask.  For a detailed description of how conversion swizzles
 * work see rc_rewrite_swizzle().
 */
void rc_pair_rewrite_writemask(
	struct rc_pair_sub_instruction * sub,
	unsigned int conversion_swizzle)
{
	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
	unsigned int i;

	sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);

	if (!srcs_need_rewrite(info)) {
		return ;
	}

	for (i = 0; i < info->NumSrcRegs; i++) {
		sub->Arg[i].Swizzle =
			rc_adjust_channels(sub->Arg[i].Swizzle,
						conversion_swizzle);
	}
}

static void normal_rewrite_writemask_cb(
	void * userdata,
	struct rc_instruction * inst,
	struct rc_src_register * src)
{
	unsigned int * conversion_swizzle = (unsigned int *)userdata;
	src->Swizzle = rc_adjust_channels(src->Swizzle, *conversion_swizzle);
}

/**
 * This function is the same as rc_pair_rewrite_writemask() except it
 * operates on normal instructions.
 */
void rc_normal_rewrite_writemask(
	struct rc_instruction * inst,
	unsigned int conversion_swizzle)
{
	struct rc_sub_instruction * sub = &inst->U.I;
	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
	sub->DstReg.WriteMask =
		rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);

	if (info->HasTexture) {
		unsigned int i;
		assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
		for (i = 0; i < 4; i++) {
			unsigned int swz = GET_SWZ(conversion_swizzle, i);
			if (swz > 3)
				continue;
			SET_SWZ(sub->TexSwizzle, swz, i);
		}
	}

	if (!srcs_need_rewrite(info)) {
		return;
	}

	rc_for_all_reads_src(inst, normal_rewrite_writemask_cb,
							&conversion_swizzle);
}

/**
 * This function replaces each value 'swz' in swizzle with the value of
 * GET_SWZ(conversion_swizzle, swz).  So, if you want to change all the X's
 * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9).  If you want
 * to change all the Y's in swizzle to X, then conversion_swizzle should be
 * _X__ (0xfc7).  If you want to change the Y's to X and the X's to Y, then
 * conversion swizzle should be YX__ (0xfc1).
 * @param swizzle The swizzle to change
 * @param conversion_swizzle Describes the conversion to perform on the swizzle
 * @return A converted swizzle
 */
unsigned int rc_rewrite_swizzle(
	unsigned int swizzle,
	unsigned int conversion_swizzle)
{
	unsigned int chan;
	unsigned int out_swizzle = swizzle;

	for (chan = 0; chan < 4; chan++) {
		unsigned int swz = GET_SWZ(swizzle, chan);
		unsigned int new_swz;
		if (swz > 3) {
			SET_SWZ(out_swizzle, chan, swz);
		} else {
			new_swz = GET_SWZ(conversion_swizzle, swz);
			if (new_swz != RC_SWIZZLE_UNUSED) {
				SET_SWZ(out_swizzle, chan, new_swz);
			} else {
				SET_SWZ(out_swizzle, chan, swz);
			}
		}
	}
	return out_swizzle;
}

/**
 * Left multiplication of a register with a swizzle
 */
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
{
	struct rc_src_register tmp = srcreg;
	int i;
	tmp.Swizzle = 0;
	tmp.Negate = 0;
	for(i = 0; i < 4; ++i) {
		rc_swizzle swz = GET_SWZ(swizzle, i);
		if (swz < 4) {
			tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
			tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
		} else {
			tmp.Swizzle |= swz << (i*3);
		}
	}
	return tmp;
}

void reset_srcreg(struct rc_src_register* reg)
{
	memset(reg, 0, sizeof(struct rc_src_register));
	reg->Swizzle = RC_SWIZZLE_XYZW;
}

unsigned int rc_src_reads_dst_mask(
		rc_register_file src_file,
		unsigned int src_idx,
		unsigned int src_swz,
		rc_register_file dst_file,
		unsigned int dst_idx,
		unsigned int dst_mask)
{
	if (src_file != dst_file || src_idx != dst_idx) {
		return RC_MASK_NONE;
	}
	return dst_mask & rc_swizzle_to_writemask(src_swz);
}

/**
 * @return A bit mask specifying whether this swizzle will select from an RGB
 * source, an Alpha source, or both.
 */
unsigned int rc_source_type_swz(unsigned int swizzle)
{
	unsigned int chan;
	unsigned int swz = RC_SWIZZLE_UNUSED;
	unsigned int ret = RC_SOURCE_NONE;

	for(chan = 0; chan < 4; chan++) {
		swz = GET_SWZ(swizzle, chan);
		if (swz == RC_SWIZZLE_W) {
			ret |= RC_SOURCE_ALPHA;
		} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
						|| swz == RC_SWIZZLE_Z) {
			ret |= RC_SOURCE_RGB;
		}
	}
	return ret;
}

unsigned int rc_source_type_mask(unsigned int mask)
{
	unsigned int ret = RC_SOURCE_NONE;

	if (mask & RC_MASK_XYZ)
		ret |= RC_SOURCE_RGB;

	if (mask & RC_MASK_W)
		ret |= RC_SOURCE_ALPHA;

	return ret;
}

struct src_select {
	rc_register_file File;
	int Index;
	unsigned int SrcType;
};

struct can_use_presub_data {
	struct src_select Selects[5];
	unsigned int SelectCount;
	const struct rc_src_register * ReplaceReg;
	unsigned int ReplaceRemoved;
};

static void can_use_presub_data_add_select(
	struct can_use_presub_data * data,
	rc_register_file file,
	unsigned int index,
	unsigned int src_type)
{
	struct src_select * select;

	select = &data->Selects[data->SelectCount++];
	select->File = file;
	select->Index = index;
	select->SrcType = src_type;
}

/**
 * This callback function counts the number of sources in inst that are
 * different from the sources in can_use_presub_data->RemoveSrcs.
 */
static void can_use_presub_read_cb(
	void * userdata,
	struct rc_instruction * inst,
	struct rc_src_register * src)
{
	struct can_use_presub_data * d = userdata;

	if (!d->ReplaceRemoved && src == d->ReplaceReg) {
		d->ReplaceRemoved = 1;
		return;
	}

	if (src->File == RC_FILE_NONE)
		return;

	can_use_presub_data_add_select(d, src->File, src->Index,
					rc_source_type_swz(src->Swizzle));
}

unsigned int rc_inst_can_use_presub(
	struct rc_instruction * inst,
	rc_presubtract_op presub_op,
	unsigned int presub_writemask,
	const struct rc_src_register * replace_reg,
	const struct rc_src_register * presub_src0,
	const struct rc_src_register * presub_src1)
{
	struct can_use_presub_data d;
	unsigned int num_presub_srcs;
	unsigned int i;
	const struct rc_opcode_info * info =
					rc_get_opcode_info(inst->U.I.Opcode);
	int rgb_count = 0, alpha_count = 0;
	unsigned int src_type0, src_type1;

	if (presub_op == RC_PRESUB_NONE) {
		return 1;
	}

	if (info->HasTexture) {
		return 0;
	}

	/* We can't use more than one presubtract value in an
	 * instruction, unless the two prsubtract operations
	 * are the same and read from the same registers.
	 * XXX For now we will limit instructions to only one presubtract
	 * value.*/
	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
		return 0;
	}

	memset(&d, 0, sizeof(d));
	d.ReplaceReg = replace_reg;

	rc_for_all_reads_src(inst, can_use_presub_read_cb, &d);

	num_presub_srcs = rc_presubtract_src_reg_count(presub_op);

	src_type0 = rc_source_type_swz(presub_src0->Swizzle);
	can_use_presub_data_add_select(&d,
		presub_src0->File,
		presub_src0->Index,
		src_type0);

	if (num_presub_srcs > 1) {
		src_type1 = rc_source_type_swz(presub_src1->Swizzle);
		can_use_presub_data_add_select(&d,
			presub_src1->File,
			presub_src1->Index,
			src_type1);

		/* Even if both of the presub sources read from the same
		 * register, we still need to use 2 different source selects
		 * for them, so we need to increment the count to compensate.
		 */
		if (presub_src0->File == presub_src1->File
		    && presub_src0->Index == presub_src1->Index) {
			if (src_type0 & src_type1 & RC_SOURCE_RGB) {
				rgb_count++;
			}
			if (src_type0 & src_type1 & RC_SOURCE_ALPHA) {
				alpha_count++;
			}
		}
	}

	/* Count the number of source selects for Alpha and RGB.  If we
	 * encounter two of the same source selects then we can ignore the
	 * first one. */
	for (i = 0; i < d.SelectCount; i++) {
		unsigned int j;
		unsigned int src_type = d.Selects[i].SrcType;
		for (j = i + 1; j < d.SelectCount; j++) {
			if (d.Selects[i].File == d.Selects[j].File
			    && d.Selects[i].Index == d.Selects[j].Index) {
				src_type &= ~d.Selects[j].SrcType;
			}
		}
		if (src_type & RC_SOURCE_RGB) {
			rgb_count++;
		}

		if (src_type & RC_SOURCE_ALPHA) {
			alpha_count++;
		}
	}

	if (rgb_count > 3 || alpha_count > 3) {
		return 0;
	}

	return 1;
}

struct max_data {
	unsigned int Max;
	unsigned int HasFileType;
	rc_register_file File;
};

static void max_callback(
	void * userdata,
	struct rc_instruction * inst,
	rc_register_file file,
	unsigned int index,
	unsigned int mask)
{
	struct max_data * d = (struct max_data*)userdata;
	if (file == d->File && (!d->HasFileType || index > d->Max)) {
		d->Max = index;
		d->HasFileType = 1;
	}
}

/**
 * @return The maximum index of the specified register file used by the
 * program.
 */
int rc_get_max_index(
	struct radeon_compiler * c,
	rc_register_file file)
{
	struct max_data data;
	struct rc_instruction * inst;
	data.Max = 0;
	data.HasFileType = 0;
	data.File = file;
	for (inst = c->Program.Instructions.Next;
					inst != &c->Program.Instructions;
					inst = inst->Next) {
		rc_for_all_reads_mask(inst, max_callback, &data);
		rc_for_all_writes_mask(inst, max_callback, &data);
	}
	if (!data.HasFileType) {
		return -1;
	} else {
		return data.Max;
	}
}

static unsigned int get_source_readmask(
	struct rc_pair_sub_instruction * sub,
	unsigned int source,
	unsigned int src_type)
{
	unsigned int i;
	unsigned int readmask = 0;
	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);

	for (i = 0; i < info->NumSrcRegs; i++) {
		if (sub->Arg[i].Source != source
		    || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
			continue;
		}
		readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
	}
	return readmask;
}

/**
 * This function attempts to remove a source from a pair instructions.
 * @param inst
 * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
 * @param source The index of the source to remove
 * @param new_readmask A mask representing the components that are read by
 * the source that is intended to replace the one you are removing.  If you
 * want to remove a source only and not replace it, this parameter should be
 * zero.
 * @return 1 if the source was successfully removed, 0 if it was not
 */
unsigned int rc_pair_remove_src(
	struct rc_instruction * inst,
	unsigned int src_type,
	unsigned int source,
	unsigned int new_readmask)
{
	unsigned int readmask = 0;

	readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
	readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);

	if ((new_readmask & readmask) != readmask)
		return 0;

	if (src_type & RC_SOURCE_RGB) {
		memset(&inst->U.P.RGB.Src[source], 0,
			sizeof(struct rc_pair_instruction_source));
	}

	if (src_type & RC_SOURCE_ALPHA) {
		memset(&inst->U.P.Alpha.Src[source], 0,
			sizeof(struct rc_pair_instruction_source));
	}

	return 1;
}

/**
 * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
 * @return The opcode of inst if it is a flow control instruction.
 */
rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst)
{
	const struct rc_opcode_info * info;
	if (inst->Type == RC_INSTRUCTION_NORMAL) {
		info = rc_get_opcode_info(inst->U.I.Opcode);
	} else {
		info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
		/*A flow control instruction shouldn't have an alpha
		 * instruction.*/
		assert(!info->IsFlowControl ||
				inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
	}

	if (info->IsFlowControl)
		return info->Opcode;
	else
		return RC_OPCODE_NOP;

}

/**
 * @return The BGNLOOP instruction that starts the loop ended by endloop.
 */
struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)
{
	unsigned int endloop_count = 0;
	struct rc_instruction * inst;
	for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) {
		rc_opcode op = rc_get_flow_control_inst(inst);
		if (op == RC_OPCODE_ENDLOOP) {
			endloop_count++;
		} else if (op == RC_OPCODE_BGNLOOP) {
			if (endloop_count == 0) {
				return inst;
			} else {
				endloop_count--;
			}
		}
	}
	return NULL;
}

/**
 * @return The ENDLOOP instruction that ends the loop started by bgnloop.
 */
struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
{
	unsigned int bgnloop_count = 0;
	struct rc_instruction * inst;
	for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {
		rc_opcode op = rc_get_flow_control_inst(inst);
		if (op == RC_OPCODE_BGNLOOP) {
			bgnloop_count++;
		} else if (op == RC_OPCODE_ENDLOOP) {
			if (bgnloop_count == 0) {
				return inst;
			} else {
				bgnloop_count--;
			}
		}
	}
	return NULL;
}

/**
 * @return A conversion swizzle for converting from old_mask->new_mask
 */
unsigned int rc_make_conversion_swizzle(
	unsigned int old_mask,
	unsigned int new_mask)
{
	unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
	unsigned int old_idx;
	unsigned int new_idx = 0;
	for (old_idx = 0; old_idx < 4; old_idx++) {
		if (!GET_BIT(old_mask, old_idx))
			continue;
		for ( ; new_idx < 4; new_idx++) {
			if (GET_BIT(new_mask, new_idx)) {
				SET_SWZ(conversion_swizzle, old_idx, new_idx);
				new_idx++;
				break;
			}
		}
	}
	return conversion_swizzle;
}

/**
 * @return 1 if the register contains an immediate value, 0 otherwise.
 */
unsigned int rc_src_reg_is_immediate(
	struct radeon_compiler * c,
	unsigned int file,
	unsigned int index)
{
	return file == RC_FILE_CONSTANT &&
	c->Program.Constants.Constants[index].Type == RC_CONSTANT_IMMEDIATE;
}

/**
 * @return The immediate value in the specified register.
 */
float rc_get_constant_value(
	struct radeon_compiler * c,
	unsigned int index,
	unsigned int swizzle,
	unsigned int negate,
	unsigned int chan)
{
	float base = 1.0f;
	int swz = GET_SWZ(swizzle, chan);
	if(swz >= 4 || index >= c->Program.Constants.Count ){
		rc_error(c, "get_constant_value: Can't find a value.\n");
		return 0.0f;
	}
	if(GET_BIT(negate, chan)){
		base = -1.0f;
	}
	return base *
		c->Program.Constants.Constants[index].u.Immediate[swz];
}

/**
 * This function returns the component value (RC_SWIZZLE_*) of the first used
 * channel in the swizzle.  This is only useful for scalar instructions that are
 * known to use only one channel of the swizzle.
 */
unsigned int rc_get_scalar_src_swz(unsigned int swizzle)
{
	unsigned int swz, chan;
	for (chan = 0; chan < 4; chan++) {
		swz = GET_SWZ(swizzle, chan);
		if (swz != RC_SWIZZLE_UNUSED) {
			break;
		}
	}
	assert(swz != RC_SWIZZLE_UNUSED);
	return swz;
}