/* * Copyright 2014 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * This utility transforms the geometry shader to emulate point sprite by * drawing a quad. It also adds an extra output for the original point position * if the point position is to be written to a stream output buffer. * Note: It assumes the driver will add a constant for the inverse viewport * after the user defined constants. */ #include "util/u_debug.h" #include "util/u_math.h" #include "tgsi_info.h" #include "tgsi_point_sprite.h" #include "tgsi_transform.h" #include "pipe/p_state.h" #define INVALID_INDEX 9999 /* Set swizzle based on the immediates (0, 1, 0, -1) */ static inline unsigned set_swizzle(int x, int y, int z, int w) { static const unsigned map[3] = {TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y}; assert(x >= -1); assert(x <= 1); assert(y >= -1); assert(y <= 1); assert(z >= -1); assert(z <= 1); assert(w >= -1); assert(w <= 1); return map[x+1] | (map[y+1] << 2) | (map[z+1] << 4) | (map[w+1] << 6); } static inline unsigned get_swizzle(unsigned swizzle, unsigned component) { assert(component < 4); return (swizzle >> (component * 2)) & 0x3; } struct psprite_transform_context { struct tgsi_transform_context base; unsigned num_tmp; unsigned num_out; unsigned num_orig_out; unsigned num_const; unsigned num_imm; unsigned point_size_in; // point size input unsigned point_size_out; // point size output unsigned point_size_tmp; // point size temp unsigned point_pos_in; // point pos input unsigned point_pos_out; // point pos output unsigned point_pos_sout; // original point pos for streamout unsigned point_pos_tmp; // point pos temp unsigned point_scale_tmp; // point scale temp unsigned point_color_out; // point color output unsigned point_color_tmp; // point color temp unsigned point_imm; // point immediates unsigned point_ivp; // point inverseViewport constant unsigned point_dir_swz[4]; // point direction swizzle unsigned point_coord_swz[4]; // point coord swizzle unsigned point_coord_enable; // point coord enable mask unsigned point_coord_decl; // point coord output declared mask unsigned point_coord_out; // point coord output starting index unsigned point_coord_aa; // aa point coord semantic index unsigned point_coord_k; // aa point coord threshold distance unsigned stream_out_point_pos:1; // set if to stream out original point pos unsigned aa_point:1; // set if doing aa point unsigned out_tmp_index[PIPE_MAX_SHADER_OUTPUTS]; int max_generic; // max generic semantic index }; static inline struct psprite_transform_context * psprite_transform_context(struct tgsi_transform_context *ctx) { return (struct psprite_transform_context *) ctx; } /** * TGSI declaration transform callback. */ static void psprite_decl(struct tgsi_transform_context *ctx, struct tgsi_full_declaration *decl) { struct psprite_transform_context *ts = psprite_transform_context(ctx); if (decl->Declaration.File == TGSI_FILE_INPUT) { if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) { ts->point_size_in = decl->Range.First; } else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { ts->point_pos_in = decl->Range.First; } } else if (decl->Declaration.File == TGSI_FILE_OUTPUT) { if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) { ts->point_size_out = decl->Range.First; } else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { ts->point_pos_out = decl->Range.First; } else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && decl->Semantic.Index < 32) { ts->point_coord_decl |= 1 << decl->Semantic.Index; ts->max_generic = MAX2(ts->max_generic, (int)decl->Semantic.Index); } ts->num_out = MAX2(ts->num_out, decl->Range.Last + 1); } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1); } else if (decl->Declaration.File == TGSI_FILE_CONSTANT) { ts->num_const = MAX2(ts->num_const, decl->Range.Last + 1); } ctx->emit_declaration(ctx, decl); } /** * TGSI immediate declaration transform callback. */ static void psprite_immediate(struct tgsi_transform_context *ctx, struct tgsi_full_immediate *imm) { struct psprite_transform_context *ts = psprite_transform_context(ctx); ctx->emit_immediate(ctx, imm); ts->num_imm++; } /** * TGSI transform prolog callback. */ static void psprite_prolog(struct tgsi_transform_context *ctx) { struct psprite_transform_context *ts = psprite_transform_context(ctx); unsigned point_coord_enable, en; int i; /* Replace output registers with temporary registers */ for (i = 0; i < ts->num_out; i++) { ts->out_tmp_index[i] = ts->num_tmp++; } ts->num_orig_out = ts->num_out; /* Declare a tmp register for point scale */ ts->point_scale_tmp = ts->num_tmp++; if (ts->point_size_out != INVALID_INDEX) ts->point_size_tmp = ts->out_tmp_index[ts->point_size_out]; else ts->point_size_tmp = ts->num_tmp++; assert(ts->point_pos_out != INVALID_INDEX); ts->point_pos_tmp = ts->out_tmp_index[ts->point_pos_out]; ts->out_tmp_index[ts->point_pos_out] = INVALID_INDEX; /* Declare one more tmp register for point coord threshold distance * if we are generating anti-aliased point. */ if (ts->aa_point) ts->point_coord_k = ts->num_tmp++; tgsi_transform_temps_decl(ctx, ts->point_size_tmp, ts->num_tmp-1); /* Declare an extra output for the original point position for stream out */ if (ts->stream_out_point_pos) { ts->point_pos_sout = ts->num_out++; tgsi_transform_output_decl(ctx, ts->point_pos_sout, TGSI_SEMANTIC_GENERIC, 0, 0); } /* point coord outputs to be declared */ point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl; /* Declare outputs for those point coord that are enabled but are not * already declared in this shader. */ ts->point_coord_out = ts->num_out; if (point_coord_enable) { for (i = 0, en = point_coord_enable; en; en>>=1, i++) { if (en & 0x1) { tgsi_transform_output_decl(ctx, ts->num_out++, TGSI_SEMANTIC_GENERIC, i, 0); ts->max_generic = MAX2(ts->max_generic, (int)i); } } } /* add an extra generic output for aa point texcoord */ if (ts->aa_point) { ts->point_coord_aa = ts->max_generic + 1; assert((ts->point_coord_enable & (1 << ts->point_coord_aa)) == 0); ts->point_coord_enable |= 1 << (ts->point_coord_aa); tgsi_transform_output_decl(ctx, ts->num_out++, TGSI_SEMANTIC_GENERIC, ts->point_coord_aa, 0); } /* Declare extra immediates */ ts->point_imm = ts->num_imm; tgsi_transform_immediate_decl(ctx, 0, 1, 0.5, -1); /* Declare point constant - * constant.xy -- inverseViewport * constant.z -- current point size * constant.w -- max point size * The driver needs to add this constant to the constant buffer */ ts->point_ivp = ts->num_const++; tgsi_transform_const_decl(ctx, ts->point_ivp, ts->point_ivp); /* If this geometry shader does not specify point size, * get the current point size from the point constant. */ if (ts->point_size_out == INVALID_INDEX) { struct tgsi_full_instruction inst; inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_MOV; inst.Instruction.NumDstRegs = 1; tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_XYZW); inst.Instruction.NumSrcRegs = 1; tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z); ctx->emit_instruction(ctx, &inst); } } /** * Add the point sprite emulation instructions at the emit vertex instruction */ static void psprite_emit_vertex_inst(struct tgsi_transform_context *ctx, struct tgsi_full_instruction *vert_inst) { struct psprite_transform_context *ts = psprite_transform_context(ctx); struct tgsi_full_instruction inst; unsigned point_coord_enable, en; unsigned i, j, s; /* new point coord outputs */ point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl; /* OUTPUT[pos_sout] = TEMP[pos] */ if (ts->point_pos_sout != INVALID_INDEX) { tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, ts->point_pos_sout, TGSI_WRITEMASK_XYZW, TGSI_FILE_TEMPORARY, ts->point_pos_tmp); } /** * Set up the point scale vector * scale = pointSize * pos.w * inverseViewport */ /* MUL point_scale.x, point_size.x, point_pos.w */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W, false); /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_MUL; inst.Instruction.NumDstRegs = 1; tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_XY); inst.Instruction.NumSrcRegs = 2; tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z); ctx->emit_instruction(ctx, &inst); /** * Set up the point coord threshold distance * k = 0.5 - 1 / pointsize */ if (ts->aa_point) { tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV, TGSI_FILE_TEMPORARY, ts->point_coord_k, TGSI_WRITEMASK_X, TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, false); tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, ts->point_coord_k, TGSI_WRITEMASK_X, TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Z, TGSI_FILE_TEMPORARY, ts->point_coord_k, TGSI_SWIZZLE_X, true); } for (i = 0; i < 4; i++) { unsigned point_dir_swz = ts->point_dir_swz[i]; unsigned point_coord_swz = ts->point_coord_swz[i]; /* All outputs need to be emitted for each vertex */ for (j = 0; j < ts->num_orig_out; j++) { if (ts->out_tmp_index[j] != INVALID_INDEX) { tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, j, TGSI_WRITEMASK_XYZW, TGSI_FILE_TEMPORARY, ts->out_tmp_index[j]); } } /* pos = point_scale * point_dir + point_pos */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_MAD; inst.Instruction.NumDstRegs = 1; tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT, ts->point_pos_out, TGSI_WRITEMASK_XYZW); inst.Instruction.NumSrcRegs = 3; tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, ts->point_imm, get_swizzle(point_dir_swz, 0), get_swizzle(point_dir_swz, 1), get_swizzle(point_dir_swz, 2), get_swizzle(point_dir_swz, 3)); tgsi_transform_src_reg(&inst.Src[2], TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W); ctx->emit_instruction(ctx, &inst); /* point coord */ for (j = 0, s = 0, en = point_coord_enable; en; en>>=1, s++) { unsigned dstReg; if (en & 0x1) { dstReg = ts->point_coord_out + j; inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_MOV; inst.Instruction.NumDstRegs = 1; tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT, dstReg, TGSI_WRITEMASK_XYZW); inst.Instruction.NumSrcRegs = 1; tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE, ts->point_imm, get_swizzle(point_coord_swz, 0), get_swizzle(point_coord_swz, 1), get_swizzle(point_coord_swz, 2), get_swizzle(point_coord_swz, 3)); ctx->emit_instruction(ctx, &inst); /* MOV point_coord.z point_coord_k.x */ if (s == ts->point_coord_aa) { tgsi_transform_op1_swz_inst(ctx, TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, dstReg, TGSI_WRITEMASK_Z, TGSI_FILE_TEMPORARY, ts->point_coord_k, TGSI_SWIZZLE_X); } j++; /* the next point coord output offset */ } } /* Emit the EMIT instruction for each vertex of the quad */ ctx->emit_instruction(ctx, vert_inst); } /* Emit the ENDPRIM instruction for the quad */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_ENDPRIM; inst.Instruction.NumDstRegs = 0; inst.Instruction.NumSrcRegs = 1; inst.Src[0] = vert_inst->Src[0]; ctx->emit_instruction(ctx, &inst); } /** * TGSI instruction transform callback. */ static void psprite_inst(struct tgsi_transform_context *ctx, struct tgsi_full_instruction *inst) { struct psprite_transform_context *ts = psprite_transform_context(ctx); if (inst->Instruction.Opcode == TGSI_OPCODE_EMIT) { psprite_emit_vertex_inst(ctx, inst); } else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && inst->Dst[0].Register.Index == ts->point_size_out) { /** * Replace point size output reg with tmp reg. * The tmp reg will be later used as a src reg for computing * the point scale factor. */ inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY; inst->Dst[0].Register.Index = ts->point_size_tmp; ctx->emit_instruction(ctx, inst); /* Clamp the point size */ /* MAX point_size_tmp.x, point_size_tmp.x, point_imm.y */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y, false); /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W, false); } else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && inst->Dst[0].Register.Index == ts->point_pos_out) { /** * Replace point pos output reg with tmp reg. */ inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY; inst->Dst[0].Register.Index = ts->point_pos_tmp; ctx->emit_instruction(ctx, inst); } else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { /** * Replace output reg with tmp reg. */ inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY; inst->Dst[0].Register.Index = ts->out_tmp_index[inst->Dst[0].Register.Index]; ctx->emit_instruction(ctx, inst); } else { ctx->emit_instruction(ctx, inst); } } /** * TGSI property instruction transform callback. * Transforms a point into a 4-vertex triangle strip. */ static void psprite_property(struct tgsi_transform_context *ctx, struct tgsi_full_property *prop) { switch (prop->Property.PropertyName) { case TGSI_PROPERTY_GS_OUTPUT_PRIM: prop->u[0].Data = PIPE_PRIM_TRIANGLE_STRIP; break; case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: prop->u[0].Data *= 4; break; default: break; } ctx->emit_property(ctx, prop); } /** * TGSI utility to transform a geometry shader to support point sprite. */ struct tgsi_token * tgsi_add_point_sprite(const struct tgsi_token *tokens_in, const unsigned point_coord_enable, const bool sprite_origin_lower_left, const bool stream_out_point_pos, int *aa_point_coord_index) { struct psprite_transform_context transform; const uint num_new_tokens = 200; /* should be enough */ const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens; struct tgsi_token *new_tokens; /* setup transformation context */ memset(&transform, 0, sizeof(transform)); transform.base.transform_declaration = psprite_decl; transform.base.transform_instruction = psprite_inst; transform.base.transform_property = psprite_property; transform.base.transform_immediate = psprite_immediate; transform.base.prolog = psprite_prolog; transform.point_size_in = INVALID_INDEX; transform.point_size_out = INVALID_INDEX; transform.point_size_tmp = INVALID_INDEX; transform.point_pos_in = INVALID_INDEX; transform.point_pos_out = INVALID_INDEX; transform.point_pos_sout = INVALID_INDEX; transform.point_pos_tmp = INVALID_INDEX; transform.point_scale_tmp = INVALID_INDEX; transform.point_imm = INVALID_INDEX; transform.point_coord_aa = INVALID_INDEX; transform.point_coord_k = INVALID_INDEX; transform.stream_out_point_pos = stream_out_point_pos; transform.point_coord_enable = point_coord_enable; transform.aa_point = aa_point_coord_index != NULL; transform.max_generic = -1; /* point sprite directions based on the immediates (0, 1, 0.5, -1) */ /* (-1, -1, 0, 0) */ transform.point_dir_swz[0] = set_swizzle(-1, -1, 0, 0); /* (-1, 1, 0, 0) */ transform.point_dir_swz[1] = set_swizzle(-1, 1, 0, 0); /* (1, -1, 0, 0) */ transform.point_dir_swz[2] = set_swizzle(1, -1, 0, 0); /* (1, 1, 0, 0) */ transform.point_dir_swz[3] = set_swizzle(1, 1, 0, 0); /* point coord based on the immediates (0, 1, 0, -1) */ if (sprite_origin_lower_left) { /* (0, 0, 0, 1) */ transform.point_coord_swz[0] = set_swizzle(0, 0, 0, 1); /* (0, 1, 0, 1) */ transform.point_coord_swz[1] = set_swizzle(0, 1, 0, 1); /* (1, 0, 0, 1) */ transform.point_coord_swz[2] = set_swizzle(1, 0, 0, 1); /* (1, 1, 0, 1) */ transform.point_coord_swz[3] = set_swizzle(1, 1, 0, 1); } else { /* (0, 1, 0, 1) */ transform.point_coord_swz[0] = set_swizzle(0, 1, 0, 1); /* (0, 0, 0, 1) */ transform.point_coord_swz[1] = set_swizzle(0, 0, 0, 1); /* (1, 1, 0, 1) */ transform.point_coord_swz[2] = set_swizzle(1, 1, 0, 1); /* (1, 0, 0, 1) */ transform.point_coord_swz[3] = set_swizzle(1, 0, 0, 1); } /* allocate new tokens buffer */ new_tokens = tgsi_alloc_tokens(new_len); if (!new_tokens) return NULL; /* transform the shader */ tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base); if (aa_point_coord_index) *aa_point_coord_index = transform.point_coord_aa; return new_tokens; }