/*
* Copyright © 2016 Red Hat
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdbool.h>
#include "st_tgsi_lower_yuv.h"
#include "tgsi/tgsi_transform.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_dump.h"
#include "util/u_debug.h"
#include "util/bitscan.h"
struct tgsi_yuv_transform {
struct tgsi_transform_context base;
struct tgsi_shader_info info;
struct tgsi_full_src_register imm[4];
struct {
struct tgsi_full_src_register src;
struct tgsi_full_dst_register dst;
} tmp[2];
#define A 0
#define B 1
/* Maps a primary sampler (used for Y) to the U or UV sampler. In
* case of 3-plane YUV format, the V plane is next sampler after U.
*/
unsigned char sampler_map[PIPE_MAX_SAMPLERS][2];
bool first_instruction_emitted;
unsigned free_slots;
unsigned lower_nv12;
unsigned lower_iyuv;
};
static inline struct tgsi_yuv_transform *
tgsi_yuv_transform(struct tgsi_transform_context *tctx)
{
return (struct tgsi_yuv_transform *)tctx;
}
static void
reg_dst(struct tgsi_full_dst_register *dst,
const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
{
*dst = *orig_dst;
dst->Register.WriteMask &= wrmask;
assert(dst->Register.WriteMask);
}
static inline void
get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
{
swiz[0] = src->SwizzleX;
swiz[1] = src->SwizzleY;
swiz[2] = src->SwizzleZ;
swiz[3] = src->SwizzleW;
}
static void
reg_src(struct tgsi_full_src_register *src,
const struct tgsi_full_src_register *orig_src,
unsigned sx, unsigned sy, unsigned sz, unsigned sw)
{
unsigned swiz[4];
get_swiz(swiz, &orig_src->Register);
*src = *orig_src;
src->Register.SwizzleX = swiz[sx];
src->Register.SwizzleY = swiz[sy];
src->Register.SwizzleZ = swiz[sz];
src->Register.SwizzleW = swiz[sw];
}
#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
static inline struct tgsi_full_instruction
tex_instruction(unsigned samp)
{
struct tgsi_full_instruction inst;
inst = tgsi_default_full_instruction();
inst.Instruction.Opcode = TGSI_OPCODE_TEX;
inst.Instruction.Texture = 1;
inst.Texture.Texture = TGSI_TEXTURE_2D;
inst.Instruction.NumDstRegs = 1;
inst.Instruction.NumSrcRegs = 2;
inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
inst.Src[1].Register.Index = samp;
return inst;
}
static inline struct tgsi_full_instruction
mov_instruction(void)
{
struct tgsi_full_instruction inst;
inst = tgsi_default_full_instruction();
inst.Instruction.Opcode = TGSI_OPCODE_MOV;
inst.Instruction.Saturate = 0;
inst.Instruction.NumDstRegs = 1;
inst.Instruction.NumSrcRegs = 1;
return inst;
}
static inline struct tgsi_full_instruction
dp3_instruction(void)
{
struct tgsi_full_instruction inst;
inst = tgsi_default_full_instruction();
inst.Instruction.Opcode = TGSI_OPCODE_DP3;
inst.Instruction.NumDstRegs = 1;
inst.Instruction.NumSrcRegs = 2;
return inst;
}
static void
emit_immed(struct tgsi_transform_context *tctx, int idx,
float x, float y, float z, float w)
{
struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
struct tgsi_shader_info *info = &ctx->info;
struct tgsi_full_immediate immed;
immed = tgsi_default_full_immediate();
immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
immed.u[0].Float = x;
immed.u[1].Float = y;
immed.u[2].Float = z;
immed.u[3].Float = w;
tctx->emit_immediate(tctx, &immed);
ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE;
ctx->imm[idx].Register.Index = info->immediate_count + idx;
ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X;
ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y;
ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z;
ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W;
}
static void
emit_samp(struct tgsi_transform_context *tctx, unsigned samp)
{
tgsi_transform_sampler_decl(tctx, samp);
tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D,
TGSI_RETURN_TYPE_FLOAT);
}
/* Emit extra declarations we need:
* + 2 TEMP to hold intermediate results
* + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
* lowered YUV sampler
* + extra immediates for doing CSC
*/
static void
emit_decls(struct tgsi_transform_context *tctx)
{
struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
struct tgsi_shader_info *info = &ctx->info;
unsigned mask, tempbase, i;
struct tgsi_full_declaration decl;
/*
* Declare immediates for CSC conversion:
*/
/* ITU-R BT.601 conversion */
emit_immed(tctx, 0, 1.164, 0.000, 1.596, 0.0);
emit_immed(tctx, 1, 1.164, -0.392, -0.813, 0.0);
emit_immed(tctx, 2, 1.164, 2.017, 0.000, 0.0);
emit_immed(tctx, 3, 0.0625, 0.500, 0.500, 1.0);
/*
* Declare extra samplers / sampler-views:
*/
mask = ctx->lower_nv12 | ctx->lower_iyuv;
while (mask) {
unsigned extra, y_samp = u_bit_scan(&mask);
extra = u_bit_scan(&ctx->free_slots);
ctx->sampler_map[y_samp][0] = extra;
emit_samp(tctx, extra);
if (ctx->lower_iyuv & (1 << y_samp)) {
extra = u_bit_scan(&ctx->free_slots);
ctx->sampler_map[y_samp][1] = extra;
emit_samp(tctx, extra);
}
}
/*
* Declare extra temp:
*/
tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
for (i = 0; i < 2; i++) {
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_TEMPORARY;
decl.Range.First = decl.Range.Last = tempbase + i;
tctx->emit_declaration(tctx, &decl);
ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
ctx->tmp[i].src.Register.Index = tempbase + i;
ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
ctx->tmp[i].dst.Register.Index = tempbase + i;
ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
}
}
/* call with YUV in tmpA.xyz */
static void
yuv_to_rgb(struct tgsi_transform_context *tctx,
struct tgsi_full_dst_register *dst)
{
struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
struct tgsi_full_instruction inst;
/*
* IMM[0] FLT32 { 1.164, 0.000, 1.596, 0.0 }
* IMM[1] FLT32 { 1.164, -0.392, -0.813, 0.0 }
* IMM[2] FLT32 { 1.164, 2.017, 0.000, 0.0 }
* IMM[3] FLT32 { 0.0625, 0.500, 0.500, 1.0 }
*/
/* SUB tmpA.xyz, tmpA, imm[3] */
inst = tgsi_default_full_instruction();
inst.Instruction.Opcode = TGSI_OPCODE_ADD;
inst.Instruction.Saturate = 0;
inst.Instruction.NumDstRegs = 1;
inst.Instruction.NumSrcRegs = 2;
reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _));
inst.Src[1].Register.Negate = 1;
tctx->emit_instruction(tctx, &inst);
/* DP3 dst.x, tmpA, imm[0] */
inst = dp3_instruction();
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
tctx->emit_instruction(tctx, &inst);
/* DP3 dst.y, tmpA, imm[1] */
inst = dp3_instruction();
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
tctx->emit_instruction(tctx, &inst);
/* DP3 dst.z, tmpA, imm[2] */
inst = dp3_instruction();
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
tctx->emit_instruction(tctx, &inst);
/* MOV dst.w, imm[0].x */
inst = mov_instruction();
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
tctx->emit_instruction(tctx, &inst);
}
static void
lower_nv12(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction *originst)
{
struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
struct tgsi_full_instruction inst;
struct tgsi_full_src_register *coord = &originst->Src[0];
unsigned samp = originst->Src[1].Register.Index;
/* sample Y:
* TEX tempA.x, coord, texture[samp], 2D;
*/
inst = tex_instruction(samp);
reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
tctx->emit_instruction(tctx, &inst);
/* sample UV:
* TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
* MOV tempA.yz, tempB._xy_
*/
inst = tex_instruction(ctx->sampler_map[samp][0]);
reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY);
reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
tctx->emit_instruction(tctx, &inst);
inst = mov_instruction();
reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ);
reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _));
tctx->emit_instruction(tctx, &inst);
/* At this point, we have YUV in tempA.xyz, rest is common: */
yuv_to_rgb(tctx, &originst->Dst[0]);
}
static void
lower_iyuv(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction *originst)
{
struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
struct tgsi_full_instruction inst;
struct tgsi_full_src_register *coord = &originst->Src[0];
unsigned samp = originst->Src[1].Register.Index;
/* sample Y:
* TEX tempA.x, coord, texture[samp], 2D;
*/
inst = tex_instruction(samp);
reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
tctx->emit_instruction(tctx, &inst);
/* sample U:
* TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
* MOV tempA.y, tempB._x__
*/
inst = tex_instruction(ctx->sampler_map[samp][0]);
reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
tctx->emit_instruction(tctx, &inst);
inst = mov_instruction();
reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _));
tctx->emit_instruction(tctx, &inst);
/* sample V:
* TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
* MOV tempA.z, tempB.__x_
*/
inst = tex_instruction(ctx->sampler_map[samp][1]);
reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
tctx->emit_instruction(tctx, &inst);
inst = mov_instruction();
reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _));
tctx->emit_instruction(tctx, &inst);
/* At this point, we have YUV in tempA.xyz, rest is common: */
yuv_to_rgb(tctx, &originst->Dst[0]);
}
static void
transform_instr(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction *inst)
{
struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
if (!ctx->first_instruction_emitted) {
emit_decls(tctx);
ctx->first_instruction_emitted = true;
}
switch (inst->Instruction.Opcode) {
/* TODO what other tex opcode's can be used w/ external eglimgs? */
case TGSI_OPCODE_TEX: {
unsigned samp = inst->Src[1].Register.Index;
if (ctx->lower_nv12 & (1 << samp)) {
lower_nv12(tctx, inst);
} else if (ctx->lower_iyuv & (1 << samp)) {
lower_iyuv(tctx, inst);
} else {
goto skip;
}
break;
}
default:
skip:
tctx->emit_instruction(tctx, inst);
return;
}
}
extern const struct tgsi_token *
st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
unsigned lower_nv12, unsigned lower_iyuv)
{
struct tgsi_yuv_transform ctx;
struct tgsi_token *newtoks;
int newlen;
assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */
// tgsi_dump(tokens, 0);
// debug_printf("\n");
memset(&ctx, 0, sizeof(ctx));
ctx.base.transform_instruction = transform_instr;
ctx.free_slots = free_slots;
ctx.lower_nv12 = lower_nv12;
ctx.lower_iyuv = lower_iyuv;
tgsi_scan_shader(tokens, &ctx.info);
/* TODO better job of figuring out how many extra tokens we need..
* this is a pain about tgsi_transform :-/
*/
newlen = tgsi_num_tokens(tokens) + 120;
newtoks = tgsi_alloc_tokens(newlen);
if (!newtoks)
return NULL;
tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
// tgsi_dump(newtoks, 0);
// debug_printf("\n");
return newtoks;
}