/*
* Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Vadim Girlin
*/
#define FBC_DEBUG 0
#if FBC_DEBUG
#define FBC_DUMP(q) do { q } while (0)
#else
#define FBC_DUMP(q)
#endif
#include "sb_bc.h"
#include "sb_shader.h"
#include "sb_pass.h"
namespace r600_sb {
void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
alu_group_node *g = sh.create_alu_group();
alu_node *a = sh.create_alu();
a->bc.set_op(ALU_OP0_NOP);
a->bc.last = 1;
g->push_back(a);
b4->insert_before(g);
}
int bc_finalizer::run() {
run_on(sh.root);
regions_vec &rv = sh.get_regions();
for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
++I) {
region_node *r = *I;
assert(r);
bool loop = r->is_loop();
if (loop)
finalize_loop(r);
else
finalize_if(r);
r->expand();
}
cf_peephole();
// workaround for some problems on r6xx/7xx
// add ALU NOP to each vertex shader
if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
alu_group_node *g = sh.create_alu_group();
alu_node *a = sh.create_alu();
a->bc.set_op(ALU_OP0_NOP);
a->bc.last = 1;
g->push_back(a);
c->push_back(g);
sh.root->push_back(c);
c = sh.create_cf(CF_OP_NOP);
sh.root->push_back(c);
last_cf = c;
}
if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
last_cf = sh.create_cf(CF_OP_NOP);
sh.root->push_back(last_cf);
}
if (ctx.is_cayman()) {
if (!last_cf) {
cf_node *c = sh.create_cf(CF_OP_CF_END);
sh.root->push_back(c);
} else
last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
} else
last_cf->bc.end_of_program = 1;
for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
cf_node *le = last_export[t];
if (le)
le->bc.set_op(CF_OP_EXPORT_DONE);
}
sh.ngpr = ngpr;
sh.nstack = nstack;
return 0;
}
void bc_finalizer::finalize_loop(region_node* r) {
update_nstack(r);
cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
// Update last_cf, but don't overwrite it if it's outside the current loop nest since
// it may point to a cf that is later in program order.
// The single parent level check is sufficient since finalize_loop() is processed in
// reverse order from innermost to outermost loop nest level.
if (!last_cf || last_cf->get_parent_region() == r) {
last_cf = loop_end;
}
loop_start->jump_after(loop_end);
loop_end->jump_after(loop_start);
for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
I != E; ++I) {
depart_node *dep = *I;
cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
loop_break->jump(loop_end);
dep->push_back(loop_break);
dep->expand();
}
// FIXME produces unnecessary LOOP_CONTINUE
for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
I != E; ++I) {
repeat_node *rep = *I;
if (!(rep->parent == r && rep->prev == NULL)) {
cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
loop_cont->jump(loop_end);
rep->push_back(loop_cont);
}
rep->expand();
}
r->push_front(loop_start);
r->push_back(loop_end);
}
void bc_finalizer::finalize_if(region_node* r) {
update_nstack(r);
// expecting the following control flow structure here:
// - region
// {
// - depart/repeat 1 (it may be depart/repeat for some outer region)
// {
// - if
// {
// - depart/repeat 2 (possibly for outer region)
// {
// - some optional code
// }
// }
// - optional <else> code> ...
// }
// }
container_node *repdep1 = static_cast<container_node*>(r->first);
assert(repdep1->is_depart() || repdep1->is_repeat());
if_node *n_if = static_cast<if_node*>(repdep1->first);
if (n_if) {
assert(n_if->is_if());
container_node *repdep2 = static_cast<container_node*>(n_if->first);
assert(repdep2->is_depart() || repdep2->is_repeat());
cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
cf_node *if_pop = sh.create_cf(CF_OP_POP);
if (!last_cf || last_cf->get_parent_region() == r) {
last_cf = if_pop;
}
if_pop->bc.pop_count = 1;
if_pop->jump_after(if_pop);
r->push_front(if_jump);
r->push_back(if_pop);
/* the depart/repeat 1 is actually part of the "else" code.
* if it's a depart for an outer loop region it will want to
* insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need
* to emit the else clause.
*/
bool has_else = n_if->next;
if (repdep1->is_depart()) {
depart_node *dep1 = static_cast<depart_node*>(repdep1);
if (dep1->target != r && dep1->target->is_loop())
has_else = true;
}
if (repdep1->is_repeat()) {
repeat_node *rep1 = static_cast<repeat_node*>(repdep1);
if (rep1->target != r && rep1->target->is_loop())
has_else = true;
}
if (has_else) {
cf_node *nelse = sh.create_cf(CF_OP_ELSE);
n_if->insert_after(nelse);
if_jump->jump(nelse);
nelse->jump_after(if_pop);
nelse->bc.pop_count = 1;
} else {
if_jump->jump_after(if_pop);
if_jump->bc.pop_count = 1;
}
n_if->expand();
}
for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
I != E; ++I) {
(*I)->expand();
}
r->departs.clear();
assert(r->repeats.empty());
}
void bc_finalizer::run_on(container_node* c) {
node *prev_node = NULL;
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
node *n = *I;
if (n->is_alu_group()) {
finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
} else {
if (n->is_alu_clause()) {
cf_node *c = static_cast<cf_node*>(n);
if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
if (ctx.stack_workaround_8xx) {
region_node *r = c->get_parent_region();
if (r) {
unsigned ifs, loops;
unsigned elems = get_stack_depth(r, loops, ifs);
unsigned dmod1 = elems % ctx.stack_entry_size;
unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
if (elems && (!dmod1 || !dmod2))
c->flags |= NF_ALU_STACK_WORKAROUND;
}
} else if (ctx.stack_workaround_9xx) {
region_node *r = c->get_parent_region();
if (r) {
unsigned ifs, loops;
get_stack_depth(r, loops, ifs);
if (loops >= 2)
c->flags |= NF_ALU_STACK_WORKAROUND;
}
}
}
last_cf = c;
} else if (n->is_fetch_inst()) {
finalize_fetch(static_cast<fetch_node*>(n));
} else if (n->is_cf_inst()) {
finalize_cf(static_cast<cf_node*>(n));
}
if (n->is_container())
run_on(static_cast<container_node*>(n));
}
prev_node = n;
}
}
void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
alu_node *last = NULL;
alu_group_node *prev_g = NULL;
bool add_nop = false;
if (prev_node && prev_node->is_alu_group()) {
prev_g = static_cast<alu_group_node*>(prev_node);
}
for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
alu_node *n = static_cast<alu_node*>(*I);
unsigned slot = n->bc.slot;
value *d = n->dst.empty() ? NULL : n->dst[0];
if (d && d->is_special_reg()) {
assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access());
d = NULL;
}
sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
if (d) {
assert(fdst.chan() == slot || slot == SLOT_TRANS);
}
if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
n->bc.dst_gpr = fdst.sel();
n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
n->bc.dst_rel = 1;
update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
} else {
n->bc.dst_rel = 0;
}
n->bc.write_mask = d != NULL;
n->bc.last = 0;
if (n->bc.op_ptr->flags & AF_PRED) {
n->bc.update_pred = (n->dst[1] != NULL);
n->bc.update_exec_mask = (n->dst[2] != NULL);
}
// FIXME handle predication here
n->bc.pred_sel = PRED_SEL_OFF;
update_ngpr(n->bc.dst_gpr);
add_nop |= finalize_alu_src(g, n, prev_g);
last = n;
}
if (add_nop) {
if (sh.get_ctx().r6xx_gpr_index_workaround) {
insert_rv6xx_load_ar_workaround(g);
}
}
last->bc.last = 1;
}
bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
vvec &sv = a->src;
bool add_nop = false;
FBC_DUMP(
sblog << "finalize_alu_src: ";
dump::dump_op(a);
sblog << "\n";
);
unsigned si = 0;
for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
value *v = *I;
assert(v);
bc_alu_src &src = a->bc.src[si];
sel_chan sc;
src.rel = 0;
sel_chan gpr;
switch (v->kind) {
case VLK_REL_REG:
sc = v->get_final_gpr();
src.sel = sc.sel();
src.chan = sc.chan();
if (!v->rel->is_const()) {
src.rel = 1;
update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
if (prev && !add_nop) {
for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
alu_node *pn = static_cast<alu_node*>(*pI);
if (pn->bc.dst_gpr == src.sel) {
add_nop = true;
break;
}
}
}
} else
src.rel = 0;
break;
case VLK_REG:
gpr = v->get_final_gpr();
src.sel = gpr.sel();
src.chan = gpr.chan();
update_ngpr(src.sel);
break;
case VLK_TEMP:
src.sel = v->gpr.sel();
src.chan = v->gpr.chan();
update_ngpr(src.sel);
break;
case VLK_UNDEF:
case VLK_CONST: {
literal lv = v->literal_value;
src.chan = 0;
if (lv == literal(0))
src.sel = ALU_SRC_0;
else if (lv == literal(0.5f))
src.sel = ALU_SRC_0_5;
else if (lv == literal(1.0f))
src.sel = ALU_SRC_1;
else if (lv == literal(1))
src.sel = ALU_SRC_1_INT;
else if (lv == literal(-1))
src.sel = ALU_SRC_M_1_INT;
else {
src.sel = ALU_SRC_LITERAL;
src.chan = g->literal_chan(lv);
src.value = lv;
}
break;
}
case VLK_KCACHE: {
cf_node *clause = static_cast<cf_node*>(g->parent);
assert(clause->is_alu_clause());
sel_chan k = translate_kcache(clause, v);
assert(k && "kcache translation failed");
src.sel = k.sel();
src.chan = k.chan();
break;
}
case VLK_SPECIAL_REG:
if (v->select.sel() == SV_LDS_OQA) {
src.sel = ALU_SRC_LDS_OQ_A_POP;
src.chan = 0;
} else if (v->select.sel() == SV_LDS_OQB) {
src.sel = ALU_SRC_LDS_OQ_B_POP;
src.chan = 0;
} else {
src.sel = ALU_SRC_0;
src.chan = 0;
}
break;
case VLK_PARAM:
case VLK_SPECIAL_CONST:
src.sel = v->select.sel();
src.chan = v->select.chan();
break;
default:
assert(!"unknown value kind");
break;
}
if (prev && !add_nop) {
for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
alu_node *pn = static_cast<alu_node*>(*pI);
if (pn->bc.dst_rel) {
if (pn->bc.dst_gpr == src.sel) {
add_nop = true;
break;
}
}
}
}
}
while (si < 3) {
a->bc.src[si++].sel = 0;
}
return add_nop;
}
void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
{
int reg = -1;
for (unsigned chan = 0; chan < 4; ++chan) {
dst.bc.dst_sel[chan] = SEL_MASK;
unsigned sel = SEL_MASK;
value *v = src.src[arg_start + chan];
if (!v || v->is_undef()) {
sel = SEL_MASK;
} else if (v->is_const()) {
literal l = v->literal_value;
if (l == literal(0))
sel = SEL_0;
else if (l == literal(1.0f))
sel = SEL_1;
else {
sblog << "invalid fetch constant operand " << chan << " ";
dump::dump_op(&src);
sblog << "\n";
abort();
}
} else if (v->is_any_gpr()) {
unsigned vreg = v->gpr.sel();
unsigned vchan = v->gpr.chan();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid fetch source operand " << chan << " ";
dump::dump_op(&src);
sblog << "\n";
abort();
}
sel = vchan;
} else {
sblog << "invalid fetch source operand " << chan << " ";
dump::dump_op(&src);
sblog << "\n";
abort();
}
dst.bc.src_sel[chan] = sel;
}
if (reg >= 0)
update_ngpr(reg);
dst.bc.src_gpr = reg >= 0 ? reg : 0;
}
void bc_finalizer::emit_set_grad(fetch_node* f) {
assert(f->src.size() == 12 || f->src.size() == 13);
unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
unsigned arg_start = 0;
for (unsigned op = 0; op < 2; ++op) {
fetch_node *n = sh.create_fetch();
n->bc.set_op(ops[op]);
arg_start += 4;
copy_fetch_src(*n, *f, arg_start);
f->insert_before(n);
}
}
void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
assert(f.src.size() == 8);
fetch_node *n = sh.create_fetch();
n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
copy_fetch_src(*n, f, 4);
f.insert_before(n);
}
void bc_finalizer::finalize_fetch(fetch_node* f) {
int reg = -1;
// src
unsigned src_count = 4;
unsigned flags = f->bc.op_ptr->flags;
if (flags & FF_VTX) {
src_count = 1;
} else if (flags & FF_GDS) {
src_count = 2;
} else if (flags & FF_USEGRAD) {
emit_set_grad(f);
} else if (flags & FF_USE_TEXTURE_OFFSETS) {
emit_set_texture_offsets(*f);
}
for (unsigned chan = 0; chan < src_count; ++chan) {
unsigned sel = f->bc.src_sel[chan];
if (sel > SEL_W)
continue;
value *v = f->src[chan];
if (v->is_undef()) {
sel = SEL_MASK;
} else if (v->is_const()) {
literal l = v->literal_value;
if (l == literal(0))
sel = SEL_0;
else if (l == literal(1.0f))
sel = SEL_1;
else {
sblog << "invalid fetch constant operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
} else if (v->is_any_gpr()) {
unsigned vreg = v->gpr.sel();
unsigned vchan = v->gpr.chan();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid fetch source operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
sel = vchan;
} else {
sblog << "invalid fetch source operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
f->bc.src_sel[chan] = sel;
}
if (reg >= 0)
update_ngpr(reg);
f->bc.src_gpr = reg >= 0 ? reg : 0;
// dst
reg = -1;
unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
for (unsigned chan = 0; chan < 4; ++chan) {
unsigned sel = f->bc.dst_sel[chan];
if (sel == SEL_MASK)
continue;
value *v = f->dst[chan];
if (!v)
continue;
if (v->is_any_gpr()) {
unsigned vreg = v->gpr.sel();
unsigned vchan = v->gpr.chan();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid fetch dst operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
dst_swz[vchan] = sel;
} else {
sblog << "invalid fetch dst operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
}
for (unsigned i = 0; i < 4; ++i)
f->bc.dst_sel[i] = dst_swz[i];
if ((flags & FF_GDS) && reg == -1) {
f->bc.dst_sel[0] = SEL_MASK;
f->bc.dst_gpr = 0;
return ;
}
assert(reg >= 0);
if (reg >= 0)
update_ngpr(reg);
f->bc.dst_gpr = reg >= 0 ? reg : 0;
}
void bc_finalizer::finalize_cf(cf_node* c) {
unsigned flags = c->bc.op_ptr->flags;
c->bc.end_of_program = 0;
last_cf = c;
if (flags & CF_EXP) {
c->bc.set_op(CF_OP_EXPORT);
last_export[c->bc.type] = c;
int reg = -1;
for (unsigned chan = 0; chan < 4; ++chan) {
unsigned sel = c->bc.sel[chan];
if (sel > SEL_W)
continue;
value *v = c->src[chan];
if (v->is_undef()) {
sel = SEL_MASK;
} else if (v->is_const()) {
literal l = v->literal_value;
if (l == literal(0))
sel = SEL_0;
else if (l == literal(1.0f))
sel = SEL_1;
else {
sblog << "invalid export constant operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
} else if (v->is_any_gpr()) {
unsigned vreg = v->gpr.sel();
unsigned vchan = v->gpr.chan();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid export source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
sel = vchan;
} else {
sblog << "invalid export source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
c->bc.sel[chan] = sel;
}
if (reg >= 0)
update_ngpr(reg);
c->bc.rw_gpr = reg >= 0 ? reg : 0;
} else if (flags & CF_MEM) {
int reg = -1;
unsigned mask = 0;
for (unsigned chan = 0; chan < 4; ++chan) {
value *v = c->src[chan];
if (!v || v->is_undef())
continue;
if (!v->is_any_gpr() || v->gpr.chan() != chan) {
sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
unsigned vreg = v->gpr.sel();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
mask |= (1 << chan);
}
if (reg >= 0)
update_ngpr(reg);
c->bc.rw_gpr = reg >= 0 ? reg : 0;
c->bc.comp_mask = mask;
if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
reg = -1;
for (unsigned chan = 0; chan < 4; ++chan) {
value *v = c->src[4 + chan];
if (!v || v->is_undef())
continue;
if (!v->is_any_gpr() || v->gpr.chan() != chan) {
sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
unsigned vreg = v->gpr.sel();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
}
assert(reg >= 0);
if (reg >= 0)
update_ngpr(reg);
c->bc.index_gpr = reg >= 0 ? reg : 0;
}
} else if (flags & CF_CALL) {
update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
}
}
sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
unsigned sel = v->select.kcache_sel();
unsigned bank = v->select.kcache_bank();
unsigned chan = v->select.chan();
static const unsigned kc_base[] = {128, 160, 256, 288};
sel &= 4095;
unsigned line = sel >> 4;
for (unsigned k = 0; k < 4; ++k) {
bc_kcache &kc = alu->bc.kc[k];
if (kc.mode == KC_LOCK_NONE)
break;
if (kc.bank == bank && (kc.addr == line ||
(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
sel = kc_base[k] + (sel - (kc.addr << 4));
return sel_chan(sel, chan);
}
}
assert(!"kcache translation error");
return 0;
}
void bc_finalizer::update_ngpr(unsigned gpr) {
if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
ngpr = gpr + 1;
}
unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
unsigned &ifs, unsigned add) {
unsigned stack_elements = add;
bool has_non_wqm_push = (add != 0);
region_node *r = n->is_region() ?
static_cast<region_node*>(n) : n->get_parent_region();
loops = 0;
ifs = 0;
while (r) {
if (r->is_loop()) {
++loops;
} else {
++ifs;
has_non_wqm_push = true;
}
r = r->get_parent_region();
}
stack_elements += (loops * ctx.stack_entry_size) + ifs;
// reserve additional elements in some cases
switch (ctx.hw_class) {
case HW_CLASS_R600:
case HW_CLASS_R700:
// If any non-WQM push is invoked, 2 elements should be reserved.
if (has_non_wqm_push)
stack_elements += 2;
break;
case HW_CLASS_CAYMAN:
// If any stack operation is invoked, 2 elements should be reserved
if (stack_elements)
stack_elements += 2;
break;
case HW_CLASS_EVERGREEN:
// According to the docs we need to reserve 1 element for each of the
// following cases:
// 1) non-WQM push is used with WQM/LOOP frames on stack
// 2) ALU_ELSE_AFTER is used at the point of max stack usage
// NOTE:
// It was found that the conditions above are not sufficient, there are
// other cases where we also need to reserve stack space, that's why
// we always reserve 1 stack element if we have non-WQM push on stack.
// Condition 2 is ignored for now because we don't use this instruction.
if (has_non_wqm_push)
++stack_elements;
break;
case HW_CLASS_UNKNOWN:
assert(0);
}
return stack_elements;
}
void bc_finalizer::update_nstack(region_node* r, unsigned add) {
unsigned loops = 0;
unsigned ifs = 0;
unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
// XXX all chips expect this value to be computed using 4 as entry size,
// not the real entry size
unsigned stack_entries = (elems + 3) >> 2;
if (nstack < stack_entries)
nstack = stack_entries;
}
void bc_finalizer::cf_peephole() {
if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
I = N) {
N = I; ++N;
cf_node *c = static_cast<cf_node*>(*I);
if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
(c->flags & NF_ALU_STACK_WORKAROUND)) {
cf_node *push = sh.create_cf(CF_OP_PUSH);
c->insert_before(push);
push->jump(c);
c->bc.set_op(CF_OP_ALU);
}
}
}
for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
I = N) {
N = I; ++N;
cf_node *c = static_cast<cf_node*>(*I);
if (c->jump_after_target) {
if (c->jump_target->next == NULL) {
c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
if (last_cf == c->jump_target)
last_cf = static_cast<cf_node*>(c->jump_target->next);
}
c->jump_target = static_cast<cf_node*>(c->jump_target->next);
c->jump_after_target = false;
}
if (c->is_cf_op(CF_OP_POP)) {
node *p = c->prev;
if (p->is_alu_clause()) {
cf_node *a = static_cast<cf_node*>(p);
if (a->bc.op == CF_OP_ALU) {
a->bc.set_op(CF_OP_ALU_POP_AFTER);
c->remove();
}
}
} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
// if JUMP is immediately followed by its jump target,
// then JUMP is useless and we can eliminate it
c->remove();
}
}
}
} // namespace r600_sb