/*
* Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Vadim Girlin
*/
#define RA_DEBUG 0
#if RA_DEBUG
#define RA_DUMP(q) do { q } while (0)
#else
#define RA_DUMP(q)
#endif
#include "sb_shader.h"
#include "sb_pass.h"
namespace r600_sb {
int ra_coalesce::run() {
return sh.coal.run();
}
void coalescer::add_edge(value* a, value* b, unsigned cost) {
assert(a->is_sgpr() && b->is_sgpr());
edges.insert(new ra_edge(a,b, cost));
}
void coalescer::create_chunk(value *v) {
assert(v->is_sgpr());
ra_chunk *c = new ra_chunk();
c->values.push_back(v);
if (v->is_chan_pinned())
c->flags |= RCF_PIN_CHAN;
if (v->is_reg_pinned()) {
c->flags |= RCF_PIN_REG;
}
c->pin = v->pin_gpr;
RA_DUMP(
sblog << "create_chunk: ";
dump_chunk(c);
);
all_chunks.push_back(c);
v->chunk = c;
}
void coalescer::unify_chunks(ra_edge *e) {
ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
RA_DUMP(
sblog << "unify_chunks: ";
dump_chunk(c1);
dump_chunk(c2);
);
if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
c1->flags |= RCF_PIN_CHAN;
c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
}
if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
c1->flags |= RCF_PIN_REG;
c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
}
c1->values.reserve(c1->values.size() + c2->values.size());
for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
++I) {
(*I)->chunk = c1;
c1->values.push_back(*I);
}
chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
assert(F != all_chunks.end());
all_chunks.erase(F);
c1->cost += c2->cost + e->cost;
delete c2;
}
bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
unsigned pin_flags = (c1->flags & c2->flags) &
(RCF_PIN_CHAN | RCF_PIN_REG);
if ((pin_flags & RCF_PIN_CHAN) &&
c1->pin.chan() != c2->pin.chan())
return true;
if ((pin_flags & RCF_PIN_REG) &&
c1->pin.sel() != c2->pin.sel())
return true;
for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
++I) {
value *v1 = *I;
for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
++I) {
value *v2 = *I;
if (!v1->v_equal(v2) && v1->interferences.contains(v2))
return true;
}
}
return false;
}
void coalescer::build_chunks() {
for (edge_queue::iterator I = edges.begin(), E = edges.end();
I != E; ++I) {
ra_edge *e = *I;
if (!e->a->chunk)
create_chunk(e->a);
if (!e->b->chunk)
create_chunk(e->b);
ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
if (c1 == c2) {
c1->cost += e->cost;
} else if (!chunks_interference(c1, c2))
unify_chunks(e);
}
}
ra_constraint* coalescer::create_constraint(constraint_kind kind) {
ra_constraint *c = new ra_constraint(kind);
all_constraints.push_back(c);
return c;
}
void coalescer::dump_edges() {
sblog << "######## affinity edges\n";
for (edge_queue::iterator I = edges.begin(), E = edges.end();
I != E; ++I) {
ra_edge* e = *I;
sblog << " ra_edge ";
dump::dump_val(e->a);
sblog << " <-> ";
dump::dump_val(e->b);
sblog << " cost = " << e->cost << "\n";
}
}
void coalescer::dump_chunks() {
sblog << "######## chunks\n";
for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
I != E; ++I) {
ra_chunk* c = *I;
dump_chunk(c);
}
}
void coalescer::dump_constraint_queue() {
sblog << "######## constraints\n";
for (constraint_queue::iterator I = constraints.begin(),
E = constraints.end(); I != E; ++I) {
ra_constraint* c = *I;
dump_constraint(c);
}
}
void coalescer::dump_chunk(ra_chunk* c) {
sblog << " ra_chunk cost = " << c->cost << " : ";
dump::dump_vec(c->values);
if (c->flags & RCF_PIN_REG)
sblog << " REG = " << c->pin.sel();
if (c->flags & RCF_PIN_CHAN)
sblog << " CHAN = " << c->pin.chan();
sblog << (c->flags & RCF_GLOBAL ? " GLOBAL" : "");
sblog << "\n";
}
void coalescer::dump_constraint(ra_constraint* c) {
sblog << " ra_constraint: ";
switch (c->kind) {
case CK_PACKED_BS: sblog << "PACKED_BS"; break;
case CK_PHI: sblog << "PHI"; break;
case CK_SAME_REG: sblog << "SAME_REG"; break;
default: sblog << "UNKNOWN_KIND"; assert(0); break;
}
sblog << " cost = " << c->cost << " : ";
dump::dump_vec(c->values);
sblog << "\n";
}
void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
++I) {
value *v = *I;
s.add_set(v->interferences);
}
s.remove_vec(c->values);
}
void coalescer::build_chunk_queue() {
for (chunk_vec::iterator I = all_chunks.begin(),
E = all_chunks.end(); I != E; ++I) {
ra_chunk *c = *I;
if (!c->is_fixed())
chunks.insert(c);
}
}
void coalescer::build_constraint_queue() {
for (constraint_vec::iterator I = all_constraints.begin(),
E = all_constraints.end(); I != E; ++I) {
ra_constraint *c = *I;
unsigned cost = 0;
if (c->values.empty() || !c->values.front()->is_sgpr())
continue;
if (c->kind != CK_SAME_REG)
continue;
for (vvec::iterator I = c->values.begin(), E = c->values.end();
I != E; ++I) {
value *v = *I;
if (!v->chunk)
create_chunk(v);
else
cost += v->chunk->cost;
}
c->cost = cost;
constraints.insert(c);
}
}
void coalescer::color_chunks() {
for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
I != E; ++I) {
ra_chunk *c = *I;
if (c->is_fixed() || c->values.size() == 1)
continue;
sb_bitset rb;
val_set interf;
get_chunk_interferences(c, interf);
RA_DUMP(
sblog << "color_chunks: ";
dump_chunk(c);
sblog << "\n interferences: ";
dump::dump_set(sh,interf);
sblog << "\n";
);
init_reg_bitset(rb, interf);
unsigned pass = c->is_reg_pinned() ? 0 : 1;
unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
unsigned color = 0;
while (pass < 2) {
unsigned rs, re;
if (pass == 0) {
rs = c->pin.sel();
re = rs + 1;
} else {
rs = 0;
re = sh.num_nontemp_gpr();
}
for (unsigned reg = rs; reg < re; ++reg) {
for (unsigned chan = cs; chan < ce; ++chan) {
unsigned bit = sel_chan(reg, chan);
if (bit >= rb.size() || !rb.get(bit)) {
color = bit;
break;
}
}
if (color)
break;
}
if (color)
break;
++pass;
}
assert(color);
color_chunk(c, color);
}
}
void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
value *v = *I;
if (!v->is_any_gpr())
continue;
unsigned gpr = v->get_final_gpr();
if (!gpr)
continue;
if (gpr) {
if (gpr >= bs.size())
bs.resize(gpr + 64);
bs.set(gpr, 1);
}
}
}
void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
vvec vv = c->values;
for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
++I) {
value *v = *I;
if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
detach_value(v);
continue;
}
if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
detach_value(v);
continue;
}
v->gpr = color;
if (v->constraint && v->constraint->kind == CK_PHI)
v->fix();
RA_DUMP(
sblog << " assigned " << color << " to ";
dump::dump_val(v);
sblog << "\n";
);
}
c->pin = color;
if (c->is_reg_pinned()) {
c->fix();
}
}
coalescer::~coalescer() {
// FIXME use pool allocator ??
for (constraint_vec::iterator I = all_constraints.begin(),
E = all_constraints.end(); I != E; ++I) {
delete (*I);
}
for (chunk_vec::iterator I = all_chunks.begin(),
E = all_chunks.end(); I != E; ++I) {
delete (*I);
}
for (edge_queue::iterator I = edges.begin(), E = edges.end();
I != E; ++I) {
delete (*I);
}
}
int coalescer::run() {
int r;
RA_DUMP( dump_edges(); );
build_chunks();
RA_DUMP( dump_chunks(); );
build_constraint_queue();
RA_DUMP( dump_constraint_queue(); );
if ((r = color_constraints()))
return r;
build_chunk_queue();
color_chunks();
return 0;
}
void coalescer::color_phi_constraint(ra_constraint* c) {
}
ra_chunk* coalescer::detach_value(value *v) {
vvec::iterator F = std::find(v->chunk->values.begin(),
v->chunk->values.end(), v);
assert(F != v->chunk->values.end());
v->chunk->values.erase(F);
create_chunk(v);
if (v->is_reg_pinned()) {
v->chunk->fix();
}
RA_DUMP(
sblog << " detached : ";
dump_chunk(v->chunk);
);
return v->chunk;
}
int coalescer::color_reg_constraint(ra_constraint *c) {
unsigned k, cnt = c->values.size();
vvec & cv = c->values;
ra_chunk *ch[4];
unsigned swz[4] = {0, 1, 2, 3};
val_set interf[4];
sb_bitset rb[4];
bool reg_pinned = false;
unsigned pin_reg = ~0;
unsigned chan_mask = 0;
k = 0;
for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
value *v = *I;
if (!v->chunk)
create_chunk(v);
ch[k] = v->chunk;
if (v->chunk->is_chan_pinned()) {
unsigned chan = 1 << v->chunk->pin.chan();
if (chan & chan_mask) { // channel already in use
ch[k] = detach_value(v);
assert(!ch[k]->is_chan_pinned());
} else {
chan_mask |= chan;
}
}
if (v->chunk->is_reg_pinned()) {
if (!reg_pinned) {
reg_pinned = true;
pin_reg = v->chunk->pin.sel();
}
}
get_chunk_interferences(ch[k], interf[k]);
init_reg_bitset(rb[k], interf[k]);
}
unsigned start_reg, end_reg;
start_reg = 0;
end_reg = sh.num_nontemp_gpr();
unsigned min_reg = end_reg;
unsigned min_swz[4];
unsigned i, pass = reg_pinned ? 0 : 1;
bool done = false;
while (pass < 2) {
unsigned rs, re;
if (pass == 0) {
re = pin_reg + 1;
rs = pin_reg;
} else {
re = end_reg;
rs = start_reg;
}
min_reg = re;
// cycle on swizzle combinations
do {
for (i = 0; i < cnt; ++i) {
if (ch[i]->flags & RCF_PIN_CHAN)
if (ch[i]->pin.chan() != swz[i])
break;
}
if (i != cnt)
continue;
// looking for minimal reg number such that the constrained chunks
// may be colored with the current swizzle combination
for (unsigned reg = rs; reg < min_reg; ++reg) {
for (i = 0; i < cnt; ++i) {
unsigned bit = sel_chan(reg, swz[i]);
if (bit < rb[i].size() && rb[i].get(bit))
break;
}
if (i == cnt) {
done = true;
min_reg = reg;
std::copy(swz, swz + 4, min_swz);
break;
}
}
if (pass == 0 && done)
break;
} while (std::next_permutation(swz, swz + 4));
if (!done && pass) {
sblog << "sb: ra_coalesce - out of registers\n";
return -1;
}
if (pass == 0 && done)
break;
++pass;
};
assert(done);
RA_DUMP(
sblog << "min reg = " << min_reg << " min_swz = "
<< min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
);
for (i = 0; i < cnt; ++i) {
sel_chan color(min_reg, min_swz[i]);
ra_chunk *cc = ch[i];
if (cc->is_fixed()) {
if (cc->pin != color)
cc = detach_value(cv[i]);
else
continue;
}
color_chunk(cc, color);
cc->fix();
cc->set_prealloc();
}
return 0;
}
int coalescer::color_constraints() {
int r;
for (constraint_queue::iterator I = constraints.begin(),
E = constraints.end(); I != E; ++I) {
ra_constraint *c = *I;
RA_DUMP(
sblog << "color_constraints: ";
dump_constraint(c);
);
if (c->kind == CK_SAME_REG) {
if ((r = color_reg_constraint(c)))
return r;
} else if (c->kind == CK_PHI)
color_phi_constraint(c);
}
return 0;
}
} // namespace r600_sb