/* * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Vadim Girlin */ #ifndef SB_SCHED_H_ #define SB_SCHED_H_ namespace r600_sb { typedef sb_map<node*, unsigned> uc_map; // resource trackers for scheduler // rp = read port // uc = use count typedef sb_set<unsigned> kc_lines; class rp_kcache_tracker { unsigned rp[4]; unsigned uc[4]; const unsigned sel_count; unsigned kc_sel(sel_chan r) { return sel_count == 4 ? (unsigned)r : ((r - 1) >> 1) + 1; } public: rp_kcache_tracker(shader &sh); bool try_reserve(node *n); void unreserve(node *n); bool try_reserve(sel_chan r); void unreserve(sel_chan r); void reset(); unsigned num_sels() { return !!rp[0] + !!rp[1] + !!rp[2] + !!rp[3]; } unsigned get_lines(kc_lines &lines); }; class literal_tracker { literal lt[4]; unsigned uc[4]; public: literal_tracker() : lt(), uc() {} bool try_reserve(alu_node *n); void unreserve(alu_node *n); bool try_reserve(literal l); void unreserve(literal l); void reset(); unsigned count() { return !!uc[0] + !!uc[1] + !!uc[2] + !!uc[3]; } void init_group_literals(alu_group_node *g); }; class rp_gpr_tracker { // rp[cycle][elem] unsigned rp[3][4]; unsigned uc[3][4]; public: rp_gpr_tracker() : rp(), uc() {} bool try_reserve(alu_node *n); void unreserve(alu_node *n); bool try_reserve(unsigned cycle, unsigned sel, unsigned chan); void unreserve(unsigned cycle, unsigned sel, unsigned chan); void reset(); void dump(); }; class alu_group_tracker { shader &sh; rp_kcache_tracker kc; rp_gpr_tracker gpr; literal_tracker lt; alu_node * slots[5]; unsigned available_slots; unsigned max_slots; typedef std::map<value*, unsigned> value_index_map; value_index_map vmap; bool has_mova; bool uses_ar; bool has_predset; bool has_kill; bool updates_exec_mask; bool consumes_lds_oqa; bool produces_lds_oqa; unsigned chan_count[4]; // param index + 1 (0 means that group doesn't refer to Params) // we can't use more than one param index in a group unsigned interp_param; unsigned next_id; node_vec packed_ops; void assign_slot(unsigned slot, alu_node *n); public: alu_group_tracker(shader &sh); // FIXME use fast bs correctness check (values for same chan <= 3) ?? bool try_reserve(alu_node *n); bool try_reserve(alu_packed_node *p); void reinit(); void reset(bool keep_packed = false); sel_chan get_value_id(value *v); void update_flags(alu_node *n); alu_node* slot(unsigned i) { return slots[i]; } unsigned used_slots() { return (~available_slots) & ((1 << max_slots) - 1); } unsigned inst_count() { return __builtin_popcount(used_slots()); } unsigned literal_count() { return lt.count(); } unsigned literal_slot_count() { return (literal_count() + 1) >> 1; }; unsigned slot_count() { return inst_count() + literal_slot_count(); } bool get_consumes_lds_oqa() { return consumes_lds_oqa; } bool get_produces_lds_oqa() { return produces_lds_oqa; } alu_group_node* emit(); rp_kcache_tracker& kcache() { return kc; } bool has_update_exec_mask() { return updates_exec_mask; } unsigned avail_slots() { return available_slots; } void discard_all_slots(container_node &removed_nodes); void discard_slots(unsigned slot_mask, container_node &removed_nodes); bool has_ar_load() { return has_mova; } }; class alu_kcache_tracker { bc_kcache kc[4]; sb_set<unsigned> lines; unsigned max_kcs; public: alu_kcache_tracker(sb_hw_class hc) : kc(), lines(), max_kcs(hc >= HW_CLASS_EVERGREEN ? 4 : 2) {} void reset(); bool try_reserve(alu_group_tracker >); bool update_kc(); void init_clause(bc_cf &bc) { memcpy(bc.kc, kc, sizeof(kc)); } }; class alu_clause_tracker { shader &sh; alu_kcache_tracker kt; unsigned slot_count; alu_group_tracker grp0; alu_group_tracker grp1; unsigned group; cf_node *clause; bool push_exec_mask; unsigned outstanding_lds_oqa_reads; public: container_node conflict_nodes; // current values of AR and PR registers that we have to preload // till the end of clause (in fact, beginning, because we're scheduling // bottom-up) value *current_ar; value *current_pr; // current values of CF_IDX registers that need preloading value *current_idx[2]; alu_clause_tracker(shader &sh); void reset(); // current group alu_group_tracker& grp() { return group ? grp1 : grp0; } // previous group alu_group_tracker& prev_grp() { return group ? grp0 : grp1; } void emit_group(); void emit_clause(container_node *c); bool check_clause_limits(); void new_group(); bool is_empty(); alu_node* create_ar_load(value *v, chan_select ar_channel); void discard_current_group(); unsigned total_slots() { return slot_count; } }; class post_scheduler : public pass { container_node ready, ready_copies; // alu only container_node pending, bb_pending; bb_node *cur_bb; val_set live; // values live at the end of the alu clause uc_map ucm; alu_clause_tracker alu; typedef std::map<sel_chan, value*> rv_map; rv_map regmap, prev_regmap; val_set cleared_interf; void emit_index_registers(); public: post_scheduler(shader &sh) : pass(sh), ready(), ready_copies(), pending(), cur_bb(), live(), ucm(), alu(sh), regmap(), cleared_interf() {} virtual int run(); bool run_on(container_node *n); bool schedule_bb(bb_node *bb); void load_index_register(value *v, unsigned idx); void process_fetch(container_node *c); bool process_alu(container_node *c); bool schedule_alu(container_node *c); bool prepare_alu_group(); void release_op(node *n); void release_src_values(node *n); void release_src_vec(vvec &vv, bool src); void release_src_val(value *v); void init_uc_val(container_node *c, value *v); void init_uc_vec(container_node *c, vvec &vv, bool src); unsigned init_ucm(container_node *c, node *n); void init_regmap(); bool check_interferences(); unsigned try_add_instruction(node *n); bool check_copy(node *n); void dump_group(alu_group_tracker &rt); bool unmap_dst(alu_node *n); bool unmap_dst_val(value *d); bool map_src(alu_node *n); bool map_src_vec(vvec &vv, bool src); bool map_src_val(value *v); bool recolor_local(value *v); void update_local_interferences(); void update_live_src_vec(vvec &vv, val_set *born, bool src); void update_live_dst_vec(vvec &vv); void update_live(node *n, val_set *born); void process_group(); void set_color_local_val(value *v, sel_chan color); void set_color_local(value *v, sel_chan color); void add_interferences(value *v, sb_bitset &rb, val_set &vs); void init_globals(val_set &s, bool prealloc); void recolor_locals(); void dump_regmap(); void emit_load_ar(); void emit_clause(); void process_ready_copies(); }; } // namespace r600_sb #endif /* SB_SCHED_H_ */