/*
 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *      Vadim Girlin
 */

#ifndef SB_SHADER_H_
#define SB_SHADER_H_

#include <list>
#include <string>
#include <map>

#include "sb_ir.h"
#include "sb_expr.h"

namespace r600_sb {

struct shader_input {
	unsigned comp_mask;
	unsigned preloaded;
};

struct error_info {
	node *n;
	unsigned arg_index;
	std::string message;
};

typedef std::multimap<node*, error_info> error_map;

class sb_context;

typedef std::vector<shader_input> inputs_vec;
typedef std::vector<gpr_array*> gpr_array_vec;

struct ra_edge {
	value *a, *b;
	unsigned cost;

	ra_edge(value *a, value *b, unsigned cost) : a(a), b(b), cost(cost) {}
};

enum chunk_flags {
	RCF_GLOBAL = (1 << 0),
	RCF_PIN_CHAN = (1 << 1),
	RCF_PIN_REG = (1 << 2),

	RCF_FIXED = (1 << 3),

	RCF_PREALLOC = (1 << 4)
};

enum dce_flags {
	DF_REMOVE_DEAD  = (1 << 0),
	DF_REMOVE_UNUSED = (1 << 1),
	DF_EXPAND = (1 << 2),
};

inline dce_flags operator |(dce_flags l, dce_flags r) {
	return (dce_flags)((unsigned)l|(unsigned)r);
}

inline chunk_flags operator |(chunk_flags l, chunk_flags r) {
	return (chunk_flags)((unsigned)l|(unsigned)r);
}
inline chunk_flags& operator |=(chunk_flags &l, chunk_flags r) {
	l = l | r;
	return l;
}

inline chunk_flags& operator &=(chunk_flags &l, chunk_flags r) {
	l = (chunk_flags)((unsigned)l & (unsigned)r);
	return l;
}

inline chunk_flags operator ~(chunk_flags r) {
	return (chunk_flags)~(unsigned)r;
}

struct ra_chunk {
	vvec values;
	chunk_flags flags;
	unsigned cost;
	sel_chan pin;

	ra_chunk() : values(), flags(), cost(), pin() {}

	bool is_fixed() { return flags & RCF_FIXED; }
	void fix() { flags |= RCF_FIXED; }

	bool is_global() { return flags & RCF_GLOBAL; }
	void set_global() {	flags |= RCF_GLOBAL; }

	bool is_reg_pinned() { return flags & RCF_PIN_REG; }
	bool is_chan_pinned() { return flags & RCF_PIN_CHAN; }

	bool is_prealloc() { return flags & RCF_PREALLOC; }
	void set_prealloc() { flags |= RCF_PREALLOC; }
};

typedef std::vector<ra_chunk*> chunk_vector;

class ra_constraint {
public:
	ra_constraint(constraint_kind kind) : kind(kind), cost(0) {}

	constraint_kind kind;
	vvec values;
	unsigned cost;

	void update_values();
	bool check();
};

typedef std::vector<ra_constraint*> constraint_vec;
typedef std::vector<ra_chunk*> chunk_vec;

// priority queue
// FIXME use something more suitale or custom class ?

template <class T>
struct cost_compare {
	bool operator ()(const T& t1, const T& t2) {
		return t1->cost > t2->cost;
	}
};

template <class T, class Comp>
class queue {
	typedef std::vector<T> container;
	container cont;

public:
	queue() : cont() {}

	typedef typename container::iterator iterator;

	iterator begin() { return cont.begin(); }
	iterator end() { return cont.end(); }

	iterator insert(const T& t) {
		iterator I = std::upper_bound(begin(), end(), t, Comp());
		if (I == end())
			cont.push_back(t);
		else
			cont.insert(I, t);

		return I;
	}

	void erase(const T& t) {
		std::pair<iterator, iterator> R =
				std::equal_range(begin(), end(), t, Comp());
		iterator F = std::find(R.first, R.second, t);
		if (F != R.second)
			cont.erase(F);
	}
};

typedef queue<ra_chunk*, cost_compare<ra_chunk*> > chunk_queue;
typedef queue<ra_edge*, cost_compare<ra_edge*> > edge_queue;
typedef queue<ra_constraint*, cost_compare<ra_constraint*> > constraint_queue;

typedef std::set<ra_chunk*> chunk_set;

class shader;

class coalescer {

	shader &sh;

	edge_queue edges;
	chunk_queue chunks;
	constraint_queue constraints;

	constraint_vec all_constraints;
	chunk_vec all_chunks;

public:

	coalescer(shader &sh) : sh(sh), edges(), chunks(), constraints() {}
	~coalescer();

	int run();

	void add_edge(value *a, value *b, unsigned cost);
	void build_chunks();
	void build_constraint_queue();
	void build_chunk_queue();
	int color_constraints();
	void color_chunks();

	ra_constraint* create_constraint(constraint_kind kind);

	enum ac_cost {
		phi_cost = 10000,
		copy_cost = 1,
	};

	void dump_edges();
	void dump_chunks();
	void dump_constraint_queue();

	static void dump_chunk(ra_chunk *c);
	static void dump_constraint(ra_constraint* c);

	void get_chunk_interferences(ra_chunk *c, val_set &s);

private:

	void create_chunk(value *v);
	void unify_chunks(ra_edge *e);
	bool chunks_interference(ra_chunk *c1, ra_chunk *c2);

	int color_reg_constraint(ra_constraint *c);
	void color_phi_constraint(ra_constraint *c);


	void init_reg_bitset(sb_bitset &bs, val_set &vs);

	void color_chunk(ra_chunk *c, sel_chan color);

	ra_chunk* detach_value(value *v);
};



class shader {

	sb_context &ctx;

	typedef sb_map<uint32_t, value*> value_map;
	value_map reg_values;

	// read-only values
	value_map const_values; // immediate constants key -const  value (uint32_t)
	value_map special_ro_values; //  key - hw alu_sel & chan
	value_map kcache_values;

	gpr_array_vec gpr_arrays;

	unsigned next_temp_value_index;

	unsigned prep_regs_count;

	value* pred_sels[2];

	regions_vec regions;
	inputs_vec inputs;

	value *undef;

	sb_value_pool val_pool;
	sb_pool pool;

	std::vector<node*> all_nodes;

public:
	shader_stats src_stats, opt_stats;

	error_map errors;

	bool optimized;

	unsigned id;

	coalescer coal;

	static const unsigned temp_regid_offset = 512;

	bbs_vec bbs;

	const shader_target target;

	value_table vt;
	expr_handler ex;

	container_node *root;

	bool compute_interferences;

	bool has_alu_predication;
	bool uses_gradients;

	bool safe_math;

	unsigned ngpr, nstack;

	unsigned dce_flags;

	shader(sb_context &sctx, shader_target t, unsigned id);

	~shader();

	sb_context &get_ctx() const { return ctx; }

	value* get_const_value(const literal & v);
	value* get_special_value(unsigned sv_id, unsigned version = 0);
	value* create_temp_value();
	value* get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
                         unsigned version = 0);


	value* get_special_ro_value(unsigned sel);
	value* get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode);

	value* get_value_version(value* v, unsigned ver);

	void init();
	void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src);

	void dump_ir();

	void add_gpr_array(unsigned gpr_start, unsigned gpr_count,
	                   unsigned comp_mask);

	value* get_pred_sel(int sel);
	bool assign_slot(alu_node *n, alu_node *slots[5]);

	gpr_array* get_gpr_array(unsigned reg, unsigned chan);

	void add_input(unsigned gpr, bool preloaded = false,
	               unsigned comp_mask = 0xF);

	const inputs_vec & get_inputs() {return inputs; }

	regions_vec & get_regions() { return regions; }

	void init_call_fs(cf_node *cf);

	value *get_undef_value();
	void set_undef(val_set &s);

	node* create_node(node_type nt, node_subtype nst,
	                  node_flags flags = NF_EMPTY);
	alu_node* create_alu();
	alu_group_node* create_alu_group();
	alu_packed_node* create_alu_packed();
	cf_node* create_cf();
	cf_node* create_cf(unsigned op);
	fetch_node* create_fetch();
	region_node* create_region();
	depart_node* create_depart(region_node *target);
	repeat_node* create_repeat(region_node *target);
	container_node* create_container(node_type nt = NT_LIST,
	                                 node_subtype nst = NST_LIST,
	                                 node_flags flags = NF_EMPTY);
	if_node* create_if();
	bb_node* create_bb(unsigned id, unsigned loop_level);

	value* get_value_by_uid(unsigned id) { return val_pool[id - 1]; }

	cf_node* create_clause(node_subtype nst);

	void create_bbs();
	void expand_bbs();

	alu_node* create_mov(value* dst, value* src);
	alu_node* create_copy_mov(value *dst, value *src, unsigned affcost = 1);

	const char * get_shader_target_name();

	std::string get_full_target_name();

	void create_bbs(container_node* n, bbs_vec &bbs, int loop_level = 0);
	void expand_bbs(bbs_vec &bbs);

	sched_queue_id get_queue_id(node* n);

	void simplify_dep_rep(node *dr);

	unsigned first_temp_gpr();
	unsigned num_nontemp_gpr();

	gpr_array_vec& arrays() { return gpr_arrays; }

	void set_uses_kill();

	void fill_array_values(gpr_array *a, vvec &vv);

	alu_node* clone(alu_node *n);

	sb_value_pool& get_value_pool() { return val_pool; }

	void collect_stats(bool opt);

private:
	value* create_value(value_kind k, sel_chan regid, unsigned ver);
	value* get_value(value_kind kind, sel_chan id,
	                         unsigned version = 0);
	value* get_ro_value(value_map &vm, value_kind vk, unsigned key);
};

}

#endif /* SHADER_H_ */