/*-------------------------------------------------------------------------
 * drawElements Quality Program Reference Renderer
 * -----------------------------------------------
 *
 * Copyright 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *//*!
 * \file
 * \brief Reference renderer interface.
 *//*--------------------------------------------------------------------*/

#include "rrRenderer.hpp"
#include "tcuVectorUtil.hpp"
#include "tcuTextureUtil.hpp"
#include "tcuFloat.hpp"
#include "rrPrimitiveAssembler.hpp"
#include "rrFragmentOperations.hpp"
#include "rrRasterizer.hpp"
#include "deMemory.h"

#include <set>

namespace rr
{
namespace
{

typedef double ClipFloat; // floating point type used in clipping

typedef tcu::Vector<ClipFloat, 4> ClipVec4;

struct RasterizationInternalBuffers
{
	std::vector<FragmentPacket>		fragmentPackets;
	std::vector<GenericVec4>		shaderOutputs;
	std::vector<Fragment>			shadedFragments;
	float*							fragmentDepthBuffer;
};

deUint32 readIndexArray (const IndexType type, const void* ptr, size_t ndx)
{
	switch (type)
	{
		case INDEXTYPE_UINT8:
			return ((const deUint8*)ptr)[ndx];

		case INDEXTYPE_UINT16:
		{
			deUint16 retVal;
			deMemcpy(&retVal, (const deUint8*)ptr + ndx * sizeof(deUint16), sizeof(deUint16));

			return retVal;
		}

		case INDEXTYPE_UINT32:
		{
			deUint32 retVal;
			deMemcpy(&retVal, (const deUint8*)ptr + ndx * sizeof(deUint32), sizeof(deUint32));

			return retVal;
		}

		default:
			DE_ASSERT(false);
			return 0;
	}
}

tcu::IVec4 getBufferSize (const rr::MultisampleConstPixelBufferAccess& multisampleBuffer)
{
	return tcu::IVec4(0, 0, multisampleBuffer.raw().getHeight(), multisampleBuffer.raw().getDepth());
}

bool isEmpty (const rr::MultisampleConstPixelBufferAccess& access)
{
	return access.raw().getWidth() == 0 || access.raw().getHeight() == 0 || access.raw().getDepth() == 0;
}

struct DrawContext
{
	int primitiveID;

	DrawContext (void)
		: primitiveID(0)
	{
	}
};

/*--------------------------------------------------------------------*//*!
 * \brief Calculates intersection of two rects given as (left, bottom, width, height)
 *//*--------------------------------------------------------------------*/
tcu::IVec4 rectIntersection (const tcu::IVec4& a, const tcu::IVec4& b)
{
	const tcu::IVec2 pos	= tcu::IVec2(de::max(a.x(), b.x()), de::max(a.y(), b.y()));
	const tcu::IVec2 endPos	= tcu::IVec2(de::min(a.x() + a.z(), b.x() + b.z()), de::min(a.y() + a.w(), b.y() + b.w()));

	return tcu::IVec4(pos.x(), pos.y(), endPos.x() - pos.x(), endPos.y() - pos.y());
}

void convertPrimitiveToBaseType(std::vector<pa::Triangle>& output, std::vector<pa::Triangle>& input)
{
	std::swap(output, input);
}

void convertPrimitiveToBaseType(std::vector<pa::Line>& output, std::vector<pa::Line>& input)
{
	std::swap(output, input);
}

void convertPrimitiveToBaseType(std::vector<pa::Point>& output, std::vector<pa::Point>& input)
{
	std::swap(output, input);
}

void convertPrimitiveToBaseType(std::vector<pa::Line>& output, std::vector<pa::LineAdjacency>& input)
{
	output.resize(input.size());
	for (size_t i = 0; i < input.size(); ++i)
	{
		const int adjacentProvokingVertex	= input[i].provokingIndex;
		const int baseProvokingVertexIndex	= adjacentProvokingVertex-1;
		output[i] = pa::Line(input[i].v1, input[i].v2, baseProvokingVertexIndex);
	}
}

void convertPrimitiveToBaseType(std::vector<pa::Triangle>& output, std::vector<pa::TriangleAdjacency>& input)
{
	output.resize(input.size());
	for (size_t i = 0; i < input.size(); ++i)
	{
		const int adjacentProvokingVertex	= input[i].provokingIndex;
		const int baseProvokingVertexIndex	= adjacentProvokingVertex/2;
		output[i] = pa::Triangle(input[i].v0, input[i].v2, input[i].v4, baseProvokingVertexIndex);
	}
}

namespace cliputil
{

/*--------------------------------------------------------------------*//*!
 * \brief Get clipped portion of the second endpoint
 *
 * Calculate the intersection of line segment v0-v1 and a given plane. Line
 * segment is defined by a pair of one-dimensional homogeneous coordinates.
 *
 *//*--------------------------------------------------------------------*/
ClipFloat getSegmentVolumeEdgeClip (const ClipFloat v0,
									const ClipFloat w0,
									const ClipFloat v1,
									const ClipFloat w1,
									const ClipFloat plane)
{
	return (plane*w0 - v0) / ((v1 - v0) - plane*(w1 - w0));
}

/*--------------------------------------------------------------------*//*!
 * \brief Get clipped portion of the endpoint
 *
 * How much (in [0-1] range) of a line segment v0-v1 would be clipped
 * of the v0 end of the line segment by clipping.
 *//*--------------------------------------------------------------------*/
ClipFloat getLineEndpointClipping (const ClipVec4& v0, const ClipVec4& v1)
{
	const ClipFloat clipVolumeSize = (ClipFloat)1.0;

	if (v0.z() > v0.w())
	{
		// Clip +Z
		return getSegmentVolumeEdgeClip(v0.z(), v0.w(), v1.z(), v1.w(), clipVolumeSize);
	}
	else if (v0.z() < -v0.w())
	{
		// Clip -Z
		return getSegmentVolumeEdgeClip(v0.z(), v0.w(), v1.z(), v1.w(), -clipVolumeSize);
	}
	else
	{
		// no clipping
		return (ClipFloat)0.0;
	}
}

ClipVec4 vec4ToClipVec4 (const tcu::Vec4& v)
{
	return ClipVec4((ClipFloat)v.x(), (ClipFloat)v.y(), (ClipFloat)v.z(), (ClipFloat)v.w());
}

tcu::Vec4 clipVec4ToVec4 (const ClipVec4& v)
{
	return tcu::Vec4((float)v.x(), (float)v.y(), (float)v.z(), (float)v.w());
}

class ClipVolumePlane
{
public:
	virtual bool		pointInClipVolume			(const ClipVec4& p) const						= 0;
	virtual ClipFloat	clipLineSegmentEnd			(const ClipVec4& v0, const ClipVec4& v1) const	= 0;
	virtual ClipVec4	getLineIntersectionPoint	(const ClipVec4& v0, const ClipVec4& v1) const	= 0;
};

template <int Sign, int CompNdx>
class ComponentPlane : public ClipVolumePlane
{
	DE_STATIC_ASSERT(Sign == +1 || Sign == -1);

public:
	bool		pointInClipVolume			(const ClipVec4& p) const;
	ClipFloat	clipLineSegmentEnd			(const ClipVec4& v0, const ClipVec4& v1) const;
	ClipVec4	getLineIntersectionPoint	(const ClipVec4& v0, const ClipVec4& v1) const;
};

template <int Sign, int CompNdx>
bool ComponentPlane<Sign, CompNdx>::pointInClipVolume (const ClipVec4& p) const
{
	const ClipFloat clipVolumeSize = (ClipFloat)1.0;

	return (ClipFloat)(Sign * p[CompNdx]) <= clipVolumeSize * p.w();
}

template <int Sign, int CompNdx>
ClipFloat ComponentPlane<Sign, CompNdx>::clipLineSegmentEnd (const ClipVec4& v0, const ClipVec4& v1) const
{
	const ClipFloat clipVolumeSize = (ClipFloat)1.0;

	return getSegmentVolumeEdgeClip(v0[CompNdx], v0.w(),
									v1[CompNdx], v1.w(),
									(ClipFloat)Sign * clipVolumeSize);
}

template <int Sign, int CompNdx>
ClipVec4 ComponentPlane<Sign, CompNdx>::getLineIntersectionPoint (const ClipVec4& v0, const ClipVec4& v1) const
{
	// A point on line might be far away, causing clipping ratio (clipLineSegmentEnd) to become extremely close to 1.0
	// even if the another point is not on the plane. Prevent clipping ratio from saturating by using points on line
	// that are (nearly) on this and (nearly) on the opposite plane.

	const ClipVec4 	clippedV0	= tcu::mix(v0, v1, ComponentPlane<+1, CompNdx>().clipLineSegmentEnd(v0, v1));
	const ClipVec4 	clippedV1	= tcu::mix(v0, v1, ComponentPlane<-1, CompNdx>().clipLineSegmentEnd(v0, v1));
	const ClipFloat	clipRatio	= clipLineSegmentEnd(clippedV0, clippedV1);

	// Find intersection point of line from v0 to v1 and the current plane. Avoid ratios near 1.0
	if (clipRatio <= (ClipFloat)0.5)
		return tcu::mix(clippedV0, clippedV1, clipRatio);
	else
	{
		const ClipFloat complementClipRatio = clipLineSegmentEnd(clippedV1, clippedV0);
		return tcu::mix(clippedV1, clippedV0, complementClipRatio);
	}
}

struct TriangleVertex
{
	ClipVec4	position;
	ClipFloat	weight[3];		//!< barycentrics
};

struct SubTriangle
{
	TriangleVertex vertices[3];
};

void clipTriangleOneVertex (std::vector<TriangleVertex>& clippedEdges, const ClipVolumePlane& plane, const TriangleVertex& clipped, const TriangleVertex& v1, const TriangleVertex& v2)
{
	const ClipFloat	degenerateLimit = (ClipFloat)1.0;

	// calc clip pos
	TriangleVertex	mid1;
	TriangleVertex	mid2;
	bool			outputDegenerate = false;

	{
		const TriangleVertex&	inside	= v1;
		const TriangleVertex&	outside	= clipped;
		      TriangleVertex&	middle	= mid1;

		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);

		if (hitDist >= degenerateLimit)
		{
			// do not generate degenerate triangles
			outputDegenerate = true;
		}
		else
		{
			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);

			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
		}
	}

	{
		const TriangleVertex&	inside	= v2;
		const TriangleVertex&	outside	= clipped;
		      TriangleVertex&	middle	= mid2;

		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);

		if (hitDist >= degenerateLimit)
		{
			// do not generate degenerate triangles
			outputDegenerate = true;
		}
		else
		{
			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);

			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
		}
	}

	if (!outputDegenerate)
	{
		// gen quad (v1) -> mid1 -> mid2 -> (v2)
		clippedEdges.push_back(v1);
		clippedEdges.push_back(mid1);
		clippedEdges.push_back(mid2);
		clippedEdges.push_back(v2);
	}
	else
	{
		// don't modify
		clippedEdges.push_back(v1);
		clippedEdges.push_back(clipped);
		clippedEdges.push_back(v2);
	}
}

void clipTriangleTwoVertices (std::vector<TriangleVertex>& clippedEdges, const ClipVolumePlane& plane, const TriangleVertex& v0, const TriangleVertex& clipped1, const TriangleVertex& clipped2)
{
	const ClipFloat	unclippableLimit = (ClipFloat)1.0;

	// calc clip pos
	TriangleVertex	mid1;
	TriangleVertex	mid2;
	bool			unclippableVertex1 = false;
	bool			unclippableVertex2 = false;

	{
		const TriangleVertex&	inside	= v0;
		const TriangleVertex&	outside	= clipped1;
		      TriangleVertex&	middle	= mid1;

		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);

		if (hitDist >= unclippableLimit)
		{
			// this edge cannot be clipped because the edge is really close to the volume boundary
			unclippableVertex1 = true;
		}
		else
		{
			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);

			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
		}
	}

	{
		const TriangleVertex&	inside	= v0;
		const TriangleVertex&	outside	= clipped2;
		      TriangleVertex&	middle	= mid2;

		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);

		if (hitDist >= unclippableLimit)
		{
			// this edge cannot be clipped because the edge is really close to the volume boundary
			unclippableVertex2 = true;
		}
		else
		{
			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);

			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
		}
	}

	if (!unclippableVertex1 && !unclippableVertex2)
	{
		// gen triangle (v0) -> mid1 -> mid2
		clippedEdges.push_back(v0);
		clippedEdges.push_back(mid1);
		clippedEdges.push_back(mid2);
	}
	else if (!unclippableVertex1 && unclippableVertex2)
	{
		// clip just vertex 1
		clippedEdges.push_back(v0);
		clippedEdges.push_back(mid1);
		clippedEdges.push_back(clipped2);
	}
	else if (unclippableVertex1 && !unclippableVertex2)
	{
		// clip just vertex 2
		clippedEdges.push_back(v0);
		clippedEdges.push_back(clipped1);
		clippedEdges.push_back(mid2);
	}
	else
	{
		// don't modify
		clippedEdges.push_back(v0);
		clippedEdges.push_back(clipped1);
		clippedEdges.push_back(clipped2);
	}
}

void clipTriangleToPlane (std::vector<TriangleVertex>& clippedEdges, const TriangleVertex* vertices, const ClipVolumePlane& plane)
{
	const bool v0Clipped = !plane.pointInClipVolume(vertices[0].position);
	const bool v1Clipped = !plane.pointInClipVolume(vertices[1].position);
	const bool v2Clipped = !plane.pointInClipVolume(vertices[2].position);
	const int  clipCount = ((v0Clipped) ? (1) : (0)) + ((v1Clipped) ? (1) : (0)) + ((v2Clipped) ? (1) : (0));

	if (clipCount == 0)
	{
		// pass
		clippedEdges.insert(clippedEdges.begin(), vertices, vertices + 3);
	}
	else if (clipCount == 1)
	{
		// clip one vertex
		if (v0Clipped)			clipTriangleOneVertex(clippedEdges, plane, vertices[0], vertices[1], vertices[2]);
		else if (v1Clipped)		clipTriangleOneVertex(clippedEdges, plane, vertices[1], vertices[2], vertices[0]);
		else					clipTriangleOneVertex(clippedEdges, plane, vertices[2], vertices[0], vertices[1]);
	}
	else if (clipCount == 2)
	{
		// clip two vertices
		if (!v0Clipped)			clipTriangleTwoVertices(clippedEdges, plane, vertices[0], vertices[1], vertices[2]);
		else if (!v1Clipped)	clipTriangleTwoVertices(clippedEdges, plane, vertices[1], vertices[2], vertices[0]);
		else					clipTriangleTwoVertices(clippedEdges, plane, vertices[2], vertices[0], vertices[1]);
	}
	else if (clipCount == 3)
	{
		// discard
	}
	else
	{
		DE_ASSERT(DE_FALSE);
	}
}

} // cliputil

tcu::Vec2 to2DCartesian (const tcu::Vec4& p)
{
	return tcu::Vec2(p.x(), p.y()) / p.w();
}

float cross2D (const tcu::Vec2& a, const tcu::Vec2& b)
{
	return tcu::cross(tcu::Vec3(a.x(), a.y(), 0.0f), tcu::Vec3(b.x(), b.y(), 0.0f)).z();
}

void flatshadePrimitiveVertices (pa::Triangle& target, size_t outputNdx)
{
	const rr::GenericVec4 flatValue = target.getProvokingVertex()->outputs[outputNdx];
	target.v0->outputs[outputNdx] = flatValue;
	target.v1->outputs[outputNdx] = flatValue;
	target.v2->outputs[outputNdx] = flatValue;
}

void flatshadePrimitiveVertices (pa::Line& target, size_t outputNdx)
{
	const rr::GenericVec4 flatValue = target.getProvokingVertex()->outputs[outputNdx];
	target.v0->outputs[outputNdx] = flatValue;
	target.v1->outputs[outputNdx] = flatValue;
}

void flatshadePrimitiveVertices (pa::Point& target, size_t outputNdx)
{
	DE_UNREF(target);
	DE_UNREF(outputNdx);
}

template <typename ContainerType>
void flatshadeVertices (const Program& program, ContainerType& list)
{
	// flatshade
	const std::vector<rr::VertexVaryingInfo>& fragInputs = (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());

	for (size_t inputNdx = 0; inputNdx < fragInputs.size(); ++inputNdx)
		if (fragInputs[inputNdx].flatshade)
			for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
				flatshadePrimitiveVertices(*it, inputNdx);
}

/*--------------------------------------------------------------------*//*!
 * Clip triangles to the clip volume.
 *//*--------------------------------------------------------------------*/
void clipPrimitives (std::vector<pa::Triangle>&		list,
					 const Program&					program,
					 bool							clipWithZPlanes,
					 VertexPacketAllocator&			vpalloc)
{
	using namespace cliputil;

	cliputil::ComponentPlane<+1, 0> clipPosX;
	cliputil::ComponentPlane<-1, 0> clipNegX;
	cliputil::ComponentPlane<+1, 1> clipPosY;
	cliputil::ComponentPlane<-1, 1> clipNegY;
	cliputil::ComponentPlane<+1, 2> clipPosZ;
	cliputil::ComponentPlane<-1, 2> clipNegZ;

	const std::vector<rr::VertexVaryingInfo>&	fragInputs			= (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
	const ClipVolumePlane*						planes[]			= { &clipPosX, &clipNegX, &clipPosY, &clipNegY, &clipPosZ, &clipNegZ };
	const int									numPlanes			= (clipWithZPlanes) ? (6) : (4);

	std::vector<pa::Triangle>					outputTriangles;

	for (int inputTriangleNdx = 0; inputTriangleNdx < (int)list.size(); ++inputTriangleNdx)
	{
		bool clippedByPlane[6];

		// Needs clipping?
		{
			bool discardPrimitive	= false;
			bool fullyInClipVolume	= true;

			for (int planeNdx = 0; planeNdx < numPlanes; ++planeNdx)
			{
				const ClipVolumePlane*	plane			= planes[planeNdx];
				const bool				v0InsidePlane	= plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v0->position));
				const bool				v1InsidePlane	= plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v1->position));
				const bool				v2InsidePlane	= plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v2->position));

				// Fully outside
				if (!v0InsidePlane && !v1InsidePlane && !v2InsidePlane)
				{
					discardPrimitive = true;
					break;
				}
				// Partially outside
				else if (!v0InsidePlane || !v1InsidePlane || !v2InsidePlane)
				{
					clippedByPlane[planeNdx] = true;
					fullyInClipVolume = false;
				}
				// Fully inside
				else
					clippedByPlane[planeNdx] = false;
			}

			if (discardPrimitive)
				continue;

			if (fullyInClipVolume)
			{
				outputTriangles.push_back(list[inputTriangleNdx]);
				continue;
			}
		}

		// Clip
		{
			std::vector<SubTriangle>	subTriangles	(1);
			SubTriangle&				initialTri		= subTriangles[0];

			initialTri.vertices[0].position = vec4ToClipVec4(list[inputTriangleNdx].v0->position);
			initialTri.vertices[0].weight[0] = (ClipFloat)1.0;
			initialTri.vertices[0].weight[1] = (ClipFloat)0.0;
			initialTri.vertices[0].weight[2] = (ClipFloat)0.0;

			initialTri.vertices[1].position = vec4ToClipVec4(list[inputTriangleNdx].v1->position);
			initialTri.vertices[1].weight[0] = (ClipFloat)0.0;
			initialTri.vertices[1].weight[1] = (ClipFloat)1.0;
			initialTri.vertices[1].weight[2] = (ClipFloat)0.0;

			initialTri.vertices[2].position = vec4ToClipVec4(list[inputTriangleNdx].v2->position);
			initialTri.vertices[2].weight[0] = (ClipFloat)0.0;
			initialTri.vertices[2].weight[1] = (ClipFloat)0.0;
			initialTri.vertices[2].weight[2] = (ClipFloat)1.0;

			// Clip all subtriangles to all relevant planes
			for (int planeNdx = 0; planeNdx < numPlanes; ++planeNdx)
			{
				std::vector<SubTriangle> nextPhaseSubTriangles;

				if (!clippedByPlane[planeNdx])
					continue;

				for (int subTriangleNdx = 0; subTriangleNdx < (int)subTriangles.size(); ++subTriangleNdx)
				{
					std::vector<TriangleVertex> convexPrimitive;

					// Clip triangle and form a convex n-gon ( n c {3, 4} )
					clipTriangleToPlane(convexPrimitive, subTriangles[subTriangleNdx].vertices, *planes[planeNdx]);

					// Subtriangle completely discarded
					if (convexPrimitive.empty())
						continue;

					DE_ASSERT(convexPrimitive.size() == 3 || convexPrimitive.size() == 4);

					//Triangulate planar convex n-gon
					{
						TriangleVertex& v0 = convexPrimitive[0];

						for (int subsubTriangleNdx = 1; subsubTriangleNdx + 1 < (int)convexPrimitive.size(); ++subsubTriangleNdx)
						{
							const float				degenerateEpsilon	= 1.0e-6f;
							const TriangleVertex&	v1					= convexPrimitive[subsubTriangleNdx];
							const TriangleVertex&	v2					= convexPrimitive[subsubTriangleNdx + 1];
							const float				visibleArea			= de::abs(cross2D(to2DCartesian(clipVec4ToVec4(v1.position)) - to2DCartesian(clipVec4ToVec4(v0.position)),
																						  to2DCartesian(clipVec4ToVec4(v2.position)) - to2DCartesian(clipVec4ToVec4(v0.position))));

							// has surface area (is not a degenerate)
							if (visibleArea >= degenerateEpsilon)
							{
								SubTriangle subsubTriangle;

								subsubTriangle.vertices[0] = v0;
								subsubTriangle.vertices[1] = v1;
								subsubTriangle.vertices[2] = v2;

								nextPhaseSubTriangles.push_back(subsubTriangle);
							}
						}
					}
				}

				subTriangles.swap(nextPhaseSubTriangles);
			}

			// Rebuild pa::Triangles from subtriangles
			for (int subTriangleNdx = 0; subTriangleNdx < (int)subTriangles.size(); ++subTriangleNdx)
			{
				VertexPacket*	p0				= vpalloc.alloc();
				VertexPacket*	p1				= vpalloc.alloc();
				VertexPacket*	p2				= vpalloc.alloc();
				pa::Triangle	ngonFragment	(p0, p1, p2, -1);

				p0->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[0].position);
				p1->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[1].position);
				p2->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[2].position);

				for (size_t outputNdx = 0; outputNdx < fragInputs.size(); ++outputNdx)
				{
					if (fragInputs[outputNdx].type == GENERICVECTYPE_FLOAT)
					{
						const tcu::Vec4 out0 = list[inputTriangleNdx].v0->outputs[outputNdx].get<float>();
						const tcu::Vec4 out1 = list[inputTriangleNdx].v1->outputs[outputNdx].get<float>();
						const tcu::Vec4 out2 = list[inputTriangleNdx].v2->outputs[outputNdx].get<float>();

						p0->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[0].weight[0] * out0
											   + (float)subTriangles[subTriangleNdx].vertices[0].weight[1] * out1
											   + (float)subTriangles[subTriangleNdx].vertices[0].weight[2] * out2;

						p1->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[1].weight[0] * out0
											   + (float)subTriangles[subTriangleNdx].vertices[1].weight[1] * out1
											   + (float)subTriangles[subTriangleNdx].vertices[1].weight[2] * out2;

						p2->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[2].weight[0] * out0
											   + (float)subTriangles[subTriangleNdx].vertices[2].weight[1] * out1
											   + (float)subTriangles[subTriangleNdx].vertices[2].weight[2] * out2;
					}
					else
					{
						// only floats are interpolated, all others must be flatshaded then
						p0->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
						p1->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
						p2->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
					}
				}

				outputTriangles.push_back(ngonFragment);
			}
		}
	}

	// output result
	list.swap(outputTriangles);
}

/*--------------------------------------------------------------------*//*!
 * Clip lines to the near and far clip planes.
 *
 * Clipping to other planes is a by-product of the viewport test  (i.e.
 * rasterization area selection).
 *//*--------------------------------------------------------------------*/
void clipPrimitives (std::vector<pa::Line>& 		list,
					 const Program& 				program,
					 bool 							clipWithZPlanes,
					 VertexPacketAllocator&			vpalloc)
{
	DE_UNREF(vpalloc);

	using namespace cliputil;

	// Lines are clipped only by the far and the near planes here. Line clipping by other planes done in the rasterization phase

	const std::vector<rr::VertexVaryingInfo>&	fragInputs	= (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
	std::vector<pa::Line>						visibleLines;

	// Z-clipping disabled, don't do anything
	if (!clipWithZPlanes)
		return;

	for (size_t ndx = 0; ndx < list.size(); ++ndx)
	{
		pa::Line& l = list[ndx];

		// Totally discarded?
		if ((l.v0->position.z() < -l.v0->position.w() && l.v1->position.z() < -l.v1->position.w()) ||
			(l.v0->position.z() >  l.v0->position.w() && l.v1->position.z() >  l.v1->position.w()))
			continue; // discard

		// Something is visible

		const ClipVec4	p0	= vec4ToClipVec4(l.v0->position);
		const ClipVec4	p1	= vec4ToClipVec4(l.v1->position);
		const ClipFloat	t0	= getLineEndpointClipping(p0, p1);
		const ClipFloat	t1	= getLineEndpointClipping(p1, p0);

		// Not clipped at all?
		if (t0 == (ClipFloat)0.0 && t1 == (ClipFloat)0.0)
		{
			visibleLines.push_back(pa::Line(l.v0, l.v1, -1));
		}
		else
		{
			// Clip position
			l.v0->position = clipVec4ToVec4(tcu::mix(p0, p1, t0));
			l.v1->position = clipVec4ToVec4(tcu::mix(p1, p0, t1));

			// Clip attributes
			for (size_t outputNdx = 0; outputNdx < fragInputs.size(); ++outputNdx)
			{
				// only floats are clipped, other types are flatshaded
				if (fragInputs[outputNdx].type == GENERICVECTYPE_FLOAT)
				{
					const tcu::Vec4 a0 = l.v0->outputs[outputNdx].get<float>();
					const tcu::Vec4 a1 = l.v1->outputs[outputNdx].get<float>();

					l.v0->outputs[outputNdx] = tcu::mix(a0, a1, (float)t0);
					l.v1->outputs[outputNdx] = tcu::mix(a1, a0, (float)t1);
				}
			}

			visibleLines.push_back(pa::Line(l.v0, l.v1, -1));
		}
	}

	// return visible in list
	std::swap(visibleLines, list);
}

/*--------------------------------------------------------------------*//*!
 * Discard points not within clip volume. Clipping is a by-product
 * of the viewport test.
 *//*--------------------------------------------------------------------*/
void clipPrimitives (std::vector<pa::Point>&		list,
					 const Program&					program,
					 bool							clipWithZPlanes,
					 VertexPacketAllocator&			vpalloc)
{
	DE_UNREF(vpalloc);
	DE_UNREF(program);

	std::vector<pa::Point> visiblePoints;

	// Z-clipping disabled, don't do anything
	if (!clipWithZPlanes)
		return;

	for (size_t ndx = 0; ndx < list.size(); ++ndx)
	{
		pa::Point& p = list[ndx];

		// points are discarded if Z is not in range. (Wide) point clipping is done in the rasterization phase
		if (de::inRange(p.v0->position.z(), -p.v0->position.w(), p.v0->position.w()))
			visiblePoints.push_back(pa::Point(p.v0));
	}

	// return visible in list
	std::swap(visiblePoints, list);
}

void transformVertexClipCoordsToWindowCoords (const RenderState& state, VertexPacket& packet)
{
	// To normalized device coords
	{
		packet.position = tcu::Vec4(packet.position.x()/packet.position.w(),
									packet.position.y()/packet.position.w(),
									packet.position.z()/packet.position.w(),
									1.0f               /packet.position.w());
	}

	// To window coords
	{
		const WindowRectangle&	viewport	= state.viewport.rect;
		const float				halfW		= (float)(viewport.width) / 2.0f;
		const float				halfH		= (float)(viewport.height) / 2.0f;
		const float				oX			= (float)viewport.left + halfW;
		const float				oY			= (float)viewport.bottom + halfH;
		const float				zn			= state.viewport.zn;
		const float				zf			= state.viewport.zf;

		packet.position = tcu::Vec4(packet.position.x()*halfW + oX,
									packet.position.y()*halfH + oY,
									packet.position.z()*(zf - zn)/2.0f + (zn + zf)/2.0f,
									packet.position.w());
	}
}

void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Triangle& target)
{
	transformVertexClipCoordsToWindowCoords(state, *target.v0);
	transformVertexClipCoordsToWindowCoords(state, *target.v1);
	transformVertexClipCoordsToWindowCoords(state, *target.v2);
}

void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Line& target)
{
	transformVertexClipCoordsToWindowCoords(state, *target.v0);
	transformVertexClipCoordsToWindowCoords(state, *target.v1);
}

void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Point& target)
{
	transformVertexClipCoordsToWindowCoords(state, *target.v0);
}

template <typename ContainerType>
void transformClipCoordsToWindowCoords (const RenderState& state, ContainerType& list)
{
	for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
		transformPrimitiveClipCoordsToWindowCoords(state, *it);
}

void makeSharedVerticeDistinct (VertexPacket*& packet, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
{
	// distinct
	if (vertices.find(packet) == vertices.end())
	{
		vertices.insert(packet);
	}
	else
	{
		VertexPacket* newPacket = vpalloc.alloc();

		// copy packet output values
		newPacket->position		= packet->position;
		newPacket->pointSize	= packet->pointSize;
		newPacket->primitiveID	= packet->primitiveID;

		for (size_t outputNdx = 0; outputNdx < vpalloc.getNumVertexOutputs(); ++outputNdx)
			newPacket->outputs[outputNdx] = packet->outputs[outputNdx];

		// no need to insert new packet to "vertices" as newPacket is unique
		packet = newPacket;
	}
}

void makeSharedVerticesDistinct (pa::Triangle& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
{
	makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
	makeSharedVerticeDistinct(target.v1, vertices, vpalloc);
	makeSharedVerticeDistinct(target.v2, vertices, vpalloc);
}

void makeSharedVerticesDistinct (pa::Line& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
{
	makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
	makeSharedVerticeDistinct(target.v1, vertices, vpalloc);
}

void makeSharedVerticesDistinct (pa::Point& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
{
	makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
}

template <typename ContainerType>
void makeSharedVerticesDistinct (ContainerType& list, VertexPacketAllocator& vpalloc)
{
	std::set<VertexPacket*, std::less<void*> > vertices;

	for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
		makeSharedVerticesDistinct(*it, vertices, vpalloc);
}

void generatePrimitiveIDs (pa::Triangle& target, int id)
{
	target.v0->primitiveID = id;
	target.v1->primitiveID = id;
	target.v2->primitiveID = id;
}

void generatePrimitiveIDs (pa::Line& target, int id)
{
	target.v0->primitiveID = id;
	target.v1->primitiveID = id;
}

void generatePrimitiveIDs (pa::Point& target, int id)
{
	target.v0->primitiveID = id;
}

template <typename ContainerType>
void generatePrimitiveIDs (ContainerType& list, DrawContext& drawContext)
{
	for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
		generatePrimitiveIDs(*it, drawContext.primitiveID++);
}

static float findTriangleVertexDepthSlope (const tcu::Vec4& p, const tcu::Vec4& v0, const tcu::Vec4& v1)
{
	// screen space
	const tcu::Vec3 ssp		=  p.swizzle(0, 1, 2);
	const tcu::Vec3 ssv0	= v0.swizzle(0, 1, 2);
	const tcu::Vec3 ssv1	= v1.swizzle(0, 1, 2);

	// dx & dy

	const tcu::Vec3 a		= ssv0.swizzle(0,1,2) - ssp.swizzle(0,1,2);
	const tcu::Vec3 b		= ssv1.swizzle(0,1,2) - ssp.swizzle(0,1,2);
	const float		epsilon	= 0.0001f;
	const float		det		= (a.x() * b.y() - b.x() * a.y());

	// degenerate triangle, it won't generate any fragments anyway. Return value doesn't matter
	if (de::abs(det) < epsilon)
		return 0.0f;

	const tcu::Vec2	dxDir	= tcu::Vec2( b.y(), -a.y()) / det;
	const tcu::Vec2	dyDir	= tcu::Vec2(-b.x(),  a.x()) / det;

	const float		dzdx	= dxDir.x() * a.z() + dxDir.y() * b.z();
	const float		dzdy	= dyDir.x() * a.z() + dyDir.y() * b.z();

	// approximate using max(|dz/dx|, |dz/dy|)
	return de::max(de::abs(dzdx), de::abs(dzdy));
}

static float findPrimitiveMaximumDepthSlope (const pa::Triangle& triangle)
{
	const float d1 = findTriangleVertexDepthSlope(triangle.v0->position, triangle.v1->position, triangle.v2->position);
	const float d2 = findTriangleVertexDepthSlope(triangle.v1->position, triangle.v2->position, triangle.v0->position);
	const float d3 = findTriangleVertexDepthSlope(triangle.v2->position, triangle.v0->position, triangle.v1->position);

	return de::max(d1, de::max(d2, d3));
}

static float getFloatingPointMinimumResolvableDifference (float maxZValue, tcu::TextureFormat::ChannelType type)
{
	if (type == tcu::TextureFormat::FLOAT)
	{
		// 32f
		const int maxExponent = tcu::Float32(maxZValue).exponent();
		return tcu::Float32::construct(+1, maxExponent - 23, 1 << 23).asFloat();
	}

	// unexpected format
	DE_ASSERT(false);
	return 0.0f;
}

static float getFixedPointMinimumResolvableDifference (int numBits)
{
	return tcu::Float32::construct(+1, -numBits, 1 << 23).asFloat();
}

static float findPrimitiveMinimumResolvableDifference (const pa::Triangle& triangle, const rr::MultisampleConstPixelBufferAccess& depthAccess)
{
	const float								maxZvalue		= de::max(de::max(triangle.v0->position.z(), triangle.v1->position.z()), triangle.v2->position.z());
	const tcu::TextureFormat				format			= depthAccess.raw().getFormat();
	const tcu::TextureFormat::ChannelOrder	order			= format.order;

	if (order == tcu::TextureFormat::D)
	{
		// depth only
		const tcu::TextureFormat::ChannelType	channelType		= format.type;
		const tcu::TextureChannelClass			channelClass	= tcu::getTextureChannelClass(channelType);
		const int								numBits			= tcu::getTextureFormatBitDepth(format).x();

		if (channelClass == tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
			return getFloatingPointMinimumResolvableDifference(maxZvalue, channelType);
		else
			// \note channelClass might be CLASS_LAST but that's ok
			return getFixedPointMinimumResolvableDifference(numBits);
	}
	else if (order == tcu::TextureFormat::DS)
	{
		// depth stencil, special cases for possible combined formats
		if (format.type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
			return getFloatingPointMinimumResolvableDifference(maxZvalue, tcu::TextureFormat::FLOAT);
		else if (format.type == tcu::TextureFormat::UNSIGNED_INT_24_8)
			return getFixedPointMinimumResolvableDifference(24);
	}

	// unexpected format
	DE_ASSERT(false);
	return 0.0f;
}

void writeFragmentPackets (const RenderState&					state,
						   const RenderTarget&					renderTarget,
						   const Program&						program,
						   const FragmentPacket*				fragmentPackets,
						   int									numRasterizedPackets,
						   rr::FaceType							facetype,
						   const std::vector<rr::GenericVec4>&	fragmentOutputArray,
						   const float*							depthValues,
						   std::vector<Fragment>&				fragmentBuffer)
{
	const int			numSamples		= renderTarget.getNumSamples();
	const size_t		numOutputs		= program.fragmentShader->getOutputs().size();
	FragmentProcessor	fragProcessor;

	DE_ASSERT(fragmentOutputArray.size() >= (size_t)numRasterizedPackets*4*numOutputs);
	DE_ASSERT(fragmentBuffer.size()      >= (size_t)numRasterizedPackets*4);

	// Translate fragments but do not set the value yet
	{
		int	fragCount = 0;
		for (int packetNdx = 0; packetNdx < numRasterizedPackets; ++packetNdx)
		for (int fragNdx = 0; fragNdx < 4; fragNdx++)
		{
			const FragmentPacket&	packet	= fragmentPackets[packetNdx];
			const int				xo		= fragNdx%2;
			const int				yo		= fragNdx/2;

			if (getCoverageAnyFragmentSampleLive(packet.coverage, numSamples, xo, yo))
			{
				Fragment& fragment		= fragmentBuffer[fragCount++];

				fragment.pixelCoord		= packet.position + tcu::IVec2(xo, yo);
				fragment.coverage		= (deUint32)((packet.coverage & getCoverageFragmentSampleBits(numSamples, xo, yo)) >> getCoverageOffset(numSamples, xo, yo));
				fragment.sampleDepths	= (depthValues) ? (&depthValues[(packetNdx*4 + yo*2 + xo)*numSamples]) : (DE_NULL);
			}
		}
	}

	// Set per output output values
	{
		rr::FragmentOperationState noStencilDepthWriteState(state.fragOps);
		noStencilDepthWriteState.depthMask						= false;
		noStencilDepthWriteState.stencilStates[facetype].sFail	= STENCILOP_KEEP;
		noStencilDepthWriteState.stencilStates[facetype].dpFail	= STENCILOP_KEEP;
		noStencilDepthWriteState.stencilStates[facetype].dpPass	= STENCILOP_KEEP;

		int	fragCount = 0;
		for (size_t outputNdx = 0; outputNdx < numOutputs; ++outputNdx)
		{
			// Only the last output-pass has default state, other passes have stencil & depth writemask=0
			const rr::FragmentOperationState& fragOpsState = (outputNdx == numOutputs-1) ? (state.fragOps) : (noStencilDepthWriteState);

			for (int packetNdx = 0; packetNdx < numRasterizedPackets; ++packetNdx)
			for (int fragNdx = 0; fragNdx < 4; fragNdx++)
			{
				const FragmentPacket&	packet	= fragmentPackets[packetNdx];
				const int				xo		= fragNdx%2;
				const int				yo		= fragNdx/2;

				// Add only fragments that have live samples to shaded fragments queue.
				if (getCoverageAnyFragmentSampleLive(packet.coverage, numSamples, xo, yo))
				{
					Fragment& fragment		= fragmentBuffer[fragCount++];
					fragment.value			= fragmentOutputArray[(packetNdx*4 + fragNdx) * numOutputs + outputNdx];
				}
			}

			// Execute per-fragment ops and write
			fragProcessor.render(renderTarget.getColorBuffer((int)outputNdx), renderTarget.getDepthBuffer(), renderTarget.getStencilBuffer(), &fragmentBuffer[0], fragCount, facetype, fragOpsState);
		}
	}
}

void rasterizePrimitive (const RenderState&					state,
						 const RenderTarget&				renderTarget,
						 const Program&						program,
						 const pa::Triangle&				triangle,
						 const tcu::IVec4&					renderTargetRect,
						 RasterizationInternalBuffers&		buffers)
{
	const int			numSamples		= renderTarget.getNumSamples();
	const float			depthClampMin	= de::min(state.viewport.zn, state.viewport.zf);
	const float			depthClampMax	= de::max(state.viewport.zn, state.viewport.zf);
	TriangleRasterizer	rasterizer		(renderTargetRect, numSamples, state.rasterization);
	float				depthOffset		= 0.0f;

	rasterizer.init(triangle.v0->position, triangle.v1->position, triangle.v2->position);

	// Culling
	const FaceType visibleFace = rasterizer.getVisibleFace();
	if ((state.cullMode == CULLMODE_FRONT	&& visibleFace == FACETYPE_FRONT) ||
		(state.cullMode == CULLMODE_BACK	&& visibleFace == FACETYPE_BACK))
		return;

	// Shading context
	FragmentShadingContext shadingContext(triangle.v0->outputs, triangle.v1->outputs, triangle.v2->outputs, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, triangle.v2->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples);

	// Polygon offset
	if (buffers.fragmentDepthBuffer && state.fragOps.polygonOffsetEnabled)
	{
		const float maximumDepthSlope			= findPrimitiveMaximumDepthSlope(triangle);
		const float minimumResolvableDifference	= findPrimitiveMinimumResolvableDifference(triangle, renderTarget.getDepthBuffer());

		depthOffset = maximumDepthSlope * state.fragOps.polygonOffsetFactor + minimumResolvableDifference * state.fragOps.polygonOffsetUnits;
	}

	// Execute rasterize - shade - write loop
	for (;;)
	{
		const int	maxFragmentPackets		= (int)buffers.fragmentPackets.size();
		int			numRasterizedPackets	= 0;

		// Rasterize

		rasterizer.rasterize(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);

		// numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()

		if (!numRasterizedPackets)
			break; // Rasterization finished.

		// Polygon offset
		if (buffers.fragmentDepthBuffer && state.fragOps.polygonOffsetEnabled)
			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx] + depthOffset, 0.0f, 1.0f);

		// Shade

		program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);

		// Depth clamp
		if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);

		// Handle fragment shader outputs

		writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, visibleFace, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
	}
}

void rasterizePrimitive (const RenderState&					state,
						 const RenderTarget&				renderTarget,
						 const Program&						program,
						 const pa::Line&					line,
						 const tcu::IVec4&					renderTargetRect,
						 RasterizationInternalBuffers&		buffers)
{
	const int					numSamples			= renderTarget.getNumSamples();
	const float					depthClampMin		= de::min(state.viewport.zn, state.viewport.zf);
	const float					depthClampMax		= de::max(state.viewport.zn, state.viewport.zf);
	const bool					msaa				= numSamples > 1;
	FragmentShadingContext		shadingContext		(line.v0->outputs, line.v1->outputs, DE_NULL, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, line.v1->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples);
	SingleSampleLineRasterizer	aliasedRasterizer	(renderTargetRect);
	MultiSampleLineRasterizer	msaaRasterizer		(numSamples, renderTargetRect);

	// Initialize rasterization.
	if (msaa)
		msaaRasterizer.init(line.v0->position, line.v1->position, state.line.lineWidth);
	else
		aliasedRasterizer.init(line.v0->position, line.v1->position, state.line.lineWidth);

	for (;;)
	{
		const int	maxFragmentPackets		= (int)buffers.fragmentPackets.size();
		int			numRasterizedPackets	= 0;

		// Rasterize

		if (msaa)
			msaaRasterizer.rasterize	(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
		else
			aliasedRasterizer.rasterize	(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);

		// numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()

		if (!numRasterizedPackets)
			break; // Rasterization finished.

		// Shade

		program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);

		// Depth clamp
		if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);

		// Handle fragment shader outputs

		writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, rr::FACETYPE_FRONT, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
	}
}

void rasterizePrimitive (const RenderState&					state,
						 const RenderTarget&				renderTarget,
						 const Program&						program,
						 const pa::Point&					point,
						 const tcu::IVec4&					renderTargetRect,
						 RasterizationInternalBuffers&		buffers)
{
	const int			numSamples		= renderTarget.getNumSamples();
	const float			depthClampMin	= de::min(state.viewport.zn, state.viewport.zf);
	const float			depthClampMax	= de::max(state.viewport.zn, state.viewport.zf);
	TriangleRasterizer	rasterizer1		(renderTargetRect, numSamples, state.rasterization);
	TriangleRasterizer	rasterizer2		(renderTargetRect, numSamples, state.rasterization);

	// draw point as two triangles
	const float offset				= point.v0->pointSize / 2.0f;
	const tcu::Vec4		w0			= tcu::Vec4(point.v0->position.x() + offset, point.v0->position.y() + offset, point.v0->position.z(), point.v0->position.w());
	const tcu::Vec4		w1			= tcu::Vec4(point.v0->position.x() - offset, point.v0->position.y() + offset, point.v0->position.z(), point.v0->position.w());
	const tcu::Vec4		w2			= tcu::Vec4(point.v0->position.x() - offset, point.v0->position.y() - offset, point.v0->position.z(), point.v0->position.w());
	const tcu::Vec4		w3			= tcu::Vec4(point.v0->position.x() + offset, point.v0->position.y() - offset, point.v0->position.z(), point.v0->position.w());

	rasterizer1.init(w0, w1, w2);
	rasterizer2.init(w0, w2, w3);

	// Shading context
	FragmentShadingContext shadingContext(point.v0->outputs, DE_NULL, DE_NULL, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, point.v0->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples);

	// Execute rasterize - shade - write loop
	for (;;)
	{
		const int	maxFragmentPackets		= (int)buffers.fragmentPackets.size();
		int			numRasterizedPackets	= 0;

		// Rasterize both triangles

		rasterizer1.rasterize(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
		if (numRasterizedPackets != maxFragmentPackets)
		{
			float* const	depthBufferAppendPointer	= (buffers.fragmentDepthBuffer) ? (buffers.fragmentDepthBuffer + numRasterizedPackets*numSamples*4) : (DE_NULL);
			int				numRasterizedPackets2		= 0;

			rasterizer2.rasterize(&buffers.fragmentPackets[numRasterizedPackets], depthBufferAppendPointer, maxFragmentPackets - numRasterizedPackets, numRasterizedPackets2);

			numRasterizedPackets += numRasterizedPackets2;
		}

		// numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()

		if (!numRasterizedPackets)
			break; // Rasterization finished.

		// Shade

		program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);

		// Depth clamp
		if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);

		// Handle fragment shader outputs

		writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, rr::FACETYPE_FRONT, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
	}
}

template <typename ContainerType>
void rasterize (const RenderState&					state,
				const RenderTarget&					renderTarget,
				const Program&						program,
				const ContainerType&				list)
{
	const int						numSamples			= renderTarget.getNumSamples();
	const int						numFragmentOutputs	= (int)program.fragmentShader->getOutputs().size();
	const size_t					maxFragmentPackets	= 128;

	const tcu::IVec4				viewportRect		= tcu::IVec4(state.viewport.rect.left, state.viewport.rect.bottom, state.viewport.rect.width, state.viewport.rect.height);
	const tcu::IVec4				bufferRect			= getBufferSize(renderTarget.getColorBuffer(0));
	const tcu::IVec4				renderTargetRect	= rectIntersection(viewportRect, bufferRect);

	// shared buffers for all primitives
	std::vector<FragmentPacket>		fragmentPackets		(maxFragmentPackets);
	std::vector<GenericVec4>		shaderOutputs		(maxFragmentPackets*4*numFragmentOutputs);
	std::vector<Fragment>			shadedFragments		(maxFragmentPackets*4);
	std::vector<float>				depthValues			(0);
	float*							depthBufferPointer	= DE_NULL;

	RasterizationInternalBuffers	buffers;

	// calculate depth only if we have a depth buffer
	if (!isEmpty(renderTarget.getDepthBuffer()))
	{
		depthValues.resize(maxFragmentPackets*4*numSamples);
		depthBufferPointer = &depthValues[0];
	}

	// set buffers
	buffers.fragmentPackets.swap(fragmentPackets);
	buffers.shaderOutputs.swap(shaderOutputs);
	buffers.shadedFragments.swap(shadedFragments);
	buffers.fragmentDepthBuffer = depthBufferPointer;

	// rasterize
	for (typename ContainerType::const_iterator it = list.begin(); it != list.end(); ++it)
		rasterizePrimitive(state, renderTarget, program, *it, renderTargetRect, buffers);
}

/*--------------------------------------------------------------------*//*!
 * Draws transformed triangles, lines or points to render target
 *//*--------------------------------------------------------------------*/
template <typename ContainerType>
void drawBasicPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, ContainerType& primList, VertexPacketAllocator& vpalloc)
{
	const bool clipZ = !state.fragOps.depthClampEnabled;

	// Transform feedback

	// Flatshading
	flatshadeVertices(program, primList);

	// Clipping
	// \todo [jarkko] is creating & swapping std::vectors really a good solution?
	clipPrimitives(primList, program, clipZ, vpalloc);

	// Transform vertices to window coords
	transformClipCoordsToWindowCoords(state, primList);

	// Rasterize and paint
	rasterize(state, renderTarget, program, primList);
}

void copyVertexPacketPointers(const VertexPacket** dst, const pa::Point& in)
{
	dst[0] = in.v0;
}

void copyVertexPacketPointers(const VertexPacket** dst, const pa::Line& in)
{
	dst[0] = in.v0;
	dst[1] = in.v1;
}

void copyVertexPacketPointers(const VertexPacket** dst, const pa::Triangle& in)
{
	dst[0] = in.v0;
	dst[1] = in.v1;
	dst[2] = in.v2;
}

void copyVertexPacketPointers(const VertexPacket** dst, const pa::LineAdjacency& in)
{
	dst[0] = in.v0;
	dst[1] = in.v1;
	dst[2] = in.v2;
	dst[3] = in.v3;
}

void copyVertexPacketPointers(const VertexPacket** dst, const pa::TriangleAdjacency& in)
{
	dst[0] = in.v0;
	dst[1] = in.v1;
	dst[2] = in.v2;
	dst[3] = in.v3;
	dst[4] = in.v4;
	dst[5] = in.v5;
}

template <PrimitiveType DrawPrimitiveType> // \note DrawPrimitiveType  can only be Points, line_strip, or triangle_strip
void drawGeometryShaderOutputAsPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, VertexPacket* const* vertices, size_t numVertices, VertexPacketAllocator& vpalloc)
{
	// Run primitive assembly for generated stream

	const size_t															assemblerPrimitiveCount		= PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::getPrimitiveCount(numVertices);
	std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::BaseType>	inputPrimitives				(assemblerPrimitiveCount);

	PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::exec(inputPrimitives.begin(), vertices, numVertices, state.provokingVertexConvention); // \note input Primitives are baseType_t => only basic primitives (non adjacency) will compile

	// Make shared vertices distinct

	makeSharedVerticesDistinct(inputPrimitives, vpalloc);

	// Draw assembled primitives

	drawBasicPrimitives(state, renderTarget, program, inputPrimitives, vpalloc);
}

template <PrimitiveType DrawPrimitiveType>
void drawWithGeometryShader(const RenderState& state, const RenderTarget& renderTarget, const Program& program, std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::Type>& input, DrawContext& drawContext)
{
	// Vertices outputted by geometry shader may have different number of output variables than the original, create new memory allocator
	VertexPacketAllocator vpalloc(program.geometryShader->getOutputs().size());

	// Run geometry shader for all primitives
	GeometryEmitter					emitter			(vpalloc, program.geometryShader->getNumVerticesOut());
	std::vector<PrimitivePacket>	primitives		(input.size());
	const int						numInvocations	= (int)program.geometryShader->getNumInvocations();
	const int						verticesIn		= PrimitiveTypeTraits<DrawPrimitiveType>::Type::NUM_VERTICES;

	for (size_t primitiveNdx = 0; primitiveNdx < input.size(); ++primitiveNdx)
	{
		primitives[primitiveNdx].primitiveIDIn = drawContext.primitiveID++;
		copyVertexPacketPointers(primitives[primitiveNdx].vertices, input[primitiveNdx]);
	}

	if (primitives.empty())
		return;

	for (int invocationNdx = 0; invocationNdx < numInvocations; ++invocationNdx)
	{
		// Shading invocation

		program.geometryShader->shadePrimitives(emitter, verticesIn, &primitives[0], (int)primitives.size(), invocationNdx);

		// Find primitives in the emitted vertices

		std::vector<VertexPacket*> emitted;
		emitter.moveEmittedTo(emitted);

		for (size_t primitiveBegin = 0; primitiveBegin < emitted.size();)
		{
			size_t primitiveEnd;

			// Find primitive begin
			if (!emitted[primitiveBegin])
			{
				++primitiveBegin;
				continue;
			}

			// Find primitive end

			primitiveEnd = primitiveBegin + 1;
			for (; (primitiveEnd < emitted.size()) && emitted[primitiveEnd]; ++primitiveEnd); // find primitive end

			// Draw range [begin, end)

			switch (program.geometryShader->getOutputType())
			{
				case rr::GEOMETRYSHADEROUTPUTTYPE_POINTS:			drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_POINTS>			(state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
				case rr::GEOMETRYSHADEROUTPUTTYPE_LINE_STRIP:		drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_LINE_STRIP>		(state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
				case rr::GEOMETRYSHADEROUTPUTTYPE_TRIANGLE_STRIP:	drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP>	(state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
				default:
					DE_ASSERT(DE_FALSE);
			}

			// Next primitive
			primitiveBegin = primitiveEnd + 1;
		}
	}
}

/*--------------------------------------------------------------------*//*!
 * Assembles, tesselates, runs geometry shader and draws primitives of any type from vertex list.
 *//*--------------------------------------------------------------------*/
template <PrimitiveType DrawPrimitiveType>
void drawAsPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, VertexPacket* const* vertices, int numVertices, DrawContext& drawContext, VertexPacketAllocator& vpalloc)
{
	// Assemble primitives (deconstruct stips & loops)
	const size_t															assemblerPrimitiveCount		= PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::getPrimitiveCount(numVertices);
	std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::Type>		inputPrimitives				(assemblerPrimitiveCount);

	PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::exec(inputPrimitives.begin(), vertices, (size_t)numVertices, state.provokingVertexConvention);

	// Tesselate
	//if (state.tesselation)
	//	primList = state.tesselation.exec(primList);

	// Geometry shader
	if (program.geometryShader)
	{
		// If there is an active geometry shader, it will convert any primitive type to basic types
		drawWithGeometryShader<DrawPrimitiveType>(state, renderTarget, program, inputPrimitives, drawContext);
	}
	else
	{
		std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::BaseType> basePrimitives;

		// convert types from X_adjacency to X
		convertPrimitiveToBaseType(basePrimitives, inputPrimitives);

		// Make shared vertices distinct. Needed for that the translation to screen space happens only once per vertex, and for flatshading
		makeSharedVerticesDistinct(basePrimitives, vpalloc);

		// A primitive ID will be generated even if no geometry shader is active
		generatePrimitiveIDs(basePrimitives, drawContext);

		// Draw as a basic type
		drawBasicPrimitives(state, renderTarget, program, basePrimitives, vpalloc);
	}
}

bool isValidCommand (const DrawCommand& command, int numInstances)
{
	// numInstances should be valid
	if (numInstances < 1)
		return false;

	// Shaders should have the same varyings
	if (command.program.geometryShader)
	{
		if (command.program.vertexShader->getOutputs() != command.program.geometryShader->getInputs())
			return false;

		if (command.program.geometryShader->getOutputs() != command.program.fragmentShader->getInputs())
			return false;
	}
	else
	{
		if (command.program.vertexShader->getOutputs() != command.program.fragmentShader->getInputs())
			return false;
	}

	// Shader input/output types are set
	for (size_t varyingNdx = 0; varyingNdx < command.program.vertexShader->getInputs().size(); ++varyingNdx)
		if (command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
			command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
			command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
			return false;
	for (size_t varyingNdx = 0; varyingNdx < command.program.vertexShader->getOutputs().size(); ++varyingNdx)
		if (command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
			command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
			command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
			return false;

	for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getInputs().size(); ++varyingNdx)
		if (command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
			command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
			command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
			return false;
	for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getOutputs().size(); ++varyingNdx)
		if (command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
			command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
			command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
			return false;

	if (command.program.geometryShader)
	{
		for (size_t varyingNdx = 0; varyingNdx < command.program.geometryShader->getInputs().size(); ++varyingNdx)
			if (command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
				command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
				command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
				return false;
		for (size_t varyingNdx = 0; varyingNdx < command.program.geometryShader->getOutputs().size(); ++varyingNdx)
			if (command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
				command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
				command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
				return false;
	}

	// Enough vertex inputs?
	if ((size_t)command.numVertexAttribs < command.program.vertexShader->getInputs().size())
		return false;

	// There is a fragment output sink for each output?
	if ((size_t)command.renderTarget.getNumColorBuffers() < command.program.fragmentShader->getOutputs().size())
		return false;

	// All destination buffers should have same number of samples and same size
	for (int outputNdx = 0; outputNdx < command.renderTarget.getNumColorBuffers(); ++outputNdx)
	{
		if (getBufferSize(command.renderTarget.getColorBuffer(0)) != getBufferSize(command.renderTarget.getColorBuffer(outputNdx)))
			return false;

		if (command.renderTarget.getNumSamples() != command.renderTarget.getColorBuffer(outputNdx).getNumSamples())
			return false;
	}

	// All destination buffers should have same basic type as matching fragment output
	for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getOutputs().size(); ++varyingNdx)
	{
		const tcu::TextureChannelClass	colorbufferClass = tcu::getTextureChannelClass(command.renderTarget.getColorBuffer((int)varyingNdx).raw().getFormat().type);
		const GenericVecType			colorType		 = (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));

		if (command.program.fragmentShader->getOutputs()[varyingNdx].type != colorType)
			return false;
	}

	// Integer values are flatshaded
	for (size_t outputNdx = 0; outputNdx < command.program.vertexShader->getOutputs().size(); ++outputNdx)
	{
		if (!command.program.vertexShader->getOutputs()[outputNdx].flatshade &&
			(command.program.vertexShader->getOutputs()[outputNdx].type == GENERICVECTYPE_INT32 ||
			 command.program.vertexShader->getOutputs()[outputNdx].type == GENERICVECTYPE_UINT32))
			return false;
	}
	if (command.program.geometryShader)
		for (size_t outputNdx = 0; outputNdx < command.program.geometryShader->getOutputs().size(); ++outputNdx)
		{
			if (!command.program.geometryShader->getOutputs()[outputNdx].flatshade &&
				(command.program.geometryShader->getOutputs()[outputNdx].type == GENERICVECTYPE_INT32 ||
				 command.program.geometryShader->getOutputs()[outputNdx].type == GENERICVECTYPE_UINT32))
				return false;
		}

	// Draw primitive is valid for geometry shader
	if (command.program.geometryShader)
	{
		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_POINTS && command.primitives.getPrimitiveType() != PRIMITIVETYPE_POINTS)
			return false;

		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_LINES &&
			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINES &&
			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_STRIP &&
			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_LOOP))
			return false;

		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_TRIANGLES &&
			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLES &&
			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_STRIP &&
			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_FAN))
			return false;

		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_LINES_ADJACENCY &&
			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINES_ADJACENCY &&
			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_STRIP_ADJACENCY))
			return false;

		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_TRIANGLES_ADJACENCY &&
			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLES_ADJACENCY &&
			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY))
			return false;
	}

	return true;
}

} // anonymous

RenderTarget::RenderTarget (const MultisamplePixelBufferAccess& colorMultisampleBuffer,
							const MultisamplePixelBufferAccess& depthMultisampleBuffer,
							const MultisamplePixelBufferAccess& stencilMultisampleBuffer)
	: m_numColorBuffers	(1)
	, m_depthBuffer		(MultisamplePixelBufferAccess::fromMultisampleAccess(tcu::getEffectiveDepthStencilAccess(depthMultisampleBuffer.raw(), tcu::Sampler::MODE_DEPTH)))
	, m_stencilBuffer	(MultisamplePixelBufferAccess::fromMultisampleAccess(tcu::getEffectiveDepthStencilAccess(stencilMultisampleBuffer.raw(), tcu::Sampler::MODE_STENCIL)))
{
	m_colorBuffers[0] = colorMultisampleBuffer;
}

int RenderTarget::getNumSamples (void) const
{
	DE_ASSERT(m_numColorBuffers > 0);
	return m_colorBuffers[0].getNumSamples();
}

DrawIndices::DrawIndices (const deUint32* ptr, int baseVertex_)
	: indices	(ptr)
	, indexType	(INDEXTYPE_UINT32)
	, baseVertex(baseVertex_)
{
}

DrawIndices::DrawIndices (const deUint16* ptr, int baseVertex_)
	: indices	(ptr)
	, indexType	(INDEXTYPE_UINT16)
	, baseVertex(baseVertex_)
{
}

DrawIndices::DrawIndices (const deUint8* ptr, int baseVertex_)
	: indices	(ptr)
	, indexType	(INDEXTYPE_UINT8)
	, baseVertex(baseVertex_)
{
}

DrawIndices::DrawIndices (const void* ptr, IndexType type, int baseVertex_)
	: indices	(ptr)
	, indexType	(type)
	, baseVertex(baseVertex_)
{
}

PrimitiveList::PrimitiveList (PrimitiveType primitiveType, int numElements, const int firstElement)
	: m_primitiveType	(primitiveType)
	, m_numElements		(numElements)
	, m_indices			(DE_NULL)
	, m_indexType		(INDEXTYPE_LAST)
	, m_baseVertex		(firstElement)
{
	DE_ASSERT(numElements >= 0 && "Invalid numElements");
	DE_ASSERT(firstElement >= 0 && "Invalid firstElement");
}

PrimitiveList::PrimitiveList (PrimitiveType primitiveType, int numElements, const DrawIndices& indices)
	: m_primitiveType	(primitiveType)
	, m_numElements		((size_t)numElements)
	, m_indices			(indices.indices)
	, m_indexType		(indices.indexType)
	, m_baseVertex		(indices.baseVertex)
{
	DE_ASSERT(numElements >= 0 && "Invalid numElements");
}

size_t PrimitiveList::getIndex (size_t elementNdx) const
{
	// indices == DE_NULL interpreted as command.indices = [first (=baseVertex) + 0, first + 1, first + 2...]
	if (m_indices)
	{
		int index = m_baseVertex + (int)readIndexArray(m_indexType, m_indices, elementNdx);
		DE_ASSERT(index >= 0); // do not access indices < 0

		return (size_t)index;
	}
	else
		return (size_t)(m_baseVertex) + elementNdx;
}

bool PrimitiveList::isRestartIndex (size_t elementNdx, deUint32 restartIndex) const
{
	// implicit index or explicit index (without base vertex) equals restart
	if (m_indices)
		return readIndexArray(m_indexType, m_indices, elementNdx) == restartIndex;
	else
		return elementNdx == (size_t)restartIndex;
}

Renderer::Renderer (void)
{
}

Renderer::~Renderer (void)
{
}

void Renderer::draw (const DrawCommand& command) const
{
	drawInstanced(command, 1);
}

void Renderer::drawInstanced (const DrawCommand& command, int numInstances) const
{
	// Do not run bad commands
	{
		const bool validCommand = isValidCommand(command, numInstances);
		if (!validCommand)
		{
			DE_ASSERT(false);
			return;
		}
	}

	// Do not draw if nothing to draw
	{
		if (command.primitives.getNumElements() == 0 || numInstances == 0)
			return;
	}

	// Prepare transformation

	const size_t				numVaryings = command.program.vertexShader->getOutputs().size();
	VertexPacketAllocator		vpalloc(numVaryings);
	std::vector<VertexPacket*>	vertexPackets = vpalloc.allocArray(command.primitives.getNumElements());
	DrawContext					drawContext;

	for (int instanceID = 0; instanceID < numInstances; ++instanceID)
	{
		// Each instance has its own primitives
		drawContext.primitiveID = 0;

		for (size_t elementNdx = 0; elementNdx < command.primitives.getNumElements(); ++elementNdx)
		{
			int numVertexPackets = 0;

			// collect primitive vertices until restart

			while (elementNdx < command.primitives.getNumElements() &&
					!(command.state.restart.enabled && command.primitives.isRestartIndex(elementNdx, command.state.restart.restartIndex)))
			{
				// input
				vertexPackets[numVertexPackets]->instanceNdx	= instanceID;
				vertexPackets[numVertexPackets]->vertexNdx		= (int)command.primitives.getIndex(elementNdx);

				// output
				vertexPackets[numVertexPackets]->pointSize		= command.state.point.pointSize;	// default value from the current state
				vertexPackets[numVertexPackets]->position		= tcu::Vec4(0, 0, 0, 0);			// no undefined values

				++numVertexPackets;
				++elementNdx;
			}

			// Duplicated restart shade
			if (numVertexPackets == 0)
				continue;

			// \todo Vertex cache?

			// Transform vertices

			command.program.vertexShader->shadeVertices(command.vertexAttribs, &vertexPackets[0], numVertexPackets);

			// Draw primitives

			switch (command.primitives.getPrimitiveType())
			{
				case PRIMITIVETYPE_TRIANGLES:				{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLES>					(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				case PRIMITIVETYPE_TRIANGLE_STRIP:			{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP>			(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				case PRIMITIVETYPE_TRIANGLE_FAN:			{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_FAN>				(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				case PRIMITIVETYPE_LINES:					{ drawAsPrimitives<PRIMITIVETYPE_LINES>						(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				case PRIMITIVETYPE_LINE_STRIP:				{ drawAsPrimitives<PRIMITIVETYPE_LINE_STRIP>				(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				case PRIMITIVETYPE_LINE_LOOP:				{ drawAsPrimitives<PRIMITIVETYPE_LINE_LOOP>					(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				case PRIMITIVETYPE_POINTS:					{ drawAsPrimitives<PRIMITIVETYPE_POINTS>					(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				case PRIMITIVETYPE_LINES_ADJACENCY:			{ drawAsPrimitives<PRIMITIVETYPE_LINES_ADJACENCY>			(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				case PRIMITIVETYPE_LINE_STRIP_ADJACENCY:	{ drawAsPrimitives<PRIMITIVETYPE_LINE_STRIP_ADJACENCY>		(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				case PRIMITIVETYPE_TRIANGLES_ADJACENCY:		{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLES_ADJACENCY>		(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				case PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY:{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY>	(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
				default:
					DE_ASSERT(DE_FALSE);
			}
		}
	}
}

} // rr