// Copyright 2016 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "QuadRasterizer.hpp" #include "Primitive.hpp" #include "Renderer.hpp" #include "Pipeline/Constants.hpp" #include "System/Math.hpp" #include "Vulkan/VkDebug.hpp" namespace sw { extern bool veryEarlyDepthTest; extern bool complementaryDepthBuffer; extern bool fullPixelPositionRegister; extern int clusterCount; QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : state(state), shader(pixelShader) { } QuadRasterizer::~QuadRasterizer() { } void QuadRasterizer::generate() { #if PERF_PROFILE for(int i = 0; i < PERF_TIMERS; i++) { cycles[i] = 0; } Long pixelTime = Ticks(); #endif constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); occlusion = 0; int clusterCount = Renderer::getClusterCount(); Do { Int yMin = *Pointer<Int>(primitive + OFFSET(Primitive,yMin)); Int yMax = *Pointer<Int>(primitive + OFFSET(Primitive,yMax)); Int cluster2 = cluster + cluster; yMin += clusterCount * 2 - 2 - cluster2; yMin &= -clusterCount * 2; yMin += cluster2; If(yMin < yMax) { rasterize(yMin, yMax); } primitive += sizeof(Primitive) * state.multiSample; count--; } Until(count == 0) if(state.occlusionEnabled) { UInt clusterOcclusion = *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster); clusterOcclusion += occlusion; *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster) = clusterOcclusion; } #if PERF_PROFILE cycles[PERF_PIXEL] = Ticks() - pixelTime; for(int i = 0; i < PERF_TIMERS; i++) { *Pointer<Long>(data + OFFSET(DrawData,cycles[i]) + 8 * cluster) += cycles[i]; } #endif Return(); } void QuadRasterizer::rasterize(Int &yMin, Int &yMax) { Pointer<Byte> cBuffer[RENDERTARGETS]; Pointer<Byte> zBuffer; Pointer<Byte> sBuffer; for(int index = 0; index < RENDERTARGETS; index++) { if(state.colorWriteActive(index)) { cBuffer[index] = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); } } if(state.depthTestActive) { zBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); } if(state.stencilActive) { sBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)); } Int y = yMin; Do { Int x0a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span))); Int x0b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span))); Int x0 = Min(x0a, x0b); for(unsigned int q = 1; q < state.multiSample; q++) { x0a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span))); x0b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span))); x0 = Min(x0, Min(x0a, x0b)); } x0 &= 0xFFFFFFFE; Int x1a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span))); Int x1b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span))); Int x1 = Max(x1a, x1b); for(unsigned int q = 1; q < state.multiSample; q++) { x1a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span))); x1b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span))); x1 = Max(x1, Max(x1a, x1b)); } Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16); if(interpolateZ()) { for(unsigned int q = 0; q < state.multiSample; q++) { Float4 y = yyyy; if(state.multiSample > 1) { y -= *Pointer<Float4>(constants + OFFSET(Constants,Y) + q * sizeof(float4)); } Dz[q] = *Pointer<Float4>(primitive + OFFSET(Primitive,z.C), 16) + y * *Pointer<Float4>(primitive + OFFSET(Primitive,z.B), 16); } } if(veryEarlyDepthTest && state.multiSample == 1 && !state.depthOverride) { if(!state.stencilActive && state.depthTestActive && (state.depthCompareMode == VK_COMPARE_OP_LESS_OR_EQUAL || state.depthCompareMode == VK_COMPARE_OP_LESS)) // FIXME: Both modes ok? { Float4 xxxx = Float4(Float(x0)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16); Pointer<Byte> buffer; Int pitch; if(!state.quadLayoutDepthBuffer) { buffer = zBuffer + 4 * x0; pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); } else { buffer = zBuffer + 8 * x0; } For(Int x = x0, x < x1, x += 2) { Float4 z = interpolate(xxxx, Dz[0], z, primitive + OFFSET(Primitive,z), false, false, state.depthClamp); Float4 zValue; if(!state.quadLayoutDepthBuffer) { // FIXME: Properly optimizes? zValue.xy = *Pointer<Float4>(buffer); zValue.zw = *Pointer<Float4>(buffer + pitch - 8); } else { zValue = *Pointer<Float4>(buffer, 16); } Int4 zTest; if(complementaryDepthBuffer) { zTest = CmpLE(zValue, z); } else { zTest = CmpNLT(zValue, z); } Int zMask = SignMask(zTest); If(zMask == 0) { x0 += 2; } Else { x = x1; } xxxx += Float4(2); if(!state.quadLayoutDepthBuffer) { buffer += 8; } else { buffer += 16; } } } } If(x0 < x1) { if(interpolateW()) { Dw = *Pointer<Float4>(primitive + OFFSET(Primitive,w.C), 16) + yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,w.B), 16); } for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) { for(int component = 0; component < 4; component++) { if(state.interpolant[interpolant].component & (1 << component)) { Dv[interpolant][component] = *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].C), 16); if(!(state.interpolant[interpolant].flat & (1 << component))) { Dv[interpolant][component] += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].B), 16); } } } } if(state.fog.component) { Df = *Pointer<Float4>(primitive + OFFSET(Primitive,f.C), 16); if(!state.fog.flat) { Df += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,f.B), 16); } } Short4 xLeft[4]; Short4 xRight[4]; for(unsigned int q = 0; q < state.multiSample; q++) { xLeft[q] = *Pointer<Short4>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline) + y * sizeof(Primitive::Span)); xRight[q] = xLeft[q]; xLeft[q] = Swizzle(xLeft[q], 0xA0) - Short4(1, 2, 1, 2); xRight[q] = Swizzle(xRight[q], 0xF5) - Short4(0, 1, 0, 1); } For(Int x = x0, x < x1, x += 2) { Short4 xxxx = Short4(x); Int cMask[4]; for(unsigned int q = 0; q < state.multiSample; q++) { Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx); cMask[q] = SignMask(PackSigned(mask, mask)) & 0x0000000F; } quad(cBuffer, zBuffer, sBuffer, cMask, x, y); } } int clusterCount = Renderer::getClusterCount(); for(int index = 0; index < RENDERTARGETS; index++) { if(state.colorWriteActive(index)) { cBuffer[index] += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])) << (1 + sw::log2(clusterCount)); // FIXME: Precompute } } if(state.depthTestActive) { zBuffer += *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute } if(state.stencilActive) { sBuffer += *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute } y += 2 * clusterCount; } Until(y >= yMax) } Float4 QuadRasterizer::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective, bool clamp) { Float4 interpolant = D; if(!flat) { interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16); if(perspective) { interpolant *= rhw; } } if(clamp) { interpolant = Min(Max(interpolant, Float4(0.0f)), Float4(1.0f)); } return interpolant; } bool QuadRasterizer::interpolateZ() const { return state.depthTestActive || (shader && shader->isVPosDeclared() && fullPixelPositionRegister); } bool QuadRasterizer::interpolateW() const { return state.perspective || (shader && shader->isVPosDeclared() && fullPixelPositionRegister); } }