C++程序  |  318行  |  12.67 KB

/****************************************************************************
* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* @file archrast.cpp
*
* @brief Implementation for archrast.
*
******************************************************************************/
#include <atomic>

#include "common/os.h"
#include "archrast/archrast.h"
#include "archrast/eventmanager.h"
#include "gen_ar_eventhandlerfile.hpp"

namespace ArchRast
{
    //////////////////////////////////////////////////////////////////////////
    /// @brief struct that keeps track of depth and stencil event information
    struct DepthStencilStats
    {
        uint32_t earlyZTestPassCount = 0;
        uint32_t earlyZTestFailCount = 0;
        uint32_t lateZTestPassCount = 0;
        uint32_t lateZTestFailCount = 0;
        uint32_t earlyStencilTestPassCount = 0;
        uint32_t earlyStencilTestFailCount = 0;
        uint32_t lateStencilTestPassCount = 0;
        uint32_t lateStencilTestFailCount = 0;
    };

    struct CStats
    {
        uint32_t clippedVerts = 0;
    };

    struct TEStats
    {
        uint32_t inputPrims = 0;
        //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
    };

    struct GSStats
    {
        uint32_t inputPrimCount;
        uint32_t primGeneratedCount;
        uint32_t vertsInput;
    };

    //////////////////////////////////////////////////////////////////////////
    /// @brief Event handler that saves stat events to event files. This
    ///        handler filters out unwanted events.
    class EventHandlerStatsFile : public EventHandlerFile
    {
    public:
        EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) {}

        // These are events that we're not interested in saving in stats event files.
        virtual void Handle(const Start& event) {}
        virtual void Handle(const End& event) {}

        virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
        {
            //earlyZ test compute
            mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
            mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);

            //earlyStencil test compute
            mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
            mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
            mNeedFlush = true;
        }

        virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
        {
            //earlyZ test compute
            mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
            mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);

            //earlyStencil test compute
            mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
            mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
            mNeedFlush = true;
        }

        virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
        {
            //earlyZ test compute
            mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
            mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);

            //earlyStencil test compute
            mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
            mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
            mNeedFlush = true;
        }

        virtual void Handle(const LateDepthStencilInfoSingleSample& event)
        {
            //lateZ test compute
            mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
            mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);

            //lateStencil test compute
            mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
            mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
            mNeedFlush = true;
        }

        virtual void Handle(const LateDepthStencilInfoSampleRate& event)
        {
            //lateZ test compute
            mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
            mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);

            //lateStencil test compute
            mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
            mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
            mNeedFlush = true;
        }

        virtual void Handle(const LateDepthStencilInfoNullPS& event)
        {
            //lateZ test compute
            mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
            mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);

            //lateStencil test compute
            mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
            mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
            mNeedFlush = true;
        }

        virtual void Handle(const EarlyDepthInfoPixelRate& event)
        {
            //earlyZ test compute
            mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
            mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
            mNeedFlush = true;
        }


        virtual void Handle(const LateDepthInfoPixelRate& event)
        {
            //lateZ test compute
            mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
            mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
            mNeedFlush = true;
        }


        // Flush cached events for this draw
        virtual void FlushDraw(uint32_t drawId)
        {
            if (mNeedFlush == false) return;

            //singleSample
            EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
            EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
            EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
            EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));

            //sampleRate
            EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
            EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
            EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
            EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));

            //pixelRate
            EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
            EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));


            //NullPS
            EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
            EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));

            //Reset Internal Counters
            mDSSingleSample = {};
            mDSSampleRate = {};
            mDSPixelRate = {};
            mDSNullPS = {};

            mNeedFlush = false;
        }

        virtual void Handle(const FrontendDrawEndEvent& event)
        {
            //Clipper
            EventHandlerFile::Handle(VertsClipped(event.data.drawId, mClipper.clippedVerts));

            //Tesselator
            EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));

            //Geometry Shader
            EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
            EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
            EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));

            //Reset Internal Counters
            mClipper = {};
            mTS = {};
            mGS = {};
        }

        virtual void Handle(const GSPrimInfo& event)
        {
            mGS.inputPrimCount += event.data.inputPrimCount;
            mGS.primGeneratedCount += event.data.primGeneratedCount;
            mGS.vertsInput += event.data.vertsInput;
        }

        virtual void Handle(const ClipVertexCount& event)
        {
            mClipper.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim);
        }

        virtual void Handle(const TessPrimCount& event)
        {
            mTS.inputPrims += event.data.primCount;
        }

    protected:
        bool mNeedFlush;
        // Per draw stats
        DepthStencilStats mDSSingleSample = {};
        DepthStencilStats mDSSampleRate = {};
        DepthStencilStats mDSPixelRate = {};
        DepthStencilStats mDSNullPS = {};
        DepthStencilStats mDSOmZ = {};
        CStats mClipper = {};
        TEStats mTS = {};
        GSStats mGS = {};

    };

    static EventManager* FromHandle(HANDLE hThreadContext)
    {
        return reinterpret_cast<EventManager*>(hThreadContext);
    }

    // Construct an event manager and associate a handler with it.
    HANDLE CreateThreadContext(AR_THREAD type)
    {
        // Can we assume single threaded here?
        static std::atomic<uint32_t> counter(0);
        uint32_t id = counter.fetch_add(1);

        EventManager* pManager = new EventManager();
        EventHandlerFile* pHandler = new EventHandlerStatsFile(id);

        if (pManager && pHandler)
        {
            pManager->Attach(pHandler);

            if (type == AR_THREAD::API)
            {
                pHandler->Handle(ThreadStartApiEvent());
            }
            else
            {
                pHandler->Handle(ThreadStartWorkerEvent());
            }
            pHandler->MarkHeader();

            return pManager;
        }

        SWR_INVALID("Failed to register thread.");
        return nullptr;
    }

    void DestroyThreadContext(HANDLE hThreadContext)
    {
        EventManager* pManager = FromHandle(hThreadContext);
        SWR_ASSERT(pManager != nullptr);

        delete pManager;
    }

    // Dispatch event for this thread.
    void Dispatch(HANDLE hThreadContext, const Event& event)
    {
        EventManager* pManager = FromHandle(hThreadContext);
        SWR_ASSERT(pManager != nullptr);

        pManager->Dispatch(event);
    }

    // Flush for this thread.
    void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
    {
        EventManager* pManager = FromHandle(hThreadContext);
        SWR_ASSERT(pManager != nullptr);

        pManager->FlushDraw(drawId);
    }
}