/****************************************************************************
* Copyright (C) 2015 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
***************************************************************************/
#include "swr_context.h"
#include "swr_memory.h"
#include "swr_screen.h"
#include "swr_resource.h"
#include "swr_scratch.h"
#include "swr_query.h"
#include "swr_fence.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "util/u_atomic.h"
#include "util/u_upload_mgr.h"
#include "util/u_transfer.h"
#include "util/u_surface.h"
#include "api.h"
#include "backend.h"
#include "knobs.h"
static struct pipe_surface *
swr_create_surface(struct pipe_context *pipe,
struct pipe_resource *pt,
const struct pipe_surface *surf_tmpl)
{
struct pipe_surface *ps;
ps = CALLOC_STRUCT(pipe_surface);
if (ps) {
pipe_reference_init(&ps->reference, 1);
pipe_resource_reference(&ps->texture, pt);
ps->context = pipe;
ps->format = surf_tmpl->format;
if (pt->target != PIPE_BUFFER) {
assert(surf_tmpl->u.tex.level <= pt->last_level);
ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
ps->u.tex.level = surf_tmpl->u.tex.level;
ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
} else {
/* setting width as number of elements should get us correct
* renderbuffer width */
ps->width = surf_tmpl->u.buf.last_element
- surf_tmpl->u.buf.first_element + 1;
ps->height = pt->height0;
ps->u.buf.first_element = surf_tmpl->u.buf.first_element;
ps->u.buf.last_element = surf_tmpl->u.buf.last_element;
assert(ps->u.buf.first_element <= ps->u.buf.last_element);
assert(ps->u.buf.last_element < ps->width);
}
}
return ps;
}
static void
swr_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf)
{
assert(surf->texture);
struct pipe_resource *resource = surf->texture;
/* If the resource has been drawn to, store tiles. */
swr_store_dirty_resource(pipe, resource, SWR_TILE_RESOLVED);
pipe_resource_reference(&resource, NULL);
FREE(surf);
}
static void *
swr_transfer_map(struct pipe_context *pipe,
struct pipe_resource *resource,
unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **transfer)
{
struct swr_screen *screen = swr_screen(pipe->screen);
struct swr_resource *spr = swr_resource(resource);
struct pipe_transfer *pt;
enum pipe_format format = resource->format;
assert(resource);
assert(level <= resource->last_level);
/* If mapping an attached rendertarget, store tiles to surface and set
* postStoreTileState to SWR_TILE_INVALID so tiles get reloaded on next use
* and nothing needs to be done at unmap. */
swr_store_dirty_resource(pipe, resource, SWR_TILE_INVALID);
if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
/* If resource is in use, finish fence before mapping.
* Unless requested not to block, then if not done return NULL map */
if (usage & PIPE_TRANSFER_DONTBLOCK) {
if (swr_is_fence_pending(screen->flush_fence))
return NULL;
} else {
if (spr->status) {
/* But, if there's no fence pending, submit one.
* XXX: Remove once draw timestamps are finished. */
if (!swr_is_fence_pending(screen->flush_fence))
swr_fence_submit(swr_context(pipe), screen->flush_fence);
swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
swr_resource_unused(resource);
}
}
}
pt = CALLOC_STRUCT(pipe_transfer);
if (!pt)
return NULL;
pipe_resource_reference(&pt->resource, resource);
pt->usage = (pipe_transfer_usage)usage;
pt->level = level;
pt->box = *box;
pt->stride = spr->swr.pitch;
pt->layer_stride = spr->swr.qpitch * spr->swr.pitch;
/* if we're mapping the depth/stencil, copy in stencil for the section
* being read in
*/
if (usage & PIPE_TRANSFER_READ && spr->has_depth && spr->has_stencil) {
size_t zbase, sbase;
for (int z = box->z; z < box->z + box->depth; z++) {
zbase = (z * spr->swr.qpitch + box->y) * spr->swr.pitch +
spr->mip_offsets[level];
sbase = (z * spr->secondary.qpitch + box->y) * spr->secondary.pitch +
spr->secondary_mip_offsets[level];
for (int y = box->y; y < box->y + box->height; y++) {
if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
for (int x = box->x; x < box->x + box->width; x++)
((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 4 * x + 3] =
((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x];
} else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
for (int x = box->x; x < box->x + box->width; x++)
((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 8 * x + 4] =
((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x];
}
zbase += spr->swr.pitch;
sbase += spr->secondary.pitch;
}
}
}
unsigned offset = box->z * pt->layer_stride +
util_format_get_nblocksy(format, box->y) * pt->stride +
util_format_get_stride(format, box->x);
*transfer = pt;
return (void*)(spr->swr.xpBaseAddress + offset + spr->mip_offsets[level]);
}
static void
swr_transfer_flush_region(struct pipe_context *pipe,
struct pipe_transfer *transfer,
const struct pipe_box *flush_box)
{
assert(transfer->resource);
assert(transfer->usage & PIPE_TRANSFER_WRITE);
struct swr_resource *spr = swr_resource(transfer->resource);
if (!spr->has_depth || !spr->has_stencil)
return;
size_t zbase, sbase;
struct pipe_box box = *flush_box;
box.x += transfer->box.x;
box.y += transfer->box.y;
box.z += transfer->box.z;
for (int z = box.z; z < box.z + box.depth; z++) {
zbase = (z * spr->swr.qpitch + box.y) * spr->swr.pitch +
spr->mip_offsets[transfer->level];
sbase = (z * spr->secondary.qpitch + box.y) * spr->secondary.pitch +
spr->secondary_mip_offsets[transfer->level];
for (int y = box.y; y < box.y + box.height; y++) {
if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
for (int x = box.x; x < box.x + box.width; x++)
((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x] =
((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 4 * x + 3];
} else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
for (int x = box.x; x < box.x + box.width; x++)
((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x] =
((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 8 * x + 4];
}
zbase += spr->swr.pitch;
sbase += spr->secondary.pitch;
}
}
}
static void
swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer)
{
assert(transfer->resource);
struct swr_resource *spr = swr_resource(transfer->resource);
/* if we're mapping the depth/stencil, copy in stencil for the section
* being written out
*/
if (transfer->usage & PIPE_TRANSFER_WRITE &&
!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) &&
spr->has_depth && spr->has_stencil) {
struct pipe_box box;
u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height,
transfer->box.depth, &box);
swr_transfer_flush_region(pipe, transfer, &box);
}
pipe_resource_reference(&transfer->resource, NULL);
FREE(transfer);
}
static void
swr_resource_copy(struct pipe_context *pipe,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dstx,
unsigned dsty,
unsigned dstz,
struct pipe_resource *src,
unsigned src_level,
const struct pipe_box *src_box)
{
struct swr_screen *screen = swr_screen(pipe->screen);
/* If either the src or dst is a renderTarget, store tiles before copy */
swr_store_dirty_resource(pipe, src, SWR_TILE_RESOLVED);
swr_store_dirty_resource(pipe, dst, SWR_TILE_RESOLVED);
swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
swr_resource_unused(src);
swr_resource_unused(dst);
if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER)
|| (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) {
util_resource_copy_region(
pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box);
return;
}
debug_printf("unhandled swr_resource_copy\n");
}
static void
swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
{
struct swr_context *ctx = swr_context(pipe);
/* Make a copy of the const blit_info, so we can modify it */
struct pipe_blit_info info = *blit_info;
if (info.render_condition_enable && !swr_check_render_cond(pipe))
return;
if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1
&& !util_format_is_depth_or_stencil(info.src.resource->format)
&& !util_format_is_pure_integer(info.src.resource->format)) {
debug_printf("swr_blit: color resolve : %d -> %d\n",
info.src.resource->nr_samples, info.dst.resource->nr_samples);
/* Resolve is done as part of the surface store. */
swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED);
struct pipe_resource *src_resource = info.src.resource;
struct pipe_resource *resolve_target =
swr_resource(src_resource)->resolve_target;
/* The resolve target becomes the new source for the blit. */
info.src.resource = resolve_target;
}
if (util_try_blit_via_copy_region(pipe, &info)) {
return; /* done */
}
if (info.mask & PIPE_MASK_S) {
debug_printf("swr: cannot blit stencil, skipping\n");
info.mask &= ~PIPE_MASK_S;
}
if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
debug_printf("swr: blit unsupported %s -> %s\n",
util_format_short_name(info.src.resource->format),
util_format_short_name(info.dst.resource->format));
return;
}
if (ctx->active_queries) {
ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, FALSE);
ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, FALSE);
}
util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer);
util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems);
util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs);
util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);
util_blitter_save_so_targets(
ctx->blitter,
ctx->num_so_targets,
(struct pipe_stream_output_target **)ctx->so_targets);
util_blitter_save_rasterizer(ctx->blitter, (void *)ctx->rasterizer);
util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
util_blitter_save_fragment_shader(ctx->blitter, ctx->fs);
util_blitter_save_blend(ctx->blitter, (void *)ctx->blend);
util_blitter_save_depth_stencil_alpha(ctx->blitter,
(void *)ctx->depth_stencil);
util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer);
util_blitter_save_fragment_sampler_states(
ctx->blitter,
ctx->num_samplers[PIPE_SHADER_FRAGMENT],
(void **)ctx->samplers[PIPE_SHADER_FRAGMENT]);
util_blitter_save_fragment_sampler_views(
ctx->blitter,
ctx->num_sampler_views[PIPE_SHADER_FRAGMENT],
ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
util_blitter_save_render_condition(ctx->blitter,
ctx->render_cond_query,
ctx->render_cond_cond,
ctx->render_cond_mode);
util_blitter_blit(ctx->blitter, &info);
if (ctx->active_queries) {
ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, TRUE);
ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, TRUE);
}
}
static void
swr_destroy(struct pipe_context *pipe)
{
struct swr_context *ctx = swr_context(pipe);
struct swr_screen *screen = swr_screen(pipe->screen);
if (ctx->blitter)
util_blitter_destroy(ctx->blitter);
for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
if (ctx->framebuffer.cbufs[i]) {
struct swr_resource *res = swr_resource(ctx->framebuffer.cbufs[i]->texture);
/* NULL curr_pipe, so we don't have a reference to a deleted pipe */
res->curr_pipe = NULL;
pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL);
}
}
if (ctx->framebuffer.zsbuf) {
struct swr_resource *res = swr_resource(ctx->framebuffer.zsbuf->texture);
/* NULL curr_pipe, so we don't have a reference to a deleted pipe */
res->curr_pipe = NULL;
pipe_surface_reference(&ctx->framebuffer.zsbuf, NULL);
}
for (unsigned i = 0; i < ARRAY_SIZE(ctx->sampler_views[0]); i++) {
pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL);
}
for (unsigned i = 0; i < ARRAY_SIZE(ctx->sampler_views[0]); i++) {
pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_VERTEX][i], NULL);
}
if (ctx->pipe.stream_uploader)
u_upload_destroy(ctx->pipe.stream_uploader);
/* Idle core after destroying buffer resources, but before deleting
* context. Destroying resources has potentially called StoreTiles.*/
ctx->api.pfnSwrWaitForIdle(ctx->swrContext);
if (ctx->swrContext)
ctx->api.pfnSwrDestroyContext(ctx->swrContext);
delete ctx->blendJIT;
swr_destroy_scratch_buffers(ctx);
/* Only update screen->pipe if current context is being destroyed */
assert(screen);
if (screen->pipe == pipe)
screen->pipe = NULL;
AlignedFree(ctx);
}
static void
swr_render_condition(struct pipe_context *pipe,
struct pipe_query *query,
boolean condition,
enum pipe_render_cond_flag mode)
{
struct swr_context *ctx = swr_context(pipe);
ctx->render_cond_query = query;
ctx->render_cond_mode = mode;
ctx->render_cond_cond = condition;
}
static void
swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)
{
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
if (!pDC)
return;
struct swr_query_result *pqr = pDC->pStats;
SWR_STATS *pSwrStats = &pqr->core;
pSwrStats->DepthPassCount += pStats->DepthPassCount;
pSwrStats->PsInvocations += pStats->PsInvocations;
pSwrStats->CsInvocations += pStats->CsInvocations;
}
static void
swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)
{
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
if (!pDC)
return;
struct swr_query_result *pqr = pDC->pStats;
SWR_STATS_FE *pSwrStats = &pqr->coreFE;
p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices);
p_atomic_add(&pSwrStats->IaPrimitives, pStats->IaPrimitives);
p_atomic_add(&pSwrStats->VsInvocations, pStats->VsInvocations);
p_atomic_add(&pSwrStats->HsInvocations, pStats->HsInvocations);
p_atomic_add(&pSwrStats->DsInvocations, pStats->DsInvocations);
p_atomic_add(&pSwrStats->GsInvocations, pStats->GsInvocations);
p_atomic_add(&pSwrStats->CInvocations, pStats->CInvocations);
p_atomic_add(&pSwrStats->CPrimitives, pStats->CPrimitives);
p_atomic_add(&pSwrStats->GsPrimitives, pStats->GsPrimitives);
for (unsigned i = 0; i < 4; i++) {
p_atomic_add(&pSwrStats->SoPrimStorageNeeded[i],
pStats->SoPrimStorageNeeded[i]);
p_atomic_add(&pSwrStats->SoNumPrimsWritten[i],
pStats->SoNumPrimsWritten[i]);
}
}
struct pipe_context *
swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
{
struct swr_context *ctx = (struct swr_context *)
AlignedMalloc(sizeof(struct swr_context), KNOB_SIMD_BYTES);
memset(ctx, 0, sizeof(struct swr_context));
swr_screen(p_screen)->pfnSwrGetInterface(ctx->api);
ctx->swrDC.pAPI = &ctx->api;
ctx->blendJIT =
new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;
ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT;
SWR_CREATECONTEXT_INFO createInfo;
memset(&createInfo, 0, sizeof(createInfo));
createInfo.privateStateSize = sizeof(swr_draw_context);
createInfo.pfnLoadTile = swr_LoadHotTile;
createInfo.pfnStoreTile = swr_StoreHotTile;
createInfo.pfnClearTile = swr_StoreHotTileClear;
createInfo.pfnUpdateStats = swr_UpdateStats;
createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
SWR_THREADING_INFO threadingInfo {0};
threadingInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS;
threadingInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES;
threadingInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;
threadingInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE;
threadingInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED;
// Use non-standard settings for KNL
if (swr_screen(p_screen)->is_knl)
{
if (nullptr == getenv("KNOB_MAX_THREADS_PER_CORE"))
threadingInfo.MAX_THREADS_PER_CORE = 2;
if (nullptr == getenv("KNOB_MAX_DRAWS_IN_FLIGHT"))
{
ctx->max_draws_in_flight = 2048;
createInfo.MAX_DRAWS_IN_FLIGHT = ctx->max_draws_in_flight;
}
}
createInfo.pThreadInfo = &threadingInfo;
ctx->swrContext = ctx->api.pfnSwrCreateContext(&createInfo);
ctx->api.pfnSwrInit();
if (ctx->swrContext == NULL)
goto fail;
ctx->pipe.screen = p_screen;
ctx->pipe.destroy = swr_destroy;
ctx->pipe.priv = priv;
ctx->pipe.create_surface = swr_create_surface;
ctx->pipe.surface_destroy = swr_surface_destroy;
ctx->pipe.transfer_map = swr_transfer_map;
ctx->pipe.transfer_unmap = swr_transfer_unmap;
ctx->pipe.transfer_flush_region = swr_transfer_flush_region;
ctx->pipe.buffer_subdata = u_default_buffer_subdata;
ctx->pipe.texture_subdata = u_default_texture_subdata;
ctx->pipe.clear_texture = util_clear_texture;
ctx->pipe.resource_copy_region = swr_resource_copy;
ctx->pipe.render_condition = swr_render_condition;
swr_state_init(&ctx->pipe);
swr_clear_init(&ctx->pipe);
swr_draw_init(&ctx->pipe);
swr_query_init(&ctx->pipe);
ctx->pipe.stream_uploader = u_upload_create_default(&ctx->pipe);
if (!ctx->pipe.stream_uploader)
goto fail;
ctx->pipe.const_uploader = ctx->pipe.stream_uploader;
ctx->pipe.blit = swr_blit;
ctx->blitter = util_blitter_create(&ctx->pipe);
if (!ctx->blitter)
goto fail;
swr_init_scratch_buffers(ctx);
return &ctx->pipe;
fail:
/* Should really validate the init steps and fail gracefully */
swr_destroy(&ctx->pipe);
return NULL;
}