/*
* Mesa 3-D graphics library
*
* Copyright 2003 VMware, Inc.
* Copyright 2009 VMware, Inc.
* All Rights Reserved.
* Copyright (C) 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/glheader.h"
#include "main/context.h"
#include "main/varray.h"
#include "main/macros.h"
#include "main/sse_minmax.h"
#include "x86/common_x86_asm.h"
#include "util/hash_table.h"
struct minmax_cache_key {
GLintptr offset;
GLuint count;
unsigned index_size;
};
struct minmax_cache_entry {
struct minmax_cache_key key;
GLuint min;
GLuint max;
};
static uint32_t
vbo_minmax_cache_hash(const struct minmax_cache_key *key)
{
return _mesa_hash_data(key, sizeof(*key));
}
static bool
vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
const struct minmax_cache_key *b)
{
return (a->offset == b->offset) && (a->count == b->count) &&
(a->index_size == b->index_size);
}
static void
vbo_minmax_cache_delete_entry(struct hash_entry *entry)
{
free(entry->data);
}
static GLboolean
vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
{
if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
USAGE_ATOMIC_COUNTER_BUFFER |
USAGE_SHADER_STORAGE_BUFFER |
USAGE_TRANSFORM_FEEDBACK_BUFFER |
USAGE_PIXEL_PACK_BUFFER |
USAGE_DISABLE_MINMAX_CACHE))
return GL_FALSE;
if ((bufferObj->Mappings[MAP_USER].AccessFlags &
(GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
(GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
return GL_FALSE;
return GL_TRUE;
}
void
vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
{
_mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
bufferObj->MinMaxCache = NULL;
}
static GLboolean
vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
unsigned index_size, GLintptr offset, GLuint count,
GLuint *min_index, GLuint *max_index)
{
GLboolean found = GL_FALSE;
struct minmax_cache_key key;
uint32_t hash;
struct hash_entry *result;
if (!bufferObj->MinMaxCache)
return GL_FALSE;
if (!vbo_use_minmax_cache(bufferObj))
return GL_FALSE;
simple_mtx_lock(&bufferObj->Mutex);
if (bufferObj->MinMaxCacheDirty) {
/* Disable the cache permanently for this BO if the number of hits
* is asymptotically less than the number of misses. This happens when
* applications use the BO for streaming.
*
* However, some initial optimism allows applications that interleave
* draw calls with glBufferSubData during warmup.
*/
unsigned optimism = bufferObj->Size;
if (bufferObj->MinMaxCacheMissIndices > optimism &&
bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
vbo_delete_minmax_cache(bufferObj);
goto out_disable;
}
_mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
bufferObj->MinMaxCacheDirty = false;
goto out_invalidate;
}
key.index_size = index_size;
key.offset = offset;
key.count = count;
hash = vbo_minmax_cache_hash(&key);
result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
if (result) {
struct minmax_cache_entry *entry = result->data;
*min_index = entry->min;
*max_index = entry->max;
found = GL_TRUE;
}
out_invalidate:
if (found) {
/* The hit counter saturates so that we don't accidently disable the
* cache in a long-running program.
*/
unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
bufferObj->MinMaxCacheHitIndices = new_hit_count;
else
bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
} else {
bufferObj->MinMaxCacheMissIndices += count;
}
out_disable:
simple_mtx_unlock(&bufferObj->Mutex);
return found;
}
static void
vbo_minmax_cache_store(struct gl_context *ctx,
struct gl_buffer_object *bufferObj,
unsigned index_size, GLintptr offset, GLuint count,
GLuint min, GLuint max)
{
struct minmax_cache_entry *entry;
struct hash_entry *table_entry;
uint32_t hash;
if (!vbo_use_minmax_cache(bufferObj))
return;
simple_mtx_lock(&bufferObj->Mutex);
if (!bufferObj->MinMaxCache) {
bufferObj->MinMaxCache =
_mesa_hash_table_create(NULL,
(uint32_t (*)(const void *))vbo_minmax_cache_hash,
(bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
if (!bufferObj->MinMaxCache)
goto out;
}
entry = MALLOC_STRUCT(minmax_cache_entry);
if (!entry)
goto out;
entry->key.offset = offset;
entry->key.count = count;
entry->key.index_size = index_size;
entry->min = min;
entry->max = max;
hash = vbo_minmax_cache_hash(&entry->key);
table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
hash, &entry->key);
if (table_entry) {
/* It seems like this could happen when two contexts are rendering using
* the same buffer object from multiple threads.
*/
_mesa_debug(ctx, "duplicate entry in minmax cache\n");
free(entry);
goto out;
}
table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
hash, &entry->key, entry);
if (!table_entry)
free(entry);
out:
simple_mtx_unlock(&bufferObj->Mutex);
}
/**
* Compute min and max elements by scanning the index buffer for
* glDraw[Range]Elements() calls.
* If primitive restart is enabled, we need to ignore restart
* indexes when computing min/max.
*/
static void
vbo_get_minmax_index(struct gl_context *ctx,
const struct _mesa_prim *prim,
const struct _mesa_index_buffer *ib,
GLuint *min_index, GLuint *max_index,
const GLuint count)
{
const GLboolean restart = ctx->Array._PrimitiveRestart;
const GLuint restartIndex =
_mesa_primitive_restart_index(ctx, ib->index_size);
const char *indices;
GLuint i;
GLintptr offset = 0;
indices = (char *) ib->ptr + prim->start * ib->index_size;
if (_mesa_is_bufferobj(ib->obj)) {
GLsizeiptr size = MIN2(count * ib->index_size, ib->obj->Size);
if (vbo_get_minmax_cached(ib->obj, ib->index_size, (GLintptr) indices,
count, min_index, max_index))
return;
offset = (GLintptr) indices;
indices = ctx->Driver.MapBufferRange(ctx, offset, size,
GL_MAP_READ_BIT, ib->obj,
MAP_INTERNAL);
}
switch (ib->index_size) {
case 4: {
const GLuint *ui_indices = (const GLuint *)indices;
GLuint max_ui = 0;
GLuint min_ui = ~0U;
if (restart) {
for (i = 0; i < count; i++) {
if (ui_indices[i] != restartIndex) {
if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
}
}
}
else {
#if defined(USE_SSE41)
if (cpu_has_sse4_1) {
_mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
}
else
#endif
for (i = 0; i < count; i++) {
if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
}
}
*min_index = min_ui;
*max_index = max_ui;
break;
}
case 2: {
const GLushort *us_indices = (const GLushort *)indices;
GLuint max_us = 0;
GLuint min_us = ~0U;
if (restart) {
for (i = 0; i < count; i++) {
if (us_indices[i] != restartIndex) {
if (us_indices[i] > max_us) max_us = us_indices[i];
if (us_indices[i] < min_us) min_us = us_indices[i];
}
}
}
else {
for (i = 0; i < count; i++) {
if (us_indices[i] > max_us) max_us = us_indices[i];
if (us_indices[i] < min_us) min_us = us_indices[i];
}
}
*min_index = min_us;
*max_index = max_us;
break;
}
case 1: {
const GLubyte *ub_indices = (const GLubyte *)indices;
GLuint max_ub = 0;
GLuint min_ub = ~0U;
if (restart) {
for (i = 0; i < count; i++) {
if (ub_indices[i] != restartIndex) {
if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
}
}
}
else {
for (i = 0; i < count; i++) {
if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
}
}
*min_index = min_ub;
*max_index = max_ub;
break;
}
default:
unreachable("not reached");
}
if (_mesa_is_bufferobj(ib->obj)) {
vbo_minmax_cache_store(ctx, ib->obj, ib->index_size, offset,
count, *min_index, *max_index);
ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
}
}
/**
* Compute min and max elements for nr_prims
*/
void
vbo_get_minmax_indices(struct gl_context *ctx,
const struct _mesa_prim *prims,
const struct _mesa_index_buffer *ib,
GLuint *min_index,
GLuint *max_index,
GLuint nr_prims)
{
GLuint tmp_min, tmp_max;
GLuint i;
GLuint count;
*min_index = ~0;
*max_index = 0;
for (i = 0; i < nr_prims; i++) {
const struct _mesa_prim *start_prim;
start_prim = &prims[i];
count = start_prim->count;
/* Do combination if possible to reduce map/unmap count */
while ((i + 1 < nr_prims) &&
(prims[i].start + prims[i].count == prims[i+1].start)) {
count += prims[i+1].count;
i++;
}
vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
*min_index = MIN2(*min_index, tmp_min);
*max_index = MAX2(*max_index, tmp_max);
}
}