/*
 * Copyright 2008 Ben Skeggs
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "nvc0/nvc0_context.h"
#include "nvc0/nvc0_resource.h"
#include "nvc0/gm107_texture.xml.h"
#include "nvc0/nvc0_compute.xml.h"
#include "nv50/g80_texture.xml.h"
#include "nv50/g80_defs.xml.h"

#include "util/u_format.h"

#define NVE4_TIC_ENTRY_INVALID 0x000fffff
#define NVE4_TSC_ENTRY_INVALID 0xfff00000

static inline uint32_t
nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
{
   switch (swz) {
   case PIPE_SWIZZLE_X  : return fmt->tic.src_x;
   case PIPE_SWIZZLE_Y: return fmt->tic.src_y;
   case PIPE_SWIZZLE_Z : return fmt->tic.src_z;
   case PIPE_SWIZZLE_W: return fmt->tic.src_w;
   case PIPE_SWIZZLE_1:
      return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
   case PIPE_SWIZZLE_0:
   default:
      return G80_TIC_SOURCE_ZERO;
   }
}

struct pipe_sampler_view *
nvc0_create_sampler_view(struct pipe_context *pipe,
                         struct pipe_resource *res,
                         const struct pipe_sampler_view *templ)
{
   uint32_t flags = 0;

   if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)
      flags |= NV50_TEXVIEW_SCALED_COORDS;

   return nvc0_create_texture_view(pipe, res, templ, flags, templ->target);
}

static struct pipe_sampler_view *
gm107_create_texture_view(struct pipe_context *pipe,
                          struct pipe_resource *texture,
                          const struct pipe_sampler_view *templ,
                          uint32_t flags,
                          enum pipe_texture_target target)
{
   const struct util_format_description *desc;
   const struct nvc0_format *fmt;
   uint64_t address;
   uint32_t *tic;
   uint32_t swz[4];
   uint32_t width, height;
   uint32_t depth;
   struct nv50_tic_entry *view;
   struct nv50_miptree *mt;
   bool tex_int;

   view = MALLOC_STRUCT(nv50_tic_entry);
   if (!view)
      return NULL;
   mt = nv50_miptree(texture);

   view->pipe = *templ;
   view->pipe.reference.count = 1;
   view->pipe.texture = NULL;
   view->pipe.context = pipe;

   view->id = -1;
   view->bindless = 0;

   pipe_resource_reference(&view->pipe.texture, texture);

   tic = &view->tic[0];

   desc = util_format_description(view->pipe.format);
   tex_int = util_format_is_pure_integer(view->pipe.format);

   fmt = &nvc0_format_table[view->pipe.format];
   swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
   swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
   swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
   swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);

   tic[0]  = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
   tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
   tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
   tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
   tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
   tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
   tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
   tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
   tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;

   address = mt->base.address;

   tic[3]  = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
   tic[4]  = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
   tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;

   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
      tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;

   if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
      tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
   else
      tic[5] = 0;

   /* check for linear storage type */
   if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
      if (texture->target == PIPE_BUFFER) {
         assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
         width = view->pipe.u.buf.size / (desc->block.bits / 8) - 1;
         address +=
            view->pipe.u.buf.offset;
         tic[2]  = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
         tic[3] |= width >> 16;
         tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
         tic[4] |= width & 0xffff;
      } else {
         assert(!(mt->level[0].pitch & 0x1f));
         /* must be 2D texture without mip maps */
         tic[2]  = GM107_TIC2_2_HEADER_VERSION_PITCH;
         tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
         tic[3] |= mt->level[0].pitch >> 5;
         tic[4] |= mt->base.base.width0 - 1;
         tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
         tic[5] |= mt->base.base.height0 - 1;
      }
      tic[1]  = address;
      tic[2] |= address >> 32;
      tic[6]  = 0;
      tic[7]  = 0;
      return &view->pipe;
   }

   tic[2]  = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
   tic[3] |=
      ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
      ((mt->level[0].tile_mode & 0xf00) >> 8 << 6);

   depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);

   if (mt->base.base.array_size > 1) {
      /* there doesn't seem to be a base layer field in TIC */
      address += view->pipe.u.tex.first_layer * mt->layer_stride;
      depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
   }
   tic[1]  = address;
   tic[2] |= address >> 32;

   switch (target) {
   case PIPE_TEXTURE_1D:
      tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
      break;
   case PIPE_TEXTURE_2D:
      tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
      break;
   case PIPE_TEXTURE_RECT:
      tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
      break;
   case PIPE_TEXTURE_3D:
      tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
      break;
   case PIPE_TEXTURE_CUBE:
      depth /= 6;
      tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
      break;
   case PIPE_TEXTURE_1D_ARRAY:
      tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
      break;
   case PIPE_TEXTURE_2D_ARRAY:
      tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
      break;
   case PIPE_TEXTURE_CUBE_ARRAY:
      depth /= 6;
      tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
      break;
   default:
      unreachable("unexpected/invalid texture target");
   }

   tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
             GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
             GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
             GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;

   if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
      width = mt->base.base.width0 << mt->ms_x;
      height = mt->base.base.height0 << mt->ms_y;
   } else {
      width = mt->base.base.width0;
      height = mt->base.base.height0;
   }

   tic[4] |= width - 1;

   tic[5] |= (height - 1) & 0xffff;
   tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
   tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;

   /* sampling points: (?) */
   if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
      tic[6]  = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
      tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
   } else {
      tic[6]  = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
      tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
   }

   tic[7]  = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
   tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;

   return &view->pipe;
}

struct pipe_sampler_view *
gm107_create_texture_view_from_image(struct pipe_context *pipe,
                                     const struct pipe_image_view *view)
{
   struct nv04_resource *res = nv04_resource(view->resource);
   struct pipe_sampler_view templ = {};
   enum pipe_texture_target target;
   uint32_t flags = 0;

   if (!res)
      return NULL;
   target = res->base.target;

   if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
      target = PIPE_TEXTURE_2D_ARRAY;

   templ.format = view->format;
   templ.swizzle_r = PIPE_SWIZZLE_X;
   templ.swizzle_g = PIPE_SWIZZLE_Y;
   templ.swizzle_b = PIPE_SWIZZLE_Z;
   templ.swizzle_a = PIPE_SWIZZLE_W;

   if (target == PIPE_BUFFER) {
      templ.u.buf.offset = view->u.buf.offset;
      templ.u.buf.size = view->u.buf.size;
   } else {
      templ.u.tex.first_layer = view->u.tex.first_layer;
      templ.u.tex.last_layer = view->u.tex.last_layer;
      templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
   }

   flags = NV50_TEXVIEW_SCALED_COORDS;

   return nvc0_create_texture_view(pipe, &res->base, &templ, flags, target);
}

static struct pipe_sampler_view *
gf100_create_texture_view(struct pipe_context *pipe,
                          struct pipe_resource *texture,
                          const struct pipe_sampler_view *templ,
                          uint32_t flags,
                          enum pipe_texture_target target)
{
   const struct util_format_description *desc;
   const struct nvc0_format *fmt;
   uint64_t address;
   uint32_t *tic;
   uint32_t swz[4];
   uint32_t width, height;
   uint32_t depth;
   uint32_t tex_fmt;
   struct nv50_tic_entry *view;
   struct nv50_miptree *mt;
   bool tex_int;

   view = MALLOC_STRUCT(nv50_tic_entry);
   if (!view)
      return NULL;
   mt = nv50_miptree(texture);

   view->pipe = *templ;
   view->pipe.reference.count = 1;
   view->pipe.texture = NULL;
   view->pipe.context = pipe;

   view->id = -1;
   view->bindless = 0;

   pipe_resource_reference(&view->pipe.texture, texture);

   tic = &view->tic[0];

   desc = util_format_description(view->pipe.format);

   fmt = &nvc0_format_table[view->pipe.format];

   tex_int = util_format_is_pure_integer(view->pipe.format);
   tex_fmt = fmt->tic.format & 0x3f;

   swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
   swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
   swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
   swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
   tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
            (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
            (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
            (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
            (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
            (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
            (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
            (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
            (swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
            ((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));

   address = mt->base.address;

   tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;

   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
      tic[2] |= G80_TIC_2_SRGB_CONVERSION;

   if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
      tic[2] |= G80_TIC_2_NORMALIZED_COORDS;

   /* check for linear storage type */
   if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
      if (texture->target == PIPE_BUFFER) {
         assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
         address +=
            view->pipe.u.buf.offset;
         tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
         tic[3] = 0;
         tic[4] = /* width */
            view->pipe.u.buf.size / (desc->block.bits / 8);
         tic[5] = 0;
      } else {
         /* must be 2D texture without mip maps */
         tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
         tic[3] = mt->level[0].pitch;
         tic[4] = mt->base.base.width0;
         tic[5] = (1 << 16) | mt->base.base.height0;
      }
      tic[6] =
      tic[7] = 0;
      tic[1] = address;
      tic[2] |= address >> 32;
      return &view->pipe;
   }

   tic[2] |=
      ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
      ((mt->level[0].tile_mode & 0xf00) << (25 - 8));

   depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);

   if (mt->base.base.array_size > 1) {
      /* there doesn't seem to be a base layer field in TIC */
      address += view->pipe.u.tex.first_layer * mt->layer_stride;
      depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
   }
   tic[1] = address;
   tic[2] |= address >> 32;

   switch (target) {
   case PIPE_TEXTURE_1D:
      tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
      break;
   case PIPE_TEXTURE_2D:
      tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
      break;
   case PIPE_TEXTURE_RECT:
      tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
      break;
   case PIPE_TEXTURE_3D:
      tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
      break;
   case PIPE_TEXTURE_CUBE:
      depth /= 6;
      tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
      break;
   case PIPE_TEXTURE_1D_ARRAY:
      tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
      break;
   case PIPE_TEXTURE_2D_ARRAY:
      tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
      break;
   case PIPE_TEXTURE_CUBE_ARRAY:
      depth /= 6;
      tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
      break;
   default:
      unreachable("unexpected/invalid texture target");
   }

   tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;

   if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
      width = mt->base.base.width0 << mt->ms_x;
      height = mt->base.base.height0 << mt->ms_y;
   } else {
      width = mt->base.base.width0;
      height = mt->base.base.height0;
   }

   tic[4] = (1 << 31) | width;

   tic[5] = height & 0xffff;
   tic[5] |= depth << 16;
   tic[5] |= mt->base.base.last_level << 28;

   /* sampling points: (?) */
   if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
      tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
   else
      tic[6] = 0x03000000;

   tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
   tic[7] |= mt->ms_mode << 12;

   return &view->pipe;
}

struct pipe_sampler_view *
nvc0_create_texture_view(struct pipe_context *pipe,
                         struct pipe_resource *texture,
                         const struct pipe_sampler_view *templ,
                         uint32_t flags,
                         enum pipe_texture_target target)
{
   if (nvc0_context(pipe)->screen->tic.maxwell)
      return gm107_create_texture_view(pipe, texture, templ, flags, target);
   return gf100_create_texture_view(pipe, texture, templ, flags, target);
}

bool
nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
                struct nv04_resource *res)
{
   uint64_t address = res->address;
   if (res->base.target != PIPE_BUFFER)
      return false;
   address += tic->pipe.u.buf.offset;
   if (tic->tic[1] == (uint32_t)address &&
       (tic->tic[2] & 0xff) == address >> 32)
      return false;

   tic->tic[1] = address;
   tic->tic[2] &= 0xffffff00;
   tic->tic[2] |= address >> 32;

   if (tic->id >= 0) {
      nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
                           NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
                           tic->tic);
      return true;
   }

   return false;
}

bool
nvc0_validate_tic(struct nvc0_context *nvc0, int s)
{
   uint32_t commands[32];
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   unsigned i;
   unsigned n = 0;
   bool need_flush = false;

   for (i = 0; i < nvc0->num_textures[s]; ++i) {
      struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
      struct nv04_resource *res;
      const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));

      if (!tic) {
         if (dirty)
            commands[n++] = (i << 1) | 0;
         continue;
      }
      res = nv04_resource(tic->pipe.texture);
      need_flush |= nvc0_update_tic(nvc0, tic, res);

      if (tic->id < 0) {
         tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);

         nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
                              NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
                              tic->tic);
         need_flush = true;
      } else
      if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
         if (unlikely(s == 5))
            BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
         else
            BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
         PUSH_DATA (push, (tic->id << 4) | 1);
         NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
      }
      nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);

      res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
      res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;

      if (!dirty)
         continue;
      commands[n++] = (tic->id << 9) | (i << 1) | 1;

      if (unlikely(s == 5))
         BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
      else
         BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
   }
   for (; i < nvc0->state.num_textures[s]; ++i)
      commands[n++] = (i << 1) | 0;

   nvc0->state.num_textures[s] = nvc0->num_textures[s];

   if (n) {
      if (unlikely(s == 5))
         BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
      else
         BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
      PUSH_DATAp(push, commands, n);
   }
   nvc0->textures_dirty[s] = 0;

   return need_flush;
}

static bool
nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
{
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   unsigned i;
   bool need_flush = false;

   for (i = 0; i < nvc0->num_textures[s]; ++i) {
      struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
      struct nv04_resource *res;
      const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));

      if (!tic) {
         nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
         continue;
      }
      res = nv04_resource(tic->pipe.texture);
      need_flush |= nvc0_update_tic(nvc0, tic, res);

      if (tic->id < 0) {
         tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);

         nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
                              NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
                              tic->tic);
         need_flush = true;
      } else
      if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
         BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
         PUSH_DATA (push, (tic->id << 4) | 1);
      }
      nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);

      res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
      res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;

      nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
      nvc0->tex_handles[s][i] |= tic->id;
      if (dirty)
         BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
   }
   for (; i < nvc0->state.num_textures[s]; ++i) {
      nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
      nvc0->textures_dirty[s] |= 1 << i;
   }

   nvc0->state.num_textures[s] = nvc0->num_textures[s];

   return need_flush;
}

void nvc0_validate_textures(struct nvc0_context *nvc0)
{
   bool need_flush = false;
   int i;

   for (i = 0; i < 5; i++) {
      if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
         need_flush |= nve4_validate_tic(nvc0, i);
      else
         need_flush |= nvc0_validate_tic(nvc0, i);
   }

   if (need_flush) {
      BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
      PUSH_DATA (nvc0->base.pushbuf, 0);
   }

   /* Invalidate all CP textures because they are aliased. */
   for (int i = 0; i < nvc0->num_textures[5]; i++)
      nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i));
   nvc0->textures_dirty[5] = ~0;
   nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
}

bool
nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
{
   uint32_t commands[16];
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   unsigned i;
   unsigned n = 0;
   bool need_flush = false;

   for (i = 0; i < nvc0->num_samplers[s]; ++i) {
      struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);

      if (!(nvc0->samplers_dirty[s] & (1 << i)))
         continue;
      if (!tsc) {
         commands[n++] = (i << 4) | 0;
         continue;
      }
      nvc0->seamless_cube_map = tsc->seamless_cube_map;
      if (tsc->id < 0) {
         tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);

         nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
                               65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
                               32, tsc->tsc);
         need_flush = true;
      }
      nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);

      commands[n++] = (tsc->id << 12) | (i << 4) | 1;
   }
   for (; i < nvc0->state.num_samplers[s]; ++i)
      commands[n++] = (i << 4) | 0;

   nvc0->state.num_samplers[s] = nvc0->num_samplers[s];

   if (n) {
      if (unlikely(s == 5))
         BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
      else
         BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
      PUSH_DATAp(push, commands, n);
   }
   nvc0->samplers_dirty[s] = 0;

   return need_flush;
}

bool
nve4_validate_tsc(struct nvc0_context *nvc0, int s)
{
   unsigned i;
   bool need_flush = false;

   for (i = 0; i < nvc0->num_samplers[s]; ++i) {
      struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);

      if (!tsc) {
         nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
         continue;
      }
      if (tsc->id < 0) {
         tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);

         nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
                               65536 + tsc->id * 32,
                               NV_VRAM_DOMAIN(&nvc0->screen->base),
                               32, tsc->tsc);
         need_flush = true;
      }
      nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);

      nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
      nvc0->tex_handles[s][i] |= tsc->id << 20;
   }
   for (; i < nvc0->state.num_samplers[s]; ++i) {
      nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
      nvc0->samplers_dirty[s] |= 1 << i;
   }

   nvc0->state.num_samplers[s] = nvc0->num_samplers[s];

   return need_flush;
}

void nvc0_validate_samplers(struct nvc0_context *nvc0)
{
   bool need_flush = false;
   int i;

   for (i = 0; i < 5; i++) {
      if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
         need_flush |= nve4_validate_tsc(nvc0, i);
      else
         need_flush |= nvc0_validate_tsc(nvc0, i);
   }

   if (need_flush) {
      BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
      PUSH_DATA (nvc0->base.pushbuf, 0);
   }

   /* Invalidate all CP samplers because they are aliased. */
   nvc0->samplers_dirty[5] = ~0;
   nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
}

/* Upload the "diagonal" entries for the possible texture sources ($t == $s).
 * At some point we might want to get a list of the combinations used by a
 * shader and fill in those entries instead of having it extract the handles.
 */
void
nve4_set_tex_handles(struct nvc0_context *nvc0)
{
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_screen *screen = nvc0->screen;
   unsigned s;

   if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
      return;

   for (s = 0; s < 5; ++s) {
      uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
      if (!dirty)
         continue;
      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
      do {
         int i = ffs(dirty) - 1;
         dirty &= ~(1 << i);

         BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
         PUSH_DATA (push, (8 + i) * 4);
         PUSH_DATA (push, nvc0->tex_handles[s][i]);
      } while (dirty);

      nvc0->textures_dirty[s] = 0;
      nvc0->samplers_dirty[s] = 0;
   }
}

static uint64_t
nve4_create_texture_handle(struct pipe_context *pipe,
                           struct pipe_sampler_view *view,
                           const struct pipe_sampler_state *sampler)
{
   /* We have to create persistent handles that won't change for these objects
    * That means that we have to upload them into place and lock them so that
    * they can't be kicked out later.
    */
   struct nvc0_context *nvc0 = nvc0_context(pipe);
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nv50_tic_entry *tic = nv50_tic_entry(view);
   struct nv50_tsc_entry *tsc = pipe->create_sampler_state(pipe, sampler);
   struct pipe_sampler_view *v = NULL;

   tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
   if (tsc->id < 0)
      goto fail;

   if (tic->id < 0) {
      tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
      if (tic->id < 0)
         goto fail;

      nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
                            NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
                            tic->tic);

      IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
   }

   nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
                         65536 + tsc->id * 32,
                         NV_VRAM_DOMAIN(&nvc0->screen->base),
                         32, tsc->tsc);

   IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);

   // Add an extra reference to this sampler view effectively held by this
   // texture handle. This is to deal with the sampler view being dereferenced
   // before the handle is. However we need the view to still be live until the
   // handle to it is deleted.
   pipe_sampler_view_reference(&v, view);
   p_atomic_inc(&tic->bindless);

   nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
   nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);

   return 0x100000000ULL | (tsc->id << 20) | tic->id;

fail:
   pipe->delete_sampler_state(pipe, tsc);
   return 0;
}

static bool
view_bound(struct nvc0_context *nvc0, struct pipe_sampler_view *view) {
   for (int s = 0; s < 6; s++) {
      for (int i = 0; i < nvc0->num_textures[s]; i++)
         if (nvc0->textures[s][i] == view)
            return true;
   }
   return false;
}

static void
nve4_delete_texture_handle(struct pipe_context *pipe, uint64_t handle)
{
   struct nvc0_context *nvc0 = nvc0_context(pipe);
   uint32_t tic = handle & NVE4_TIC_ENTRY_INVALID;
   uint32_t tsc = (handle & NVE4_TSC_ENTRY_INVALID) >> 20;
   struct nv50_tic_entry *entry = nvc0->screen->tic.entries[tic];

   if (entry) {
      struct pipe_sampler_view *view = &entry->pipe;
      assert(entry->bindless);
      p_atomic_dec(&entry->bindless);
      if (!view_bound(nvc0, view))
         nvc0_screen_tic_unlock(nvc0->screen, entry);
      pipe_sampler_view_reference(&view, NULL);
   }

   pipe->delete_sampler_state(pipe, nvc0->screen->tsc.entries[tsc]);
}

static void
nve4_make_texture_handle_resident(struct pipe_context *pipe,
                                  uint64_t handle, bool resident)
{
   struct nvc0_context *nvc0 = nvc0_context(pipe);
   if (resident) {
      struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
      struct nv50_tic_entry *tic =
         nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];
      assert(tic);
      assert(tic->bindless);

      res->handle = handle;
      res->buf = nv04_resource(tic->pipe.texture);
      res->flags = NOUVEAU_BO_RD;
      list_add(&res->list, &nvc0->tex_head);
   } else {
      list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->tex_head, list) {
         if (pos->handle == handle) {
            list_del(&pos->list);
            free(pos);
            break;
         }
      }
   }
}

static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];

static void
nvc0_get_surface_dims(const struct pipe_image_view *view,
                      int *width, int *height, int *depth)
{
   struct nv04_resource *res = nv04_resource(view->resource);
   int level;

   *width = *height = *depth = 1;
   if (res->base.target == PIPE_BUFFER) {
      *width = view->u.buf.size / util_format_get_blocksize(view->format);
      return;
   }

   level = view->u.tex.level;
   *width = u_minify(view->resource->width0, level);
   *height = u_minify(view->resource->height0, level);
   *depth = u_minify(view->resource->depth0, level);

   switch (res->base.target) {
   case PIPE_TEXTURE_1D_ARRAY:
   case PIPE_TEXTURE_2D_ARRAY:
   case PIPE_TEXTURE_CUBE:
   case PIPE_TEXTURE_CUBE_ARRAY:
      *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
      break;
   case PIPE_TEXTURE_1D:
   case PIPE_TEXTURE_2D:
   case PIPE_TEXTURE_RECT:
   case PIPE_TEXTURE_3D:
      break;
   default:
      assert(!"unexpected texture target");
      break;
   }
}

void
nvc0_mark_image_range_valid(const struct pipe_image_view *view)
{
   struct nv04_resource *res = (struct nv04_resource *)view->resource;

   assert(view->resource->target == PIPE_BUFFER);

   util_range_add(&res->valid_buffer_range,
                  view->u.buf.offset,
                  view->u.buf.offset + view->u.buf.size);
}

void
nve4_set_surface_info(struct nouveau_pushbuf *push,
                      const struct pipe_image_view *view,
                      struct nvc0_context *nvc0)
{
   struct nvc0_screen *screen = nvc0->screen;
   struct nv04_resource *res;
   uint64_t address;
   uint32_t *const info = push->cur;
   int width, height, depth;
   uint8_t log2cpp;

   if (view && !nve4_su_format_map[view->format])
      NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");

   push->cur += 16;

   if (!view || !nve4_su_format_map[view->format]) {
      memset(info, 0, 16 * sizeof(*info));

      info[0] = 0xbadf0000;
      info[1] = 0x80004000;
      info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
         screen->lib_code->start;
      return;
   }
   res = nv04_resource(view->resource);

   address = res->address;

   /* get surface dimensions based on the target. */
   nvc0_get_surface_dims(view, &width, &height, &depth);

   info[8] = width;
   info[9] = height;
   info[10] = depth;
   switch (res->base.target) {
   case PIPE_TEXTURE_1D_ARRAY:
      info[11] = 1;
      break;
   case PIPE_TEXTURE_2D:
   case PIPE_TEXTURE_RECT:
      info[11] = 2;
      break;
   case PIPE_TEXTURE_3D:
      info[11] = 3;
      break;
   case PIPE_TEXTURE_2D_ARRAY:
   case PIPE_TEXTURE_CUBE:
   case PIPE_TEXTURE_CUBE_ARRAY:
      info[11] = 4;
      break;
   default:
      info[11] = 0;
      break;
   }
   log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;

   /* Stick the blockwidth (ie. number of bytes per pixel) to check if the
    * format doesn't mismatch. */
   info[12] = util_format_get_blocksize(view->format);

   /* limit in bytes for raw access */
   info[13] = (0x06 << 22) | ((width << log2cpp) - 1);

   info[1] = nve4_su_format_map[view->format];

#if 0
   switch (util_format_get_blocksizebits(view->format)) {
   case  16: info[1] |= 1 << 16; break;
   case  32: info[1] |= 2 << 16; break;
   case  64: info[1] |= 3 << 16; break;
   case 128: info[1] |= 4 << 16; break;
   default:
      break;
   }
#else
   info[1] |= log2cpp << 16;
   info[1] |=  0x4000;
   info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);
#endif

   if (res->base.target == PIPE_BUFFER) {
      address += view->u.buf.offset;

      info[0]  = address >> 8;
      info[2]  = width - 1;
      info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
      info[3]  = 0;
      info[4]  = 0;
      info[5]  = 0;
      info[6]  = 0;
      info[7]  = 0;
      info[14] = 0;
      info[15] = 0;
   } else {
      struct nv50_miptree *mt = nv50_miptree(&res->base);
      struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
      const unsigned z = view->u.tex.first_layer;

      if (z) {
         if (mt->layout_3d) {
            address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
            /* doesn't work if z passes z-tile boundary */
            if (depth > 1) {
               pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
                                  "3D images are not really supported!");
               debug_printf("3D images are not really supported!\n");
            }
         } else {
            address += mt->layer_stride * z;
         }
      }
      address += lvl->offset;

      info[0]  = address >> 8;
      info[2]  = (width << mt->ms_x) - 1;
      /* NOTE: this is really important: */
      info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
      info[3]  = (0x88 << 24) | (lvl->pitch / 64);
      info[4]  = (height << mt->ms_y) - 1;
      info[4] |= (lvl->tile_mode & 0x0f0) << 25;
      info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
      info[5]  = mt->layer_stride >> 8;
      info[6]  = depth - 1;
      info[6] |= (lvl->tile_mode & 0xf00) << 21;
      info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
      info[7]  = 0;
      info[14] = mt->ms_x;
      info[15] = mt->ms_y;
   }
}

static inline void
nvc0_set_surface_info(struct nouveau_pushbuf *push,
                      const struct pipe_image_view *view, uint64_t address,
                      int width, int height, int depth)
{
   struct nv04_resource *res;
   uint32_t *const info = push->cur;

   push->cur += 16;

   /* Make sure to always initialize the surface information area because it's
    * used to check if the given image is bound or not. */
   memset(info, 0, 16 * sizeof(*info));

   if (!view || !view->resource)
      return;
   res = nv04_resource(view->resource);

   /* Stick the image dimensions for the imageSize() builtin. */
   info[8] = width;
   info[9] = height;
   info[10] = depth;

   /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
    * offset and to check if the format doesn't mismatch. */
   info[12] = util_format_get_blocksize(view->format);

   if (res->base.target == PIPE_BUFFER) {
      info[0]  = address >> 8;
      info[2]  = width;
   } else {
      struct nv50_miptree *mt = nv50_miptree(&res->base);

      info[0]  = address >> 8;
      info[2]  = width;
      info[4]  = height;
      info[5]  = mt->layer_stride >> 8;
      info[6]  = depth;
      info[14] = mt->ms_x;
      info[15] = mt->ms_y;
   }
}

void
nvc0_validate_suf(struct nvc0_context *nvc0, int s)
{
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_screen *screen = nvc0->screen;

   for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
      struct pipe_image_view *view = &nvc0->images[s][i];
      int width, height, depth;
      uint64_t address = 0;

      if (s == 5)
         BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
      else
         BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);

      if (view->resource) {
         struct nv04_resource *res = nv04_resource(view->resource);
         unsigned rt = nvc0_format_table[view->format].rt;

         if (util_format_is_depth_or_stencil(view->format))
            rt = rt << 12;
         else
            rt = (rt << 4) | (0x14 << 12);

         /* get surface dimensions based on the target. */
         nvc0_get_surface_dims(view, &width, &height, &depth);

         address = res->address;
         if (res->base.target == PIPE_BUFFER) {
            unsigned blocksize = util_format_get_blocksize(view->format);

            address += view->u.buf.offset;
            assert(!(address & 0xff));

            if (view->access & PIPE_IMAGE_ACCESS_WRITE)
               nvc0_mark_image_range_valid(view);

            PUSH_DATAh(push, address);
            PUSH_DATA (push, address);
            PUSH_DATA (push, align(width * blocksize, 0x100));
            PUSH_DATA (push, NVC0_3D_IMAGE_HEIGHT_LINEAR | 1);
            PUSH_DATA (push, rt);
            PUSH_DATA (push, 0);
         } else {
            struct nv50_miptree *mt = nv50_miptree(view->resource);
            struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
            const unsigned z = view->u.tex.first_layer;

            if (mt->layout_3d) {
               address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
               if (depth >= 1) {
                  pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
                                     "3D images are not supported!");
                  debug_printf("3D images are not supported!\n");
               }
            } else {
               address += mt->layer_stride * z;
            }
            address += lvl->offset;

            PUSH_DATAh(push, address);
            PUSH_DATA (push, address);
            PUSH_DATA (push, width << mt->ms_x);
            PUSH_DATA (push, height << mt->ms_y);
            PUSH_DATA (push, rt);
            PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
         }

         if (s == 5)
            BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
         else
            BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
      } else {
         PUSH_DATA(push, 0);
         PUSH_DATA(push, 0);
         PUSH_DATA(push, 0);
         PUSH_DATA(push, 0);
         PUSH_DATA(push, 0x14000);
         PUSH_DATA(push, 0);
      }

      /* stick surface information into the driver constant buffer */
      if (s == 5)
         BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
      else
         BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
      if (s == 5)
         BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 16);
      else
         BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
      PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));

      nvc0_set_surface_info(push, view, address, width, height, depth);
   }
}

static inline void
nvc0_update_surface_bindings(struct nvc0_context *nvc0)
{
   nvc0_validate_suf(nvc0, 4);

   /* Invalidate all COMPUTE images because they are aliased with FRAGMENT. */
   nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
   nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
   nvc0->images_dirty[5] |= nvc0->images_valid[5];
}

static void
gm107_validate_surfaces(struct nvc0_context *nvc0,
                        struct pipe_image_view *view, int stage, int slot)
{
   struct nv04_resource *res = nv04_resource(view->resource);
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_screen *screen = nvc0->screen;
   struct nv50_tic_entry *tic;

   tic = nv50_tic_entry(nvc0->images_tic[stage][slot]);

   res = nv04_resource(tic->pipe.texture);
   nvc0_update_tic(nvc0, tic, res);

   if (tic->id < 0) {
      tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);

      /* upload the texture view */
      nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
                            NV_VRAM_DOMAIN(&nvc0->screen->base), 32, tic->tic);

      BEGIN_NVC0(push, NVC0_3D(TIC_FLUSH), 1);
      PUSH_DATA (push, 0);
   } else
   if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
      BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
      PUSH_DATA (push, (tic->id << 4) | 1);
   }
   nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);

   res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
   res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;

   BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RD);

   /* upload the texture handle */
   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
   BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
   PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
   PUSH_DATA (push, tic->id);
}

static inline void
nve4_update_surface_bindings(struct nvc0_context *nvc0)
{
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_screen *screen = nvc0->screen;
   int i, j, s;

   for (s = 0; s < 5; s++) {
      if (!nvc0->images_dirty[s])
         continue;

      for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
         struct pipe_image_view *view = &nvc0->images[s][i];

         BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
         PUSH_DATA (push, NVC0_CB_AUX_SIZE);
         PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
         PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
         BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
         PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));

         if (view->resource) {
            struct nv04_resource *res = nv04_resource(view->resource);

            if (res->base.target == PIPE_BUFFER) {
               if (view->access & PIPE_IMAGE_ACCESS_WRITE)
                  nvc0_mark_image_range_valid(view);
            }

            nve4_set_surface_info(push, view, nvc0);
            BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);

            if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
               gm107_validate_surfaces(nvc0, view, s, i);
         } else {
            for (j = 0; j < 16; j++)
               PUSH_DATA(push, 0);
         }
      }
   }
}

void
nvc0_validate_surfaces(struct nvc0_context *nvc0)
{
   if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
      nve4_update_surface_bindings(nvc0);
   } else {
      nvc0_update_surface_bindings(nvc0);
   }
}

static uint64_t
nve4_create_image_handle(struct pipe_context *pipe,
                         const struct pipe_image_view *view)
{
   struct nvc0_context *nvc0 = nvc0_context(pipe);
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_screen *screen = nvc0->screen;
   int i = screen->img.next, s;

   while (screen->img.entries[i]) {
      i = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
      if (i == screen->img.next)
         return 0;
   }

   screen->img.next = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
   screen->img.entries[i] = calloc(1, sizeof(struct pipe_image_view));
   *screen->img.entries[i] = *view;

   for (s = 0; s < 6; s++) {
      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
      BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
      PUSH_DATA (push, NVC0_CB_AUX_BINDLESS_INFO(i));
      nve4_set_surface_info(push, view, nvc0);
   }

   return 0x100000000ULL | i;
}

static void
nve4_delete_image_handle(struct pipe_context *pipe, uint64_t handle)
{
   struct nvc0_context *nvc0 = nvc0_context(pipe);
   struct nvc0_screen *screen = nvc0->screen;
   int i = handle & (NVE4_IMG_MAX_HANDLES - 1);

   free(screen->img.entries[i]);
   screen->img.entries[i] = NULL;
}

static void
nve4_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,
                                unsigned access, bool resident)
{
   struct nvc0_context *nvc0 = nvc0_context(pipe);
   struct nvc0_screen *screen = nvc0->screen;

   if (resident) {
      struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
      struct pipe_image_view *view =
         screen->img.entries[handle & (NVE4_IMG_MAX_HANDLES - 1)];
      assert(view);

      if (view->resource->target == PIPE_BUFFER &&
          access & PIPE_IMAGE_ACCESS_WRITE)
         nvc0_mark_image_range_valid(view);
      res->handle = handle;
      res->buf = nv04_resource(view->resource);
      res->flags = (access & 3) << 8;
      list_add(&res->list, &nvc0->img_head);
   } else {
      list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {
         if (pos->handle == handle) {
            list_del(&pos->list);
            free(pos);
            break;
         }
      }
   }
}

void
nvc0_init_bindless_functions(struct pipe_context *pipe) {
   pipe->create_texture_handle = nve4_create_texture_handle;
   pipe->delete_texture_handle = nve4_delete_texture_handle;
   pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;

   pipe->create_image_handle = nve4_create_image_handle;
   pipe->delete_image_handle = nve4_delete_image_handle;
   pipe->make_image_handle_resident = nve4_make_image_handle_resident;
}


static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
{
   [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
   [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
   [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
   [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
   [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
   [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
   [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
   [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
   [PIPE_FORMAT_B8G8R8A8_UNORM] = GK104_IMAGE_FORMAT_BGRA8_UNORM,
   [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
   [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
   [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
   [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
   [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
   [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
   [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT,
   [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
   [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
   [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
   [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
   [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
   [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
   [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
   [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
   [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
   [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
   [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
   [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
   [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
   [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
   [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
   [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
   [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
   [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
   [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
   [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
   [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
   [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
   [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
   [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
};

/* Auxiliary format description values for surface instructions.
 * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
 */
static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
{
   [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
   [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
   [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,

   [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
   [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
   [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
   [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
   [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,

   [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
   [PIPE_FORMAT_R32G32_SINT] = 0x3433,
   [PIPE_FORMAT_R32G32_UINT] = 0x3433,

   [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
   [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24,
   [PIPE_FORMAT_B8G8R8A8_UNORM] = 0x2a24,
   [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
   [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
   [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
   [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
   [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,

   [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
   [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
   [PIPE_FORMAT_R16G16_SINT] = 0x2524,
   [PIPE_FORMAT_R16G16_UINT] = 0x2524,
   [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,

   [PIPE_FORMAT_R32_SINT] = 0x2024,
   [PIPE_FORMAT_R32_UINT] = 0x2024,
   [PIPE_FORMAT_R32_FLOAT] = 0x2024,

   [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
   [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
   [PIPE_FORMAT_R8G8_SINT] = 0x1615,
   [PIPE_FORMAT_R8G8_UINT] = 0x1615,

   [PIPE_FORMAT_R16_UNORM] = 0x1115,
   [PIPE_FORMAT_R16_SNORM] = 0x1115,
   [PIPE_FORMAT_R16_SINT] = 0x1115,
   [PIPE_FORMAT_R16_UINT] = 0x1115,
   [PIPE_FORMAT_R16_FLOAT] = 0x1115,

   [PIPE_FORMAT_R8_UNORM] = 0x0206,
   [PIPE_FORMAT_R8_SNORM] = 0x0206,
   [PIPE_FORMAT_R8_SINT] = 0x0206,
   [PIPE_FORMAT_R8_UINT] = 0x0206
};

/* NOTE: These are hardcoded offsets for the shader library.
 * TODO: Automate them.
 */
static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
{
   [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
   [PIPE_FORMAT_R32G32B32A32_SINT]  = 0x218,
   [PIPE_FORMAT_R32G32B32A32_UINT]  = 0x218,
   [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
   [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
   [PIPE_FORMAT_R16G16B16A16_SINT]  = 0x330,
   [PIPE_FORMAT_R16G16B16A16_UINT]  = 0x388,
   [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
   [PIPE_FORMAT_R32G32_FLOAT]       = 0x428,
   [PIPE_FORMAT_R32G32_SINT]        = 0x468,
   [PIPE_FORMAT_R32G32_UINT]        = 0x468,
   [PIPE_FORMAT_R10G10B10A2_UNORM]  = 0x4a8,
   [PIPE_FORMAT_R10G10B10A2_UINT]   = 0x530,
   [PIPE_FORMAT_R8G8B8A8_UNORM]     = 0x588,
   [PIPE_FORMAT_R8G8B8A8_SNORM]     = 0x5f8,
   [PIPE_FORMAT_R8G8B8A8_SINT]      = 0x670,
   [PIPE_FORMAT_R8G8B8A8_UINT]      = 0x6c8,
   [PIPE_FORMAT_B5G6R5_UNORM]       = 0x718,
   [PIPE_FORMAT_B5G5R5X1_UNORM]     = 0x7a0,
   [PIPE_FORMAT_R16G16_UNORM]       = 0x828,
   [PIPE_FORMAT_R16G16_SNORM]       = 0x890,
   [PIPE_FORMAT_R16G16_SINT]        = 0x8f0,
   [PIPE_FORMAT_R16G16_UINT]        = 0x948,
   [PIPE_FORMAT_R16G16_FLOAT]       = 0x998,
   [PIPE_FORMAT_R32_FLOAT]          = 0x9e8,
   [PIPE_FORMAT_R32_SINT]           = 0xa30,
   [PIPE_FORMAT_R32_UINT]           = 0xa30,
   [PIPE_FORMAT_R8G8_UNORM]         = 0xa78,
   [PIPE_FORMAT_R8G8_SNORM]         = 0xae0,
   [PIPE_FORMAT_R8G8_UINT]          = 0xb48,
   [PIPE_FORMAT_R8G8_SINT]          = 0xb98,
   [PIPE_FORMAT_R16_UNORM]          = 0xbe8,
   [PIPE_FORMAT_R16_SNORM]          = 0xc48,
   [PIPE_FORMAT_R16_SINT]           = 0xca0,
   [PIPE_FORMAT_R16_UINT]           = 0xce8,
   [PIPE_FORMAT_R16_FLOAT]          = 0xd30,
   [PIPE_FORMAT_R8_UNORM]           = 0xd88,
   [PIPE_FORMAT_R8_SNORM]           = 0xde0,
   [PIPE_FORMAT_R8_SINT]            = 0xe38,
   [PIPE_FORMAT_R8_UINT]            = 0xe88,
   [PIPE_FORMAT_R11G11B10_FLOAT]    = 0xed0
};