C++程序  |  703行  |  20.13 KB


#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "translate/translate.h"

#include "nvc0/nvc0_context.h"
#include "nvc0/nvc0_resource.h"

#include "nvc0/nvc0_3d.xml.h"

struct push_context {
   struct nouveau_pushbuf *push;

   struct translate *translate;
   void *dest;
   const void *idxbuf;

   uint32_t vertex_size;
   uint32_t restart_index;
   uint32_t start_instance;
   uint32_t instance_id;

   bool prim_restart;
   bool need_vertex_id;

   struct {
      bool enabled;
      bool value;
      uint8_t width;
      unsigned stride;
      const uint8_t *data;
   } edgeflag;
};

static void nvc0_push_upload_vertex_ids(struct push_context *,
                                        struct nvc0_context *,
                                        const struct pipe_draw_info *);

static void
nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
{
   ctx->push = nvc0->base.pushbuf;

   ctx->translate = nvc0->vertex->translate;
   ctx->vertex_size = nvc0->vertex->size;
   ctx->instance_id = 0;

   ctx->need_vertex_id =
      nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32);

   ctx->edgeflag.value = true;
   ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS;

   /* silence warnings */
   ctx->edgeflag.data = NULL;
   ctx->edgeflag.stride = 0;
   ctx->edgeflag.width = 0;
}

static inline void
nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias)
{
   struct translate *translate = nvc0->vertex->translate;
   unsigned i;

   for (i = 0; i < nvc0->num_vtxbufs; ++i) {
      const uint8_t *map;
      const struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];

      if (likely(vb->is_user_buffer))
         map = (const uint8_t *)vb->buffer.user;
      else
         map = nouveau_resource_map_offset(&nvc0->base,
            nv04_resource(vb->buffer.resource), vb->buffer_offset, NOUVEAU_BO_RD);

      if (index_bias && !unlikely(nvc0->vertex->instance_bufs & (1 << i)))
         map += (intptr_t)index_bias * vb->stride;

      translate->set_buffer(translate, i, map, vb->stride, ~0);
   }
}

static inline void
nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0,
                     const struct pipe_draw_info *info)
{
   if (!info->has_user_indices) {
      struct nv04_resource *buf = nv04_resource(info->index.resource);
      ctx->idxbuf = nouveau_resource_map_offset(
            &nvc0->base, buf, 0, NOUVEAU_BO_RD);
   } else {
      ctx->idxbuf = info->index.user;
   }
}

static inline void
nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
                       int32_t index_bias)
{
   unsigned attr = nvc0->vertprog->vp.edgeflag;
   struct pipe_vertex_element *ve = &nvc0->vertex->element[attr].pipe;
   struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
   struct nv04_resource *buf = nv04_resource(vb->buffer.resource);

   ctx->edgeflag.stride = vb->stride;
   ctx->edgeflag.width = util_format_get_blocksize(ve->src_format);
   if (!vb->is_user_buffer) {
      unsigned offset = vb->buffer_offset + ve->src_offset;
      ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base,
                           buf, offset, NOUVEAU_BO_RD);
   } else {
      ctx->edgeflag.data = (const uint8_t *)vb->buffer.user + ve->src_offset;
   }

   if (index_bias)
      ctx->edgeflag.data += (intptr_t)index_bias * vb->stride;
}

static inline unsigned
prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index)
{
   unsigned i;
   for (i = 0; i < push && elts[i] != index; ++i);
   return i;
}

static inline unsigned
prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index)
{
   unsigned i;
   for (i = 0; i < push && elts[i] != index; ++i);
   return i;
}

static inline unsigned
prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
{
   unsigned i;
   for (i = 0; i < push && elts[i] != index; ++i);
   return i;
}

static inline bool
ef_value_8(const struct push_context *ctx, uint32_t index)
{
   uint8_t *pf = (uint8_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
   return !!*pf;
}

static inline bool
ef_value_32(const struct push_context *ctx, uint32_t index)
{
   uint32_t *pf = (uint32_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
   return !!*pf;
}

static inline bool
ef_toggle(struct push_context *ctx)
{
   ctx->edgeflag.value = !ctx->edgeflag.value;
   return ctx->edgeflag.value;
}

static inline unsigned
ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
{
   unsigned i;
   bool ef = ctx->edgeflag.value;
   if (ctx->edgeflag.width == 1)
      for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
   else
      for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
   return i;
}

static inline unsigned
ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
{
   unsigned i;
   bool ef = ctx->edgeflag.value;
   if (ctx->edgeflag.width == 1)
      for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
   else
      for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
   return i;
}

static inline unsigned
ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
{
   unsigned i;
   bool ef = ctx->edgeflag.value;
   if (ctx->edgeflag.width == 1)
      for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
   else
      for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
   return i;
}

static inline unsigned
ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
{
   unsigned i;
   bool ef = ctx->edgeflag.value;
   if (ctx->edgeflag.width == 1)
      for (i = 0; i < n && ef_value_8(ctx, start++) == ef; ++i);
   else
      for (i = 0; i < n && ef_value_32(ctx, start++) == ef; ++i);
   return i;
}

static inline void *
nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
{
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nouveau_bo *bo;
   uint64_t va;
   const unsigned size = count * nvc0->vertex->size;

   void *const dest = nouveau_scratch_get(&nvc0->base, size, &va, &bo);

   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2);
   PUSH_DATAh(push, va);
   PUSH_DATA (push, va);
   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
   PUSH_DATAh(push, va + size - 1);
   PUSH_DATA (push, va + size - 1);

   BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
                bo);
   nouveau_pushbuf_validate(push);

   return dest;
}

static void
disp_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
{
   struct nouveau_pushbuf *push = ctx->push;
   struct translate *translate = ctx->translate;
   const uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start;
   unsigned pos = 0;

   do {
      unsigned nR = count;

      if (unlikely(ctx->prim_restart))
         nR = prim_restart_search_i08(elts, nR, ctx->restart_index);

      translate->run_elts8(translate, elts, nR,
                           ctx->start_instance, ctx->instance_id, ctx->dest);
      count -= nR;
      ctx->dest += nR * ctx->vertex_size;

      while (nR) {
         unsigned nE = nR;

         if (unlikely(ctx->edgeflag.enabled))
            nE = ef_toggle_search_i08(ctx, elts, nR);

         PUSH_SPACE(push, 4);
         if (likely(nE >= 2)) {
            BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
            PUSH_DATA (push, pos);
            PUSH_DATA (push, nE);
         } else
         if (nE) {
            if (pos <= 0xff) {
               IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
            } else {
               BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
               PUSH_DATA (push, pos);
            }
         }
         if (unlikely(nE != nR))
            IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));

         pos += nE;
         elts += nE;
         nR -= nE;
      }
      if (count) {
         BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
         PUSH_DATA (push, 0xffffffff);
         ++elts;
         ctx->dest += ctx->vertex_size;
         ++pos;
         --count;
      }
   } while (count);
}

static void
disp_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
{
   struct nouveau_pushbuf *push = ctx->push;
   struct translate *translate = ctx->translate;
   const uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start;
   unsigned pos = 0;

   do {
      unsigned nR = count;

      if (unlikely(ctx->prim_restart))
         nR = prim_restart_search_i16(elts, nR, ctx->restart_index);

      translate->run_elts16(translate, elts, nR,
                            ctx->start_instance, ctx->instance_id, ctx->dest);
      count -= nR;
      ctx->dest += nR * ctx->vertex_size;

      while (nR) {
         unsigned nE = nR;

         if (unlikely(ctx->edgeflag.enabled))
            nE = ef_toggle_search_i16(ctx, elts, nR);

         PUSH_SPACE(push, 4);
         if (likely(nE >= 2)) {
            BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
            PUSH_DATA (push, pos);
            PUSH_DATA (push, nE);
         } else
         if (nE) {
            if (pos <= 0xff) {
               IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
            } else {
               BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
               PUSH_DATA (push, pos);
            }
         }
         if (unlikely(nE != nR))
            IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));

         pos += nE;
         elts += nE;
         nR -= nE;
      }
      if (count) {
         BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
         PUSH_DATA (push, 0xffffffff);
         ++elts;
         ctx->dest += ctx->vertex_size;
         ++pos;
         --count;
      }
   } while (count);
}

static void
disp_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
{
   struct nouveau_pushbuf *push = ctx->push;
   struct translate *translate = ctx->translate;
   const uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start;
   unsigned pos = 0;

   do {
      unsigned nR = count;

      if (unlikely(ctx->prim_restart))
         nR = prim_restart_search_i32(elts, nR, ctx->restart_index);

      translate->run_elts(translate, elts, nR,
                          ctx->start_instance, ctx->instance_id, ctx->dest);
      count -= nR;
      ctx->dest += nR * ctx->vertex_size;

      while (nR) {
         unsigned nE = nR;

         if (unlikely(ctx->edgeflag.enabled))
            nE = ef_toggle_search_i32(ctx, elts, nR);

         PUSH_SPACE(push, 4);
         if (likely(nE >= 2)) {
            BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
            PUSH_DATA (push, pos);
            PUSH_DATA (push, nE);
         } else
         if (nE) {
            if (pos <= 0xff) {
               IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
            } else {
               BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
               PUSH_DATA (push, pos);
            }
         }
         if (unlikely(nE != nR))
            IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));

         pos += nE;
         elts += nE;
         nR -= nE;
      }
      if (count) {
         BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
         PUSH_DATA (push, 0xffffffff);
         ++elts;
         ctx->dest += ctx->vertex_size;
         ++pos;
         --count;
      }
   } while (count);
}

static void
disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
{
   struct nouveau_pushbuf *push = ctx->push;
   struct translate *translate = ctx->translate;
   unsigned pos = 0;

   /* XXX: This will read the data corresponding to the primitive restart index,
    *  maybe we should avoid that ?
    */
   translate->run(translate, start, count,
                  ctx->start_instance, ctx->instance_id, ctx->dest);
   do {
      unsigned nr = count;

      if (unlikely(ctx->edgeflag.enabled))
         nr = ef_toggle_search_seq(ctx, start + pos, nr);

      PUSH_SPACE(push, 4);
      if (likely(nr)) {
         BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
         PUSH_DATA (push, pos);
         PUSH_DATA (push, nr);
      }
      if (unlikely(nr != count))
         IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));

      pos += nr;
      count -= nr;
   } while (count);
}


#define NVC0_PRIM_GL_CASE(n) \
   case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n

static inline unsigned
nvc0_prim_gl(unsigned prim)
{
   switch (prim) {
   NVC0_PRIM_GL_CASE(POINTS);
   NVC0_PRIM_GL_CASE(LINES);
   NVC0_PRIM_GL_CASE(LINE_LOOP);
   NVC0_PRIM_GL_CASE(LINE_STRIP);
   NVC0_PRIM_GL_CASE(TRIANGLES);
   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
   NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
   NVC0_PRIM_GL_CASE(QUADS);
   NVC0_PRIM_GL_CASE(QUAD_STRIP);
   NVC0_PRIM_GL_CASE(POLYGON);
   NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
   NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
   NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
   NVC0_PRIM_GL_CASE(PATCHES);
   default:
      return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
   }
}

void
nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
{
   struct push_context ctx;
   unsigned i, index_size;
   unsigned inst_count = info->instance_count;
   unsigned vert_count = info->count;
   unsigned prim;

   nvc0_push_context_init(nvc0, &ctx);

   nvc0_vertex_configure_translate(nvc0, info->index_bias);

   if (nvc0->state.index_bias) {
      /* this is already taken care of by translate */
      IMMED_NVC0(ctx.push, NVC0_3D(VB_ELEMENT_BASE), 0);
      nvc0->state.index_bias = 0;
   }

   if (unlikely(ctx.edgeflag.enabled))
      nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias);

   ctx.prim_restart = info->primitive_restart;
   ctx.restart_index = info->restart_index;

   if (info->primitive_restart) {
      /* NOTE: I hope we won't ever need that last index (~0).
       * If we do, we have to disable primitive restart here always and
       * use END,BEGIN to restart. (XXX: would that affect PrimitiveID ?)
       * We could also deactive PRIM_RESTART_WITH_DRAW_ARRAYS temporarily,
       * and add manual restart to disp_vertices_seq.
       */
      BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
      PUSH_DATA (ctx.push, 1);
      PUSH_DATA (ctx.push, info->index_size ? 0xffffffff : info->restart_index);
   } else
   if (nvc0->state.prim_restart) {
      IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
   }
   nvc0->state.prim_restart = info->primitive_restart;

   if (info->index_size) {
      nvc0_push_map_idxbuf(&ctx, nvc0, info);
      index_size = info->index_size;
   } else {
      if (unlikely(info->count_from_stream_output)) {
         struct pipe_context *pipe = &nvc0->base.pipe;
         struct nvc0_so_target *targ;
         targ = nvc0_so_target(info->count_from_stream_output);
         pipe->get_query_result(pipe, targ->pq, true, (void *)&vert_count);
         vert_count /= targ->stride;
      }
      ctx.idxbuf = NULL; /* shut up warnings */
      index_size = 0;
   }

   ctx.start_instance = info->start_instance;

   prim = nvc0_prim_gl(info->mode);
   do {
      PUSH_SPACE(ctx.push, 9);

      ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count);
      if (unlikely(!ctx.dest))
         break;

      if (unlikely(ctx.need_vertex_id))
         nvc0_push_upload_vertex_ids(&ctx, nvc0, info);

      if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
         IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
      BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1);
      PUSH_DATA (ctx.push, prim);
      switch (index_size) {
      case 1:
         disp_vertices_i08(&ctx, info->start, vert_count);
         break;
      case 2:
         disp_vertices_i16(&ctx, info->start, vert_count);
         break;
      case 4:
         disp_vertices_i32(&ctx, info->start, vert_count);
         break;
      default:
         assert(index_size == 0);
         disp_vertices_seq(&ctx, info->start, vert_count);
         break;
      }
      PUSH_SPACE(ctx.push, 1);
      IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0);

      if (--inst_count) {
         prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
         ++ctx.instance_id;
      }
      nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
      nouveau_scratch_done(&nvc0->base);
   } while (inst_count);


   /* reset state and unmap buffers (no-op) */

   if (unlikely(!ctx.edgeflag.value)) {
      PUSH_SPACE(ctx.push, 1);
      IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1);
   }

   if (unlikely(ctx.need_vertex_id)) {
      PUSH_SPACE(ctx.push, 4);
      IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0);
      BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1);
      PUSH_DATA (ctx.push,
                 NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST |
                 NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
                 NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32);
      IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0);
   }

   if (info->index_size && !info->has_user_indices)
      nouveau_resource_unmap(nv04_resource(info->index.resource));
   for (i = 0; i < nvc0->num_vtxbufs; ++i)
      nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer.resource));

   NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1);
}

static inline void
copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n)
{
   unsigned i;
   for (i = 0; i < n; ++i)
      dst[i] = elts[i] + bias;
}

static inline void
copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n)
{
   unsigned i;
   for (i = 0; i < n; ++i)
      dst[i] = elts[i] + bias;
}

static inline void
copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n)
{
   unsigned i;
   for (i = 0; i < n; ++i)
      dst[i] = elts[i] + bias;
}

static void
nvc0_push_upload_vertex_ids(struct push_context *ctx,
                            struct nvc0_context *nvc0,
                            const struct pipe_draw_info *info)

{
   struct nouveau_pushbuf *push = ctx->push;
   struct nouveau_bo *bo;
   uint64_t va;
   uint32_t *data;
   uint32_t format;
   unsigned index_size = info->index_size;
   unsigned i;
   unsigned a = nvc0->vertex->num_elements;

   if (!index_size || info->index_bias)
      index_size = 4;
   data = (uint32_t *)nouveau_scratch_get(&nvc0->base,
                                          info->count * index_size, &va, &bo);

   BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
                bo);
   nouveau_pushbuf_validate(push);

   if (info->index_size) {
      if (!info->index_bias) {
         memcpy(data, ctx->idxbuf, info->count * index_size);
      } else {
         switch (info->index_size) {
         case 1:
            copy_indices_u8(data, ctx->idxbuf, info->index_bias, info->count);
            break;
         case 2:
            copy_indices_u16(data, ctx->idxbuf, info->index_bias, info->count);
            break;
         default:
            copy_indices_u32(data, ctx->idxbuf, info->index_bias, info->count);
            break;
         }
      }
   } else {
      for (i = 0; i < info->count; ++i)
         data[i] = i + (info->start + info->index_bias);
   }

   format = (1 << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) |
      NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT;

   switch (index_size) {
   case 1:
      format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8;
      break;
   case 2:
      format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16;
      break;
   default:
      format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32;
      break;
   }

   PUSH_SPACE(push, 12);

   if (unlikely(nvc0->state.instance_elts & 2)) {
      nvc0->state.instance_elts &= ~2;
      IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(1)), 0);
   }

   BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1);
   PUSH_DATA (push, format);

   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 3);
   PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size);
   PUSH_DATAh(push, va);
   PUSH_DATA (push, va);
   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
   PUSH_DATAh(push, va + info->count * index_size - 1);
   PUSH_DATA (push, va + info->count * index_size - 1);

#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) \
   (((0x80 + (a) * 0x10) / 4) << NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT)

   BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_REPLACE), 1);
   PUSH_DATA (push, NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) | 1);
}