C++程序  |  264行  |  9.65 KB

/*
 * Copyright © 2016 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

/**
 * An implementation of the transform feedback driver hooks for Haswell
 * and later hardware.  This uses MI_MATH to compute the number of vertices
 * written (for use by DrawTransformFeedback()) without any CPU<->GPU
 * synchronization which could stall.
 */

#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
#include "main/transformfeedback.h"

/**
 * We store several values in obj->prim_count_bo:
 *
 * [4x 32-bit values]: Final Number of Vertices Written
 * [4x 32-bit values]: Tally of Primitives Written So Far
 * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots
 *
 * The first set of values is used by DrawTransformFeedback(), which
 * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs
 * an indirect draw.  The other values are just temporary storage.
 */

#define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t))
#define START_OFFSET (TALLY_OFFSET * 2)

/**
 * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
 * to prim_count_bo.
 */
static void
save_prim_start_values(struct brw_context *brw,
                       struct brw_transform_feedback_object *obj)
{
   /* Flush any drawing so that the counters have the right values. */
   brw_emit_mi_flush(brw);

   /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
   for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
      brw_store_register_mem64(brw, obj->prim_count_bo,
                               GEN7_SO_NUM_PRIMS_WRITTEN(i),
                               START_OFFSET + i * sizeof(uint64_t));
   }
}

/**
 * Compute the number of primitives written during our most recent
 * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value
 * minus the stashed "start" value), and add it to our running tally.
 *
 * If \p finalize is true, also compute the number of vertices written
 * (by multiplying by the number of vertices per primitive), and store
 * that to the "final" location.
 *
 * Otherwise, just overwrite the old tally with the new one.
 */
static void
tally_prims_written(struct brw_context *brw,
                    struct brw_transform_feedback_object *obj,
                    bool finalize)
{
   /* Flush any drawing so that the counters have the right values. */
   brw_emit_mi_flush(brw);

   for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
      /* GPR0 = Tally */
      brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
      brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo,
                            TALLY_OFFSET + i * sizeof(uint32_t));
      if (!obj->base.Paused) {
         /* GPR1 = Start Snapshot */
         brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo,
                                 START_OFFSET + i * sizeof(uint64_t));
         /* GPR2 = Ending Snapshot */
         brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2));

         BEGIN_BATCH(9);
         OUT_BATCH(HSW_MI_MATH | (9 - 2));
         /* GPR1 = GPR2 (End) - GPR1 (Start) */
         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
         OUT_BATCH(MI_MATH_ALU0(SUB));
         OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
         /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */
         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
            OUT_BATCH(MI_MATH_ALU0(ADD));
         OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
         ADVANCE_BATCH();
      }

      if (!finalize) {
         /* Write back the new tally */
         brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
                                  TALLY_OFFSET + i * sizeof(uint32_t));
      } else {
         /* Convert the number of primitives to the number of vertices. */
         if (obj->primitive_mode == GL_LINES) {
            /* Double R0 (R0 = R0 + R0) */
            BEGIN_BATCH(5);
            OUT_BATCH(HSW_MI_MATH | (5 - 2));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
            OUT_BATCH(MI_MATH_ALU0(ADD));
            OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
            ADVANCE_BATCH();
         } else if (obj->primitive_mode == GL_TRIANGLES) {
            /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */
            BEGIN_BATCH(9);
            OUT_BATCH(HSW_MI_MATH | (9 - 2));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
            OUT_BATCH(MI_MATH_ALU0(ADD));
            OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
            OUT_BATCH(MI_MATH_ALU0(ADD));
            OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
            ADVANCE_BATCH();
         }
         /* Store it to the final result */
         brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
                                  i * sizeof(uint32_t));
      }
   }
}

/**
 * BeginTransformFeedback() driver hook.
 */
void
hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
                              struct gl_transform_feedback_object *obj)
{
   struct brw_context *brw = brw_context(ctx);
   struct brw_transform_feedback_object *brw_obj =
      (struct brw_transform_feedback_object *) obj;
   const struct gen_device_info *devinfo = &brw->screen->devinfo;

   brw_obj->primitive_mode = mode;

   /* Reset the SO buffer offsets to 0. */
   if (devinfo->gen >= 8) {
      brw_obj->zero_offsets = true;
   } else {
      BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS);
      OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2));
      for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
         OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
         OUT_BATCH(0);
      }
      ADVANCE_BATCH();
   }

   /* Zero out the initial tallies */
   brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET,     0ull);
   brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull);

   /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
   save_prim_start_values(brw, brw_obj);
}

/**
 * PauseTransformFeedback() driver hook.
 */
void
hsw_pause_transform_feedback(struct gl_context *ctx,
                              struct gl_transform_feedback_object *obj)
{
   struct brw_context *brw = brw_context(ctx);
   struct brw_transform_feedback_object *brw_obj =
      (struct brw_transform_feedback_object *) obj;
   const struct gen_device_info *devinfo = &brw->screen->devinfo;

   if (devinfo->is_haswell) {
      /* Flush any drawing so that the counters have the right values. */
      brw_emit_mi_flush(brw);

      /* Save the SOL buffer offset register values. */
      for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
         BEGIN_BATCH(3);
         OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
         OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
         OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
         ADVANCE_BATCH();
      }
   }

   /* Add any primitives written to our tally */
   tally_prims_written(brw, brw_obj, false);
}

/**
 * ResumeTransformFeedback() driver hook.
 */
void
hsw_resume_transform_feedback(struct gl_context *ctx,
                               struct gl_transform_feedback_object *obj)
{
   struct brw_context *brw = brw_context(ctx);
   struct brw_transform_feedback_object *brw_obj =
      (struct brw_transform_feedback_object *) obj;
   const struct gen_device_info *devinfo = &brw->screen->devinfo;

   if (devinfo->is_haswell) {
      /* Reload the SOL buffer offset registers. */
      for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
         BEGIN_BATCH(3);
         OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
         OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
         OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
         ADVANCE_BATCH();
      }
   }

   /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
   save_prim_start_values(brw, brw_obj);
}

/**
 * EndTransformFeedback() driver hook.
 */
void
hsw_end_transform_feedback(struct gl_context *ctx,
			    struct gl_transform_feedback_object *obj)
{
   struct brw_context *brw = brw_context(ctx);
   struct brw_transform_feedback_object *brw_obj =
      (struct brw_transform_feedback_object *) obj;

   /* Add any primitives written to our tally, convert it from the number
    * of primitives written to the number of vertices written, and store
    * it in the "final" location in the buffer which DrawTransformFeedback()
    * will use as the vertex count.
    */
   tally_prims_written(brw, brw_obj, true);
}