
/*
 * Copyright 2021 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "grl_metakernel_new_sah_builder.h"

#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "genxml/genX_rt_pack.h"

/* We reserve :
 *    - GPR 14 for secondary command buffer returns
 *    - GPR 15 for conditional rendering
 */
#define MI_BUILDER_NUM_ALLOC_GPRS 14
#define __gen_get_batch_dwords anv_batch_emit_dwords
#define __gen_address_offset anv_address_add
#define __gen_get_batch_address(b, a) anv_batch_address(b, a)
#include "common/mi_builder.h"

#define MI_PREDICATE_RESULT mi_reg32(0x2418)
#define DISPATCHDIM_X mi_reg32(0x2500)
#define DISPATCHDIM_Y mi_reg32(0x2504)
#define DISPATCHDIM_Z mi_reg32(0x2508)
static const uint64_t DFS_MIN_PRIMREFS = 6;
static const uint64_t DFS_MAX_PRIMREFS = 0x100u;
void
genX(grl_new_sah_builder_single_pass_binsah)(
    struct anv_cmd_buffer *cmd_buffer,
    uint64_t build_globals,
    uint64_t bvh_buffer,
    uint64_t build_primref_buffer,
    uint64_t build_primref_index_buffers,
    uint32_t alloc_backpointers)
{
    struct mi_builder b;
    mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
    /* TODO: use anv_mocs? */
    const uint32_t mocs = isl_mocs(&cmd_buffer->device->isl_dev, 0, false);
    mi_builder_set_mocs(&b, mocs);
    
    UNUSED struct mi_value REG0 = mi_reserve_gpr(&b, 0);
    UNUSED struct mi_value REG1 = mi_reserve_gpr(&b, 1);
    UNUSED struct mi_value REG2 = mi_reserve_gpr(&b, 2);
    UNUSED struct mi_value REG4 = mi_reserve_gpr(&b, 3);
    UNUSED struct mi_value REG5 = mi_reserve_gpr(&b, 4);
    UNUSED struct mi_value REG6 = mi_reserve_gpr(&b, 5);
    UNUSED struct mi_value REG7 = mi_reserve_gpr(&b, 6);
    
    
    {
        const uint32_t _group_size[3] = { 1, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_globals },
            { .u64 = bvh_buffer },
            { .u64 = build_primref_buffer },
            { .u64 = build_primref_index_buffers },
            { .u64 = alloc_backpointers },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_DFS_DFS,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
}
void
genX(grl_new_sah_builder_new_sah_build)(
    struct anv_cmd_buffer *cmd_buffer,
    struct grl_new_sah_builder_SAHBuildArgs build_args)
{
    struct mi_builder b;
    mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
    /* TODO: use anv_mocs? */
    const uint32_t mocs = isl_mocs(&cmd_buffer->device->isl_dev, 0, false);
    mi_builder_set_mocs(&b, mocs);
    
    UNUSED struct mi_value REG0 = mi_reserve_gpr(&b, 0);
    UNUSED struct mi_value REG1 = mi_reserve_gpr(&b, 1);
    UNUSED struct mi_value REG2 = mi_reserve_gpr(&b, 2);
    UNUSED struct mi_value REG4 = mi_reserve_gpr(&b, 3);
    UNUSED struct mi_value REG5 = mi_reserve_gpr(&b, 4);
    UNUSED struct mi_value REG6 = mi_reserve_gpr(&b, 5);
    UNUSED struct mi_value REG7 = mi_reserve_gpr(&b, 6);
    
    struct mi_goto_target _goto_target_l_dispatch_trivial = MI_GOTO_TARGET_INIT;
    struct mi_goto_target _goto_target_l_dispatch_single_wg = MI_GOTO_TARGET_INIT;
    struct mi_goto_target _goto_target_l_full_build = MI_GOTO_TARGET_INIT;
    struct mi_goto_target _goto_target_l_build_loop = MI_GOTO_TARGET_INIT;
    struct mi_goto_target _goto_target_l_build_qnodes = MI_GOTO_TARGET_INIT;
    struct mi_goto_target _goto_target_l_qnode_loop = MI_GOTO_TARGET_INIT;
    struct mi_goto_target _goto_target_l_done = MI_GOTO_TARGET_INIT;
    
    struct mi_value _tmp0 = mi_mem32(anv_address_from_u64(build_args.p_num_primitives));
    mi_value_add_refs(&b, _tmp0, 2);
    mi_store(&b, REG0, _tmp0);
    mi_store(&b, REG1, mi_imm(DFS_MIN_PRIMREFS));
    mi_store(&b, REG2, mi_imm(DFS_MAX_PRIMREFS));
    struct mi_value _tmp1 = mi_uge(&b, mi_imm(DFS_MIN_PRIMREFS), _tmp0);
    struct mi_value _tmp2 = mi_uge(&b, mi_imm(DFS_MAX_PRIMREFS), _tmp0);
    mi_store(&b, REG4, _tmp2);
    struct mi_value _tmp3 = mi_value_half(_tmp1, false);
    mi_goto_if(&b, _tmp3, &_goto_target_l_dispatch_trivial);
    struct mi_value _tmp4 = mi_value_half(REG4, false);
    mi_goto_if(&b, _tmp4, &_goto_target_l_dispatch_single_wg);
    mi_goto(&b, &_goto_target_l_full_build);
    mi_goto_target(&b, &_goto_target_l_dispatch_trivial);
    {
        const uint32_t _group_size[3] = { 1, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_globals },
            { .u64 = build_args.p_bvh_base },
            { .u64 = build_args.p_primref_buffer },
            { .u64 = build_args.p_primref_index_buffers },
            { .u64 = build_args.sah_build_flags },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_DFS_DFS_TRIVIAL,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    mi_goto(&b, &_goto_target_l_done);
    mi_goto_target(&b, &_goto_target_l_dispatch_single_wg);
    {
        const uint32_t _group_size[3] = { 1, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_globals },
            { .u64 = build_args.p_bvh_base },
            { .u64 = build_args.p_primref_buffer },
            { .u64 = build_args.p_primref_index_buffers },
            { .u64 = build_args.sah_build_flags },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_DFS_DFS_SINGLE_WG,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    mi_goto(&b, &_goto_target_l_done);
    mi_goto_target(&b, &_goto_target_l_full_build);
    uint64_t _tmp6 = (build_args.p_scheduler + offsetof(struct grl_new_sah_builder_Scheduler, scheduler_postsync));
    mi_store(&b, REG2, mi_imm(8));
    mi_store(&b, REG1, mi_imm(0));
    uint32_t _tmp7 = 0;
    mi_store(&b, mi_mem32(anv_address_from_u64(_tmp6)), mi_imm(_tmp7));
    mi_store(&b, REG4, mi_imm(0x1ffu));
    mi_store(&b, REG5, mi_imm(1));
    mi_store(&b, mi_mem64(anv_address_from_u64(_tmp6)), mi_imm(0));
    struct mi_value _tmp8 = mi_iadd(&b, REG0, mi_imm(0x1ffu));
    struct mi_value _tmp9 = mi_ushr_imm(&b, _tmp8, 8);
    struct mi_value _tmp10 = mi_ushr_imm(&b, _tmp9, 1);
    mi_value_add_refs(&b, _tmp10, 1);
    mi_store(&b, REG0, _tmp10);
    struct mi_value _tmp11 = mi_value_half(_tmp10, false);
    mi_store(&b, DISPATCHDIM_X, _tmp11);
    mi_store(&b, DISPATCHDIM_Y, mi_imm(1));
    mi_store(&b, DISPATCHDIM_Z, mi_imm(1));
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const uint32_t _group_size[3] = { 1, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.leaf_size },
            { .u64 = build_args.leaf_type },
            { .u64 = build_args.p_primref_index_buffers },
            { .u64 = build_args.p_primref_buffer },
            { .u64 = build_args.p_bvh2 },
            { .u64 = build_args.p_bvh_base },
            { .u64 = build_args.p_globals },
            { .u64 = build_args.p_sah_globals },
            { .u64 = build_args.p_qnode_child_buffer },
            { .u64 = build_args.sah_build_flags },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BEGIN,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BFS_PASS1_INITIAL,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BFS_PASS2_INITIAL,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    mi_goto_target(&b, &_goto_target_l_build_loop);
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const uint32_t _group_size[3] = { 1, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_SCHEDULER,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    mi_store(&b, REG2, mi_imm(build_args.p_scheduler));
    struct mi_value _tmp12 = mi_mem64(anv_address_from_u64(build_args.p_scheduler));
    mi_value_add_refs(&b, _tmp12, 2);
    mi_store(&b, REG0, _tmp12);
    struct mi_value _tmp13 = mi_iand(&b, _tmp12, mi_imm(0xffffffffu));
    struct mi_value _tmp14 = mi_ieq(&b, _tmp13, mi_imm(0));
    mi_value_add_refs(&b, _tmp14, 1);
    mi_store(&b, REG1, _tmp14);
    struct mi_value _tmp15 = mi_value_half(_tmp12, true);
    mi_store(&b, DISPATCHDIM_X, _tmp15);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_DFS,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    mi_goto_if(&b, _tmp14, &_goto_target_l_build_qnodes);
    struct mi_value _tmp16 = mi_value_half(REG0, false);
    mi_store(&b, DISPATCHDIM_X, _tmp16);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BFS_PASS1_INDEXED,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BFS_PASS2_INDEXED,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    mi_goto(&b, &_goto_target_l_build_loop);
    mi_goto_target(&b, &_goto_target_l_build_qnodes);
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const uint32_t _group_size[3] = { 1, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_sah_globals },
            { .u64 = build_args.p_qnode_child_buffer },
            { .u64 = build_args.sah_build_flags },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BUILD_QNODES_PC_KICKOFF,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
    mi_store(&b, REG7, mi_imm(1));
    mi_store(&b, REG5, mi_imm(8));
    mi_store(&b, REG6, mi_imm(16));
    mi_store(&b, REG0, mi_imm(build_args.p_root_buffer_counters));
    mi_store(&b, REG2, mi_imm(0));
    mi_goto_target(&b, &_goto_target_l_qnode_loop);
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    struct mi_value _tmp17 = mi_load_mem64_offset(&b, anv_address_from_u64(0), REG0);
    mi_value_add_refs(&b, _tmp17, 1);
    mi_store(&b, REG1, _tmp17);
    struct mi_value _tmp18 = mi_iadd(&b, REG0, REG5);
    mi_store(&b, REG0, _tmp18);
    struct mi_value _tmp19 = mi_isub(&b, _tmp17, REG2);
    mi_value_add_refs(&b, _tmp19, 1);
    mi_store(&b, REG4, _tmp19);
    struct mi_value _tmp20 = mi_ult(&b, mi_imm(0), _tmp19);
    struct mi_value _tmp21 = mi_value_half(_tmp20, false);
    mi_goto_if(&b, mi_inot(&b, _tmp21), &_goto_target_l_done);
    mi_store_mem64_offset(&b, anv_address_from_u64(0), REG0, REG2);
    struct mi_value _tmp22 = mi_iadd(&b, REG0, REG5);
    mi_value_add_refs(&b, _tmp22, 1);
    mi_store_mem64_offset(&b, anv_address_from_u64(0), _tmp22, REG1);
    struct mi_value _tmp23 = mi_isub(&b, _tmp22, REG6);
    mi_store(&b, REG0, _tmp23);
    struct mi_value _tmp24 = mi_iadd(&b, REG2, REG4);
    mi_store(&b, REG2, _tmp24);
    struct mi_value _tmp25 = mi_iadd(&b, REG4, REG7);
    struct mi_value _tmp26 = mi_ushr(&b, _tmp25, REG7);
    mi_value_add_refs(&b, _tmp26, 1);
    mi_store(&b, REG4, _tmp26);
    struct mi_value _tmp27 = mi_value_half(_tmp26, false);
    mi_store(&b, DISPATCHDIM_X, _tmp27);
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_sah_globals },
            { .u64 = build_args.p_qnode_child_buffer },
            { .u64 = build_args.sah_build_flags },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BUILD_QNODES_PC_AMPLIFY,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    mi_goto(&b, &_goto_target_l_qnode_loop);
    mi_goto_target(&b, &_goto_target_l_done);
}
void
genX(grl_new_sah_builder_new_sah_build_batchable)(
    struct anv_cmd_buffer *cmd_buffer,
    struct grl_new_sah_builder_SAHBuildArgsBatchable build_args)
{
    struct mi_builder b;
    mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
    /* TODO: use anv_mocs? */
    const uint32_t mocs = isl_mocs(&cmd_buffer->device->isl_dev, 0, false);
    mi_builder_set_mocs(&b, mocs);
    
    UNUSED struct mi_value REG0 = mi_reserve_gpr(&b, 0);
    UNUSED struct mi_value REG1 = mi_reserve_gpr(&b, 1);
    UNUSED struct mi_value REG2 = mi_reserve_gpr(&b, 2);
    UNUSED struct mi_value REG4 = mi_reserve_gpr(&b, 3);
    UNUSED struct mi_value REG5 = mi_reserve_gpr(&b, 4);
    UNUSED struct mi_value REG6 = mi_reserve_gpr(&b, 5);
    UNUSED struct mi_value REG7 = mi_reserve_gpr(&b, 6);
    
    struct mi_goto_target _goto_target_l_build_outer_loop = MI_GOTO_TARGET_INIT;
    struct mi_goto_target _goto_target_l_build_loop = MI_GOTO_TARGET_INIT;
    struct mi_goto_target _goto_target_l_continue_outer_loop = MI_GOTO_TARGET_INIT;
    struct mi_goto_target _goto_target_l_qnode_loop = MI_GOTO_TARGET_INIT;
    struct mi_goto_target _goto_target_l_done = MI_GOTO_TARGET_INIT;
    
    uint64_t _tmp28 = (build_args.p_scheduler + offsetof(struct grl_new_sah_builder_Scheduler, scheduler_postsync));
    uint32_t _tmp30 = 0;
    mi_store(&b, mi_mem32(anv_address_from_u64(_tmp28)), mi_imm(_tmp30));
    {
        const uint32_t _group_size[3] = { 2, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_globals_ptrs },
            { .u64 = build_args.p_buffers_info },
            { .u64 = build_args.p_sah_globals },
            { .u64 = build_args.num_builds },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_CATEGORIZE_BUILDS_AND_INIT_SCHEDULER,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    uint64_t _tmp31 = (build_args.p_scheduler + offsetof(struct grl_new_sah_builder_Scheduler, num_trivial_builds));
    struct mi_value _tmp32 = mi_mem64(anv_address_from_u64(_tmp31));
    mi_value_add_refs(&b, _tmp32, 4);
    mi_store(&b, REG0, _tmp32);
    struct mi_value _tmp33 = mi_value_half(_tmp32, false);
    mi_store(&b, DISPATCHDIM_X, _tmp33);
    mi_store(&b, DISPATCHDIM_Y, mi_imm(1));
    mi_store(&b, DISPATCHDIM_Z, mi_imm(1));
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_DFS_DFS_TRIVIAL_BATCHABLE,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    struct mi_value _tmp34 = mi_value_half(_tmp32, true);
    mi_store(&b, DISPATCHDIM_X, _tmp34);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_sah_globals },
            { .u64 = build_args.p_scheduler },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_DFS_DFS_SINGLE_WG_BATCHABLE,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    struct mi_value _tmp35 = mi_value_half(_tmp32, false);
    mi_value_add_refs(&b, _tmp35, 1);
    mi_store(&b, REG1, _tmp35);
    struct mi_value _tmp36 = mi_value_half(_tmp32, true);
    mi_value_add_refs(&b, _tmp36, 1);
    mi_store(&b, REG2, _tmp36);
    struct mi_value _tmp37 = mi_iadd(&b, _tmp36, _tmp35);
    struct mi_value _tmp38 = mi_isub(&b, mi_imm(build_args.num_builds), _tmp37);
    mi_value_add_refs(&b, _tmp38, 1);
    mi_store(&b, REG5, _tmp38);
    struct mi_value _tmp39 = mi_ieq(&b, _tmp38, mi_imm(0));
    mi_value_add_refs(&b, _tmp39, 1);
    mi_store(&b, REG4, _tmp39);
    struct mi_value _tmp40 = mi_value_half(_tmp39, false);
    mi_goto_if(&b, _tmp40, &_goto_target_l_done);
    mi_goto_target(&b, &_goto_target_l_build_outer_loop);
    {
        const uint32_t _group_size[3] = { 1, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BEGIN_BATCHABLE,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    uint64_t _tmp41 = (build_args.p_scheduler + offsetof(struct grl_new_sah_builder_Scheduler, batched_build_wg_count));
    mi_store(&b, REG0, mi_imm(_tmp41));
    struct mi_value _tmp42 = mi_mem64(anv_address_from_u64(_tmp41));
    mi_value_add_refs(&b, _tmp42, 1);
    mi_store(&b, REG4, _tmp42);
    struct mi_value _tmp43 = mi_value_half(_tmp42, false);
    mi_store(&b, DISPATCHDIM_X, _tmp43);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BFS_PASS1_INITIAL_BATCHABLE,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BFS_PASS2_INITIAL_BATCHABLE,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    mi_goto_target(&b, &_goto_target_l_build_loop);
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const uint32_t _group_size[3] = { 1, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_SCHEDULER,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    mi_store(&b, REG2, mi_imm(build_args.p_scheduler));
    struct mi_value _tmp44 = mi_mem64(anv_address_from_u64(build_args.p_scheduler));
    mi_value_add_refs(&b, _tmp44, 2);
    mi_store(&b, REG0, _tmp44);
    struct mi_value _tmp45 = mi_iand(&b, _tmp44, mi_imm(0xffffffffu));
    struct mi_value _tmp46 = mi_ieq(&b, _tmp45, mi_imm(0));
    mi_value_add_refs(&b, _tmp46, 1);
    mi_store(&b, REG1, _tmp46);
    struct mi_value _tmp47 = mi_value_half(_tmp44, true);
    mi_store(&b, DISPATCHDIM_X, _tmp47);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_DFS,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    mi_goto_if(&b, _tmp46, &_goto_target_l_continue_outer_loop);
    struct mi_value _tmp48 = mi_value_half(REG0, false);
    mi_store(&b, DISPATCHDIM_X, _tmp48);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BFS_PASS1_INDEXED_BATCHABLE,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BFS_PASS2_INDEXED_BATCHABLE,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    mi_goto(&b, &_goto_target_l_build_loop);
    mi_goto_target(&b, &_goto_target_l_continue_outer_loop);
    struct mi_value _tmp49 = mi_value_half(REG4, true);
    mi_goto_if(&b, _tmp49, &_goto_target_l_build_outer_loop);
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const uint32_t _group_size[3] = { 1, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.num_builds },
            { .u64 = build_args.num_max_qnode_global_root_buffer_entries },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BUILD_QNODES_INIT_SCHEDULER_BATCHED,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
    mi_store(&b, REG2, mi_imm(build_args.p_scheduler));
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    struct mi_value _tmp50 = mi_mem64(anv_address_from_u64(build_args.p_scheduler));
    mi_value_add_refs(&b, _tmp50, 1);
    mi_store(&b, REG1, _tmp50);
    struct mi_value _tmp51 = mi_value_half(_tmp50, false);
    mi_store(&b, DISPATCHDIM_X, _tmp51);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
            { .u64 = build_args.p_sah_globals },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BUILD_QNODES_BEGIN_BATCHABLE,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    uint64_t _tmp52 = (build_args.p_scheduler + offsetof(struct grl_new_sah_builder_Scheduler, batched_build_wg_count));
    mi_store(&b, REG0, mi_imm(_tmp52));
    struct mi_value _tmp53 = mi_mem64(anv_address_from_u64(_tmp52));
    mi_store(&b, REG5, _tmp53);
    mi_goto_target(&b, &_goto_target_l_qnode_loop);
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    {
        const uint32_t _group_size[3] = { 1, 1, 1 };
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_scheduler },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BUILD_QNODES_SCHEDULER,
                           _group_size, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    struct mi_value _tmp55 = mi_load_mem64_offset(&b, anv_address_from_u64(0), REG2);
    mi_value_add_refs(&b, _tmp55, 1);
    mi_store(&b, REG1, _tmp55);
    struct mi_value _tmp56 = mi_ult(&b, mi_imm(0), _tmp55);
    struct mi_value _tmp57 = mi_value_half(_tmp56, false);
    mi_goto_if(&b, mi_inot(&b, _tmp57), &_goto_target_l_done);
    struct mi_value _tmp58 = mi_value_half(REG1, false);
    mi_store(&b, DISPATCHDIM_X, _tmp58);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_sah_globals },
            { .u64 = build_args.p_scheduler },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BUILD_QNODES_PC_AMPLIFY_BATCHED,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    struct mi_value _tmp59 = mi_load_mem64_offset(&b, anv_address_from_u64(0), REG2);
    mi_store(&b, REG1, _tmp59);
    struct mi_value _tmp61 = mi_ult(&b, mi_imm(0), REG4);
    struct mi_value _tmp62 = mi_value_half(_tmp61, false);
    mi_goto_if(&b, mi_inot(&b, _tmp62), &_goto_target_l_qnode_loop);
    struct mi_value _tmp63 = mi_value_half(REG5, false);
    mi_store(&b, DISPATCHDIM_X, _tmp63);
    {
        const struct anv_kernel_arg _args[] = {
            { .u64 = build_args.p_sah_globals },
            { .u64 = build_args.p_scheduler },
        };
        genX(grl_dispatch)(cmd_buffer, GRL_CL_KERNEL_BVH_BUILD_BFS_BUILD_QNODES_TRY_TO_FILL_GRB_BATCHED,
                           NULL, ARRAY_SIZE(_args), _args);
    }
    mi_goto(&b, &_goto_target_l_qnode_loop);
    mi_goto_target(&b, &_goto_target_l_done);
    cmd_buffer->state.pending_pipe_bits |=
        ANV_PIPE_CS_STALL_BIT |
        ANV_PIPE_DATA_CACHE_FLUSH_BIT |
        ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}