groupshared uint gs_ua;
groupshared uint gs_ub;
groupshared uint gs_uc;
groupshared uint2 gs_ua2;
groupshared uint2 gs_ub2;
groupshared uint2 gs_uc2;
groupshared uint3 gs_ua3;
groupshared uint3 gs_ub3;
groupshared uint3 gs_uc3;
groupshared uint4 gs_ua4;
groupshared uint4 gs_ub4;
groupshared uint4 gs_uc4;

float ComputeShaderFunctionS(float inF0, float inF1, float inF2, uint inU0, uint inU1)
{
    uint out_u1;

    // Don't repeat all the pixel/vertex fns - just one for sanity.
    all(inF0);

    // Test atomics
    InterlockedAdd(gs_ua, gs_ub);
    InterlockedAdd(gs_ua, gs_ub, out_u1);
    InterlockedAnd(gs_ua, gs_ub);
    InterlockedAnd(gs_ua, gs_ub, out_u1);
    InterlockedCompareExchange(gs_ua, gs_ub, gs_uc, out_u1);
    InterlockedExchange(gs_ua, gs_ub, out_u1);
    InterlockedMax(gs_ua, gs_ub);
    InterlockedMax(gs_ua, gs_ub, out_u1);
    InterlockedMin(gs_ua, gs_ub);
    InterlockedMin(gs_ua, gs_ub, out_u1);
    InterlockedOr(gs_ua, gs_ub);
    InterlockedOr(gs_ua, gs_ub, out_u1);
    InterlockedXor(gs_ua, gs_ub);
    InterlockedXor(gs_ua, gs_ub, out_u1);

    // CheckAccessFullyMapped(3);  // TODO: ...

    return 0.0;
}

float1 ComputeShaderFunction1(float1 inF0, float1 inF1, float1 inF2)
{
    // TODO: ... add when float1 prototypes are generated
    return 0.0;
}

float2 ComputeShaderFunction2(float2 inF0, float2 inF1, float2 inF2, uint2 inU0, uint2 inU1)
{
    uint2 out_u2;

    // Don't repeat all the pixel/vertex fns - just one for sanity.
    all(inF0);

    // Test atomics
    InterlockedAdd(gs_ua2, gs_ub2);
    InterlockedAdd(gs_ua2, gs_ub2, out_u2);
    InterlockedAnd(gs_ua2, gs_ub2);
    InterlockedAnd(gs_ua2, gs_ub2, out_u2);
    InterlockedCompareExchange(gs_ua2, gs_ub2, gs_uc2, out_u2);
    InterlockedExchange(gs_ua2, gs_ub2, out_u2);
    InterlockedMax(gs_ua2, gs_ub2);
    InterlockedMax(gs_ua2, gs_ub2, out_u2);
    InterlockedMin(gs_ua2, gs_ub2);
    InterlockedMin(gs_ua2, gs_ub2, out_u2);
    InterlockedOr(gs_ua2, gs_ub2);
    InterlockedOr(gs_ua2, gs_ub2, out_u2);
    InterlockedXor(gs_ua2, gs_ub2);
    InterlockedXor(gs_ua2, gs_ub2, out_u2);

    // TODO: ... add when float1 prototypes are generated
    return float2(1,2);
}

float3 ComputeShaderFunction3(float3 inF0, float3 inF1, float3 inF2, uint3 inU0, uint3 inU1)
{
    uint3 out_u3;
    
    // Don't repeat all the pixel/vertex fns - just one for sanity.
    all(inF0);

    // Test atomics
    InterlockedAdd(gs_ua3, gs_ub3);
    InterlockedAdd(gs_ua3, gs_ub3, out_u3);
    InterlockedAnd(gs_ua3, gs_ub3);
    InterlockedAnd(gs_ua3, gs_ub3, out_u3);
    InterlockedCompareExchange(gs_ua3, gs_ub3, gs_uc3, out_u3);
    InterlockedExchange(gs_ua3, gs_ub3, out_u3);
    InterlockedMax(gs_ua3, gs_ub3);
    InterlockedMax(gs_ua3, gs_ub3, out_u3);
    InterlockedMin(gs_ua3, gs_ub3);
    InterlockedMin(gs_ua3, gs_ub3, out_u3);
    InterlockedOr(gs_ua3, gs_ub3);
    InterlockedOr(gs_ua3, gs_ub3, out_u3);
    InterlockedXor(gs_ua3, gs_ub3);
    InterlockedXor(gs_ua3, gs_ub3, out_u3);

    // TODO: ... add when float1 prototypes are generated
    return float3(1,2,3);
}

float4 ComputeShaderFunction(float4 inF0, float4 inF1, float4 inF2, uint4 inU0, uint4 inU1)
{
    uint4 out_u4;

    // Don't repeat all the pixel/vertex fns - just one for sanity.
    all(inF0);

    // Test atomics
    InterlockedAdd(gs_ua4, gs_ub4);
    InterlockedAdd(gs_ua4, gs_ub4, out_u4);
    InterlockedAnd(gs_ua4, gs_ub4);
    InterlockedAnd(gs_ua4, gs_ub4, out_u4);
    InterlockedCompareExchange(gs_ua4, gs_ub4, gs_uc4, out_u4);
    InterlockedExchange(gs_ua4, gs_ub4, out_u4);
    InterlockedMax(gs_ua4, gs_ub4);
    InterlockedMax(gs_ua4, gs_ub4, out_u4);
    InterlockedMin(gs_ua4, gs_ub4);
    InterlockedMin(gs_ua4, gs_ub4, out_u4);
    InterlockedOr(gs_ua4, gs_ub4);
    InterlockedOr(gs_ua4, gs_ub4, out_u4);
    InterlockedXor(gs_ua4, gs_ub4);
    InterlockedXor(gs_ua4, gs_ub4, out_u4);

    // TODO: ... add when float1 prototypes are generated
    return float4(1,2,3,4);
}