error X3504: literal loop terminated early due to out of bounds array access #4381

TobTobXX · 2022-04-27T16:06:45Z

Hi there, I wrote this wgpu application on Linux (Wayland) and it worked just fine. Now that someone wanted to compile it on Windows, it doesn't work:

noisy log output

2022-04-27T15:20:32.196Z WARN  [wgpu_hal::dx12::instance] Unable to enable D3D12 debug interface: 0x887A002D
2022-04-27T15:20:32.197Z WARN  [wgpu_hal::dx12::instance] Unable to enable DXGI debug interface: 0x887A002D
2022-04-27T15:21:29.310Z WARN  [wgpu_hal::dx12::device] Naga generated shader for "fs_main" at Fragment:
struct NagaConstants {
    int base_vertex;
    int base_instance;
    uint other;
};
ConstantBuffer<NagaConstants> _NagaConstants: register(b2);
static const int PATHS_BUF_SIZE = 200;

struct CameraUniform {
    row_major float4x4 view_proj;
    float zoom;
};

struct Path {
    uint kind;
    float2 p0_;
    float2 p1_;
    float2 p2_;
    float2 p3_;
};

struct Paths {
    Path paths[200];
};

struct Bezier {
    float2 a;
    float2 b;
    float2 c;
    float2 d;
};

struct Poly5_ {
    float c5_;
    float c4_;
    float c3_;
    float c2_;
    float c1_;
    float c0_;
};

struct VertexInput {
    linear float2 pos : LOC0;
    linear float2 tex_coord : LOC1;
};

struct VertexOutput {
    float4 projected : SV_Position;
    linear float2 tex_coord : LOC0;
};

cbuffer camera : register(b0) { CameraUniform camera; }
cbuffer paths : register(b1) { Paths paths; }

struct VertexOutput_vs_main {
    float2 tex_coord : LOC0;
    float4 projected : SV_Position;
};

struct FragmentInput_fs_main {
    float2 tex_coord_1 : LOC0;
    float4 projected_1 : SV_Position;
};

float2 cubic_bezier(Bezier b, float t)
{
    return ((((b.a * pow(t, 3.0)) + (b.b * pow(t, 2.0))) + (b.c * t)) + b.d);
}

float4 poly5_mult(Poly5_ p, float4 x)
{
    return ((((((p.c5_ * pow(x, float4(5.0.xxxx))) + (p.c4_ * pow(x, float4(4.0.xxxx)))) + (p.c3_ * pow(x, float4(3.0.xxxx)))) + (p.c2_ * pow(x, float4(2.0.xxxx)))) + (p.c1_ * x)) + float4(p.c0_.xxxx));
}

float4 poly5_d1_mult(Poly5_ p_1, float4 x_1)
{
    return ((((((5.0 * p_1.c5_) * pow(x_1, float4(4.0.xxxx))) + ((4.0 * p_1.c4_) * pow(x_1, float4(3.0.xxxx)))) + ((3.0 * p_1.c3_) * pow(x_1, float4(2.0.xxxx)))) + ((2.0 * p_1.c2_) * x_1)) + float4(p_1.c1_.xxxx));
}

float4 find_roots4_newton(Poly5_ poly)
{
    float4 approx = (float4)0;
    int i_1 = 0;

    approx = float4(0.0, 0.33000001311302185, 0.6600000262260437, 1.0);
    bool loop_init = true;
    while(true) {
        if (!loop_init) {
        int _expr15 = i_1;
        i_1 = (_expr15 + 1);
        }
        loop_init = false;
        int _expr13 = i_1;
        if ((_expr13 < 3)) {
        } else {
            break;
        }
        float4 _expr18 = approx;
        float4 _expr19 = approx;
        const float4 _e20 = poly5_mult(poly, _expr19);
        float4 _expr21 = approx;
        const float4 _e22 = poly5_d1_mult(poly, _expr21);
        approx = (_expr18 - (_e20 / _e22));
    }
    float4 _expr25 = approx;
    return clamp(_expr25, float4(0.0.xxxx), float4(1.0.xxxx));
}

Bezier ConstructBezier(float2 arg0, float2 arg1, float2 arg2, float2 arg3) {
    Bezier ret;
    ret.a = arg0;
    ret.b = arg1;
    ret.c = arg2;
    ret.d = arg3;
    return ret;
}

Poly5_ ConstructPoly5_(float arg0, float arg1, float arg2, float arg3, float arg4, float arg5) {
    Poly5_ ret;
    ret.c5_ = arg0;
    ret.c4_ = arg1;
    ret.c3_ = arg2;
    ret.c2_ = arg3;
    ret.c1_ = arg4;
    ret.c0_ = arg5;
    return ret;
}

float cubic_bezier_sd(float2 p0_, float2 p1_, float2 p2_, float2 p3_, float2 p_2, float R)
{
    float dist_1 = (float)0;
    int i_2 = 1;

    float2 a_1 = ((((-1.0 * p0_) + (3.0 * p1_)) - (3.0 * p2_)) + (1.0 * p3_));
    float2 b_2 = (((3.0 * p0_) - (6.0 * p1_)) + (3.0 * p2_));
    float2 c = ((-3.0 * p0_) + (3.0 * p1_));
    float2 d = (1.0 * p0_);
    Bezier curve = ConstructBezier(a_1, b_2, c, d);
    float2 v_c5_ = ((-3.0 * a_1) * a_1);
    float c5_ = (v_c5_.x + v_c5_.y);
    float2 v_c4_ = ((-5.0 * a_1) * b_2);
    float c4_ = (v_c4_.x + v_c4_.y);
    float2 v_c3_ = (((-4.0 * a_1) * c) - ((2.0 * b_2) * b_2));
    float c3_ = (v_c3_.x + v_c3_.y);
    float2 v_c2_ = ((((-3.0 * a_1) * d) - ((3.0 * b_2) * c)) + ((3.0 * a_1) * p_2));
    float c2_ = (v_c2_.x + v_c2_.y);
    float2 v_c1_ = ((((-2.0 * b_2) * d) - (c * c)) + ((2.0 * b_2) * p_2));
    float c1_ = (v_c1_.x + v_c1_.y);
    float2 v_c0_ = ((-c * d) + (c * p_2));
    float c0_ = (v_c0_.x + v_c0_.y);
    Poly5_ poly_1 = ConstructPoly5_(c5_, c4_, c3_, c2_, c1_, c0_);
    const float4 _e94 = find_roots4_newton(poly_1);
    const float2 _e97 = cubic_bezier(curve, _e94.x);
    dist_1 = distance(_e97, p_2);
    bool loop_init_1 = true;
    while(true) {
        if (!loop_init_1) {
        int _expr105 = i_2;
        i_2 = (_expr105 + 1);
        }
        loop_init_1 = false;
        int _expr102 = i_2;
        if ((_expr102 < 5)) {
        } else {
            break;
        }
        int _expr108 = i_2;
        const float2 _e110 = cubic_bezier(curve, _e94[_expr108]);
        float this_dist = distance(_e110, p_2);
        float _expr112 = dist_1;
        dist_1 = min(_expr112, this_dist);
    }
    float _expr114 = dist_1;
    return (_expr114 - R);
}

float line_sd(float2 a, float2 b_1, float2 p_3, float R_1)
{
    float2 r = (b_1 - a);
    float t_1 = clamp((dot(r, (p_3 - a)) / length(r)), 0.0, length(r));
    float2 n = ((t_1 * normalize(r)) + a);
    return (distance(n, p_3) - R_1);
}

VertexOutput_vs_main vs_main(VertexInput v_in)
{
    VertexOutput v_out = (VertexOutput)0;

    float4x4 _expr7 = camera.view_proj;
    v_out.projected = mul(float4(v_in.pos, 0.0, 1.0), _expr7);
    v_out.tex_coord = v_in.tex_coord;
    VertexOutput _expr15 = v_out;
    const VertexOutput vertexoutput = _expr15;
    const VertexOutput_vs_main vertexoutput_1 = { vertexoutput.tex_coord, vertexoutput.projected };
    return vertexoutput_1;
}

float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0
{
    VertexOutput f_in = { fragmentinput_fs_main.projected_1, fragmentinput_fs_main.tex_coord_1 };
    float dist = 1000000.0;
    int i = 0;

    bool loop_init_2 = true;
    while(true) {
        if (!loop_init_2) {
        int _expr11 = i;
        i = (_expr11 + 1);
        }
        loop_init_2 = false;
        int _expr9 = i;
        if ((_expr9 < PATHS_BUF_SIZE)) {
        } else {
            break;
        }
        int _expr15 = i;
        Path path = paths.paths[_expr15];
        switch(path.kind) {
            case 1u: {
                const float _e22 = line_sd(path.p0_, path.p1_, f_in.tex_coord, 3.0);
                float _expr23 = dist;
                dist = min(_expr23, _e22);
                break;
            }
            case 2u: {
                float2 p0_1 = path.p0_;
                float2 p1_1 = (path.p0_ + ((2.0 / 3.0) * (path.p1_ - path.p0_)));
                float2 p2_1 = (path.p2_ + ((2.0 / 3.0) * (path.p1_ - path.p2_)));
                float2 p3_1 = path.p2_;
                const float _e46 = cubic_bezier_sd(p0_1, p1_1, p2_1, p3_1, f_in.tex_coord, 3.0);
                float _expr47 = dist;
                dist = min(_expr47, _e46);
                break;
            }
            case 3u: {
                const float _e54 = cubic_bezier_sd(path.p0_, path.p1_, path.p2_, path.p3_, f_in.tex_coord, 3.0);
                float _expr55 = dist;
                dist = min(_expr55, _e54);
                break;
            }
            default: {
                break;
            }
        }
    }
    float _expr57 = dist;
    float _expr59 = camera.zoom;
    float adj_distance = (_expr57 / _expr59);
    return float4(float3(adj_distance.xxx), 1.0);
}

2022-04-27T15:21:29.310Z WARN  [wgpu::backend::direct] Shader translation error for stage FRAGMENT: D3DCompile error (0x80004005): C:\Users\Tobias\Downloads\font-renderer-master.tar\font-renderer-master\Shader(168,50-63): error X3504: literal loop terminated early due to out of bounds array access

(note that this is not the stderr output, since Windows shell SUUUCKS to copy-paste from. But stderr doesn't have more infos.)

Here's the important part (I think, line breaks and indentation mine):

2022-04-27T15:21:29.310Z WARN  [wgpu::backend::direct] Shader translation error for stage FRAGMENT:
  D3DCompile error (0x80004005):
    C:\Users\Tobias\Downloads\font-renderer-master.tar\font-renderer-master\Shader(168,50-63):
      error X3504: literal loop terminated early due to out of bounds array access

This is the shader I used: shader.wgsl (pls don't laugh. I know these aglorithms can be done better, but I had to do it without copy-pasting.)

And this is a link to the project source. It's just standart cargo run, in case anyone wants to try: sources.tar.gz.

Is there a way to work around this?

The text was updated successfully, but these errors were encountered:

cwfitzgerald · 2022-04-27T16:28:12Z

Thanks for filing!

This is ultimately an issue on our end, as this is valid wgsl, but the HLSL compiler isn't happy with the output.

That being said the loop on line 209 indexes the vector out of bounds, it indexes it at 1, 2, 3, 4 and 4 isn't a in-bounds index into a vec4. If you were to keep those in bounds, it should compile.

TobTobXX · 2022-04-27T16:54:42Z

Yes, it compiles now. Sorry for the inconvenience. I didn't catch it because I guess Vulkan just ignores it...

(at least now someone searching for it should find this ;) )

jimblandy · 2022-04-30T19:31:12Z

Could we get a reduced version of the test case here?

Here's the loop @cwfitzgerald called out at line 209:

for (var i = 1; i < 5; i=i+1) {
	let this_dist = distance(cubic_bezier(curve, roots_n[i]), p);
	// And accept any smaller distance
	dist = min(dist, this_dist);
}

This could be a problem for us. roots_n[i] is a dynamic access, and it's apparently only because the HLSL compiler is unrolling the loop that it recognizes that this expression will definitely perform an out-of-bounds access. We don't want to have to do this level of analysis in Naga.

If that's what's going on, then Naga may just have to accept that HLSL will reject shaders that we pass, because it analyzes them more thoroughly.

jimblandy · 2022-04-30T19:32:02Z

@TobTobXX Would you be able to put together a reduced version of the test case, based on the loop at line 209?

cwfitzgerald · 2022-04-30T20:21:33Z

I think a simple example would be an explicit vector_4[4]. I think this might be a limitation in DXBC or FXC where it doesn't allow non-constant indexing into vectors.

jimblandy · 2022-04-30T23:35:52Z

I think a simple example would be an explicit vector_4[4]. I think this might be a limitation in DXBC or FXC where it doesn't allow non-constant indexing into vectors.

We already detect this:

fn f(v: vec4<f32>) -> f32 {
   return v[4];
}

That elicits:

error: Index 4 is out of bounds for expression [1]

Could not parse WGSL

jimblandy · 2022-04-30T23:53:18Z

But, for example, Naga validates this and I suspect the HLSL compiler will reject it:

fn f(v: vec4<f32>) -> f32 {
   var sum: f32 = 0.0;
   for (var i = 1; i <= 4; i = i + 1) {
       sum = sum + v[i];
   }
   return sum;
}

TobTobXX · 2022-05-01T13:30:35Z

Yes, as @jimblandy has guessed, this shader does work on Linux, but doesn't on Windows (tested):

let foo = vec4<f32>(0.1, 0.2, 0.3, -0.6);

[[stage(vertex)]]
fn vs_main(
    [[builtin(vertex_index)]] in_vertex_index: u32,
) -> [[builtin(position)]] vec4<f32> {
    let x = f32(1 - i32(in_vertex_index)) * 0.5;
    let y = f32(i32(in_vertex_index & 1u) * 2 - 1) * 0.5;
    return vec4<f32>(x, y, 0.0, 1.0);
}

[[stage(fragment)]]
fn fs_main() -> [[location(0)]] vec4<f32> {
	var sum = 0.0;
	// Note the off-by-one error
	for (var i = 0; i < 5; i=i+1) {
		sum = sum + foo[i];
	}
	return vec4<f32>(1.0, sum, 1.0, 1.0);
}

jimblandy · 2022-05-01T13:51:20Z

Unless @kvark has some clever way out for us, I think that leaves us at

If that's what's going on, then Naga may just have to accept that HLSL will reject shaders that we pass, because it analyzes them more thoroughly

teoxoy · 2022-05-02T18:15:04Z

This only seems to be an issue with FXC. DXC seems to compile it just fine.
See https://shader-playground.timjones.io/70cb272b067ff17ae1ab1dc6ee12a513
I also found this related issue microsoft/DirectXShaderCompiler#1879 (comment)

teoxoy · 2022-05-14T15:34:43Z

I think a simple example would be an explicit vector_4[4]. I think this might be a limitation in DXBC or FXC where it doesn't allow non-constant indexing into vectors.

We already detect this:
fn f(v: vec4<f32>) -> f32 {
   return v[4];
}
That elicits:
error: Index 4 is out of bounds for expression [1]

Could not parse WGSL

@jimblandy according to the WGSL spec this should work though.

Implementing the OOB behavior above would fix the issue.

jimblandy · 2022-05-14T17:03:42Z

Implementing the OOB behavior above would fix the issue.

The OOB behavior is already implemented, actually. We're just being over-eager about detecting errors at compile time.

teoxoy · 2022-05-14T17:46:45Z

So, solving both #4389 and #4390 should hopefully fix this issue.

TobTobXX closed this as completed Apr 27, 2022

cwfitzgerald reopened this Apr 27, 2022

cwfitzgerald added area: naga back-end Outputs of naga shader conversion lang: HLSL D3D Shading Language labels Apr 27, 2022

teoxoy mentioned this issue Oct 25, 2023

Handle vector, matrix and array OOB accesses for HLSL #4389

Open

teoxoy mentioned this issue May 14, 2022

Relax vector, matrix and array OOB validation #4390

Open

cwfitzgerald added the naga Shader Translator label Oct 25, 2023

cwfitzgerald transferred this issue from gfx-rs/naga Oct 25, 2023

teoxoy added this to the WebGPU Specification V1 milestone Nov 3, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

error X3504: literal loop terminated early due to out of bounds array access #4381

error X3504: literal loop terminated early due to out of bounds array access #4381

TobTobXX commented Apr 27, 2022 •

edited

Loading

cwfitzgerald commented Apr 27, 2022

TobTobXX commented Apr 27, 2022 •

edited

Loading

jimblandy commented Apr 30, 2022

jimblandy commented Apr 30, 2022

cwfitzgerald commented Apr 30, 2022

jimblandy commented Apr 30, 2022 •

edited

Loading

jimblandy commented Apr 30, 2022

TobTobXX commented May 1, 2022

jimblandy commented May 1, 2022

teoxoy commented May 2, 2022

teoxoy commented May 14, 2022

jimblandy commented May 14, 2022

teoxoy commented May 14, 2022

error X3504: literal loop terminated early due to out of bounds array access #4381

error X3504: literal loop terminated early due to out of bounds array access #4381

Comments

TobTobXX commented Apr 27, 2022 • edited Loading

cwfitzgerald commented Apr 27, 2022

TobTobXX commented Apr 27, 2022 • edited Loading

jimblandy commented Apr 30, 2022

jimblandy commented Apr 30, 2022

cwfitzgerald commented Apr 30, 2022

jimblandy commented Apr 30, 2022 • edited Loading

jimblandy commented Apr 30, 2022

TobTobXX commented May 1, 2022

jimblandy commented May 1, 2022

teoxoy commented May 2, 2022

teoxoy commented May 14, 2022

jimblandy commented May 14, 2022

teoxoy commented May 14, 2022

TobTobXX commented Apr 27, 2022 •

edited

Loading

TobTobXX commented Apr 27, 2022 •

edited

Loading

jimblandy commented Apr 30, 2022 •

edited

Loading