Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

error X3504: literal loop terminated early due to out of bounds array access #4381

Open
TobTobXX opened this issue Apr 27, 2022 · 13 comments
Open
Labels
area: naga back-end Outputs of naga shader conversion lang: HLSL D3D Shading Language naga Shader Translator

Comments

@TobTobXX
Copy link

TobTobXX commented Apr 27, 2022

Hi there, I wrote this wgpu application on Linux (Wayland) and it worked just fine. Now that someone wanted to compile it on Windows, it doesn't work:

noisy log output
2022-04-27T15:20:32.196Z WARN  [wgpu_hal::dx12::instance] Unable to enable D3D12 debug interface: 0x887A002D
2022-04-27T15:20:32.197Z WARN  [wgpu_hal::dx12::instance] Unable to enable DXGI debug interface: 0x887A002D
2022-04-27T15:21:29.310Z WARN  [wgpu_hal::dx12::device] Naga generated shader for "fs_main" at Fragment:
struct NagaConstants {
    int base_vertex;
    int base_instance;
    uint other;
};
ConstantBuffer<NagaConstants> _NagaConstants: register(b2);
static const int PATHS_BUF_SIZE = 200;

struct CameraUniform {
    row_major float4x4 view_proj;
    float zoom;
};

struct Path {
    uint kind;
    float2 p0_;
    float2 p1_;
    float2 p2_;
    float2 p3_;
};

struct Paths {
    Path paths[200];
};

struct Bezier {
    float2 a;
    float2 b;
    float2 c;
    float2 d;
};

struct Poly5_ {
    float c5_;
    float c4_;
    float c3_;
    float c2_;
    float c1_;
    float c0_;
};

struct VertexInput {
    linear float2 pos : LOC0;
    linear float2 tex_coord : LOC1;
};

struct VertexOutput {
    float4 projected : SV_Position;
    linear float2 tex_coord : LOC0;
};

cbuffer camera : register(b0) { CameraUniform camera; }
cbuffer paths : register(b1) { Paths paths; }

struct VertexOutput_vs_main {
    float2 tex_coord : LOC0;
    float4 projected : SV_Position;
};

struct FragmentInput_fs_main {
    float2 tex_coord_1 : LOC0;
    float4 projected_1 : SV_Position;
};

float2 cubic_bezier(Bezier b, float t)
{
    return ((((b.a * pow(t, 3.0)) + (b.b * pow(t, 2.0))) + (b.c * t)) + b.d);
}

float4 poly5_mult(Poly5_ p, float4 x)
{
    return ((((((p.c5_ * pow(x, float4(5.0.xxxx))) + (p.c4_ * pow(x, float4(4.0.xxxx)))) + (p.c3_ * pow(x, float4(3.0.xxxx)))) + (p.c2_ * pow(x, float4(2.0.xxxx)))) + (p.c1_ * x)) + float4(p.c0_.xxxx));
}

float4 poly5_d1_mult(Poly5_ p_1, float4 x_1)
{
    return ((((((5.0 * p_1.c5_) * pow(x_1, float4(4.0.xxxx))) + ((4.0 * p_1.c4_) * pow(x_1, float4(3.0.xxxx)))) + ((3.0 * p_1.c3_) * pow(x_1, float4(2.0.xxxx)))) + ((2.0 * p_1.c2_) * x_1)) + float4(p_1.c1_.xxxx));
}

float4 find_roots4_newton(Poly5_ poly)
{
    float4 approx = (float4)0;
    int i_1 = 0;

    approx = float4(0.0, 0.33000001311302185, 0.6600000262260437, 1.0);
    bool loop_init = true;
    while(true) {
        if (!loop_init) {
        int _expr15 = i_1;
        i_1 = (_expr15 + 1);
        }
        loop_init = false;
        int _expr13 = i_1;
        if ((_expr13 < 3)) {
        } else {
            break;
        }
        float4 _expr18 = approx;
        float4 _expr19 = approx;
        const float4 _e20 = poly5_mult(poly, _expr19);
        float4 _expr21 = approx;
        const float4 _e22 = poly5_d1_mult(poly, _expr21);
        approx = (_expr18 - (_e20 / _e22));
    }
    float4 _expr25 = approx;
    return clamp(_expr25, float4(0.0.xxxx), float4(1.0.xxxx));
}

Bezier ConstructBezier(float2 arg0, float2 arg1, float2 arg2, float2 arg3) {
    Bezier ret;
    ret.a = arg0;
    ret.b = arg1;
    ret.c = arg2;
    ret.d = arg3;
    return ret;
}

Poly5_ ConstructPoly5_(float arg0, float arg1, float arg2, float arg3, float arg4, float arg5) {
    Poly5_ ret;
    ret.c5_ = arg0;
    ret.c4_ = arg1;
    ret.c3_ = arg2;
    ret.c2_ = arg3;
    ret.c1_ = arg4;
    ret.c0_ = arg5;
    return ret;
}

float cubic_bezier_sd(float2 p0_, float2 p1_, float2 p2_, float2 p3_, float2 p_2, float R)
{
    float dist_1 = (float)0;
    int i_2 = 1;

    float2 a_1 = ((((-1.0 * p0_) + (3.0 * p1_)) - (3.0 * p2_)) + (1.0 * p3_));
    float2 b_2 = (((3.0 * p0_) - (6.0 * p1_)) + (3.0 * p2_));
    float2 c = ((-3.0 * p0_) + (3.0 * p1_));
    float2 d = (1.0 * p0_);
    Bezier curve = ConstructBezier(a_1, b_2, c, d);
    float2 v_c5_ = ((-3.0 * a_1) * a_1);
    float c5_ = (v_c5_.x + v_c5_.y);
    float2 v_c4_ = ((-5.0 * a_1) * b_2);
    float c4_ = (v_c4_.x + v_c4_.y);
    float2 v_c3_ = (((-4.0 * a_1) * c) - ((2.0 * b_2) * b_2));
    float c3_ = (v_c3_.x + v_c3_.y);
    float2 v_c2_ = ((((-3.0 * a_1) * d) - ((3.0 * b_2) * c)) + ((3.0 * a_1) * p_2));
    float c2_ = (v_c2_.x + v_c2_.y);
    float2 v_c1_ = ((((-2.0 * b_2) * d) - (c * c)) + ((2.0 * b_2) * p_2));
    float c1_ = (v_c1_.x + v_c1_.y);
    float2 v_c0_ = ((-c * d) + (c * p_2));
    float c0_ = (v_c0_.x + v_c0_.y);
    Poly5_ poly_1 = ConstructPoly5_(c5_, c4_, c3_, c2_, c1_, c0_);
    const float4 _e94 = find_roots4_newton(poly_1);
    const float2 _e97 = cubic_bezier(curve, _e94.x);
    dist_1 = distance(_e97, p_2);
    bool loop_init_1 = true;
    while(true) {
        if (!loop_init_1) {
        int _expr105 = i_2;
        i_2 = (_expr105 + 1);
        }
        loop_init_1 = false;
        int _expr102 = i_2;
        if ((_expr102 < 5)) {
        } else {
            break;
        }
        int _expr108 = i_2;
        const float2 _e110 = cubic_bezier(curve, _e94[_expr108]);
        float this_dist = distance(_e110, p_2);
        float _expr112 = dist_1;
        dist_1 = min(_expr112, this_dist);
    }
    float _expr114 = dist_1;
    return (_expr114 - R);
}

float line_sd(float2 a, float2 b_1, float2 p_3, float R_1)
{
    float2 r = (b_1 - a);
    float t_1 = clamp((dot(r, (p_3 - a)) / length(r)), 0.0, length(r));
    float2 n = ((t_1 * normalize(r)) + a);
    return (distance(n, p_3) - R_1);
}

VertexOutput_vs_main vs_main(VertexInput v_in)
{
    VertexOutput v_out = (VertexOutput)0;

    float4x4 _expr7 = camera.view_proj;
    v_out.projected = mul(float4(v_in.pos, 0.0, 1.0), _expr7);
    v_out.tex_coord = v_in.tex_coord;
    VertexOutput _expr15 = v_out;
    const VertexOutput vertexoutput = _expr15;
    const VertexOutput_vs_main vertexoutput_1 = { vertexoutput.tex_coord, vertexoutput.projected };
    return vertexoutput_1;
}

float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0
{
    VertexOutput f_in = { fragmentinput_fs_main.projected_1, fragmentinput_fs_main.tex_coord_1 };
    float dist = 1000000.0;
    int i = 0;

    bool loop_init_2 = true;
    while(true) {
        if (!loop_init_2) {
        int _expr11 = i;
        i = (_expr11 + 1);
        }
        loop_init_2 = false;
        int _expr9 = i;
        if ((_expr9 < PATHS_BUF_SIZE)) {
        } else {
            break;
        }
        int _expr15 = i;
        Path path = paths.paths[_expr15];
        switch(path.kind) {
            case 1u: {
                const float _e22 = line_sd(path.p0_, path.p1_, f_in.tex_coord, 3.0);
                float _expr23 = dist;
                dist = min(_expr23, _e22);
                break;
            }
            case 2u: {
                float2 p0_1 = path.p0_;
                float2 p1_1 = (path.p0_ + ((2.0 / 3.0) * (path.p1_ - path.p0_)));
                float2 p2_1 = (path.p2_ + ((2.0 / 3.0) * (path.p1_ - path.p2_)));
                float2 p3_1 = path.p2_;
                const float _e46 = cubic_bezier_sd(p0_1, p1_1, p2_1, p3_1, f_in.tex_coord, 3.0);
                float _expr47 = dist;
                dist = min(_expr47, _e46);
                break;
            }
            case 3u: {
                const float _e54 = cubic_bezier_sd(path.p0_, path.p1_, path.p2_, path.p3_, f_in.tex_coord, 3.0);
                float _expr55 = dist;
                dist = min(_expr55, _e54);
                break;
            }
            default: {
                break;
            }
        }
    }
    float _expr57 = dist;
    float _expr59 = camera.zoom;
    float adj_distance = (_expr57 / _expr59);
    return float4(float3(adj_distance.xxx), 1.0);
}

2022-04-27T15:21:29.310Z WARN  [wgpu::backend::direct] Shader translation error for stage FRAGMENT: D3DCompile error (0x80004005): C:\Users\Tobias\Downloads\font-renderer-master.tar\font-renderer-master\Shader(168,50-63): error X3504: literal loop terminated early due to out of bounds array access


(note that this is not the stderr output, since Windows shell SUUUCKS to copy-paste from. But stderr doesn't have more infos.)

Here's the important part (I think, line breaks and indentation mine):

2022-04-27T15:21:29.310Z WARN  [wgpu::backend::direct] Shader translation error for stage FRAGMENT:
  D3DCompile error (0x80004005):
    C:\Users\Tobias\Downloads\font-renderer-master.tar\font-renderer-master\Shader(168,50-63):
      error X3504: literal loop terminated early due to out of bounds array access

This is the shader I used: shader.wgsl (pls don't laugh. I know these aglorithms can be done better, but I had to do it without copy-pasting.)

And this is a link to the project source. It's just standart cargo run, in case anyone wants to try: sources.tar.gz.

Is there a way to work around this?

@cwfitzgerald
Copy link
Member

Thanks for filing!

This is ultimately an issue on our end, as this is valid wgsl, but the HLSL compiler isn't happy with the output.

That being said the loop on line 209 indexes the vector out of bounds, it indexes it at 1, 2, 3, 4 and 4 isn't a in-bounds index into a vec4. If you were to keep those in bounds, it should compile.

@TobTobXX
Copy link
Author

TobTobXX commented Apr 27, 2022

Yes, it compiles now. Sorry for the inconvenience. I didn't catch it because I guess Vulkan just ignores it...

(at least now someone searching for it should find this ;) )

@cwfitzgerald cwfitzgerald reopened this Apr 27, 2022
@cwfitzgerald cwfitzgerald added area: naga back-end Outputs of naga shader conversion lang: HLSL D3D Shading Language labels Apr 27, 2022
@jimblandy
Copy link
Member

Could we get a reduced version of the test case here?

Here's the loop @cwfitzgerald called out at line 209:

for (var i = 1; i < 5; i=i+1) {
	let this_dist = distance(cubic_bezier(curve, roots_n[i]), p);
	// And accept any smaller distance
	dist = min(dist, this_dist);
}

This could be a problem for us. roots_n[i] is a dynamic access, and it's apparently only because the HLSL compiler is unrolling the loop that it recognizes that this expression will definitely perform an out-of-bounds access. We don't want to have to do this level of analysis in Naga.

If that's what's going on, then Naga may just have to accept that HLSL will reject shaders that we pass, because it analyzes them more thoroughly.

@jimblandy
Copy link
Member

@TobTobXX Would you be able to put together a reduced version of the test case, based on the loop at line 209?

@cwfitzgerald
Copy link
Member

I think a simple example would be an explicit vector_4[4]. I think this might be a limitation in DXBC or FXC where it doesn't allow non-constant indexing into vectors.

@jimblandy
Copy link
Member

jimblandy commented Apr 30, 2022

I think a simple example would be an explicit vector_4[4]. I think this might be a limitation in DXBC or FXC where it doesn't allow non-constant indexing into vectors.

We already detect this:

fn f(v: vec4<f32>) -> f32 {
   return v[4];
}

That elicits:

error: Index 4 is out of bounds for expression [1]

Could not parse WGSL

@jimblandy
Copy link
Member

But, for example, Naga validates this and I suspect the HLSL compiler will reject it:

fn f(v: vec4<f32>) -> f32 {
   var sum: f32 = 0.0;
   for (var i = 1; i <= 4; i = i + 1) {
       sum = sum + v[i];
   }
   return sum;
}

@TobTobXX
Copy link
Author

TobTobXX commented May 1, 2022

Yes, as @jimblandy has guessed, this shader does work on Linux, but doesn't on Windows (tested):

let foo = vec4<f32>(0.1, 0.2, 0.3, -0.6);

[[stage(vertex)]]
fn vs_main(
    [[builtin(vertex_index)]] in_vertex_index: u32,
) -> [[builtin(position)]] vec4<f32> {
    let x = f32(1 - i32(in_vertex_index)) * 0.5;
    let y = f32(i32(in_vertex_index & 1u) * 2 - 1) * 0.5;
    return vec4<f32>(x, y, 0.0, 1.0);
}

[[stage(fragment)]]
fn fs_main() -> [[location(0)]] vec4<f32> {
	var sum = 0.0;
	// Note the off-by-one error
	for (var i = 0; i < 5; i=i+1) {
		sum = sum + foo[i];
	}
	return vec4<f32>(1.0, sum, 1.0, 1.0);
}

@jimblandy
Copy link
Member

Unless @kvark has some clever way out for us, I think that leaves us at

If that's what's going on, then Naga may just have to accept that HLSL will reject shaders that we pass, because it analyzes them more thoroughly

@teoxoy
Copy link
Member

teoxoy commented May 2, 2022

This only seems to be an issue with FXC. DXC seems to compile it just fine.
See https://shader-playground.timjones.io/70cb272b067ff17ae1ab1dc6ee12a513
I also found this related issue microsoft/DirectXShaderCompiler#1879 (comment)

@teoxoy
Copy link
Member

teoxoy commented May 14, 2022

I think a simple example would be an explicit vector_4[4]. I think this might be a limitation in DXBC or FXC where it doesn't allow non-constant indexing into vectors.

We already detect this:

fn f(v: vec4<f32>) -> f32 {
   return v[4];
}

That elicits:

error: Index 4 is out of bounds for expression [1]

Could not parse WGSL

@jimblandy according to the WGSL spec this should work though.

image

Implementing the OOB behavior above would fix the issue.

@jimblandy
Copy link
Member

Implementing the OOB behavior above would fix the issue.

The OOB behavior is already implemented, actually. We're just being over-eager about detecting errors at compile time.

@teoxoy
Copy link
Member

teoxoy commented May 14, 2022

So, solving both #4389 and #4390 should hopefully fix this issue.

@cwfitzgerald cwfitzgerald added the naga Shader Translator label Oct 25, 2023
@cwfitzgerald cwfitzgerald transferred this issue from gfx-rs/naga Oct 25, 2023
@teoxoy teoxoy added this to the WebGPU Specification V1 milestone Nov 3, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
area: naga back-end Outputs of naga shader conversion lang: HLSL D3D Shading Language naga Shader Translator
Projects
Status: No status
Development

No branches or pull requests

4 participants