module name=sample_weighting_forward_cuda, target=x86-64-linux-avx-cuda-cuda_capability_50-debug-f16c-sse41 {
func sample_weighting_forward_cuda(samples, coordinates, projections, nsize, output)
{
return_second(halide_print(stringify("Entering Pipeline sample_weighting_forward_cuda", "\n")), "Entering Pipeline sample_weighting_forward_cuda")
return_second(halide_print(stringify(" Input Buffer samples:", " ", samples.buffer, "\n")), " Input Buffer samples:")
return_second(halide_print(stringify(" Input Buffer coordinates:", " ", coordinates.buffer, "\n")), " Input Buffer coordinates:")
return_second(halide_print(stringify(" Input Buffer projections:", " ", projections.buffer, "\n")), " Input Buffer projections:")
return_second(halide_print(stringify(" Input int32 nsize:", " ", nsize, "\n")), " Input int32 nsize:")
return_second(halide_print(stringify(" Output Buffer output:", " ", output.buffer, "\n")), " Output Buffer output:")
assert((reinterpret(samples.buffer) != (uint64)0), halide_error_buffer_argument_is_null("samples"))
assert((reinterpret(projections.buffer) != (uint64)0), halide_error_buffer_argument_is_null("projections"))
assert((reinterpret(output.buffer) != (uint64)0), halide_error_buffer_argument_is_null("output"))
assert((reinterpret(coordinates.buffer) != (uint64)0), halide_error_buffer_argument_is_null("coordinates"))

let coordinates = _halide_buffer_get_host(coordinates.buffer)

let coordinates.type.code = _halide_buffer_get_type_code(coordinates.buffer)

let coordinates.type.bits = _halide_buffer_get_type_bits(coordinates.buffer)

let coordinates.type.lanes = _halide_buffer_get_type_lanes(coordinates.buffer)

let coordinates.min.0 = _halide_buffer_get_min(coordinates.buffer, 0)

let coordinates.extent.0 = _halide_buffer_get_extent(coordinates.buffer, 0)

let coordinates.stride.0 = _halide_buffer_get_stride(coordinates.buffer, 0)

let coordinates.min.1 = _halide_buffer_get_min(coordinates.buffer, 1)

let coordinates.extent.1 = _halide_buffer_get_extent(coordinates.buffer, 1)

let coordinates.stride.1 = _halide_buffer_get_stride(coordinates.buffer, 1)

let coordinates.min.2 = _halide_buffer_get_min(coordinates.buffer, 2)

let coordinates.extent.2 = _halide_buffer_get_extent(coordinates.buffer, 2)

let coordinates.stride.2 = _halide_buffer_get_stride(coordinates.buffer, 2)

let coordinates.min.3 = _halide_buffer_get_min(coordinates.buffer, 3)

let coordinates.extent.3 = _halide_buffer_get_extent(coordinates.buffer, 3)

let coordinates.stride.3 = _halide_buffer_get_stride(coordinates.buffer, 3)

let coordinates.min.4 = _halide_buffer_get_min(coordinates.buffer, 4)

let coordinates.extent.4 = _halide_buffer_get_extent(coordinates.buffer, 4)

let coordinates.stride.4 = _halide_buffer_get_stride(coordinates.buffer, 4)

let output = _halide_buffer_get_host(output.buffer)

let output.type.code = _halide_buffer_get_type_code(output.buffer)

let output.type.bits = _halide_buffer_get_type_bits(output.buffer)

let output.type.lanes = _halide_buffer_get_type_lanes(output.buffer)

let output.min.0 = _halide_buffer_get_min(output.buffer, 0)

let output.extent.0 = _halide_buffer_get_extent(output.buffer, 0)

let output.stride.0 = _halide_buffer_get_stride(output.buffer, 0)

let output.min.1 = _halide_buffer_get_min(output.buffer, 1)

let output.extent.1 = _halide_buffer_get_extent(output.buffer, 1)

let output.stride.1 = _halide_buffer_get_stride(output.buffer, 1)

let output.min.2 = _halide_buffer_get_min(output.buffer, 2)

let output.extent.2 = _halide_buffer_get_extent(output.buffer, 2)

let output.stride.2 = _halide_buffer_get_stride(output.buffer, 2)

let output.min.3 = _halide_buffer_get_min(output.buffer, 3)

let output.extent.3 = _halide_buffer_get_extent(output.buffer, 3)

let output.stride.3 = _halide_buffer_get_stride(output.buffer, 3)

let projections = _halide_buffer_get_host(projections.buffer)

let projections.type.code = _halide_buffer_get_type_code(projections.buffer)

let projections.type.bits = _halide_buffer_get_type_bits(projections.buffer)

let projections.type.lanes = _halide_buffer_get_type_lanes(projections.buffer)

let projections.min.0 = _halide_buffer_get_min(projections.buffer, 0)

let projections.extent.0 = _halide_buffer_get_extent(projections.buffer, 0)

let projections.stride.0 = _halide_buffer_get_stride(projections.buffer, 0)

let projections.min.1 = _halide_buffer_get_min(projections.buffer, 1)

let projections.extent.1 = _halide_buffer_get_extent(projections.buffer, 1)

let projections.stride.1 = _halide_buffer_get_stride(projections.buffer, 1)

let projections.min.2 = _halide_buffer_get_min(projections.buffer, 2)

let projections.extent.2 = _halide_buffer_get_extent(projections.buffer, 2)

let projections.stride.2 = _halide_buffer_get_stride(projections.buffer, 2)

let projections.min.3 = _halide_buffer_get_min(projections.buffer, 3)

let projections.extent.3 = _halide_buffer_get_extent(projections.buffer, 3)

let projections.stride.3 = _halide_buffer_get_stride(projections.buffer, 3)

let projections.min.4 = _halide_buffer_get_min(projections.buffer, 4)

let projections.extent.4 = _halide_buffer_get_extent(projections.buffer, 4)

let projections.stride.4 = _halide_buffer_get_stride(projections.buffer, 4)

let samples = _halide_buffer_get_host(samples.buffer)

let samples.type.code = _halide_buffer_get_type_code(samples.buffer)

let samples.type.bits = _halide_buffer_get_type_bits(samples.buffer)

let samples.type.lanes = _halide_buffer_get_type_lanes(samples.buffer)

let samples.min.0 = _halide_buffer_get_min(samples.buffer, 0)

let samples.extent.0 = _halide_buffer_get_extent(samples.buffer, 0)

let samples.stride.0 = _halide_buffer_get_stride(samples.buffer, 0)

let samples.min.1 = _halide_buffer_get_min(samples.buffer, 1)

let samples.extent.1 = _halide_buffer_get_extent(samples.buffer, 1)

let samples.stride.1 = _halide_buffer_get_stride(samples.buffer, 1)

let samples.min.2 = _halide_buffer_get_min(samples.buffer, 2)

let samples.extent.2 = _halide_buffer_get_extent(samples.buffer, 2)

let samples.stride.2 = _halide_buffer_get_stride(samples.buffer, 2)

let samples.min.3 = _halide_buffer_get_min(samples.buffer, 3)

let samples.extent.3 = _halide_buffer_get_extent(samples.buffer, 3)

let samples.stride.3 = _halide_buffer_get_stride(samples.buffer, 3)

let samples.min.4 = _halide_buffer_get_min(samples.buffer, 4)

let samples.extent.4 = _halide_buffer_get_extent(samples.buffer, 4)

let samples.stride.4 = _halide_buffer_get_stride(samples.buffer, 4)

let filtered.s1.k.max = select((0 < samples.extent.3), (((output.min.2 + output.extent.2) + -1) / samples.extent.3), (output.min.2 / samples.extent.3))

let filtered.s1.k.min = select((0 < samples.extent.3), (output.min.2 / samples.extent.3), (((output.min.2 + output.extent.2) + -1) / samples.extent.3))

let filtered.s1.ci.max.s = int32(abs(samples.extent.3))

let coordinates.extent.0.required.s = (max((min(samples.extent.0, (coordinates.min.0 + coordinates.extent.0)) + -1), coordinates.min.0) - max(min(((coordinates.min.0 + coordinates.extent.0) + -1), 0), coordinates.min.0))

let coordinates.min.0.required = max(min(((coordinates.min.0 + coordinates.extent.0) + -1), 0), coordinates.min.0)

let coordinates.extent.1.required.s = (max(min(((nsize / 2) + 63), ((coordinates.min.1 + coordinates.extent.1) + -1)), coordinates.min.1) - max(min((((0 - nsize) / 2) + 1), ((coordinates.min.1 + coordinates.extent.1) + -1)), coordinates.min.1))

let coordinates.min.1.required = max(min((((0 - nsize) / 2) + 1), ((coordinates.min.1 + coordinates.extent.1) + -1)), coordinates.min.1)

let coordinates.extent.2.required.s = (max(min(((nsize / 2) + 63), ((coordinates.min.2 + coordinates.extent.2) + -1)), coordinates.min.2) - max(min((((0 - nsize) / 2) + 1), ((coordinates.min.2 + coordinates.extent.2) + -1)), coordinates.min.2))

let coordinates.min.2.required = max(min((((0 - nsize) / 2) + 1), ((coordinates.min.2 + coordinates.extent.2) + -1)), coordinates.min.2)

let coordinates.stride.2.required = ((coordinates.extent.0.required.s + 1) * (coordinates.extent.1.required.s + 1))

let coordinates.extent.3.required.s = (max((min(coordinates.extent.3, (coordinates.min.3 + coordinates.extent.3)) + -1), coordinates.min.3) - max(min(((coordinates.min.3 + coordinates.extent.3) + -1), 0), coordinates.min.3))

let coordinates.min.3.required = max(min(((coordinates.min.3 + coordinates.extent.3) + -1), 0), coordinates.min.3)

let coordinates.stride.3.required = (coordinates.stride.2.required * (coordinates.extent.2.required.s + 1))

let coordinates.extent.4.required.s = (max(min(((coordinates.min.4 + coordinates.extent.4) + -1), 1), coordinates.min.4) - max(min(((coordinates.min.4 + coordinates.extent.4) + -1), 0), coordinates.min.4))

let coordinates.min.4.required = max(min(((coordinates.min.4 + coordinates.extent.4) + -1), 0), coordinates.min.4)

let coordinates.stride.4.required = (coordinates.stride.3.required * (coordinates.extent.3.required.s + 1))

let output.extent.0.required.s = (((min(((((((output.extent.3 * output.extent.2) * output.extent.1) * output.extent.0) + -1) / 8) * 8), ((((output.extent.3 * output.extent.2) * output.extent.1) * output.extent.0) + -8)) + 7) / max(((output.extent.3 * output.extent.2) * output.extent.1), 1)) - (min(((((output.extent.3 * output.extent.2) * output.extent.1) * output.extent.0) + -8), 0) / max(((output.extent.3 * output.extent.2) * output.extent.1), 1)))

let output.min.0.required.s = (min(((((output.extent.3 * output.extent.2) * output.extent.1) * output.extent.0) + -8), 0) / max(((output.extent.3 * output.extent.2) * output.extent.1), 1))

let output.extent.1.required.s = ((int32(abs(max(((output.extent.3 * output.extent.2) * output.extent.1), 1))) + -1) / max((output.extent.3 * output.extent.2), 1))

let output.extent.2.required.s = ((int32(abs(max((output.extent.3 * output.extent.2), 1))) + -1) / max(output.extent.3, 1))

let output.stride.2.required = ((output.extent.0.required.s + 1) * (output.extent.1.required.s + 1))

let output.extent.3.required = int32(abs(max(output.extent.3, 1)))

let output.stride.3.required = (output.stride.2.required * (output.extent.2.required.s + 1))

let projections.extent.0.required.s = (max(min(((projections.min.0 + projections.extent.0) + -1), 63), projections.min.0) - max(min(((projections.min.0 + projections.extent.0) + -1), 0), projections.min.0))

let projections.min.0.required = max(min(((projections.min.0 + projections.extent.0) + -1), 0), projections.min.0)

let projections.extent.1.required.s = (max(min(((projections.min.1 + projections.extent.1) + -1), 63), projections.min.1) - max(min(((projections.min.1 + projections.extent.1) + -1), 0), projections.min.1))

let projections.min.1.required = max(min(((projections.min.1 + projections.extent.1) + -1), 0), projections.min.1)

let projections.extent.2.required.s = (max((min(coordinates.extent.3, (projections.min.2 + projections.extent.2)) + -1), projections.min.2) - max(min(((projections.min.2 + projections.extent.2) + -1), 0), projections.min.2))

let projections.min.2.required = max(min(((projections.min.2 + projections.extent.2) + -1), 0), projections.min.2)

let projections.stride.2.required = ((projections.extent.0.required.s + 1) * (projections.extent.1.required.s + 1))

let projections.extent.3.required.s = (max(min(((projections.min.3 + projections.extent.3) + -1), 31), projections.min.3) - max(min(((projections.min.3 + projections.extent.3) + -1), 0), projections.min.3))

let projections.min.3.required = max(min(((projections.min.3 + projections.extent.3) + -1), 0), projections.min.3)

let projections.stride.3.required = (projections.stride.2.required * (projections.extent.2.required.s + 1))

let projections.extent.4.required.s = (max(min(((projections.min.4 + projections.extent.4) + -1), 1), projections.min.4) - max(min(((projections.min.4 + projections.extent.4) + -1), 0), projections.min.4))

let projections.min.4.required = max(min(((projections.min.4 + projections.extent.4) + -1), 0), projections.min.4)

let projections.stride.4.required = (projections.stride.3.required * (projections.extent.3.required.s + 1))

let samples.extent.0.required.s = (max((min(samples.extent.0, (samples.min.0 + samples.extent.0)) + -1), samples.min.0) - max(min(((samples.min.0 + samples.extent.0) + -1), 0), samples.min.0))

let samples.min.0.required = max(min(((samples.min.0 + samples.extent.0) + -1), 0), samples.min.0)

let samples.extent.1.required.s = (max(min(((nsize / 2) + 63), ((samples.min.1 + samples.extent.1) + -1)), samples.min.1) - max(min((((0 - nsize) / 2) + 1), ((samples.min.1 + samples.extent.1) + -1)), samples.min.1))

let samples.min.1.required = max(min((((0 - nsize) / 2) + 1), ((samples.min.1 + samples.extent.1) + -1)), samples.min.1)

let samples.extent.2.required.s = (max(min(((nsize / 2) + 63), ((samples.min.2 + samples.extent.2) + -1)), samples.min.2) - max(min((((0 - nsize) / 2) + 1), ((samples.min.2 + samples.extent.2) + -1)), samples.min.2))

let samples.min.2.required = max(min((((0 - nsize) / 2) + 1), ((samples.min.2 + samples.extent.2) + -1)), samples.min.2)

let samples.stride.2.required = ((samples.extent.0.required.s + 1) * (samples.extent.1.required.s + 1))

let samples.extent.3.required.s = (max(min(((samples.min.3 + samples.extent.3) + -1), 2), samples.min.3) - max(min(((samples.min.3 + samples.extent.3) + -1), 0), samples.min.3))

let samples.min.3.required = max(min(((samples.min.3 + samples.extent.3) + -1), 0), samples.min.3)

let samples.stride.3.required = (samples.stride.2.required * (samples.extent.2.required.s + 1))

let samples.extent.4.required.s = (max(min(((samples.min.4 + samples.extent.4) + -1), 1), samples.min.4) - max(min(((samples.min.4 + samples.extent.4) + -1), 0), samples.min.4))

let samples.min.4.required = max(min(((samples.min.4 + samples.extent.4) + -1), 0), samples.min.4)

let samples.stride.4.required = (samples.stride.3.required * (samples.extent.3.required.s + 1))

if (_halide_buffer_is_bounds_query(coordinates.buffer))
{
_halide_buffer_init(coordinates.buffer, _halide_buffer_get_shape(coordinates.buffer), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 5, make_struct(coordinates.min.0.required, (coordinates.extent.0.required.s + 1), 1, 0, coordinates.min.1.required, (coordinates.extent.1.required.s + 1), (coordinates.extent.0.required.s + 1), 0, coordinates.min.2.required, (coordinates.extent.2.required.s + 1), coordinates.stride.2.required, 0, coordinates.min.3.required, (coordinates.extent.3.required.s + 1), coordinates.stride.3.required, 0, coordinates.min.4.required, (coordinates.extent.4.required.s + 1), coordinates.stride.4.required, 0), (uint64)0)
}
if (_halide_buffer_is_bounds_query(output.buffer))
{
_halide_buffer_init(output.buffer, _halide_buffer_get_shape(output.buffer), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 4, make_struct((output.min.0.required.s + output.min.0), (output.extent.0.required.s + 1), 1, 0, output.min.1, (output.extent.1.required.s + 1), (output.extent.0.required.s + 1), 0, output.min.2, (output.extent.2.required.s + 1), output.stride.2.required, 0, output.min.3, output.extent.3.required, output.stride.3.required, 0), (uint64)0)
}
if (_halide_buffer_is_bounds_query(projections.buffer))
{
_halide_buffer_init(projections.buffer, _halide_buffer_get_shape(projections.buffer), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 5, make_struct(projections.min.0.required, (projections.extent.0.required.s + 1), 1, 0, projections.min.1.required, (projections.extent.1.required.s + 1), (projections.extent.0.required.s + 1), 0, projections.min.2.required, (projections.extent.2.required.s + 1), projections.stride.2.required, 0, projections.min.3.required, (projections.extent.3.required.s + 1), projections.stride.3.required, 0, projections.min.4.required, (projections.extent.4.required.s + 1), projections.stride.4.required, 0), (uint64)0)
}
if (_halide_buffer_is_bounds_query(samples.buffer))
{
_halide_buffer_init(samples.buffer, _halide_buffer_get_shape(samples.buffer), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 5, make_struct(samples.min.0.required, (samples.extent.0.required.s + 1), 1, 0, samples.min.1.required, (samples.extent.1.required.s + 1), (samples.extent.0.required.s + 1), 0, samples.min.2.required, (samples.extent.2.required.s + 1), samples.stride.2.required, 0, samples.min.3.required, (samples.extent.3.required.s + 1), samples.stride.3.required, 0, samples.min.4.required, (samples.extent.4.required.s + 1), samples.stride.4.required, 0), (uint64)0)
}
if (!(((_halide_buffer_is_bounds_query(coordinates.buffer) || _halide_buffer_is_bounds_query(output.buffer)) || _halide_buffer_is_bounds_query(projections.buffer)) || _halide_buffer_is_bounds_query(samples.buffer)))
{
assert((((coordinates.type.code == (uint8)2) && (coordinates.type.bits == (uint8)32)) && (coordinates.type.lanes == (uint16)1)), halide_error_bad_type("Input buffer coordinates", coordinates.type.code, (uint8)2, coordinates.type.bits, (uint8)32, coordinates.type.lanes, (uint16)1))
assert((((output.type.code == (uint8)2) && (output.type.bits == (uint8)32)) && (output.type.lanes == (uint16)1)), halide_error_bad_type("Output buffer output", output.type.code, (uint8)2, output.type.bits, (uint8)32, output.type.lanes, (uint16)1))
assert((((projections.type.code == (uint8)2) && (projections.type.bits == (uint8)32)) && (projections.type.lanes == (uint16)1)), halide_error_bad_type("Input buffer projections", projections.type.code, (uint8)2, projections.type.bits, (uint8)32, projections.type.lanes, (uint16)1))
assert((((samples.type.code == (uint8)2) && (samples.type.bits == (uint8)32)) && (samples.type.lanes == (uint16)1)), halide_error_bad_type("Input buffer samples", samples.type.code, (uint8)2, samples.type.bits, (uint8)32, samples.type.lanes, (uint16)1))
assert(((coordinates.min.0 <= coordinates.min.0.required) && ((((coordinates.min.0.required + coordinates.extent.0.required.s) - coordinates.extent.0) + 1) <= coordinates.min.0)), halide_error_access_out_of_bounds("Input buffer coordinates", 0, coordinates.min.0.required, (coordinates.min.0.required + coordinates.extent.0.required.s), coordinates.min.0, ((coordinates.min.0 + coordinates.extent.0) + -1)))
assert((0 <= coordinates.extent.0), halide_error_buffer_extents_negative("Input buffer coordinates", 0, coordinates.extent.0))
assert(((coordinates.min.1 <= coordinates.min.1.required) && ((((coordinates.min.1.required + coordinates.extent.1.required.s) - coordinates.extent.1) + 1) <= coordinates.min.1)), halide_error_access_out_of_bounds("Input buffer coordinates", 1, coordinates.min.1.required, (coordinates.min.1.required + coordinates.extent.1.required.s), coordinates.min.1, ((coordinates.min.1 + coordinates.extent.1) + -1)))
assert((0 <= coordinates.extent.1), halide_error_buffer_extents_negative("Input buffer coordinates", 1, coordinates.extent.1))
assert(((coordinates.min.2 <= coordinates.min.2.required) && ((((coordinates.min.2.required + coordinates.extent.2.required.s) - coordinates.extent.2) + 1) <= coordinates.min.2)), halide_error_access_out_of_bounds("Input buffer coordinates", 2, coordinates.min.2.required, (coordinates.min.2.required + coordinates.extent.2.required.s), coordinates.min.2, ((coordinates.min.2 + coordinates.extent.2) + -1)))
assert((0 <= coordinates.extent.2), halide_error_buffer_extents_negative("Input buffer coordinates", 2, coordinates.extent.2))
assert(((coordinates.min.3 <= coordinates.min.3.required) && ((((coordinates.min.3.required + coordinates.extent.3.required.s) - coordinates.extent.3) + 1) <= coordinates.min.3)), halide_error_access_out_of_bounds("Input buffer coordinates", 3, coordinates.min.3.required, (coordinates.min.3.required + coordinates.extent.3.required.s), coordinates.min.3, ((coordinates.min.3 + coordinates.extent.3) + -1)))
assert((0 <= coordinates.extent.3), halide_error_buffer_extents_negative("Input buffer coordinates", 3, coordinates.extent.3))
assert(((coordinates.min.4 <= coordinates.min.4.required) && ((((coordinates.min.4.required + coordinates.extent.4.required.s) - coordinates.extent.4) + 1) <= coordinates.min.4)), halide_error_access_out_of_bounds("Input buffer coordinates", 4, coordinates.min.4.required, (coordinates.min.4.required + coordinates.extent.4.required.s), coordinates.min.4, ((coordinates.min.4 + coordinates.extent.4) + -1)))
assert((0 <= coordinates.extent.4), halide_error_buffer_extents_negative("Input buffer coordinates", 4, coordinates.extent.4))
assert(((0 <= output.min.0.required.s) && (((((output.min.0.required.s + output.min.0) + output.extent.0.required.s) - output.extent.0) + 1) <= output.min.0)), halide_error_access_out_of_bounds("Output buffer output", 0, (output.min.0.required.s + output.min.0), ((output.min.0.required.s + output.min.0) + output.extent.0.required.s), output.min.0, ((output.min.0 + output.extent.0) + -1)))
assert((0 <= output.extent.0), halide_error_buffer_extents_negative("Output buffer output", 0, output.extent.0))
assert(((((output.min.1 + output.extent.1.required.s) - output.extent.1) + 1) <= output.min.1), halide_error_access_out_of_bounds("Output buffer output", 1, output.min.1, (output.min.1 + output.extent.1.required.s), output.min.1, ((output.min.1 + output.extent.1) + -1)))
assert((0 <= output.extent.1), halide_error_buffer_extents_negative("Output buffer output", 1, output.extent.1))
assert(((((output.min.2 + output.extent.2.required.s) - output.extent.2) + 1) <= output.min.2), halide_error_access_out_of_bounds("Output buffer output", 2, output.min.2, (output.min.2 + output.extent.2.required.s), output.min.2, ((output.min.2 + output.extent.2) + -1)))
assert((0 <= output.extent.2), halide_error_buffer_extents_negative("Output buffer output", 2, output.extent.2))
assert((((output.min.3 + output.extent.3.required) - output.extent.3) <= output.min.3), halide_error_access_out_of_bounds("Output buffer output", 3, output.min.3, ((output.min.3 + output.extent.3.required) + -1), output.min.3, ((output.min.3 + output.extent.3) + -1)))
assert((0 <= output.extent.3), halide_error_buffer_extents_negative("Output buffer output", 3, output.extent.3))
assert(((projections.min.0 <= projections.min.0.required) && ((((projections.min.0.required + projections.extent.0.required.s) - projections.extent.0) + 1) <= projections.min.0)), halide_error_access_out_of_bounds("Input buffer projections", 0, projections.min.0.required, (projections.min.0.required + projections.extent.0.required.s), projections.min.0, ((projections.min.0 + projections.extent.0) + -1)))
assert((0 <= projections.extent.0), halide_error_buffer_extents_negative("Input buffer projections", 0, projections.extent.0))
assert(((projections.min.1 <= projections.min.1.required) && ((((projections.min.1.required + projections.extent.1.required.s) - projections.extent.1) + 1) <= projections.min.1)), halide_error_access_out_of_bounds("Input buffer projections", 1, projections.min.1.required, (projections.min.1.required + projections.extent.1.required.s), projections.min.1, ((projections.min.1 + projections.extent.1) + -1)))
assert((0 <= projections.extent.1), halide_error_buffer_extents_negative("Input buffer projections", 1, projections.extent.1))
assert(((projections.min.2 <= projections.min.2.required) && ((((projections.min.2.required + projections.extent.2.required.s) - projections.extent.2) + 1) <= projections.min.2)), halide_error_access_out_of_bounds("Input buffer projections", 2, projections.min.2.required, (projections.min.2.required + projections.extent.2.required.s), projections.min.2, ((projections.min.2 + projections.extent.2) + -1)))
assert((0 <= projections.extent.2), halide_error_buffer_extents_negative("Input buffer projections", 2, projections.extent.2))
assert(((projections.min.3 <= projections.min.3.required) && ((((projections.min.3.required + projections.extent.3.required.s) - projections.extent.3) + 1) <= projections.min.3)), halide_error_access_out_of_bounds("Input buffer projections", 3, projections.min.3.required, (projections.min.3.required + projections.extent.3.required.s), projections.min.3, ((projections.min.3 + projections.extent.3) + -1)))
assert((0 <= projections.extent.3), halide_error_buffer_extents_negative("Input buffer projections", 3, projections.extent.3))
assert(((projections.min.4 <= projections.min.4.required) && ((((projections.min.4.required + projections.extent.4.required.s) - projections.extent.4) + 1) <= projections.min.4)), halide_error_access_out_of_bounds("Input buffer projections", 4, projections.min.4.required, (projections.min.4.required + projections.extent.4.required.s), projections.min.4, ((projections.min.4 + projections.extent.4) + -1)))
assert((0 <= projections.extent.4), halide_error_buffer_extents_negative("Input buffer projections", 4, projections.extent.4))
assert(((samples.min.0 <= samples.min.0.required) && ((((samples.min.0.required + samples.extent.0.required.s) - samples.extent.0) + 1) <= samples.min.0)), halide_error_access_out_of_bounds("Input buffer samples", 0, samples.min.0.required, (samples.min.0.required + samples.extent.0.required.s), samples.min.0, ((samples.min.0 + samples.extent.0) + -1)))
assert((0 <= samples.extent.0), halide_error_buffer_extents_negative("Input buffer samples", 0, samples.extent.0))
assert(((samples.min.1 <= samples.min.1.required) && ((((samples.min.1.required + samples.extent.1.required.s) - samples.extent.1) + 1) <= samples.min.1)), halide_error_access_out_of_bounds("Input buffer samples", 1, samples.min.1.required, (samples.min.1.required + samples.extent.1.required.s), samples.min.1, ((samples.min.1 + samples.extent.1) + -1)))
assert((0 <= samples.extent.1), halide_error_buffer_extents_negative("Input buffer samples", 1, samples.extent.1))
assert(((samples.min.2 <= samples.min.2.required) && ((((samples.min.2.required + samples.extent.2.required.s) - samples.extent.2) + 1) <= samples.min.2)), halide_error_access_out_of_bounds("Input buffer samples", 2, samples.min.2.required, (samples.min.2.required + samples.extent.2.required.s), samples.min.2, ((samples.min.2 + samples.extent.2) + -1)))
assert((0 <= samples.extent.2), halide_error_buffer_extents_negative("Input buffer samples", 2, samples.extent.2))
assert(((samples.min.3 <= samples.min.3.required) && ((((samples.min.3.required + samples.extent.3.required.s) - samples.extent.3) + 1) <= samples.min.3)), halide_error_access_out_of_bounds("Input buffer samples", 3, samples.min.3.required, (samples.min.3.required + samples.extent.3.required.s), samples.min.3, ((samples.min.3 + samples.extent.3) + -1)))
assert((0 <= samples.extent.3), halide_error_buffer_extents_negative("Input buffer samples", 3, samples.extent.3))
assert(((samples.min.4 <= samples.min.4.required) && ((((samples.min.4.required + samples.extent.4.required.s) - samples.extent.4) + 1) <= samples.min.4)), halide_error_access_out_of_bounds("Input buffer samples", 4, samples.min.4.required, (samples.min.4.required + samples.extent.4.required.s), samples.min.4, ((samples.min.4 + samples.extent.4) + -1)))
assert((0 <= samples.extent.4), halide_error_buffer_extents_negative("Input buffer samples", 4, samples.extent.4))
assert((coordinates.stride.0 == 1), halide_error_constraint_violated("coordinates.stride.0", coordinates.stride.0, "1", 1))
assert((output.stride.0 == 1), halide_error_constraint_violated("output.stride.0", output.stride.0, "1", 1))
assert((projections.stride.0 == 1), halide_error_constraint_violated("projections.stride.0", projections.stride.0, "1", 1))
assert((samples.stride.0 == 1), halide_error_constraint_violated("samples.stride.0", samples.stride.0, "1", 1))

let coordinates.total_extent.1 = (int64(coordinates.extent.1) * int64(coordinates.extent.0))

let coordinates.total_extent.3 = (int64(coordinates.extent.3) * (int64(coordinates.extent.2) * coordinates.total_extent.1))

let output.total_extent.1 = (int64(output.extent.1) * int64(output.extent.0))

let output.total_extent.3 = (int64(output.extent.3) * (int64(output.extent.2) * output.total_extent.1))

let projections.total_extent.1 = (int64(projections.extent.1) * int64(projections.extent.0))

let projections.total_extent.3 = (int64(projections.extent.3) * (int64(projections.extent.2) * projections.total_extent.1))

let samples.total_extent.1 = (int64(samples.extent.1) * int64(samples.extent.0))

let samples.total_extent.3 = (int64(samples.extent.3) * (int64(samples.extent.2) * samples.total_extent.1))

assert((abs(int64(coordinates.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("coordinates", abs(int64(coordinates.extent.0)), (uint64)2147483647))
assert((abs((int64(coordinates.extent.1) * int64(coordinates.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("coordinates", abs((int64(coordinates.extent.1) * int64(coordinates.stride.1))), (uint64)2147483647))
assert((coordinates.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("coordinates", coordinates.total_extent.1, (int64)2147483647))
assert((abs((int64(coordinates.extent.2) * int64(coordinates.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("coordinates", abs((int64(coordinates.extent.2) * int64(coordinates.stride.2))), (uint64)2147483647))
assert(((int64(coordinates.extent.2) * coordinates.total_extent.1) <= (int64)2147483647), halide_error_buffer_extents_too_large("coordinates", (int64(coordinates.extent.2) * coordinates.total_extent.1), (int64)2147483647))
assert((abs((int64(coordinates.extent.3) * int64(coordinates.stride.3))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("coordinates", abs((int64(coordinates.extent.3) * int64(coordinates.stride.3))), (uint64)2147483647))
assert((coordinates.total_extent.3 <= (int64)2147483647), halide_error_buffer_extents_too_large("coordinates", coordinates.total_extent.3, (int64)2147483647))
assert((abs((int64(coordinates.extent.4) * int64(coordinates.stride.4))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("coordinates", abs((int64(coordinates.extent.4) * int64(coordinates.stride.4))), (uint64)2147483647))
assert(((int64(coordinates.extent.4) * coordinates.total_extent.3) <= (int64)2147483647), halide_error_buffer_extents_too_large("coordinates", (int64(coordinates.extent.4) * coordinates.total_extent.3), (int64)2147483647))
assert((abs(int64(output.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output", abs(int64(output.extent.0)), (uint64)2147483647))
assert((abs((int64(output.extent.1) * int64(output.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output", abs((int64(output.extent.1) * int64(output.stride.1))), (uint64)2147483647))
assert((output.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("output", output.total_extent.1, (int64)2147483647))
assert((abs((int64(output.extent.2) * int64(output.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output", abs((int64(output.extent.2) * int64(output.stride.2))), (uint64)2147483647))
assert(((int64(output.extent.2) * output.total_extent.1) <= (int64)2147483647), halide_error_buffer_extents_too_large("output", (int64(output.extent.2) * output.total_extent.1), (int64)2147483647))
assert((abs((int64(output.extent.3) * int64(output.stride.3))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output", abs((int64(output.extent.3) * int64(output.stride.3))), (uint64)2147483647))
assert((output.total_extent.3 <= (int64)2147483647), halide_error_buffer_extents_too_large("output", output.total_extent.3, (int64)2147483647))
assert((abs(int64(projections.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("projections", abs(int64(projections.extent.0)), (uint64)2147483647))
assert((abs((int64(projections.extent.1) * int64(projections.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("projections", abs((int64(projections.extent.1) * int64(projections.stride.1))), (uint64)2147483647))
assert((projections.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("projections", projections.total_extent.1, (int64)2147483647))
assert((abs((int64(projections.extent.2) * int64(projections.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("projections", abs((int64(projections.extent.2) * int64(projections.stride.2))), (uint64)2147483647))
assert(((int64(projections.extent.2) * projections.total_extent.1) <= (int64)2147483647), halide_error_buffer_extents_too_large("projections", (int64(projections.extent.2) * projections.total_extent.1), (int64)2147483647))
assert((abs((int64(projections.extent.3) * int64(projections.stride.3))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("projections", abs((int64(projections.extent.3) * int64(projections.stride.3))), (uint64)2147483647))
assert((projections.total_extent.3 <= (int64)2147483647), halide_error_buffer_extents_too_large("projections", projections.total_extent.3, (int64)2147483647))
assert((abs((int64(projections.extent.4) * int64(projections.stride.4))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("projections", abs((int64(projections.extent.4) * int64(projections.stride.4))), (uint64)2147483647))
assert(((int64(projections.extent.4) * projections.total_extent.3) <= (int64)2147483647), halide_error_buffer_extents_too_large("projections", (int64(projections.extent.4) * projections.total_extent.3), (int64)2147483647))
assert((abs(int64(samples.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("samples", abs(int64(samples.extent.0)), (uint64)2147483647))
assert((abs((int64(samples.extent.1) * int64(samples.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("samples", abs((int64(samples.extent.1) * int64(samples.stride.1))), (uint64)2147483647))
assert((samples.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("samples", samples.total_extent.1, (int64)2147483647))
assert((abs((int64(samples.extent.2) * int64(samples.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("samples", abs((int64(samples.extent.2) * int64(samples.stride.2))), (uint64)2147483647))
assert(((int64(samples.extent.2) * samples.total_extent.1) <= (int64)2147483647), halide_error_buffer_extents_too_large("samples", (int64(samples.extent.2) * samples.total_extent.1), (int64)2147483647))
assert((abs((int64(samples.extent.3) * int64(samples.stride.3))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("samples", abs((int64(samples.extent.3) * int64(samples.stride.3))), (uint64)2147483647))
assert((samples.total_extent.3 <= (int64)2147483647), halide_error_buffer_extents_too_large("samples", samples.total_extent.3, (int64)2147483647))
assert((abs((int64(samples.extent.4) * int64(samples.stride.4))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("samples", abs((int64(samples.extent.4) * int64(samples.stride.4))), (uint64)2147483647))
assert(((int64(samples.extent.4) * samples.total_extent.3) <= (int64)2147483647), halide_error_buffer_extents_too_large("samples", (int64(samples.extent.4) * samples.total_extent.3), (int64)2147483647))
assert(((0 <= output.min.3) && ((output.min.3 + output.extent.3) <= 2)), halide_error_explicit_bounds_too_small("n", "filtered", 0, 1, output.min.3, ((output.min.3 + output.extent.3) + -1)))
assert(((0 <= filtered.s1.k.min) && (filtered.s1.k.max <= 15)), halide_error_explicit_bounds_too_small("k", "filtered", 0, 15, filtered.s1.k.min, filtered.s1.k.max))
assert((filtered.s1.ci.max.s <= 3), halide_error_explicit_bounds_too_small("ci", "filtered", 0, 2, 0, (filtered.s1.ci.max.s + -1)))
assert(((0 <= output.min.1) && ((output.min.1 + output.extent.1) <= 64)), halide_error_explicit_bounds_too_small("y", "filtered", 0, 63, output.min.1, ((output.min.1 + output.extent.1) + -1)))
assert(((0 <= output.min.0) && ((output.min.0 + output.extent.0) <= 64)), halide_error_explicit_bounds_too_small("x", "filtered", 0, 63, output.min.0, ((output.min.0 + output.extent.0) + -1)))
assert(((output.extent.3.required + output.min.3) <= 2), halide_error_explicit_bounds_too_small("n", "filtered", 0, 1, min(output.min.3, 0), max((max((output.extent.3.required + output.min.3), 2) + -1), 1)))
assert((0 <= output.min.3), halide_error_explicit_bounds_too_small("n", "filtered", 0, 1, min(output.min.3, 0), max((max((output.extent.3.required + output.min.3), 2) + -1), 1)))
assert((select((0 < samples.extent.3), ((output.extent.2.required.s + output.min.2) / samples.extent.3), (output.min.2 / samples.extent.3)) <= 15), (let t4112 = (0 < samples.extent.3) in (let t4113 = (output.min.2 / samples.extent.3) in (let t4114 = ((output.extent.2.required.s + output.min.2) / samples.extent.3) in halide_error_explicit_bounds_too_small("k", "filtered", 0, 15, min(select(t4112, t4113, t4114), 0), max(select(t4112, t4114, t4113), 15))))))
assert((0 <= select((0 < samples.extent.3), (output.min.2 / samples.extent.3), ((output.extent.2.required.s + output.min.2) / samples.extent.3))), (let t4115 = (0 < samples.extent.3) in (let t4116 = (output.min.2 / samples.extent.3) in (let t4117 = ((output.extent.2.required.s + output.min.2) / samples.extent.3) in halide_error_explicit_bounds_too_small("k", "filtered", 0, 15, min(select(t4115, t4116, t4117), 0), max(select(t4115, t4117, t4116), 15))))))
assert((filtered.s1.ci.max.s <= 3), halide_error_explicit_bounds_too_small("ci", "filtered", 0, 2, 0, max((max(filtered.s1.ci.max.s, 3) + -1), 2)))
assert(((output.extent.1.required.s + output.min.1) <= 63), halide_error_explicit_bounds_too_small("y", "filtered", 0, 63, min(output.min.1, 0), max((output.extent.1.required.s + output.min.1), 63)))
assert((0 <= output.min.1), halide_error_explicit_bounds_too_small("y", "filtered", 0, 63, min(output.min.1, 0), max((output.extent.1.required.s + output.min.1), 63)))
assert(((((min(((((((output.extent.3 * output.extent.2) * output.extent.1) * output.extent.0) + -1) / 8) * 8), ((((output.extent.3 * output.extent.2) * output.extent.1) * output.extent.0) + -8)) + 7) / max(((output.extent.3 * output.extent.2) * output.extent.1), 1)) + output.min.0) <= 63), halide_error_explicit_bounds_too_small("x", "filtered", 0, 63, min((output.min.0.required.s + output.min.0), 0), max((((min(((((((output.extent.3 * output.extent.2) * output.extent.1) * output.extent.0) + -1) / 8) * 8), ((((output.extent.3 * output.extent.2) * output.extent.1) * output.extent.0) + -8)) + 7) / max(((output.extent.3 * output.extent.2) * output.extent.1), 1)) + output.min.0), 63)))
assert(((0 - output.min.0) <= output.min.0.required.s), halide_error_explicit_bounds_too_small("x", "filtered", 0, 63, min((output.min.0.required.s + output.min.0), 0), max((((min(((((((output.extent.3 * output.extent.2) * output.extent.1) * output.extent.0) + -1) / 8) * 8), ((((output.extent.3 * output.extent.2) * output.extent.1) * output.extent.0) + -8)) + 7) / max(((output.extent.3 * output.extent.2) * output.extent.1), 1)) + output.min.0), 63)))
allocate filtered[float32 * 64 * 64 * 3 * 16 * 2] if (uint1)0

let filtered.buffer = _halide_buffer_init(alloca(size_of_halide_buffer_t()), make_struct(0, 64, 1, 0, 0, 64, 64, 0, 0, 3, 4096, 0, 0, 16, 12288, 0, 0, 2, 196608, 0), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 5, make_struct(0, 64, 1, 0, 0, 64, 64, 0, 0, 3, 4096, 0, 0, 16, 12288, 0, 0, 2, 196608, 0), (uint64)0)

register_destructor("halide_device_free_as_destructor", filtered.buffer)
produce filtered
{

let halide_device_malloc_result = halide_device_malloc(filtered.buffer, halide_cuda_device_interface())

assert((halide_device_malloc_result == 0), halide_device_malloc_result)
gpu_block (filtered.s0.n.gpu_tile.gpu_tile.gpu_tile.gpu_tile.gpu_tile.__block_id_x, 0, 49152)
{
gpu_thread (.__thread_id_x, 0, 8)
{
filtered[(((((((filtered.s0.n.gpu_tile.gpu_tile.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + .__thread_id_x) / 6144) + (((((filtered.s0.n.gpu_tile.gpu_tile.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + .__thread_id_x) % 6144) / 96) * 64)) + ((((((filtered.s0.n.gpu_tile.gpu_tile.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + .__thread_id_x) % 6144) % 96) / 32) * 4096)) + (((((((filtered.s0.n.gpu_tile.gpu_tile.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + .__thread_id_x) % 6144) % 96) % 32) / 2) * 12288)) + (((((((filtered.s0.n.gpu_tile.gpu_tile.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + .__thread_id_x) % 6144) % 96) % 32) % 2) * 196608))] = 0.000000f
}
}
_halide_buffer_set_device_dirty(filtered.buffer, (uint1)1)

let filtered.s1.r$z.loop_extent = ((nsize / 2) - ((0 - nsize) / 2))

let halide_copy_to_device_result = halide_copy_to_device(coordinates.buffer, halide_cuda_device_interface())

assert((halide_copy_to_device_result == 0), halide_copy_to_device_result)

let halide_copy_to_device_result$2 = halide_copy_to_device(projections.buffer, halide_cuda_device_interface())

assert((halide_copy_to_device_result$2 == 0), halide_copy_to_device_result$2)

let halide_copy_to_device_result$3 = halide_copy_to_device(samples.buffer, halide_cuda_device_interface())

assert((halide_copy_to_device_result$3 == 0), halide_copy_to_device_result$3)

let filtered.s1.n.prologue.s = max(max(coordinates.min.4, projections.min.4), samples.min.4)

let filtered.s1.n.epilogue.s = max(min(min((coordinates.min.4 + coordinates.extent.4), (projections.min.4 + projections.extent.4)), (samples.min.4 + samples.extent.4)), max(max(max(coordinates.min.4, projections.min.4), samples.min.4), 0))

let t4443 = ((0 - nsize) / 2)

let t4444 = max(filtered.s1.r$z.loop_extent, 1)

let t4445 = (nsize / 2)

let t4446 = (max((max(filtered.s1.r$z.loop_extent, int32(abs(t4444))) + t4443), t4445) - t4443)

let t4447 = (filtered.s1.r$z.loop_extent * filtered.s1.r$z.loop_extent)

let t4448 = max(t4447, 1)

let t4449 = (max(max((filtered.s1.r$z.loop_extent + t4443), ((((int32(abs(t4448)) + -1) / t4444) + t4443) + 1)), t4445) - t4443)

let t4450 = (t4447 * samples.extent.0)

let t4451 = float32(nsize)

let t4452 = max(samples.extent.0, (((t4450 + -1) / t4448) + 1))

let t4453 = ((t4452 * t4449) * t4446)

let t4413 = (max(t4453, (t4453 * 2)) * 128)

let t4417 = max(t4450, 8)

let t4412 = min(max(filtered.s1.n.prologue.s, 0), 2)

let t4441 = (1.000000f / ((t4451 * t4451) * float32(samples.extent.0)))

let t4433 = ((nsize + -1) / 2)

let t4435 = (t4451 * 0.500000f)

let t4421 = (t4443 + 1)

let t4442 = ((((samples.min.0 + (samples.min.1 * samples.stride.1)) + (samples.min.2 * samples.stride.2)) + (samples.min.3 * samples.stride.3)) + (samples.min.4 * samples.stride.4))

let t4434 = ((((projections.min.0 + (projections.min.1 * projections.stride.1)) + (projections.min.2 * projections.stride.2)) + (projections.min.3 * projections.stride.3)) + (projections.min.4 * projections.stride.4))

let t4427 = ((((coordinates.min.0 + (coordinates.min.1 * coordinates.stride.1)) + (coordinates.min.2 * coordinates.stride.2)) + (coordinates.min.3 * coordinates.stride.3)) + (coordinates.min.4 * coordinates.stride.4))

let t4440 = (samples.min.4 + samples.extent.4)

let t4439 = (samples.min.3 + samples.extent.3)

let t4438 = (samples.min.2 + samples.extent.2)

let t4437 = (samples.min.1 + samples.extent.1)

let t4432 = (projections.min.4 + projections.extent.4)

let t4431 = (projections.min.3 + projections.extent.3)

let t4430 = (projections.min.2 + projections.extent.2)

let t4429 = (projections.min.1 + projections.extent.1)

let t4428 = (projections.min.0 + projections.extent.0)

let t4426 = (coordinates.min.4 + coordinates.extent.4)

let t4424 = (coordinates.min.2 + coordinates.extent.2)

let t4423 = (coordinates.min.1 + coordinates.extent.1)

let t4422 = (coordinates.min.0 + coordinates.extent.0)

for (filtered.s1.n, 0, t4412)
{

let t4459 = (filtered.s1.n < samples.min.4)

let t4456 = (filtered.s1.n < projections.min.4)

let t4455 = (filtered.s1.n < coordinates.min.4)

for (filtered.s1.k, 0, 16)
{

let t545 = t4413

let t546 = t4446

let t547 = t4449

let t548 = t4452

let t4467 = (t548 * t547)

let t4466 = (((filtered.s1.n * 16) + filtered.s1.k) * 12288)

let t4460 = (t545 * 4)

let t4462 = (filtered.s1.k * 2)

gpu_block (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x, 0, 1536)
{
allocate __shared[uint8 * t4460]
gpu_thread (.__thread_id_x, 0, t4417)
{

let projected_coord.x.extent_realized.s = ((((filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + 7) / 192) - (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x / 24))

let projected_coord.stride.4 = ((t4467 * t546) * (projected_coord.x.extent_realized.s + 1))

produce projected_coord
{

let t4470 = (.__thread_id_x < t4450)

let t4468 = (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x / 24)

let t4469 = (projected_coord.x.extent_realized.s + 1)

for (projected_coord.s0.k, t4462, 2)
{

let t4471 = (((projected_coord.s0.k - t4462) * projected_coord.stride.4) * 64)

for (projected_coord.s0.y, 0, 64)
{

let t4472 = (projected_coord.s0.y * projected_coord.stride.4)

for (projected_coord.s0.x, t4468, t4469)
{
if (t4470)
{
__shared[(let t4134 = (.__thread_id_x % t4448) in ((((((.__thread_id_x / t4448) + ((t4134 / t4444) * t548)) + ((t4134 % t4444) * t4467)) + t4471) + t4472) + ((projected_coord.s0.x - t4468) * (t4467 * t546))))] = 0.000000f
}
}
}
}
halide_gpu_thread_barrier()
if ((.__thread_id_x < 1))
{

let t4473 = (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x / 24)

let t4474 = (projected_coord.x.extent_realized.s + 1)

for (projected_coord.s1.k, t4462, 2)
{

let t4478 = (projected_coord.s1.k < projections.min.3)

let t4477 = (((projected_coord.s1.k - t4462) * projected_coord.stride.4) * 64)

for (projected_coord.s1.y, 0, 64)
{

let t4481 = (projected_coord.s1.y < projections.min.1)

let t4480 = (t4477 + (projected_coord.s1.y * projected_coord.stride.4))

for (projected_coord.s1.x, t4473, t4474)
{

let t4484 = (projected_coord.s1.x < projections.min.0)

let t4483 = (projected_coord.s1.x - t4473)

for (projected_coord.s1.dy, t4421, filtered.s1.r$z.loop_extent)
{

let t4486 = (projected_coord.s1.y < (coordinates.min.2 - projected_coord.s1.dy))

let t4489 = float32(projected_coord.s1.dy)

for (projected_coord.s1.dx, t4421, filtered.s1.r$z.loop_extent)
{

let t4491 = (projected_coord.s1.x < (coordinates.min.1 - projected_coord.s1.dx))

let t4495 = ((t4433 + projected_coord.s1.dx) * t548)

let t4494 = (t4433 + projected_coord.s1.dy)

let t4496 = float32(projected_coord.s1.dx)

for (projected_coord.s1.s, 0, samples.extent.0)
{

let t4498 = (projected_coord.s1.s < coordinates.min.0)

let t4501 = ((((t4480 + (t4483 * (t4467 * t546))) + (t4494 * t4467)) + t4495) + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, 0, coordinates.extent.3)
{
__shared[t4501] = (let t4142 = select((((((((((t4498 || (t4422 <= projected_coord.s1.s)) || t4491) || ((t4423 - projected_coord.s1.dx) <= projected_coord.s1.x)) || t4486) || ((t4424 - projected_coord.s1.dy) <= projected_coord.s1.y)) || (projected_coord.s1.r_coord$x < coordinates.min.3)) || ((coordinates.min.3 + coordinates.extent.3) <= projected_coord.s1.r_coord$x)) || t4455) || (t4426 <= filtered.s1.n)), 0.000000f, coordinates[((max(min(projected_coord.s1.s, (t4422 + -1)), coordinates.min.0) + ((((max(min(filtered.s1.n, (t4426 + -1)), coordinates.min.4) * coordinates.stride.4) - t4427) + (max(min((projected_coord.s1.y + projected_coord.s1.dy), (t4424 + -1)), coordinates.min.2) * coordinates.stride.2)) + (max(min((projected_coord.s1.x + projected_coord.s1.dx), (t4423 + -1)), coordinates.min.1) * coordinates.stride.1))) + (max(min(projected_coord.s1.r_coord$x, ((coordinates.min.3 + coordinates.extent.3) + -1)), coordinates.min.3) * coordinates.stride.3))]) in (__shared[((((t4480 + (t4483 * (t4467 * t546))) + (t4494 * t4467)) + t4495) + projected_coord.s1.s)] + (select((((((((((t4484 || (t4428 <= projected_coord.s1.x)) || t4481) || (t4429 <= projected_coord.s1.y)) || (projected_coord.s1.r_coord$x < projections.min.2)) || (t4430 <= projected_coord.s1.r_coord$x)) || t4478) || (t4431 <= projected_coord.s1.k)) || t4456) || (t4432 <= filtered.s1.n)), 0.000000f, projections[((max(min(projected_coord.s1.x, (t4428 + -1)), projections.min.0) + ((((max(min(filtered.s1.n, (t4432 + -1)), projections.min.4) * projections.stride.4) - t4434) + (max(min(projected_coord.s1.k, (t4431 + -1)), projections.min.3) * projections.stride.3)) + (max(min(projected_coord.s1.y, (t4429 + -1)), projections.min.1) * projections.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4430 + -1)), projections.min.2) * projections.stride.2))]) * select((projected_coord.s1.r_coord$x == 0), (((t4142 + t4496) + -0.500000f) / t4435), select((projected_coord.s1.r_coord$x == 1), (((t4142 + t4489) + -0.500000f) / t4435), t4142)))))
}
}
}
}
}
}
}
}
}
halide_gpu_thread_barrier()
consume projected_coord
{
if ((.__thread_id_x < 8))
{

let t4507 = ((filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + .__thread_id_x)

let t4508 = (t4507 % 192)

let t4503 = (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x / 24)

let t4506 = (((t4466 + (t4507 / 192)) + ((t4508 / 3) * 64)) + ((t4508 % 3) * 4096))

for (filtered.s1.r$z, t4421, filtered.s1.r$z.loop_extent)
{
for (filtered.s1.r$y, t4421, filtered.s1.r$z.loop_extent)
{

let t4514 = ((t4433 + filtered.s1.r$y) * t548)

let t4513 = (t4433 + filtered.s1.r$z)

for (filtered.s1.r$x, 0, samples.extent.0)
{
filtered[t4506] = (let t4153.s = (((((t4507 / 192) - t4503) * (t4467 * t546)) + (((t4507 % 192) / 3) * projected_coord.stride.4)) + (t4513 * t4467)) in (let t4154 = __shared[((t4153.s + t4514) + filtered.s1.r$x)] in (let t4155 = __shared[((projected_coord.stride.4 * 64) + ((t4153.s + t4514) + filtered.s1.r$x))] in (filtered[(((t4466 + (t4507 / 192)) + (((t4507 % 192) / 3) * 64)) + (((t4507 % 192) % 3) * 4096))] + ((t4441 * exp_f32((((t4154 * t4154) + (t4155 * t4155)) * -0.500000f))) * select(((((((((((filtered.s1.r$x < samples.min.0) || ((samples.min.0 + samples.extent.0) <= filtered.s1.r$x)) || ((t4507 / 192) < (samples.min.1 - filtered.s1.r$y))) || ((t4437 - filtered.s1.r$y) <= (t4507 / 192))) || (((t4507 % 192) / 3) < (samples.min.2 - filtered.s1.r$z))) || ((t4438 - filtered.s1.r$z) <= ((t4507 % 192) / 3))) || (((t4507 % 192) % 3) < samples.min.3)) || (t4439 <= ((t4507 % 192) % 3))) || t4459) || (t4440 <= filtered.s1.n)), 0.000000f, samples[(max(min(filtered.s1.r$x, ((samples.min.0 + samples.extent.0) + -1)), samples.min.0) + (((((max(min(filtered.s1.n, (t4440 + -1)), samples.min.4) * samples.stride.4) - t4442) + (max(min(((t4507 % 192) % 3), (t4439 + -1)), samples.min.3) * samples.stride.3)) + (max(min((((t4507 % 192) / 3) + filtered.s1.r$z), (t4438 + -1)), samples.min.2) * samples.stride.2)) + (max(min(((t4507 / 192) + filtered.s1.r$y), (t4437 + -1)), samples.min.1) * samples.stride.1)))]))))))
}
}
}
}
}
}
free __shared
}
}
}

let t4564 = (coordinates.min.0 + coordinates.extent.0)

let t4565 = (coordinates.min.1 + coordinates.extent.1)

let t4566 = (coordinates.min.2 + coordinates.extent.2)

let t4567 = (coordinates.min.3 + coordinates.extent.3)

let t4568 = (projections.min.1 + projections.extent.1)

let t4569 = (projections.min.2 + projections.extent.2)

let t4570 = (samples.min.0 + samples.extent.0)

let t4571 = ((0 - nsize) / 2)

let t4572 = (filtered.s1.r$z.loop_extent + t4571)

let t4573 = min(max(filtered.s1.n.prologue.s, 0), 2)

let t4574 = max(filtered.s1.r$z.loop_extent, 1)

let t4575 = (nsize / 2)

let t4576 = (max((max(filtered.s1.r$z.loop_extent, int32(abs(t4574))) + t4571), t4575) - t4571)

let t4577 = (filtered.s1.r$z.loop_extent * filtered.s1.r$z.loop_extent)

let t4578 = max(t4577, 1)

let t4579 = (max(max(t4572, ((((int32(abs(t4578)) + -1) / t4574) + t4571) + 1)), t4575) - t4571)

let t4580 = (t4577 * samples.extent.0)

let t4581 = float32(nsize)

let t4582 = max(coordinates.min.0, 0)

let t4583 = max(samples.min.0, 0)

let t4584 = max(max(coordinates.min.3, projections.min.2), 0)

let t4585 = max(projections.min.1, ((coordinates.min.2 - t4571) + -1))

let t4586 = max(samples.extent.0, (((t4580 + -1) / t4578) + 1))

let t4587 = ((t4586 * t4579) * t4576)

let t4588 = (t4566 - t4571)

let t4527 = max(min(max(t4585, 0), 64), (min(min((min((t4588 - filtered.s1.r$z.loop_extent), t4568) + -1), (t4588 + -2)), 63) + 1))

let t4521 = (max(t4587, (t4587 * 2)) * 128)

let t4534 = max(t4580, 8)

let t4552 = max(projections.min.0, ((coordinates.min.1 - t4571) + -1))

let t4530 = min(t4584, coordinates.extent.3)

let t4531 = min(max(min(t4567, t4569), t4584), coordinates.extent.3)

let t4533 = min(max(t4570, t4583), samples.extent.0)

let t4529 = min(max(t4564, t4582), samples.extent.0)

let t4532 = min(t4583, samples.extent.0)

let t4528 = min(t4582, samples.extent.0)

let t4562 = (1.000000f / ((t4581 * t4581) * float32(samples.extent.0)))

let t4548 = ((nsize + -1) / 2)

let t4551 = (t4581 * 0.500000f)

let t4520 = (min(filtered.s1.n.epilogue.s, 2) - t4573)

let t4553 = (t4565 - t4571)

let t4538 = (t4571 + 1)

let t4558 = ((t4571 + filtered.s1.r$z.loop_extent) + 1)

let t4563 = ((((samples.min.0 + (samples.min.1 * samples.stride.1)) + (samples.min.2 * samples.stride.2)) + (samples.min.3 * samples.stride.3)) + (samples.min.4 * samples.stride.4))

let t4550 = ((((projections.min.0 + (projections.min.1 * projections.stride.1)) + (projections.min.2 * projections.stride.2)) + (projections.min.3 * projections.stride.3)) + (projections.min.4 * projections.stride.4))

let t4544 = ((((coordinates.min.0 + (coordinates.min.1 * coordinates.stride.1)) + (coordinates.min.2 * coordinates.stride.2)) + (coordinates.min.3 * coordinates.stride.3)) + (coordinates.min.4 * coordinates.stride.4))

let t4549 = ((projections.min.4 + projections.extent.4) + -1)

let t4556 = (t4572 + 1)

let t4543 = ((coordinates.min.4 + coordinates.extent.4) + -1)

let t4561 = (samples.min.3 + samples.extent.3)

let t4557 = (samples.min.2 + samples.extent.2)

let t4560 = (samples.min.1 + samples.extent.1)

let t4525 = (projections.min.3 + projections.extent.3)

let t4545 = (projections.min.0 + projections.extent.0)

for (filtered.s1.n, t4573, t4520)
{

let t4593 = (filtered.s1.n * 196608)

let t4590 = ((max(min(filtered.s1.n, t4549), projections.min.4) * projections.stride.4) - t4550)

let t4589 = ((max(min(filtered.s1.n, t4543), coordinates.min.4) * coordinates.stride.4) - t4544)

let t4594 = ((filtered.s1.n * samples.stride.4) - t4563)

for (filtered.s1.k, 0, 16)
{

let t549 = t4521

let t550 = t4576

let t551 = t4579

let t552 = t4586

let t553 = min(max(projections.min.3, (filtered.s1.k * 2)), ((filtered.s1.k * 2) + 2))

let t554 = min(max(t4525, max(projections.min.3, (filtered.s1.k * 2))), ((filtered.s1.k * 2) + 2))

let t555.s = t4585

let t556 = t4527

let t557 = t4528

let t558 = t4529

let t559 = t4530

let t560 = t4531

let t561 = t4532

let t562 = t4533

let t4614 = (filtered.s1.k * 2)

let t4615 = min(max(t555.s, 0), 64)

let t4616 = (t552 * t551)

let t4595 = (t549 * 4)

let t4610 = ((t4614 - t554) + 2)

let t4611 = (t4593 + (filtered.s1.k * 12288))

gpu_block (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x, 0, 1536)
{
allocate __shared[uint8 * t4595]
gpu_thread (.__thread_id_x, 0, t4534)
{

let projected_coord.x.extent_realized.s = ((((filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + 7) / 192) - (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x / 24))

let projected_coord.stride.4 = ((t4616 * t550) * (projected_coord.x.extent_realized.s + 1))

produce projected_coord
{

let t4619 = (.__thread_id_x < t4580)

let t4617 = (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x / 24)

let t4618 = (projected_coord.x.extent_realized.s + 1)

for (projected_coord.s0.k, t4614, 2)
{

let t4620 = (((projected_coord.s0.k - t4614) * projected_coord.stride.4) * 64)

for (projected_coord.s0.y, 0, 64)
{

let t4621 = (projected_coord.s0.y * projected_coord.stride.4)

for (projected_coord.s0.x, t4617, t4618)
{
if (t4619)
{
__shared[(let t4172 = (.__thread_id_x % t4578) in ((((((.__thread_id_x / t4578) + ((t4172 / t4574) * t552)) + ((t4172 % t4574) * t4616)) + t4620) + t4621) + ((projected_coord.s0.x - t4617) * (t4616 * t550))))] = 0.000000f
}
}
}
}
halide_gpu_thread_barrier()
if ((.__thread_id_x < 1))
{

let t4623 = (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x / 24)

let t4622 = (t553 - t4614)

let t4624 = (projected_coord.x.extent_realized.s + 1)

for (projected_coord.s1.k, t4614, t4622)
{

let t4626 = (projected_coord.s1.k < projections.min.3)

let t4625 = (((projected_coord.s1.k - t4614) * projected_coord.stride.4) * 64)

for (projected_coord.s1.y, 0, 64)
{

let t4628 = (projected_coord.s1.y < projections.min.1)

let t4627 = (t4625 + (projected_coord.s1.y * projected_coord.stride.4))

for (projected_coord.s1.x, t4623, t4624)
{

let t4630 = (projected_coord.s1.x < projections.min.0)

let t4629 = (projected_coord.s1.x - t4623)

for (projected_coord.s1.dy, t4538, filtered.s1.r$z.loop_extent)
{

let t4631 = (projected_coord.s1.y < (coordinates.min.2 - projected_coord.s1.dy))

let t4634 = float32(projected_coord.s1.dy)

for (projected_coord.s1.dx, t4538, filtered.s1.r$z.loop_extent)
{

let t4635 = (projected_coord.s1.x < (coordinates.min.1 - projected_coord.s1.dx))

let t4639 = ((t4548 + projected_coord.s1.dx) * t552)

let t4638 = (t4548 + projected_coord.s1.dy)

let t4640 = float32(projected_coord.s1.dx)

for (projected_coord.s1.s, 0, samples.extent.0)
{

let t4641 = (projected_coord.s1.s < coordinates.min.0)

let t4644 = ((((t4627 + (t4629 * (t4616 * t550))) + (t4638 * t4616)) + t4639) + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, 0, coordinates.extent.3)
{
__shared[t4644] = (let t4179 = select((((((((t4641 || (t4564 <= projected_coord.s1.s)) || t4635) || ((t4565 - projected_coord.s1.dx) <= projected_coord.s1.x)) || t4631) || ((t4566 - projected_coord.s1.dy) <= projected_coord.s1.y)) || (projected_coord.s1.r_coord$x < coordinates.min.3)) || (t4567 <= projected_coord.s1.r_coord$x)), 0.000000f, coordinates[((max(min(projected_coord.s1.s, (t4564 + -1)), coordinates.min.0) + ((t4589 + (max(min((projected_coord.s1.y + projected_coord.s1.dy), (t4566 + -1)), coordinates.min.2) * coordinates.stride.2)) + (max(min((projected_coord.s1.x + projected_coord.s1.dx), (t4565 + -1)), coordinates.min.1) * coordinates.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4567 + -1)), coordinates.min.3) * coordinates.stride.3))]) in (__shared[((((t4627 + (t4629 * (t4616 * t550))) + (t4638 * t4616)) + t4639) + projected_coord.s1.s)] + (select((((((((t4630 || (t4545 <= projected_coord.s1.x)) || t4628) || (t4568 <= projected_coord.s1.y)) || (projected_coord.s1.r_coord$x < projections.min.2)) || (t4569 <= projected_coord.s1.r_coord$x)) || t4626) || (t4525 <= projected_coord.s1.k)), 0.000000f, projections[((max(min(projected_coord.s1.x, (t4545 + -1)), projections.min.0) + ((t4590 + (max(min(projected_coord.s1.k, (t4525 + -1)), projections.min.3) * projections.stride.3)) + (max(min(projected_coord.s1.y, (t4568 + -1)), projections.min.1) * projections.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4569 + -1)), projections.min.2) * projections.stride.2))]) * select((projected_coord.s1.r_coord$x == 0), (((t4179 + t4640) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4179 + t4634) + -0.500000f) / t4551), t4179)))))
}
}
}
}
}
}
}

let t4659 = (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x / 24)

let t4660 = (t4659 + projected_coord.x.extent_realized.s)

let t4661 = min(max(t4552, t4659), ((projected_coord.x.extent_realized.s + t4659) + 1))

let t4650 = max(t4661, (min(min((min((t4553 - filtered.s1.r$z.loop_extent), t4545) + -1), (t4553 + -2)), t4660) + 1))

let t4645 = (t554 - t553)

let t4657 = (t4660 + 1)

let t4647 = (projected_coord.x.extent_realized.s + 1)

for (projected_coord.s1.k, t553, t4645)
{

let t4662 = (((projected_coord.s1.k - t4614) * projected_coord.stride.4) * 64)

let t4663 = (t4590 + (projected_coord.s1.k * projections.stride.3))

for (projected_coord.s1.y, 0, t4615)
{

let t4665 = (projected_coord.s1.y < projections.min.1)

let t4664 = (t4662 + (projected_coord.s1.y * projected_coord.stride.4))

for (projected_coord.s1.x, t4659, t4647)
{

let t4667 = (projected_coord.s1.x < projections.min.0)

let t4666 = (projected_coord.s1.x - t4659)

for (projected_coord.s1.dy, t4538, filtered.s1.r$z.loop_extent)
{

let t4668 = (projected_coord.s1.y < (coordinates.min.2 - projected_coord.s1.dy))

let t4671 = float32(projected_coord.s1.dy)

for (projected_coord.s1.dx, t4538, filtered.s1.r$z.loop_extent)
{

let t4672 = (projected_coord.s1.x < (coordinates.min.1 - projected_coord.s1.dx))

let t4676 = ((t4548 + projected_coord.s1.dx) * t552)

let t4675 = (t4548 + projected_coord.s1.dy)

let t4677 = float32(projected_coord.s1.dx)

for (projected_coord.s1.s, 0, samples.extent.0)
{

let t4678 = (projected_coord.s1.s < coordinates.min.0)

let t4681 = ((((t4664 + (t4666 * (t4616 * t550))) + (t4675 * t4616)) + t4676) + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, 0, coordinates.extent.3)
{
__shared[t4681] = (let t4190 = select((((((((t4678 || (t4564 <= projected_coord.s1.s)) || t4672) || ((t4565 - projected_coord.s1.dx) <= projected_coord.s1.x)) || t4668) || ((t4566 - projected_coord.s1.dy) <= projected_coord.s1.y)) || (projected_coord.s1.r_coord$x < coordinates.min.3)) || (t4567 <= projected_coord.s1.r_coord$x)), 0.000000f, coordinates[((max(min(projected_coord.s1.s, (t4564 + -1)), coordinates.min.0) + ((t4589 + (max(min((projected_coord.s1.y + projected_coord.s1.dy), (t4566 + -1)), coordinates.min.2) * coordinates.stride.2)) + (max(min((projected_coord.s1.x + projected_coord.s1.dx), (t4565 + -1)), coordinates.min.1) * coordinates.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4567 + -1)), coordinates.min.3) * coordinates.stride.3))]) in (__shared[((((t4664 + (t4666 * (t4616 * t550))) + (t4675 * t4616)) + t4676) + projected_coord.s1.s)] + (select((((((t4667 || (t4545 <= projected_coord.s1.x)) || t4665) || (t4568 <= projected_coord.s1.y)) || (projected_coord.s1.r_coord$x < projections.min.2)) || (t4569 <= projected_coord.s1.r_coord$x)), 0.000000f, projections[((max(min(projected_coord.s1.x, (t4545 + -1)), projections.min.0) + (t4663 + (max(min(projected_coord.s1.y, (t4568 + -1)), projections.min.1) * projections.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4569 + -1)), projections.min.2) * projections.stride.2))]) * select((projected_coord.s1.r_coord$x == 0), (((t4190 + t4677) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4190 + t4671) + -0.500000f) / t4551), t4190)))))
}
}
}
}
}
}

let t4683 = (((projected_coord.s1.k - t4614) * projected_coord.stride.4) * 64)

let t4682 = (t556 - t4615)

let t4684 = (t4590 + (projected_coord.s1.k * projections.stride.3))

for (projected_coord.s1.y, t4615, t4682)
{

let projected_coord.s1.x.prologue = t4661

let projected_coord.s1.x.epilogue = t4650

let t4691 = (projected_coord.s1.x.prologue - t4659)

let t4693 = (t4684 + (projected_coord.s1.y * projections.stride.1))

let t4692 = (t4683 + (projected_coord.s1.y * projected_coord.stride.4))

for (projected_coord.s1.x, t4659, t4691)
{

let t4695 = (projected_coord.s1.x < projections.min.0)

let t4694 = (projected_coord.s1.x - t4659)

for (projected_coord.s1.dy, t4538, filtered.s1.r$z.loop_extent)
{

let t4696 = (t4589 + ((projected_coord.s1.y + projected_coord.s1.dy) * coordinates.stride.2))

let t4697 = (t4548 + projected_coord.s1.dy)

let t4698 = float32(projected_coord.s1.dy)

for (projected_coord.s1.dx, t4538, filtered.s1.r$z.loop_extent)
{

let t4699 = (projected_coord.s1.x < (coordinates.min.1 - projected_coord.s1.dx))

let t4701 = ((t4548 + projected_coord.s1.dx) * t552)

let t4702 = float32(projected_coord.s1.dx)

for (projected_coord.s1.s, 0, samples.extent.0)
{

let t4703 = (projected_coord.s1.s < coordinates.min.0)

let t4705 = ((((t4692 + (t4694 * (t4616 * t550))) + (t4697 * t4616)) + t4701) + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, 0, coordinates.extent.3)
{
__shared[t4705] = (let t4201 = select((((((t4703 || (t4564 <= projected_coord.s1.s)) || t4699) || ((t4565 - projected_coord.s1.dx) <= projected_coord.s1.x)) || (projected_coord.s1.r_coord$x < coordinates.min.3)) || (t4567 <= projected_coord.s1.r_coord$x)), 0.000000f, coordinates[((max(min(projected_coord.s1.s, (t4564 + -1)), coordinates.min.0) + (t4696 + (max(min((projected_coord.s1.x + projected_coord.s1.dx), (t4565 + -1)), coordinates.min.1) * coordinates.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4567 + -1)), coordinates.min.3) * coordinates.stride.3))]) in (__shared[((((t4692 + (t4694 * (t4616 * t550))) + (t4697 * t4616)) + t4701) + projected_coord.s1.s)] + (select((((t4695 || (t4545 <= projected_coord.s1.x)) || (projected_coord.s1.r_coord$x < projections.min.2)) || (t4569 <= projected_coord.s1.r_coord$x)), 0.000000f, projections[((max(min(projected_coord.s1.x, (t4545 + -1)), projections.min.0) + t4693) + (max(min(projected_coord.s1.r_coord$x, (t4569 + -1)), projections.min.2) * projections.stride.2))]) * select((projected_coord.s1.r_coord$x == 0), (((t4201 + t4702) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4201 + t4698) + -0.500000f) / t4551), t4201)))))
}
}
}
}
}

let t4714 = (samples.extent.0 - t558)

let t4706 = (projected_coord.s1.x.epilogue - projected_coord.s1.x.prologue)

let t4708 = (t4684 + (projected_coord.s1.y * projections.stride.1))

let t4707 = (t4683 + (projected_coord.s1.y * projected_coord.stride.4))

for (projected_coord.s1.x, projected_coord.s1.x.prologue, t4706)
{

let t4715 = (projected_coord.s1.x - t4659)

let t4716 = (t4708 + projected_coord.s1.x)

for (projected_coord.s1.dy, t4538, filtered.s1.r$z.loop_extent)
{

let t4722 = (t4589 + ((projected_coord.s1.y + projected_coord.s1.dy) * coordinates.stride.2))

let t4723 = (t4548 + projected_coord.s1.dy)

let t4724 = float32(projected_coord.s1.dy)

for (projected_coord.s1.dx, t4538, filtered.s1.r$z.loop_extent)
{

let t4731 = ((t4548 + projected_coord.s1.dx) * t552)

let t4730 = (t4722 + ((projected_coord.s1.x + projected_coord.s1.dx) * coordinates.stride.1))

let t4732 = float32(projected_coord.s1.dx)

for (projected_coord.s1.s, 0, t557)
{

let t4733 = (projected_coord.s1.s < coordinates.min.0)

let t4734 = ((((t4707 + (t4715 * (t4616 * t550))) + (t4723 * t4616)) + t4731) + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, 0, coordinates.extent.3)
{
__shared[t4734] = (let t4208 = select((((t4733 || (t4564 <= projected_coord.s1.s)) || (projected_coord.s1.r_coord$x < coordinates.min.3)) || (t4567 <= projected_coord.s1.r_coord$x)), 0.000000f, coordinates[((max(min(projected_coord.s1.s, (t4564 + -1)), coordinates.min.0) + t4730) + (max(min(projected_coord.s1.r_coord$x, (t4567 + -1)), coordinates.min.3) * coordinates.stride.3))]) in (__shared[((((t4707 + (t4715 * (t4616 * t550))) + (t4723 * t4616)) + t4731) + projected_coord.s1.s)] + (select(((projected_coord.s1.r_coord$x < projections.min.2) || (t4569 <= projected_coord.s1.r_coord$x)), 0.000000f, projections[(t4716 + (max(min(projected_coord.s1.r_coord$x, (t4569 + -1)), projections.min.2) * projections.stride.2))]) * select((projected_coord.s1.r_coord$x == 0), (((t4208 + t4732) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4208 + t4724) + -0.500000f) / t4551), t4208)))))
}
}

let t4737 = ((t4548 + projected_coord.s1.dx) * t552)

let t4735 = (t558 - t557)

let t4742 = (coordinates.extent.3 - t560)

let t4736 = (t4722 + ((projected_coord.s1.x + projected_coord.s1.dx) * coordinates.stride.1))

let t4738 = float32(projected_coord.s1.dx)

for (projected_coord.s1.s, t557, t4735)
{

let t4744 = ((((t4707 + (t4715 * (t4616 * t550))) + (t4723 * t4616)) + t4737) + projected_coord.s1.s)

let t4743 = (t4736 + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, 0, t559)
{
__shared[t4744] = (let t4213 = select(((projected_coord.s1.r_coord$x < coordinates.min.3) || (t4567 <= projected_coord.s1.r_coord$x)), 0.000000f, coordinates[(t4743 + (max(min(projected_coord.s1.r_coord$x, (t4567 + -1)), coordinates.min.3) * coordinates.stride.3))]) in (__shared[((((t4707 + (t4715 * (t4616 * t550))) + (t4723 * t4616)) + t4737) + projected_coord.s1.s)] + (select(((projected_coord.s1.r_coord$x < projections.min.2) || (t4569 <= projected_coord.s1.r_coord$x)), 0.000000f, projections[(t4716 + (max(min(projected_coord.s1.r_coord$x, (t4569 + -1)), projections.min.2) * projections.stride.2))]) * select((projected_coord.s1.r_coord$x == 0), (((t4213 + t4738) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4213 + t4724) + -0.500000f) / t4551), t4213)))))
}

let t4745 = (t560 - t559)

let t4749 = ((((t4707 + (t4715 * (t4616 * t550))) + (t4723 * t4616)) + t4737) + projected_coord.s1.s)

let t4746 = (t4736 + projected_coord.s1.s)

let t4748 = (t4569 + -1)

let t4747 = (t4567 + -1)

for (projected_coord.s1.r_coord$x, t559, t4745)
{
__shared[t4749] = (let t4217 = coordinates[(t4746 + (max(min(projected_coord.s1.r_coord$x, t4747), coordinates.min.3) * coordinates.stride.3))] in (__shared[((((t4707 + (t4715 * (t4616 * t550))) + (t4723 * t4616)) + t4737) + projected_coord.s1.s)] + (projections[(t4716 + (max(min(projected_coord.s1.r_coord$x, t4748), projections.min.2) * projections.stride.2))] * select((projected_coord.s1.r_coord$x == 0), (((t4217 + t4738) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4217 + t4724) + -0.500000f) / t4551), t4217)))))
}

let t4751 = ((((t4707 + (t4715 * (t4616 * t550))) + (t4723 * t4616)) + t4737) + projected_coord.s1.s)

let t4750 = (t4736 + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, t560, t4742)
{
__shared[t4751] = (let t4221 = select((projected_coord.s1.r_coord$x < t4567), coordinates[(t4750 + (max(min(projected_coord.s1.r_coord$x, (t4567 + -1)), coordinates.min.3) * coordinates.stride.3))], 0.000000f) in (__shared[((((t4707 + (t4715 * (t4616 * t550))) + (t4723 * t4616)) + t4737) + projected_coord.s1.s)] + (select((projected_coord.s1.r_coord$x < t4569), projections[(t4716 + (max(min(projected_coord.s1.r_coord$x, (t4569 + -1)), projections.min.2) * projections.stride.2))], 0.000000f) * select((projected_coord.s1.r_coord$x == 0), (((t4221 + t4738) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4221 + t4724) + -0.500000f) / t4551), t4221)))))
}
}

let t4753 = ((t4548 + projected_coord.s1.dx) * t552)

let t4752 = (t4722 + ((projected_coord.s1.x + projected_coord.s1.dx) * coordinates.stride.1))

let t4754 = float32(projected_coord.s1.dx)

for (projected_coord.s1.s, t558, t4714)
{

let t4755 = ((((t4707 + (t4715 * (t4616 * t550))) + (t4723 * t4616)) + t4753) + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, 0, coordinates.extent.3)
{
__shared[t4755] = (let t4227 = select((((t4564 <= projected_coord.s1.s) || (projected_coord.s1.r_coord$x < coordinates.min.3)) || (t4567 <= projected_coord.s1.r_coord$x)), 0.000000f, coordinates[((max(min(projected_coord.s1.s, (t4564 + -1)), coordinates.min.0) + t4752) + (max(min(projected_coord.s1.r_coord$x, (t4567 + -1)), coordinates.min.3) * coordinates.stride.3))]) in (__shared[((((t4707 + (t4715 * (t4616 * t550))) + (t4723 * t4616)) + t4753) + projected_coord.s1.s)] + (select(((projected_coord.s1.r_coord$x < projections.min.2) || (t4569 <= projected_coord.s1.r_coord$x)), 0.000000f, projections[(t4716 + (max(min(projected_coord.s1.r_coord$x, (t4569 + -1)), projections.min.2) * projections.stride.2))]) * select((projected_coord.s1.r_coord$x == 0), (((t4227 + t4754) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4227 + t4724) + -0.500000f) / t4551), t4227)))))
}
}
}
}
}

let t4756 = (t4657 - projected_coord.s1.x.epilogue)

let t4758 = (t4684 + (projected_coord.s1.y * projections.stride.1))

let t4757 = (t4683 + (projected_coord.s1.y * projected_coord.stride.4))

for (projected_coord.s1.x, projected_coord.s1.x.epilogue, t4756)
{

let t4759 = (projected_coord.s1.x - t4659)

for (projected_coord.s1.dy, t4538, filtered.s1.r$z.loop_extent)
{

let t4760 = (t4589 + ((projected_coord.s1.y + projected_coord.s1.dy) * coordinates.stride.2))

let t4761 = (t4548 + projected_coord.s1.dy)

let t4762 = float32(projected_coord.s1.dy)

for (projected_coord.s1.dx, t4538, filtered.s1.r$z.loop_extent)
{

let t4764 = ((t4548 + projected_coord.s1.dx) * t552)

let t4765 = float32(projected_coord.s1.dx)

for (projected_coord.s1.s, 0, samples.extent.0)
{

let t4766 = (projected_coord.s1.s < coordinates.min.0)

let t4768 = ((((t4757 + (t4759 * (t4616 * t550))) + (t4761 * t4616)) + t4764) + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, 0, coordinates.extent.3)
{
__shared[t4768] = (let t4234 = select(((((t4766 || (t4564 <= projected_coord.s1.s)) || ((t4565 - projected_coord.s1.dx) <= projected_coord.s1.x)) || (projected_coord.s1.r_coord$x < coordinates.min.3)) || (t4567 <= projected_coord.s1.r_coord$x)), 0.000000f, coordinates[((max(min(projected_coord.s1.s, (t4564 + -1)), coordinates.min.0) + (t4760 + (max(min((projected_coord.s1.x + projected_coord.s1.dx), (t4565 + -1)), coordinates.min.1) * coordinates.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4567 + -1)), coordinates.min.3) * coordinates.stride.3))]) in (__shared[((((t4757 + (t4759 * (t4616 * t550))) + (t4761 * t4616)) + t4764) + projected_coord.s1.s)] + (select((((t4545 <= projected_coord.s1.x) || (projected_coord.s1.r_coord$x < projections.min.2)) || (t4569 <= projected_coord.s1.r_coord$x)), 0.000000f, projections[((max(min(projected_coord.s1.x, (t4545 + -1)), projections.min.0) + t4758) + (max(min(projected_coord.s1.r_coord$x, (t4569 + -1)), projections.min.2) * projections.stride.2))]) * select((projected_coord.s1.r_coord$x == 0), (((t4234 + t4765) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4234 + t4762) + -0.500000f) / t4551), t4234)))))
}
}
}
}
}
}

let t4770 = (((projected_coord.s1.k - t4614) * projected_coord.stride.4) * 64)

let t4771 = (t4590 + (projected_coord.s1.k * projections.stride.3))

for (projected_coord.s1.y, t556, (64 - t556))
{

let t4772 = (t4770 + (projected_coord.s1.y * projected_coord.stride.4))

for (projected_coord.s1.x, t4659, t4647)
{

let t4774 = (projected_coord.s1.x < projections.min.0)

let t4773 = (projected_coord.s1.x - t4659)

for (projected_coord.s1.dy, t4538, filtered.s1.r$z.loop_extent)
{

let t4777 = float32(projected_coord.s1.dy)

for (projected_coord.s1.dx, t4538, filtered.s1.r$z.loop_extent)
{

let t4778 = (projected_coord.s1.x < (coordinates.min.1 - projected_coord.s1.dx))

let t4782 = ((t4548 + projected_coord.s1.dx) * t552)

let t4781 = (t4548 + projected_coord.s1.dy)

let t4783 = float32(projected_coord.s1.dx)

for (projected_coord.s1.s, 0, samples.extent.0)
{

let t4784 = (projected_coord.s1.s < coordinates.min.0)

let t4787 = ((((t4772 + (t4773 * (t4616 * t550))) + (t4781 * t4616)) + t4782) + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, 0, coordinates.extent.3)
{
__shared[t4787] = (let t4243 = select(((((((t4784 || (t4564 <= projected_coord.s1.s)) || t4778) || ((t4565 - projected_coord.s1.dx) <= projected_coord.s1.x)) || ((t4566 - projected_coord.s1.dy) <= projected_coord.s1.y)) || (projected_coord.s1.r_coord$x < coordinates.min.3)) || (t4567 <= projected_coord.s1.r_coord$x)), 0.000000f, coordinates[((max(min(projected_coord.s1.s, (t4564 + -1)), coordinates.min.0) + ((t4589 + (max(min((projected_coord.s1.y + projected_coord.s1.dy), (t4566 + -1)), coordinates.min.2) * coordinates.stride.2)) + (max(min((projected_coord.s1.x + projected_coord.s1.dx), (t4565 + -1)), coordinates.min.1) * coordinates.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4567 + -1)), coordinates.min.3) * coordinates.stride.3))]) in (__shared[((((t4772 + (t4773 * (t4616 * t550))) + (t4781 * t4616)) + t4782) + projected_coord.s1.s)] + (select(((((t4774 || (t4545 <= projected_coord.s1.x)) || (t4568 <= projected_coord.s1.y)) || (projected_coord.s1.r_coord$x < projections.min.2)) || (t4569 <= projected_coord.s1.r_coord$x)), 0.000000f, projections[((max(min(projected_coord.s1.x, (t4545 + -1)), projections.min.0) + (t4771 + (max(min(projected_coord.s1.y, (t4568 + -1)), projections.min.1) * projections.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4569 + -1)), projections.min.2) * projections.stride.2))]) * select((projected_coord.s1.r_coord$x == 0), (((t4243 + t4783) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4243 + t4777) + -0.500000f) / t4551), t4243)))))
}
}
}
}
}
}
}

let t4788 = (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x / 24)

let t4789 = (projected_coord.x.extent_realized.s + 1)

for (projected_coord.s1.k, t554, t4610)
{

let t4790 = (((projected_coord.s1.k - t4614) * projected_coord.stride.4) * 64)

for (projected_coord.s1.y, 0, 64)
{

let t4792 = (projected_coord.s1.y < projections.min.1)

let t4791 = (t4790 + (projected_coord.s1.y * projected_coord.stride.4))

for (projected_coord.s1.x, t4788, t4789)
{

let t4794 = (projected_coord.s1.x < projections.min.0)

let t4793 = (projected_coord.s1.x - t4788)

for (projected_coord.s1.dy, t4538, filtered.s1.r$z.loop_extent)
{

let t4795 = (projected_coord.s1.y < (coordinates.min.2 - projected_coord.s1.dy))

let t4798 = float32(projected_coord.s1.dy)

for (projected_coord.s1.dx, t4538, filtered.s1.r$z.loop_extent)
{

let t4799 = (projected_coord.s1.x < (coordinates.min.1 - projected_coord.s1.dx))

let t4803 = ((t4548 + projected_coord.s1.dx) * t552)

let t4802 = (t4548 + projected_coord.s1.dy)

let t4804 = float32(projected_coord.s1.dx)

for (projected_coord.s1.s, 0, samples.extent.0)
{

let t4805 = (projected_coord.s1.s < coordinates.min.0)

let t4808 = ((((t4791 + (t4793 * (t4616 * t550))) + (t4802 * t4616)) + t4803) + projected_coord.s1.s)

for (projected_coord.s1.r_coord$x, 0, coordinates.extent.3)
{
__shared[t4808] = (let t4253 = select((((((((t4805 || (t4564 <= projected_coord.s1.s)) || t4799) || ((t4565 - projected_coord.s1.dx) <= projected_coord.s1.x)) || t4795) || ((t4566 - projected_coord.s1.dy) <= projected_coord.s1.y)) || (projected_coord.s1.r_coord$x < coordinates.min.3)) || (t4567 <= projected_coord.s1.r_coord$x)), 0.000000f, coordinates[((max(min(projected_coord.s1.s, (t4564 + -1)), coordinates.min.0) + ((t4589 + (max(min((projected_coord.s1.y + projected_coord.s1.dy), (t4566 + -1)), coordinates.min.2) * coordinates.stride.2)) + (max(min((projected_coord.s1.x + projected_coord.s1.dx), (t4565 + -1)), coordinates.min.1) * coordinates.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4567 + -1)), coordinates.min.3) * coordinates.stride.3))]) in (__shared[((((t4791 + (t4793 * (t4616 * t550))) + (t4802 * t4616)) + t4803) + projected_coord.s1.s)] + (select(((((((t4794 || (t4545 <= projected_coord.s1.x)) || t4792) || (t4568 <= projected_coord.s1.y)) || (projected_coord.s1.r_coord$x < projections.min.2)) || (t4569 <= projected_coord.s1.r_coord$x)) || (t4525 <= projected_coord.s1.k)), 0.000000f, projections[((max(min(projected_coord.s1.x, (t4545 + -1)), projections.min.0) + ((t4590 + (max(min(projected_coord.s1.k, (t4525 + -1)), projections.min.3) * projections.stride.3)) + (max(min(projected_coord.s1.y, (t4568 + -1)), projections.min.1) * projections.stride.1))) + (max(min(projected_coord.s1.r_coord$x, (t4569 + -1)), projections.min.2) * projections.stride.2))]) * select((projected_coord.s1.r_coord$x == 0), (((t4253 + t4804) + -0.500000f) / t4551), select((projected_coord.s1.r_coord$x == 1), (((t4253 + t4798) + -0.500000f) / t4551), t4253)))))
}
}
}
}
}
}
}
}
}
halide_gpu_thread_barrier()
consume projected_coord
{
if ((.__thread_id_x < 8))
{

let filtered.s1.r$z.prologue = min(max((samples.min.2 - ((((filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + .__thread_id_x) % 192) / 3)), t4538), t4556)

let filtered.s1.r$z.epilogue = max(min((t4557 - ((((filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + .__thread_id_x) % 192) / 3)), t4558), min(max((samples.min.2 - ((((filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + .__thread_id_x) % 192) / 3)), t4538), t4556))

let t4814 = ((filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x * 8) + .__thread_id_x)

let t4815 = (t4814 % 192)

let t4811 = (filtered.s1.ci.gpu_tile.gpu_tile.gpu_tile.__block_id_x / 24)

let t4813 = (((t4611 + (t4814 / 192)) + ((t4815 / 3) * 64)) + ((t4815 % 3) * 4096))

let t4809 = (t4548 + filtered.s1.r$z.prologue)