module name=learnable_demosaick_forward_cuda, target=x86-64-linux-avx-cuda-f16c-profile-sse41 {
func learnable_demosaick_forward_cuda(mosaick, selection_filters, green_filters, output)
{
register_destructor("halide_profiler_pipeline_end", halide_profiler_get_state())
allocate profiling_func_names[(void *) * 6]
profiling_func_names[2] = "selection"
profiling_func_names[0] = "overhead"
profiling_func_names[5] = "output"
profiling_func_names[3] = "normalizer"
profiling_func_names[4] = "interpolated_green"
profiling_func_names[1] = "interp_g"

let profiler_token = halide_profiler_pipeline_start("learnable_demosaick_forward_cuda", 6, profiling_func_names)

assert((0 <= profiler_token), profiler_token)

let profiler_state = halide_profiler_get_state()

let profiler_pipeline_state = halide_profiler_get_pipeline_state("learnable_demosaick_forward_cuda")

halide_profiler_incr_active_threads(profiler_state)
assert((reinterpret(selection_filters.buffer) != (uint64)0), halide_error_buffer_argument_is_null("selection_filters"))
assert((reinterpret(output.buffer) != (uint64)0), halide_error_buffer_argument_is_null("output"))
assert((reinterpret(mosaick.buffer) != (uint64)0), halide_error_buffer_argument_is_null("mosaick"))
assert((reinterpret(green_filters.buffer) != (uint64)0), halide_error_buffer_argument_is_null("green_filters"))

let green_filters = _halide_buffer_get_host(green_filters.buffer)

let green_filters.type.code = _halide_buffer_get_type_code(green_filters.buffer)

let green_filters.type.bits = _halide_buffer_get_type_bits(green_filters.buffer)

let green_filters.type.lanes = _halide_buffer_get_type_lanes(green_filters.buffer)

let green_filters.min.0 = _halide_buffer_get_min(green_filters.buffer, 0)

let green_filters.extent.0 = _halide_buffer_get_extent(green_filters.buffer, 0)

let green_filters.stride.0 = _halide_buffer_get_stride(green_filters.buffer, 0)

let green_filters.min.1 = _halide_buffer_get_min(green_filters.buffer, 1)

let green_filters.extent.1 = _halide_buffer_get_extent(green_filters.buffer, 1)

let green_filters.stride.1 = _halide_buffer_get_stride(green_filters.buffer, 1)

let green_filters.min.2 = _halide_buffer_get_min(green_filters.buffer, 2)

let green_filters.extent.2 = _halide_buffer_get_extent(green_filters.buffer, 2)

let green_filters.stride.2 = _halide_buffer_get_stride(green_filters.buffer, 2)

let mosaick = _halide_buffer_get_host(mosaick.buffer)

let mosaick.type.code = _halide_buffer_get_type_code(mosaick.buffer)

let mosaick.type.bits = _halide_buffer_get_type_bits(mosaick.buffer)

let mosaick.type.lanes = _halide_buffer_get_type_lanes(mosaick.buffer)

let mosaick.min.0 = _halide_buffer_get_min(mosaick.buffer, 0)

let mosaick.extent.0 = _halide_buffer_get_extent(mosaick.buffer, 0)

let mosaick.stride.0 = _halide_buffer_get_stride(mosaick.buffer, 0)

let mosaick.min.1 = _halide_buffer_get_min(mosaick.buffer, 1)

let mosaick.extent.1 = _halide_buffer_get_extent(mosaick.buffer, 1)

let mosaick.stride.1 = _halide_buffer_get_stride(mosaick.buffer, 1)

let mosaick.min.2 = _halide_buffer_get_min(mosaick.buffer, 2)

let mosaick.extent.2 = _halide_buffer_get_extent(mosaick.buffer, 2)

let mosaick.stride.2 = _halide_buffer_get_stride(mosaick.buffer, 2)

let output = _halide_buffer_get_host(output.buffer)

let output.type.code = _halide_buffer_get_type_code(output.buffer)

let output.type.bits = _halide_buffer_get_type_bits(output.buffer)

let output.type.lanes = _halide_buffer_get_type_lanes(output.buffer)

let output.min.0 = _halide_buffer_get_min(output.buffer, 0)

let output.extent.0 = _halide_buffer_get_extent(output.buffer, 0)

let output.stride.0 = _halide_buffer_get_stride(output.buffer, 0)

let output.min.1 = _halide_buffer_get_min(output.buffer, 1)

let output.extent.1 = _halide_buffer_get_extent(output.buffer, 1)

let output.stride.1 = _halide_buffer_get_stride(output.buffer, 1)

let output.min.2 = _halide_buffer_get_min(output.buffer, 2)

let output.extent.2 = _halide_buffer_get_extent(output.buffer, 2)

let output.stride.2 = _halide_buffer_get_stride(output.buffer, 2)

let output.min.3 = _halide_buffer_get_min(output.buffer, 3)

let output.extent.3 = _halide_buffer_get_extent(output.buffer, 3)

let output.stride.3 = _halide_buffer_get_stride(output.buffer, 3)

let selection_filters = _halide_buffer_get_host(selection_filters.buffer)

let selection_filters.type.code = _halide_buffer_get_type_code(selection_filters.buffer)

let selection_filters.type.bits = _halide_buffer_get_type_bits(selection_filters.buffer)

let selection_filters.type.lanes = _halide_buffer_get_type_lanes(selection_filters.buffer)

let selection_filters.min.0 = _halide_buffer_get_min(selection_filters.buffer, 0)

let selection_filters.extent.0 = _halide_buffer_get_extent(selection_filters.buffer, 0)

let selection_filters.stride.0 = _halide_buffer_get_stride(selection_filters.buffer, 0)

let selection_filters.min.1 = _halide_buffer_get_min(selection_filters.buffer, 1)

let selection_filters.extent.1 = _halide_buffer_get_extent(selection_filters.buffer, 1)

let selection_filters.stride.1 = _halide_buffer_get_stride(selection_filters.buffer, 1)

let selection_filters.min.2 = _halide_buffer_get_min(selection_filters.buffer, 2)

let selection_filters.extent.2 = _halide_buffer_get_extent(selection_filters.buffer, 2)

let selection_filters.stride.2 = _halide_buffer_get_stride(selection_filters.buffer, 2)

let green_filters.extent.2.required.s = (let t1342 = ((output.extent.0 + 2) * (output.extent.1 + 2)) in ((int32(abs(max((t1342 * green_filters.extent.2), 1))) + -1) / max(t1342, 1)))

let mosaick.extent.0.required.s = (let t1345 = min((output.min.0 + output.extent.0), ((mosaick.min.0 + mosaick.extent.0) + -1)) in (max(max(t1345, max(max((min((output.min.0 + output.extent.0), (mosaick.min.0 + mosaick.extent.0)) + -1), max(t1345, (min(max(max((output.min.0 + output.extent.0), max((((output.min.0 + output.extent.0) + selection_filters.extent.0) - (selection_filters.extent.0 / 2)), ((((int32(abs(max((output.extent.0 + 2), 1))) + output.min.0) + green_filters.extent.0) - (green_filters.extent.0 / 2)) + -2))), ((output.min.0 + output.extent.0) + -1)), (mosaick.min.0 + mosaick.extent.0)) + -1))), (min(((output.min.0 + output.extent.0) + -1), (mosaick.min.0 + mosaick.extent.0)) + -1))), mosaick.min.0) - max(min(min(min(min(min((min((output.min.0 - (green_filters.extent.0 / 2)), (output.min.0 - (selection_filters.extent.0 / 2))) + -1), output.min.0), ((mosaick.min.0 + mosaick.extent.0) + -1)), (min(output.min.0, (mosaick.min.0 + mosaick.extent.0)) + -1)), min((output.min.0 + 1), ((mosaick.min.0 + mosaick.extent.0) + -1))), min(output.min.0, ((mosaick.min.0 + mosaick.extent.0) + -1))), mosaick.min.0)))

let mosaick.min.0.required = max(min(min(min(min(min((min((output.min.0 - (green_filters.extent.0 / 2)), (output.min.0 - (selection_filters.extent.0 / 2))) + -1), output.min.0), ((mosaick.min.0 + mosaick.extent.0) + -1)), (min(output.min.0, (mosaick.min.0 + mosaick.extent.0)) + -1)), min((output.min.0 + 1), ((mosaick.min.0 + mosaick.extent.0) + -1))), min(output.min.0, ((mosaick.min.0 + mosaick.extent.0) + -1))), mosaick.min.0)

let mosaick.extent.1.required.s = (let t1349 = min((output.min.1 + output.extent.1), ((mosaick.min.1 + mosaick.extent.1) + -1)) in (let t1350 = min(((output.min.1 + output.extent.1) + -1), (mosaick.min.1 + mosaick.extent.1)) in (max(max(t1349, max(max(t1349, max(max(t1349, max(min(max(max(((((((int32(abs(max(((output.extent.0 + 2) * (output.extent.1 + 2)), 1))) + -1) / max((output.extent.0 + 2), 1)) + output.min.1) + green_filters.extent.1) - (green_filters.extent.1 / 2)) + -2), ((((output.min.1 + output.extent.1) + selection_filters.extent.1) - (selection_filters.extent.1 / 2)) + -1)), ((output.min.1 + output.extent.1) + -1)), ((mosaick.min.1 + mosaick.extent.1) + -1)), (t1350 + -1))), (t1350 + -1))), (t1350 + -1))), mosaick.min.1) - max(min(min(min(min((min((output.min.1 - (green_filters.extent.1 / 2)), (output.min.1 - (selection_filters.extent.1 / 2))) + -1), output.min.1), ((mosaick.min.1 + mosaick.extent.1) + -1)), (min(output.min.1, (mosaick.min.1 + mosaick.extent.1)) + -1)), min((output.min.1 + 1), ((mosaick.min.1 + mosaick.extent.1) + -1))), mosaick.min.1))))

let mosaick.min.1.required = max(min(min(min(min((min((output.min.1 - (green_filters.extent.1 / 2)), (output.min.1 - (selection_filters.extent.1 / 2))) + -1), output.min.1), ((mosaick.min.1 + mosaick.extent.1) + -1)), (min(output.min.1, (mosaick.min.1 + mosaick.extent.1)) + -1)), min((output.min.1 + 1), ((mosaick.min.1 + mosaick.extent.1) + -1))), mosaick.min.1)

let mosaick.extent.2.required.s = (let t1352.s = ((output.extent.0 + 2) * (output.extent.1 + 2)) in (max(max(min(((((((((t1352.s * green_filters.extent.2) * output.extent.3) + -1) / 8) * 8) + 7) / max((t1352.s * green_filters.extent.2), 1)) + output.min.3), ((mosaick.min.2 + mosaick.extent.2) + -1)), (min((output.min.3 + output.extent.3), (mosaick.min.2 + mosaick.extent.2)) + -1)), mosaick.min.2) - max(min(output.min.3, ((mosaick.min.2 + mosaick.extent.2) + -1)), mosaick.min.2)))

let mosaick.min.2.required = max(min(output.min.3, ((mosaick.min.2 + mosaick.extent.2) + -1)), mosaick.min.2)

let mosaick.stride.2.required = ((mosaick.extent.0.required.s + 1) * (mosaick.extent.1.required.s + 1))

if (_halide_buffer_is_bounds_query(green_filters.buffer))
{
_halide_buffer_init(green_filters.buffer, _halide_buffer_get_shape(green_filters.buffer), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 3, make_struct(0, green_filters.extent.0, 1, 0, 0, green_filters.extent.1, green_filters.extent.0, 0, 0, (green_filters.extent.2.required.s + 1), (green_filters.extent.0 * green_filters.extent.1), 0), (uint64)0)
}
if (_halide_buffer_is_bounds_query(mosaick.buffer))
{
_halide_buffer_init(mosaick.buffer, _halide_buffer_get_shape(mosaick.buffer), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 3, make_struct(mosaick.min.0.required, (mosaick.extent.0.required.s + 1), 1, 0, mosaick.min.1.required, (mosaick.extent.1.required.s + 1), (mosaick.extent.0.required.s + 1), 0, mosaick.min.2.required, (mosaick.extent.2.required.s + 1), mosaick.stride.2.required, 0), (uint64)0)
}
if (_halide_buffer_is_bounds_query(output.buffer))
{
_halide_buffer_init(output.buffer, _halide_buffer_get_shape(output.buffer), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 4, make_struct(output.min.0, output.extent.0, 1, 0, output.min.1, output.extent.1, output.extent.0, 0, output.min.2, output.extent.2, (output.extent.0 * output.extent.1), 0, output.min.3, output.extent.3, ((output.extent.0 * output.extent.1) * output.extent.2), 0), (uint64)0)
}
if (_halide_buffer_is_bounds_query(selection_filters.buffer))
{
_halide_buffer_init(selection_filters.buffer, _halide_buffer_get_shape(selection_filters.buffer), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 3, make_struct(0, selection_filters.extent.0, 1, 0, 0, selection_filters.extent.1, selection_filters.extent.0, 0, 0, green_filters.extent.2, (selection_filters.extent.0 * selection_filters.extent.1), 0), (uint64)0)
}
if (!(((_halide_buffer_is_bounds_query(green_filters.buffer) || _halide_buffer_is_bounds_query(mosaick.buffer)) || _halide_buffer_is_bounds_query(output.buffer)) || _halide_buffer_is_bounds_query(selection_filters.buffer)))
{
assert((((green_filters.type.code == (uint8)2) && (green_filters.type.bits == (uint8)32)) && (green_filters.type.lanes == (uint16)1)), halide_error_bad_type("Input buffer green_filters", green_filters.type.code, (uint8)2, green_filters.type.bits, (uint8)32, green_filters.type.lanes, (uint16)1))
assert((((mosaick.type.code == (uint8)2) && (mosaick.type.bits == (uint8)32)) && (mosaick.type.lanes == (uint16)1)), halide_error_bad_type("Input buffer mosaick", mosaick.type.code, (uint8)2, mosaick.type.bits, (uint8)32, mosaick.type.lanes, (uint16)1))
assert((((output.type.code == (uint8)2) && (output.type.bits == (uint8)32)) && (output.type.lanes == (uint16)1)), halide_error_bad_type("Output buffer output", output.type.code, (uint8)2, output.type.bits, (uint8)32, output.type.lanes, (uint16)1))
assert((((selection_filters.type.code == (uint8)2) && (selection_filters.type.bits == (uint8)32)) && (selection_filters.type.lanes == (uint16)1)), halide_error_bad_type("Input buffer selection_filters", selection_filters.type.code, (uint8)2, selection_filters.type.bits, (uint8)32, selection_filters.type.lanes, (uint16)1))
assert(((green_filters.min.0 <= 0) && (0 <= green_filters.min.0)), halide_error_access_out_of_bounds("Input buffer green_filters", 0, 0, (green_filters.extent.0 + -1), green_filters.min.0, ((green_filters.min.0 + green_filters.extent.0) + -1)))
assert((0 <= green_filters.extent.0), halide_error_buffer_extents_negative("Input buffer green_filters", 0, green_filters.extent.0))
assert(((green_filters.min.1 <= 0) && (0 <= green_filters.min.1)), halide_error_access_out_of_bounds("Input buffer green_filters", 1, 0, (green_filters.extent.1 + -1), green_filters.min.1, ((green_filters.min.1 + green_filters.extent.1) + -1)))
assert((0 <= green_filters.extent.1), halide_error_buffer_extents_negative("Input buffer green_filters", 1, green_filters.extent.1))
assert(((green_filters.min.2 <= 0) && (((green_filters.extent.2.required.s - green_filters.extent.2) + 1) <= green_filters.min.2)), halide_error_access_out_of_bounds("Input buffer green_filters", 2, 0, green_filters.extent.2.required.s, green_filters.min.2, ((green_filters.min.2 + green_filters.extent.2) + -1)))
assert((0 <= green_filters.extent.2), halide_error_buffer_extents_negative("Input buffer green_filters", 2, green_filters.extent.2))
assert(((mosaick.min.0 <= mosaick.min.0.required) && ((((mosaick.min.0.required + mosaick.extent.0.required.s) - mosaick.extent.0) + 1) <= mosaick.min.0)), halide_error_access_out_of_bounds("Input buffer mosaick", 0, mosaick.min.0.required, (mosaick.min.0.required + mosaick.extent.0.required.s), mosaick.min.0, ((mosaick.min.0 + mosaick.extent.0) + -1)))
assert((0 <= mosaick.extent.0), halide_error_buffer_extents_negative("Input buffer mosaick", 0, mosaick.extent.0))
assert(((mosaick.min.1 <= mosaick.min.1.required) && ((((mosaick.min.1.required + mosaick.extent.1.required.s) - mosaick.extent.1) + 1) <= mosaick.min.1)), halide_error_access_out_of_bounds("Input buffer mosaick", 1, mosaick.min.1.required, (mosaick.min.1.required + mosaick.extent.1.required.s), mosaick.min.1, ((mosaick.min.1 + mosaick.extent.1) + -1)))
assert((0 <= mosaick.extent.1), halide_error_buffer_extents_negative("Input buffer mosaick", 1, mosaick.extent.1))
assert(((mosaick.min.2 <= mosaick.min.2.required) && ((((mosaick.min.2.required + mosaick.extent.2.required.s) - mosaick.extent.2) + 1) <= mosaick.min.2)), halide_error_access_out_of_bounds("Input buffer mosaick", 2, mosaick.min.2.required, (mosaick.min.2.required + mosaick.extent.2.required.s), mosaick.min.2, ((mosaick.min.2 + mosaick.extent.2) + -1)))
assert((0 <= mosaick.extent.2), halide_error_buffer_extents_negative("Input buffer mosaick", 2, mosaick.extent.2))
assert((0 <= output.extent.0), halide_error_buffer_extents_negative("Output buffer output", 0, output.extent.0))
assert((0 <= output.extent.1), halide_error_buffer_extents_negative("Output buffer output", 1, output.extent.1))
assert((0 <= output.extent.2), halide_error_buffer_extents_negative("Output buffer output", 2, output.extent.2))
assert((0 <= output.extent.3), halide_error_buffer_extents_negative("Output buffer output", 3, output.extent.3))
assert(((selection_filters.min.0 <= 0) && (0 <= selection_filters.min.0)), halide_error_access_out_of_bounds("Input buffer selection_filters", 0, 0, (selection_filters.extent.0 + -1), selection_filters.min.0, ((selection_filters.min.0 + selection_filters.extent.0) + -1)))
assert((0 <= selection_filters.extent.0), halide_error_buffer_extents_negative("Input buffer selection_filters", 0, selection_filters.extent.0))
assert(((selection_filters.min.1 <= 0) && (0 <= selection_filters.min.1)), halide_error_access_out_of_bounds("Input buffer selection_filters", 1, 0, (selection_filters.extent.1 + -1), selection_filters.min.1, ((selection_filters.min.1 + selection_filters.extent.1) + -1)))
assert((0 <= selection_filters.extent.1), halide_error_buffer_extents_negative("Input buffer selection_filters", 1, selection_filters.extent.1))
assert(((selection_filters.min.2 <= 0) && ((green_filters.extent.2 - selection_filters.extent.2) <= selection_filters.min.2)), halide_error_access_out_of_bounds("Input buffer selection_filters", 2, 0, (green_filters.extent.2 + -1), selection_filters.min.2, ((selection_filters.min.2 + selection_filters.extent.2) + -1)))
assert((0 <= selection_filters.extent.2), halide_error_buffer_extents_negative("Input buffer selection_filters", 2, selection_filters.extent.2))
assert((green_filters.stride.0 == 1), halide_error_constraint_violated("green_filters.stride.0", green_filters.stride.0, "1", 1))
assert((mosaick.stride.0 == 1), halide_error_constraint_violated("mosaick.stride.0", mosaick.stride.0, "1", 1))
assert((output.stride.0 == 1), halide_error_constraint_violated("output.stride.0", output.stride.0, "1", 1))
assert((selection_filters.stride.0 == 1), halide_error_constraint_violated("selection_filters.stride.0", selection_filters.stride.0, "1", 1))

let green_filters.total_extent.1 = (int64(green_filters.extent.1) * int64(green_filters.extent.0))

let mosaick.total_extent.1 = (int64(mosaick.extent.1) * int64(mosaick.extent.0))

let output.total_extent.1 = (int64(output.extent.1) * int64(output.extent.0))

let output.total_extent.3 = (int64(output.extent.3) * (int64(output.extent.2) * output.total_extent.1))

let selection_filters.total_extent.1 = (int64(selection_filters.extent.1) * int64(selection_filters.extent.0))

assert((abs(int64(green_filters.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("green_filters", abs(int64(green_filters.extent.0)), (uint64)2147483647))
assert((abs((int64(green_filters.extent.1) * int64(green_filters.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("green_filters", abs((int64(green_filters.extent.1) * int64(green_filters.stride.1))), (uint64)2147483647))
assert((green_filters.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("green_filters", green_filters.total_extent.1, (int64)2147483647))
assert((abs((int64(green_filters.extent.2) * int64(green_filters.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("green_filters", abs((int64(green_filters.extent.2) * int64(green_filters.stride.2))), (uint64)2147483647))
assert(((int64(green_filters.extent.2) * green_filters.total_extent.1) <= (int64)2147483647), halide_error_buffer_extents_too_large("green_filters", (int64(green_filters.extent.2) * green_filters.total_extent.1), (int64)2147483647))
assert((abs(int64(mosaick.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("mosaick", abs(int64(mosaick.extent.0)), (uint64)2147483647))
assert((abs((int64(mosaick.extent.1) * int64(mosaick.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("mosaick", abs((int64(mosaick.extent.1) * int64(mosaick.stride.1))), (uint64)2147483647))
assert((mosaick.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("mosaick", mosaick.total_extent.1, (int64)2147483647))
assert((abs((int64(mosaick.extent.2) * int64(mosaick.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("mosaick", abs((int64(mosaick.extent.2) * int64(mosaick.stride.2))), (uint64)2147483647))
assert(((int64(mosaick.extent.2) * mosaick.total_extent.1) <= (int64)2147483647), halide_error_buffer_extents_too_large("mosaick", (int64(mosaick.extent.2) * mosaick.total_extent.1), (int64)2147483647))
assert((abs(int64(output.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output", abs(int64(output.extent.0)), (uint64)2147483647))
assert((abs((int64(output.extent.1) * int64(output.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output", abs((int64(output.extent.1) * int64(output.stride.1))), (uint64)2147483647))
assert((output.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("output", output.total_extent.1, (int64)2147483647))
assert((abs((int64(output.extent.2) * int64(output.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output", abs((int64(output.extent.2) * int64(output.stride.2))), (uint64)2147483647))
assert(((int64(output.extent.2) * output.total_extent.1) <= (int64)2147483647), halide_error_buffer_extents_too_large("output", (int64(output.extent.2) * output.total_extent.1), (int64)2147483647))
assert((abs((int64(output.extent.3) * int64(output.stride.3))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output", abs((int64(output.extent.3) * int64(output.stride.3))), (uint64)2147483647))
assert((output.total_extent.3 <= (int64)2147483647), halide_error_buffer_extents_too_large("output", output.total_extent.3, (int64)2147483647))
assert((abs(int64(selection_filters.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("selection_filters", abs(int64(selection_filters.extent.0)), (uint64)2147483647))
assert((abs((int64(selection_filters.extent.1) * int64(selection_filters.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("selection_filters", abs((int64(selection_filters.extent.1) * int64(selection_filters.stride.1))), (uint64)2147483647))
assert((selection_filters.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("selection_filters", selection_filters.total_extent.1, (int64)2147483647))
assert((abs((int64(selection_filters.extent.2) * int64(selection_filters.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("selection_filters", abs((int64(selection_filters.extent.2) * int64(selection_filters.stride.2))), (uint64)2147483647))
assert(((int64(selection_filters.extent.2) * selection_filters.total_extent.1) <= (int64)2147483647), halide_error_buffer_extents_too_large("selection_filters", (int64(selection_filters.extent.2) * selection_filters.total_extent.1), (int64)2147483647))
assert((mosaick != reinterpret((uint64)0)), halide_error_host_is_null("Input buffer mosaick"))
assert((output != reinterpret((uint64)0)), halide_error_host_is_null("Output buffer output"))
assert((selection_filters != reinterpret((uint64)0)), halide_error_host_is_null("Input buffer selection_filters"))

let interp_g.n.min_realized = (let t1355.s = ((output.extent.0 + 2) * (output.extent.1 + 2)) in min(((min((((t1355.s * green_filters.extent.2) * output.extent.3) + -8), 0) / max((t1355.s * green_filters.extent.2), 1)) + output.min.3), output.min.3))

let interp_g.n.extent_realized.s.s = (let t1356 = ((output.extent.0 + 2) * (output.extent.1 + 2)) in (max(max(((min((((((t1356 * green_filters.extent.2) * output.extent.3) + -1) / 8) * 8), (((t1356 * green_filters.extent.2) * output.extent.3) + -8)) + 7) / max((t1356 * green_filters.extent.2), 1)), ((((((t1356 * output.extent.3) + -1) / 8) * 8) + 7) / max(t1356, 1))), (((((((t1356 * green_filters.extent.2) * output.extent.3) + -1) / 8) * 8) + 7) / max((t1356 * green_filters.extent.2), 1))) + output.min.3))

let interp_g.k.extent_realized.s = (let t1360 = ((output.extent.0 + 2) * (output.extent.1 + 2)) in max(((int32(abs(max((t1360 * green_filters.extent.2), 1))) + -1) / max(t1360, 1)), (green_filters.extent.2 + -1)))

let interp_g.y.extent_realized.s = ((int32(abs(max(((output.extent.0 + 2) * (output.extent.1 + 2)), 1))) + -1) / max((output.extent.0 + 2), 1))

let interp_g.x.extent_realized = int32(abs(max((output.extent.0 + 2), 1)))

let interp_g.stride.2 = (interp_g.x.extent_realized * (interp_g.y.extent_realized.s + 1))

let interp_g.stride.3 = (interp_g.stride.2 * (interp_g.k.extent_realized.s + 1))

allocate interp_g[float32 * interp_g.x.extent_realized * (interp_g.y.extent_realized.s + 1) * (interp_g.k.extent_realized.s + 1) * ((interp_g.n.extent_realized.s.s - interp_g.n.min_realized) + 1)] if (uint1)0

let interp_g.buffer = _halide_buffer_init(alloca(size_of_halide_buffer_t()), make_struct((output.min.0 + -1), interp_g.x.extent_realized, 1, 0, (output.min.1 + -1), (interp_g.y.extent_realized.s + 1), interp_g.x.extent_realized, 0, 0, (interp_g.k.extent_realized.s + 1), interp_g.stride.2, 0, interp_g.n.min_realized, ((interp_g.n.extent_realized.s.s - interp_g.n.min_realized) + 1), interp_g.stride.3, 0), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 4, make_struct((output.min.0 + -1), interp_g.x.extent_realized, 1, 0, (output.min.1 + -1), (interp_g.y.extent_realized.s + 1), interp_g.x.extent_realized, 0, 0, (interp_g.k.extent_realized.s + 1), interp_g.stride.2, 0, interp_g.n.min_realized, ((interp_g.n.extent_realized.s.s - interp_g.n.min_realized) + 1), interp_g.stride.3, 0), (uint64)0)

register_destructor("halide_device_free_as_destructor", interp_g.buffer)
produce interp_g
{
halide_profiler_set_current_func(profiler_state, profiler_token, 1)

let t632 = ((output.extent.0 + 2) * (output.extent.1 + 2))

let halide_device_malloc_result$3 = halide_device_malloc(interp_g.buffer, halide_cuda_device_interface())

assert((halide_device_malloc_result$3 == 0), halide_device_malloc_result$3)
halide_profiler_decr_active_threads(profiler_state)

let t1714 = (t632 * green_filters.extent.2)

let t1715 = (t1714 * output.extent.3)

let t1708 = max((t1715 / 8), 0)

let t1709 = max(t1714, 1)

let t1711 = max((output.extent.0 + 2), 1)

let t1710 = max(t632, 1)

let t1707 = ((t1715 + 7) / 8)

let t1712 = (output.min.3 - interp_g.n.min_realized)

gpu_block (interp_g.s0.x.xy.xyk.xykn.xykn.__block_id_x, 0, t1707)
{
gpu_thread (.__thread_id_x, 0, 8)
{
if ((interp_g.s0.x.xy.xyk.xykn.xykn.__block_id_x < t1708))
{
interp_g[(let t1364 = (((interp_g.s0.x.xy.xyk.xykn.xykn.__block_id_x * 8) + .__thread_id_x) % t1709) in (let t1366 = (t1364 % t1710) in ((((t1366 % t1711) + ((t1366 / t1711) * interp_g.x.extent_realized)) + ((t1364 / t1710) * interp_g.stride.2)) + ((t1712 + (((interp_g.s0.x.xy.xyk.xykn.xykn.__block_id_x * 8) + .__thread_id_x) / t1709)) * interp_g.stride.3))))] = 0.000000f
}
else
{
interp_g[(let t1371 = ((((t1714 * output.extent.3) + .__thread_id_x) + -8) % max(t1714, 1)) in (let t1373 = (t1371 % t1710) in ((((t1373 % t1711) + ((t1373 / t1711) * interp_g.x.extent_realized)) + ((t1371 / t1710) * interp_g.stride.2)) + ((t1712 + ((((t1714 * output.extent.3) + .__thread_id_x) + -8) / max(t1714, 1))) * interp_g.stride.3))))] = 0.000000f
}
}
}
halide_profiler_incr_active_threads(profiler_state)
_halide_buffer_set_device_dirty(interp_g.buffer, (uint1)1)

let halide_copy_to_device_result$2 = halide_copy_to_device(green_filters.buffer, halide_cuda_device_interface())

assert((halide_copy_to_device_result$2 == 0), halide_copy_to_device_result$2)

let halide_copy_to_device_result$3 = halide_copy_to_device(mosaick.buffer, halide_cuda_device_interface())

assert((halide_copy_to_device_result$3 == 0), halide_copy_to_device_result$3)
halide_profiler_decr_active_threads(profiler_state)

let t1738 = (mosaick.min.0 + mosaick.extent.0)

let t1739 = (mosaick.min.1 + mosaick.extent.1)

let t1740 = (mosaick.min.1 * mosaick.stride.1)

let t1741 = (mosaick.min.2 * mosaick.stride.2)

let t1742 = (green_filters.extent.1 / 2)

let t1743 = (output.min.1 - t1742)

let t1744 = (green_filters.extent.0 / 2)

let t1745 = (output.min.0 - t1744)

let t1746 = (t632 * green_filters.extent.2)

let t1718 = max(t1746, 1)

let t1720 = max((output.extent.0 + 2), 1)

let t1719 = max(t632, 1)

let t1716 = (((t1746 * output.extent.3) + 7) / 8)

let t1721 = (t1742 - output.min.1)

let t1733 = (t1744 - output.min.0)

let t1724 = (output.min.3 - interp_g.n.min_realized)

let t1737 = ((((t1745 - mosaick.min.0) - t1740) - t1741) + -1)

let t1717 = (((mosaick.min.1 + t1742) - output.min.1) + 1)

let t1732 = (((mosaick.min.0 + t1744) - output.min.0) + 1)

let t1725 = ((mosaick.min.2 + mosaick.extent.2) + -1)

let t1726 = ((mosaick.min.0 + t1740) + t1741)

let t1731 = ((green_filters.min.0 + (green_filters.min.1 * green_filters.stride.1)) + (green_filters.min.2 * green_filters.stride.2))

gpu_block (interp_g.s1.x.xy.xyk.xykn.xykn.__block_id_x, 0, t1716)
{
gpu_thread (.__thread_id_x, 0, 8)
{

let interp_g.s1.r14$y.prologue = min(max((t1717 - (((((interp_g.s1.x.xy.xyk.xykn.xykn.__block_id_x * 8) + .__thread_id_x) % t1718) % t1719) / t1720)), 0), green_filters.extent.1)

let interp_g.s1.r14$y.epilogue = (let t1375 = (t1721 - (((((interp_g.s1.x.xy.xyk.xykn.xykn.__block_id_x * 8) + .__thread_id_x) % t1718) % t1719) / t1720)) in min(max(((t1739 + t1375) + 1), max(((mosaick.min.1 + t1375) + 1), 0)), green_filters.extent.1))

let t1749 = ((interp_g.s1.x.xy.xyk.xykn.xykn.__block_id_x * 8) + .__thread_id_x)

let t1750 = (t1749 % t1718)

let t1751 = (t1750 % t1719)

let t1748 = ((((t1751 % t1720) + ((t1751 / t1720) * interp_g.x.extent_realized)) + ((t1750 / t1719) * interp_g.stride.2)) + ((t1724 + (t1749 / t1718)) * interp_g.stride.3))

for (interp_g.s1.r14$y, 0, interp_g.s1.r14$y.prologue)
{

let t1752 = (interp_g.s1.r14$y * green_filters.stride.1)

for (interp_g.s1.r14$x, 0, green_filters.extent.0)
{
interp_g[t1748] = (let t1378 = (t1749 % t1718) in (let t1380 = (t1378 % t1719) in (let t1382 = (t1380 % t1720) in (let t1383 = (t1380 / t1720) in (let t1384 = (t1378 / t1719) in (let t1385 = (t1749 / t1718) in (interp_g[(((t1382 + (t1383 * interp_g.x.extent_realized)) + (t1384 * interp_g.stride.2)) + ((t1724 + t1385) * interp_g.stride.3))] + (mosaick[(max((min(((t1745 + t1382) + interp_g.s1.r14$x), t1738) + -1), mosaick.min.0) + (((max(min((output.min.3 + t1385), t1725), mosaick.min.2) * mosaick.stride.2) - t1726) + (max((min(((t1743 + t1383) + interp_g.s1.r14$y), t1739) + -1), mosaick.min.1) * mosaick.stride.1)))] * green_filters[((((t1384 * green_filters.stride.2) - t1731) + t1752) + interp_g.s1.r14$x)]))))))))
}
}

let t1761 = ((interp_g.s1.x.xy.xyk.xykn.xykn.__block_id_x * 8) + .__thread_id_x)

let t1762 = (t1761 % t1718)

let t1763 = (t1762 % t1719)

let t1764 = (t1763 % t1720)

let t1765 = (t1733 - t1764)

let t1766 = (((t1764 + ((t1763 / t1720) * interp_g.x.extent_realized)) + ((t1762 / t1719) * interp_g.stride.2)) + ((t1724 + (t1761 / t1718)) * interp_g.stride.3))

let t1755 = min(max(((t1738 + t1765) + 1), max(((mosaick.min.0 + t1765) + 1), 0)), green_filters.extent.0)

let t1754 = min(max((t1732 - t1764), 0), green_filters.extent.0)

let t1753 = (interp_g.s1.r14$y.epilogue - interp_g.s1.r14$y.prologue)

let t1757 = (t1743 + -1)

for (interp_g.s1.r14$y, interp_g.s1.r14$y.prologue, t1753)
{

let interp_g.s1.r14$x.prologue = t1754

let interp_g.s1.r14$x.epilogue = t1755

let t1767 = (interp_g.s1.r14$y * green_filters.stride.1)

for (interp_g.s1.r14$x, 0, interp_g.s1.r14$x.prologue)
{
interp_g[t1766] = (let t1395 = (t1761 % t1718) in (let t1397 = (t1395 % t1719) in (let t1399 = (t1397 % t1720) in (let t1400 = (t1397 / t1720) in (let t1401 = (t1395 / t1719) in (let t1402 = (t1761 / t1718) in (interp_g[(((t1399 + (t1400 * interp_g.x.extent_realized)) + (t1401 * interp_g.stride.2)) + ((t1724 + t1402) * interp_g.stride.3))] + (mosaick[(max((min(((t1745 + t1399) + interp_g.s1.r14$x), t1738) + -1), mosaick.min.0) + (((max(min((output.min.3 + t1402), t1725), mosaick.min.2) * mosaick.stride.2) - t1726) + (((t1757 + t1400) + interp_g.s1.r14$y) * mosaick.stride.1)))] * green_filters[((((t1401 * green_filters.stride.2) - t1731) + t1767) + interp_g.s1.r14$x)]))))))))
}

let t1769 = (interp_g.s1.r14$y * green_filters.stride.1)

let t1768 = (interp_g.s1.r14$x.epilogue - interp_g.s1.r14$x.prologue)

for (interp_g.s1.r14$x, interp_g.s1.r14$x.prologue, t1768)
{
interp_g[t1766] = (let t1411 = (t1761 % t1718) in (let t1413 = (t1411 % t1719) in (let t1415 = (t1413 % t1720) in (let t1416 = (t1413 / t1720) in (let t1417 = (t1411 / t1719) in (let t1418 = (t1761 / t1718) in (interp_g[(((t1415 + (t1416 * interp_g.x.extent_realized)) + (t1417 * interp_g.stride.2)) + ((t1724 + t1418) * interp_g.stride.3))] + (mosaick[((((t1737 + t1415) + (max(min((output.min.3 + t1418), t1725), mosaick.min.2) * mosaick.stride.2)) + (((t1757 + t1416) + interp_g.s1.r14$y) * mosaick.stride.1)) + interp_g.s1.r14$x)] * green_filters[((((t1417 * green_filters.stride.2) - t1731) + t1769) + interp_g.s1.r14$x)]))))))))
}

let t1771 = (interp_g.s1.r14$y * green_filters.stride.1)

let t1770 = (green_filters.extent.0 - interp_g.s1.r14$x.epilogue)

for (interp_g.s1.r14$x, interp_g.s1.r14$x.epilogue, t1770)
{
interp_g[t1766] = (let t1427 = (t1761 % t1718) in (let t1429 = (t1427 % t1719) in (let t1431 = (t1429 % t1720) in (let t1432 = (t1429 / t1720) in (let t1433 = (t1427 / t1719) in (let t1434 = (t1761 / t1718) in (interp_g[(((t1431 + (t1432 * interp_g.x.extent_realized)) + (t1433 * interp_g.stride.2)) + ((t1724 + t1434) * interp_g.stride.3))] + (mosaick[(max((min(((t1745 + t1431) + interp_g.s1.r14$x), t1738) + -1), mosaick.min.0) + (((max(min((output.min.3 + t1434), t1725), mosaick.min.2) * mosaick.stride.2) - t1726) + (((t1757 + t1432) + interp_g.s1.r14$y) * mosaick.stride.1)))] * green_filters[((((t1433 * green_filters.stride.2) - t1731) + t1771) + interp_g.s1.r14$x)]))))))))
}
}

let t1775 = ((interp_g.s1.x.xy.xyk.xykn.xykn.__block_id_x * 8) + .__thread_id_x)

let t1776 = (t1775 % t1718)

let t1777 = (t1776 % t1719)

let t1774 = ((((t1777 % t1720) + ((t1777 / t1720) * interp_g.x.extent_realized)) + ((t1776 / t1719) * interp_g.stride.2)) + ((t1724 + (t1775 / t1718)) * interp_g.stride.3))

let t1772 = (green_filters.extent.1 - interp_g.s1.r14$y.epilogue)

for (interp_g.s1.r14$y, interp_g.s1.r14$y.epilogue, t1772)
{

let t1778 = (interp_g.s1.r14$y * green_filters.stride.1)

for (interp_g.s1.r14$x, 0, green_filters.extent.0)
{
interp_g[t1774] = (let t1443 = (t1775 % t1718) in (let t1445 = (t1443 % t1719) in (let t1447 = (t1445 % t1720) in (let t1448 = (t1445 / t1720) in (let t1449 = (t1443 / t1719) in (let t1450 = (t1775 / t1718) in (interp_g[(((t1447 + (t1448 * interp_g.x.extent_realized)) + (t1449 * interp_g.stride.2)) + ((t1724 + t1450) * interp_g.stride.3))] + (mosaick[(max((min(((t1745 + t1447) + interp_g.s1.r14$x), t1738) + -1), mosaick.min.0) + (((max(min((output.min.3 + t1450), t1725), mosaick.min.2) * mosaick.stride.2) - t1726) + (max((min(((t1743 + t1448) + interp_g.s1.r14$y), t1739) + -1), mosaick.min.1) * mosaick.stride.1)))] * green_filters[((((t1449 * green_filters.stride.2) - t1731) + t1778) + interp_g.s1.r14$x)]))))))))
}
}
}
}
halide_profiler_incr_active_threads(profiler_state)
}
consume interp_g
{
halide_profiler_set_current_func(profiler_state, profiler_token, 0)

let selection.n.extent_realized.s = (let t1457 = ((output.extent.0 + 2) * (output.extent.1 + 2)) in max(((output.min.3 + output.extent.3) + -1), (((((((t1457 * output.extent.3) + -1) / 8) * 8) + 7) / max(t1457, 1)) + output.min.3)))

let selection.y.extent_realized.s = (max(output.extent.1, (interp_g.y.extent_realized.s + -1)) - min((output.extent.1 + -7), -1))

let selection.x.extent_realized.s = (max(output.extent.0, (interp_g.x.extent_realized + -2)) - min((output.extent.0 + -7), -1))

let selection.stride.2 = ((selection.x.extent_realized.s + 1) * (selection.y.extent_realized.s + 1))

let selection.buffer = _halide_buffer_init(alloca(size_of_halide_buffer_t()), make_struct((min((output.extent.0 + -7), -1) + output.min.0), (selection.x.extent_realized.s + 1), 1, 0, (min((output.extent.1 + -7), -1) + output.min.1), (selection.y.extent_realized.s + 1), (selection.x.extent_realized.s + 1), 0, 0, green_filters.extent.2, selection.stride.2, 0, output.min.3, ((selection.n.extent_realized.s - output.min.3) + 1), (selection.stride.2 * green_filters.extent.2), 0), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 4, make_struct((min((output.extent.0 + -7), -1) + output.min.0), (selection.x.extent_realized.s + 1), 1, 0, (min((output.extent.1 + -7), -1) + output.min.1), (selection.y.extent_realized.s + 1), (selection.x.extent_realized.s + 1), 0, 0, green_filters.extent.2, selection.stride.2, 0, output.min.3, ((selection.n.extent_realized.s - output.min.3) + 1), (selection.stride.2 * green_filters.extent.2), 0), (uint64)0)

let halide_device_and_host_malloc_result$2 = halide_device_and_host_malloc(selection.buffer, halide_cuda_device_interface())

assert((halide_device_and_host_malloc_result$2 == 0), halide_device_and_host_malloc_result$2)
register_destructor("halide_device_and_host_free_as_destructor", selection.buffer)
halide_profiler_memory_allocate(profiler_pipeline_state, 2, ((((uint64((selection.x.extent_realized.s + 1)) * uint64((selection.y.extent_realized.s + 1))) * uint64(green_filters.extent.2)) * uint64(((selection.n.extent_realized.s - output.min.3) + 1))) * (uint64)4))
allocate selection[float32 * (selection.x.extent_realized.s + 1) * (selection.y.extent_realized.s + 1) * green_filters.extent.2 * ((selection.n.extent_realized.s - output.min.3) + 1)]custom_new{_halide_buffer_get_host(selection.buffer)}custom_delete{ halide_device_host_nop_free(); }
produce selection
{
halide_profiler_set_current_func(profiler_state, profiler_token, 2)

let halide_device_malloc_result$2 = halide_device_malloc(selection.buffer, halide_cuda_device_interface())

assert((halide_device_malloc_result$2 == 0), halide_device_malloc_result$2)

let t1789 = min((output.extent.0 + -7), -1)

let t1783 = max(((output.extent.0 + 2) / 8), 0)

let t1779 = ((output.extent.1 + 9) / 8)

let t1780 = ((output.extent.0 + 9) / 8)

let t1786 = (min((output.extent.1 + -7), -1) + output.min.1)

let t1788 = ((((output.min.0 + output.extent.0) - t1789) - output.min.0) + -7)

let t1784 = (((output.min.0 - t1789) - output.min.0) + -1)

let t1782 = ((output.min.1 + output.extent.1) + -7)

let t1787 = (selection.x.extent_realized.s + 1)

let t1781 = (output.min.1 + -1)

for (selection.s0.n, output.min.3, output.extent.3)
{

let t1792 = ((selection.s0.n - output.min.3) * (selection.stride.2 * green_filters.extent.2))

let t1791 = (t1788 + t1792)

let t1790 = (t1784 + t1792)

for (selection.s0.k, 0, green_filters.extent.2)
{
halide_profiler_decr_active_threads(profiler_state)

let t1795 = (selection.s0.k * selection.stride.2)

let t1794 = (t1791 + t1795)

let t1793 = (t1790 + t1795)

gpu_block (selection.s0.y.y.__block_id_y, 0, t1779)
{
gpu_block (selection.s0.x.x.__block_id_x, 0, t1780)
{
gpu_thread (.__thread_id_y, 0, 8)
{
gpu_thread (.__thread_id_x, 0, 8)
{

let selection.s0.y.yi.base = min((t1781 + (selection.s0.y.y.__block_id_y * 8)), t1782)

if ((selection.s0.x.x.__block_id_x < t1783))
{
selection[(((t1793 + (selection.s0.x.x.__block_id_x * 8)) + (((selection.s0.y.yi.base - t1786) + .__thread_id_y) * t1787)) + .__thread_id_x)] = 0.000000f
}
else
{
selection[((t1794 + (((selection.s0.y.yi.base - t1786) + .__thread_id_y) * t1787)) + .__thread_id_x)] = 0.000000f
}
}
}
}
}
halide_profiler_incr_active_threads(profiler_state)
}
}
_halide_buffer_set_device_dirty(selection.buffer, (uint1)1)

let halide_copy_to_host_result$2 = halide_copy_to_host(selection.buffer)

assert((halide_copy_to_host_result$2 == 0), halide_copy_to_host_result$2)

let halide_copy_to_host_result$3 = halide_copy_to_host(mosaick.buffer)

assert((halide_copy_to_host_result$3 == 0), halide_copy_to_host_result$3)

let halide_copy_to_host_result$5 = halide_copy_to_host(selection_filters.buffer)

assert((halide_copy_to_host_result$5 == 0), halide_copy_to_host_result$5)

let selection.s1.n.prologue = min(max(mosaick.min.2, output.min.3), (output.extent.3 + output.min.3))

let selection.s1.n.epilogue = max(min((mosaick.min.2 + mosaick.extent.2), (output.min.3 + output.extent.3)), min(max(mosaick.min.2, output.min.3), (output.extent.3 + output.min.3)))

let t1807 = (selection_filters.extent.1 / 2)

let t1809 = (selection_filters.extent.0 / 2)

let t1796 = (selection.s1.n.prologue - output.min.3)

let t1803 = (min((output.extent.1 + -7), -1) + output.min.1)

let t1802 = (min((output.extent.0 + -7), -1) + output.min.0)

let t1811 = ((selection_filters.min.0 + (selection_filters.min.1 * selection_filters.stride.1)) + (selection_filters.min.2 * selection_filters.stride.2))

let t1805 = ((mosaick.min.2 + mosaick.extent.2) + -1)

let t1808 = ((mosaick.min.1 + mosaick.extent.1) + -1)

let t1806 = ((mosaick.min.0 + (mosaick.min.1 * mosaick.stride.1)) + (mosaick.min.2 * mosaick.stride.2))

let t1810 = ((mosaick.min.0 + mosaick.extent.0) + -1)

let t1804 = (selection.x.extent_realized.s + 1)

let t1797 = (output.min.1 + -1)

let t1799 = (output.min.0 + -1)

let t1798 = (output.extent.1 + 2)

let t1800 = (output.extent.0 + 2)

for (selection.s1.n, output.min.3, t1796)
{

let t1813 = ((max(min(selection.s1.n, t1805), mosaick.min.2) * mosaick.stride.2) - t1806)

let t1812 = (((selection.s1.n - output.min.3) * (selection.stride.2 * green_filters.extent.2)) - t1802)

for (selection.s1.k, 0, green_filters.extent.2)
{

let t1815 = ((selection.s1.k * selection_filters.stride.2) - t1811)

let t1814 = (t1812 + (selection.s1.k * selection.stride.2))

for (selection.s1.y, t1797, t1798)
{

let t1817 = (selection.s1.y - t1807)

let t1816 = (t1814 + ((selection.s1.y - t1803) * t1804))

for (selection.s1.x, t1799, t1800)
{

let t1819 = (selection.s1.x - t1809)

let t1818 = (t1816 + selection.s1.x)

for (selection.s1.r4$y, 0, selection_filters.extent.1)
{

let t1821 = (t1815 + (selection.s1.r4$y * selection_filters.stride.1))

let t1820 = (t1813 + (max(min((t1817 + selection.s1.r4$y), t1808), mosaick.min.1) * mosaick.stride.1))

for (selection.s1.r4$x, 0, selection_filters.extent.0)
{
selection[t1818] = (selection[t1818] + (mosaick[(max(min((t1819 + selection.s1.r4$x), t1810), mosaick.min.0) + t1820)] * selection_filters[(t1821 + selection.s1.r4$x)]))
}
}
}
}
}
}

let t1843 = (output.min.0 + -1)

let t1844 = (output.min.0 + output.extent.0)

let t1845 = (output.min.1 + -1)

let t1846 = (output.min.1 + output.extent.1)

let t1847 = (mosaick.min.0 + mosaick.extent.0)

let t1848 = (mosaick.min.1 * mosaick.stride.1)

let t1849 = (mosaick.min.2 * mosaick.stride.2)

let t1850 = (mosaick.min.1 + mosaick.extent.1)

let t1851 = (selection_filters.extent.0 / 2)

let t1852 = (selection_filters.extent.1 / 2)

let t1853 = min(max((mosaick.min.0 + t1851), t1843), ((output.extent.0 + output.min.0) + 1))

let t1854 = min(max((mosaick.min.1 + t1852), t1845), ((output.extent.1 + output.min.1) + 1))

let t1855 = (t1850 + t1852)

let t1856 = (t1847 + t1851)

let t1839 = max(t1853, (min(min((t1856 - selection_filters.extent.0), (t1856 + -1)), t1844) + 1))

let t1824 = max(t1854, (min(min((t1855 - selection_filters.extent.1), (t1855 + -1)), t1846) + 1))

let t1822 = (selection.s1.n.epilogue - selection.s1.n.prologue)

let t1830 = (min((output.extent.1 + -7), -1) + output.min.1)

let t1829 = (min((output.extent.0 + -7), -1) + output.min.0)

let t1840 = (((t1851 + mosaick.min.0) + t1848) + t1849)

let t1837 = ((selection_filters.min.0 + (selection_filters.min.1 * selection_filters.stride.1)) + (selection_filters.min.2 * selection_filters.stride.2))

let t1834 = (t1850 + -1)

let t1832 = ((mosaick.min.0 + t1848) + t1849)

let t1836 = (t1847 + -1)

let t1831 = (selection.x.extent_realized.s + 1)

let t1827 = (output.extent.0 + 2)

for (selection.s1.n, selection.s1.n.prologue, t1822)
{

let t1860 = (selection.s1.n * mosaick.stride.2)

let t1857 = (((selection.s1.n - output.min.3) * (selection.stride.2 * green_filters.extent.2)) - t1829)

let t1859 = (t1860 - t1840)

let t1858 = (t1860 - t1832)

for (selection.s1.k, 0, green_filters.extent.2)
{

let selection.s1.y.prologue = t1854

let selection.s1.y.epilogue = t1824

let t1863 = ((selection.s1.k * selection_filters.stride.2) - t1837)

let t1861 = ((selection.s1.y.prologue - output.min.1) + 1)

let t1862 = (t1857 + (selection.s1.k * selection.stride.2))

for (selection.s1.y, t1845, t1861)
{

let t1865 = (selection.s1.y - t1852)

let t1864 = (t1862 + ((selection.s1.y - t1830) * t1831))

for (selection.s1.x, t1843, t1827)
{

let t1867 = (selection.s1.x - t1851)

let t1866 = (t1864 + selection.s1.x)

for (selection.s1.r4$y, 0, selection_filters.extent.1)
{

let t1869 = (t1863 + (selection.s1.r4$y * selection_filters.stride.1))

let t1868 = (t1858 + (max(min((t1865 + selection.s1.r4$y), t1834), mosaick.min.1) * mosaick.stride.1))

for (selection.s1.r4$x, 0, selection_filters.extent.0)
{
selection[t1866] = (selection[t1866] + (mosaick[(max(min((t1867 + selection.s1.r4$x), t1836), mosaick.min.0) + t1868)] * selection_filters[(t1869 + selection.s1.r4$x)]))
}
}
}
}

let t1872 = ((selection.s1.k * selection_filters.stride.2) - t1837)

let t1870 = (selection.s1.y.epilogue - selection.s1.y.prologue)

let t1871 = (t1857 + (selection.s1.k * selection.stride.2))

for (selection.s1.y, selection.s1.y.prologue, t1870)
{

let selection.s1.x.prologue = t1853

let selection.s1.x.epilogue = t1839

let t1875 = (selection.s1.y - t1852)

let t1873 = ((selection.s1.x.prologue - output.min.0) + 1)

let t1874 = (t1871 + ((selection.s1.y - t1830) * t1831))

for (selection.s1.x, t1843, t1873)
{

let t1877 = (selection.s1.x - t1851)

let t1876 = (t1874 + selection.s1.x)

for (selection.s1.r4$y, 0, selection_filters.extent.1)
{

let t1879 = (t1872 + (selection.s1.r4$y * selection_filters.stride.1))

let t1878 = (t1858 + ((t1875 + selection.s1.r4$y) * mosaick.stride.1))

for (selection.s1.r4$x, 0, selection_filters.extent.0)
{
selection[t1876] = (selection[t1876] + (mosaick[(max(min((t1877 + selection.s1.r4$x), t1836), mosaick.min.0) + t1878)] * selection_filters[(t1879 + selection.s1.r4$x)]))
}
}
}

let t1882 = (selection.s1.y - t1852)

let t1880 = (selection.s1.x.epilogue - selection.s1.x.prologue)

let t1881 = (t1871 + ((selection.s1.y - t1830) * t1831))

for (selection.s1.x, selection.s1.x.prologue, t1880)
{

let t1883 = (t1881 + selection.s1.x)

let t1884 = (t1859 + selection.s1.x)

for (selection.s1.r4$y, 0, selection_filters.extent.1)
{

let t1885 = (t1884 + ((t1882 + selection.s1.r4$y) * mosaick.stride.1))

let t1886 = (t1872 + (selection.s1.r4$y * selection_filters.stride.1))

for (selection.s1.r4$x, 0, selection_filters.extent.0)
{
selection[t1883] = (selection[t1883] + (mosaick[(t1885 + selection.s1.r4$x)] * selection_filters[(t1886 + selection.s1.r4$x)]))
}
}
}

let t1889 = (selection.s1.y - t1852)

let t1887 = ((t1844 - selection.s1.x.epilogue) + 1)

let t1888 = (t1871 + ((selection.s1.y - t1830) * t1831))

for (selection.s1.x, selection.s1.x.epilogue, t1887)
{

let t1891 = (selection.s1.x - t1851)

let t1890 = (t1888 + selection.s1.x)

for (selection.s1.r4$y, 0, selection_filters.extent.1)
{

let t1893 = (t1872 + (selection.s1.r4$y * selection_filters.stride.1))

let t1892 = (t1858 + ((t1889 + selection.s1.r4$y) * mosaick.stride.1))

for (selection.s1.r4$x, 0, selection_filters.extent.0)
{
selection[t1890] = (selection[t1890] + (mosaick[(max(min((t1891 + selection.s1.r4$x), t1836), mosaick.min.0) + t1892)] * selection_filters[(t1893 + selection.s1.r4$x)]))
}
}
}
}

let t1896 = ((selection.s1.k * selection_filters.stride.2) - t1837)

let t1894 = ((t1846 - selection.s1.y.epilogue) + 1)

let t1895 = (t1857 + (selection.s1.k * selection.stride.2))

for (selection.s1.y, selection.s1.y.epilogue, t1894)
{

let t1898 = (selection.s1.y - t1852)

let t1897 = (t1895 + ((selection.s1.y - t1830) * t1831))

for (selection.s1.x, t1843, t1827)
{

let t1900 = (selection.s1.x - t1851)

let t1899 = (t1897 + selection.s1.x)

for (selection.s1.r4$y, 0, selection_filters.extent.1)
{

let t1902 = (t1896 + (selection.s1.r4$y * selection_filters.stride.1))

let t1901 = (t1858 + (max(min((t1898 + selection.s1.r4$y), t1834), mosaick.min.1) * mosaick.stride.1))

for (selection.s1.r4$x, 0, selection_filters.extent.0)
{
selection[t1899] = (selection[t1899] + (mosaick[(max(min((t1900 + selection.s1.r4$x), t1836), mosaick.min.0) + t1901)] * selection_filters[(t1902 + selection.s1.r4$x)]))
}
}
}
}
}
}

let t1914 = (selection_filters.extent.1 / 2)

let t1916 = (selection_filters.extent.0 / 2)

let t1903 = ((output.min.3 + output.extent.3) - selection.s1.n.epilogue)

let t1910 = (min((output.extent.1 + -7), -1) + output.min.1)

let t1909 = (min((output.extent.0 + -7), -1) + output.min.0)

let t1918 = ((selection_filters.min.0 + (selection_filters.min.1 * selection_filters.stride.1)) + (selection_filters.min.2 * selection_filters.stride.2))

let t1912 = ((mosaick.min.2 + mosaick.extent.2) + -1)

let t1915 = ((mosaick.min.1 + mosaick.extent.1) + -1)

let t1913 = ((mosaick.min.0 + (mosaick.min.1 * mosaick.stride.1)) + (mosaick.min.2 * mosaick.stride.2))

let t1917 = ((mosaick.min.0 + mosaick.extent.0) + -1)

let t1911 = (selection.x.extent_realized.s + 1)

let t1904 = (output.min.1 + -1)

let t1906 = (output.min.0 + -1)

let t1905 = (output.extent.1 + 2)

let t1907 = (output.extent.0 + 2)

for (selection.s1.n, selection.s1.n.epilogue, t1903)
{

let t1920 = ((max(min(selection.s1.n, t1912), mosaick.min.2) * mosaick.stride.2) - t1913)

let t1919 = (((selection.s1.n - output.min.3) * (selection.stride.2 * green_filters.extent.2)) - t1909)

for (selection.s1.k, 0, green_filters.extent.2)
{

let t1922 = ((selection.s1.k * selection_filters.stride.2) - t1918)

let t1921 = (t1919 + (selection.s1.k * selection.stride.2))

for (selection.s1.y, t1904, t1905)
{

let t1924 = (selection.s1.y - t1914)

let t1923 = (t1921 + ((selection.s1.y - t1910) * t1911))

for (selection.s1.x, t1906, t1907)
{

let t1926 = (selection.s1.x - t1916)

let t1925 = (t1923 + selection.s1.x)

for (selection.s1.r4$y, 0, selection_filters.extent.1)
{

let t1928 = (t1922 + (selection.s1.r4$y * selection_filters.stride.1))

let t1927 = (t1920 + (max(min((t1924 + selection.s1.r4$y), t1915), mosaick.min.1) * mosaick.stride.1))

for (selection.s1.r4$x, 0, selection_filters.extent.0)
{
selection[t1925] = (selection[t1925] + (mosaick[(max(min((t1926 + selection.s1.r4$x), t1917), mosaick.min.0) + t1927)] * selection_filters[(t1928 + selection.s1.r4$x)]))
}
}
}
}
}
}
_halide_buffer_set_host_dirty(selection.buffer, (uint1)1)
}
consume selection
{
halide_profiler_set_current_func(profiler_state, profiler_token, 0)

let normalizer.n.extent_realized.s = (let t1464 = ((output.extent.0 + 2) * (output.extent.1 + 2)) in max(((output.min.3 + output.extent.3) + -1), (((((((t1464 * output.extent.3) + -1) / 8) * 8) + 7) / max(t1464, 1)) + output.min.3)))

let normalizer.y.extent_realized.s = ((max(max((min(((((output.extent.1 + 1) / 8) * 8) + 6), output.extent.1) + output.min.1), ((interp_g.y.extent_realized.s + output.min.1) + -1)), (output.min.1 + output.extent.1)) - min((output.extent.1 + -7), -1)) - output.min.1)

let normalizer.x.extent_realized.s = ((max(max((min(((((output.extent.0 + 1) / 8) * 8) + 6), output.extent.0) + output.min.0), ((interp_g.x.extent_realized + output.min.0) + -2)), (output.min.0 + output.extent.0)) - min((output.extent.0 + -7), -1)) - output.min.0)

let normalizer.stride.2 = ((normalizer.x.extent_realized.s + 1) * (normalizer.y.extent_realized.s + 1))

let normalizer.buffer = _halide_buffer_init(alloca(size_of_halide_buffer_t()), make_struct((min((output.extent.0 + -7), -1) + output.min.0), (normalizer.x.extent_realized.s + 1), 1, 0, (min((output.extent.1 + -7), -1) + output.min.1), (normalizer.y.extent_realized.s + 1), (normalizer.x.extent_realized.s + 1), 0, output.min.3, ((normalizer.n.extent_realized.s - output.min.3) + 1), normalizer.stride.2, 0), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 3, make_struct((min((output.extent.0 + -7), -1) + output.min.0), (normalizer.x.extent_realized.s + 1), 1, 0, (min((output.extent.1 + -7), -1) + output.min.1), (normalizer.y.extent_realized.s + 1), (normalizer.x.extent_realized.s + 1), 0, output.min.3, ((normalizer.n.extent_realized.s - output.min.3) + 1), normalizer.stride.2, 0), (uint64)0)

let halide_device_and_host_malloc_result$1 = halide_device_and_host_malloc(normalizer.buffer, halide_cuda_device_interface())

assert((halide_device_and_host_malloc_result$1 == 0), halide_device_and_host_malloc_result$1)
register_destructor("halide_device_and_host_free_as_destructor", normalizer.buffer)
halide_profiler_memory_allocate(profiler_pipeline_state, 3, (((uint64((normalizer.x.extent_realized.s + 1)) * uint64((normalizer.y.extent_realized.s + 1))) * uint64(((normalizer.n.extent_realized.s - output.min.3) + 1))) * (uint64)4))
allocate normalizer[float32 * (normalizer.x.extent_realized.s + 1) * (normalizer.y.extent_realized.s + 1) * ((normalizer.n.extent_realized.s - output.min.3) + 1)]custom_new{_halide_buffer_get_host(normalizer.buffer)}custom_delete{ halide_device_host_nop_free(); }
produce normalizer
{
halide_profiler_set_current_func(profiler_state, profiler_token, 3)

let halide_device_malloc_result$1 = halide_device_malloc(normalizer.buffer, halide_cuda_device_interface())

assert((halide_device_malloc_result$1 == 0), halide_device_malloc_result$1)

let t1938 = min((output.extent.0 + -7), -1)

let t1933 = max(((output.extent.0 + 2) / 8), 0)

let t1929 = ((output.extent.1 + 9) / 8)

let t1930 = ((output.extent.0 + 9) / 8)

let t1935 = (min((output.extent.1 + -7), -1) + output.min.1)

let t1937 = ((((output.min.0 + output.extent.0) - t1938) - output.min.0) + -7)

let t1934 = (((output.min.0 - t1938) - output.min.0) + -1)

let t1932 = ((output.min.1 + output.extent.1) + -7)

let t1931 = (output.min.1 + -1)

let t1936 = (normalizer.x.extent_realized.s + 1)

for (normalizer.s0.n, output.min.3, output.extent.3)
{
halide_profiler_decr_active_threads(profiler_state)

let t1941 = ((normalizer.s0.n - output.min.3) * normalizer.stride.2)

let t1940 = (t1937 + t1941)

let t1939 = (t1934 + t1941)

gpu_block (normalizer.s0.y.y.__block_id_y, 0, t1929)
{
gpu_block (normalizer.s0.x.x.__block_id_x, 0, t1930)
{
gpu_thread (.__thread_id_y, 0, 8)
{
gpu_thread (.__thread_id_x, 0, 8)
{

let normalizer.s0.y.yi.base = min((t1931 + (normalizer.s0.y.y.__block_id_y * 8)), t1932)

if ((normalizer.s0.x.x.__block_id_x < t1933))
{
normalizer[(((t1939 + (normalizer.s0.x.x.__block_id_x * 8)) + (((normalizer.s0.y.yi.base - t1935) + .__thread_id_y) * t1936)) + .__thread_id_x)] = 0.000000f
}
else
{
normalizer[((t1940 + (((normalizer.s0.y.yi.base - t1935) + .__thread_id_y) * t1936)) + .__thread_id_x)] = 0.000000f
}
}
}
}
}
halide_profiler_incr_active_threads(profiler_state)
}
_halide_buffer_set_device_dirty(normalizer.buffer, (uint1)1)

let halide_copy_to_host_result$1 = halide_copy_to_host(normalizer.buffer)

assert((halide_copy_to_host_result$1 == 0), halide_copy_to_host_result$1)

let t1947 = (min((output.extent.1 + -7), -1) + output.min.1)

let t1946 = (min((output.extent.0 + -7), -1) + output.min.0)

let t1950 = (selection.x.extent_realized.s + 1)

let t1942 = (output.min.1 + -1)

let t1944 = (output.min.0 + -1)

let t1943 = (output.extent.1 + 2)

let t1945 = (output.extent.0 + 2)

let t1948 = (normalizer.x.extent_realized.s + 1)

for (normalizer.s1.n, output.min.3, output.extent.3)
{

let t1954 = (normalizer.s1.n - output.min.3)

let t1953 = ((t1954 * normalizer.stride.2) - t1946)

for (normalizer.s1.y, t1942, t1943)
{

let t1958 = (normalizer.s1.y - t1947)

let t1957 = (t1953 + (t1958 * t1948))

for (normalizer.s1.x, t1944, t1945)
{

let t1959 = (normalizer.s1.x - t1946)

let t1961 = (t1957 + normalizer.s1.x)

for (normalizer.s1.r9$x, 0, green_filters.extent.2)
{
normalizer[t1961] = (normalizer[(((t1954 * normalizer.stride.2) + (t1958 * t1948)) + t1959)] + exp_f32((0.000000f - selection[((((t1954 * (selection.stride.2 * green_filters.extent.2)) + (t1958 * t1950)) + t1959) + (normalizer.s1.r9$x * selection.stride.2))])))
}
}
}
}
_halide_buffer_set_host_dirty(normalizer.buffer, (uint1)1)
}
consume normalizer
{
halide_profiler_set_current_func(profiler_state, profiler_token, 0)

let interpolated_green.n.min_realized = (let t1471 = ((output.extent.0 + 2) * (output.extent.1 + 2)) in min(output.min.3, ((min(((t1471 * output.extent.3) + -8), 0) / max(t1471, 1)) + output.min.3)))

let interpolated_green.n.extent_realized.s.s = (let t1472 = ((output.extent.0 + 2) * (output.extent.1 + 2)) in (let t1475 = ((((((t1472 * output.extent.3) + -1) / 8) * 8) + 7) / max(t1472, 1)) in max(max((t1475 + output.min.3), ((output.min.3 + output.extent.3) + -1)), (max(((min(((((t1472 * output.extent.3) + -1) / 8) * 8), ((t1472 * output.extent.3) + -8)) + 7) / max(t1472, 1)), t1475) + output.min.3))))

let interpolated_green.y.extent_realized.s = max(max(max(((max(interp_g.y.extent_realized.s, output.extent.1) + output.min.1) + -1), ((output.min.1 + output.extent.1) + -2)), (output.min.1 + output.extent.1)), ((interp_g.y.extent_realized.s + output.min.1) + -1))

let interpolated_green.x.extent_realized.s = max(max(max(max(((interp_g.x.extent_realized + output.min.0) + -2), ((output.min.0 + output.extent.0) + -1)), ((output.min.0 + output.extent.0) + -2)), (output.min.0 + output.extent.0)), ((interp_g.x.extent_realized + output.min.0) + -2))

let interpolated_green.stride.2 = (((interpolated_green.x.extent_realized.s - output.min.0) + 2) * ((interpolated_green.y.extent_realized.s - output.min.1) + 2))

let interpolated_green.buffer = _halide_buffer_init(alloca(size_of_halide_buffer_t()), make_struct((output.min.0 + -1), ((interpolated_green.x.extent_realized.s - output.min.0) + 2), 1, 0, (output.min.1 + -1), ((interpolated_green.y.extent_realized.s - output.min.1) + 2), ((interpolated_green.x.extent_realized.s - output.min.0) + 2), 0, interpolated_green.n.min_realized, ((interpolated_green.n.extent_realized.s.s - interpolated_green.n.min_realized) + 1), interpolated_green.stride.2, 0), reinterpret((uint64)0), (uint64)0, reinterpret((uint64)0), 2, 32, 3, make_struct((output.min.0 + -1), ((interpolated_green.x.extent_realized.s - output.min.0) + 2), 1, 0, (output.min.1 + -1), ((interpolated_green.y.extent_realized.s - output.min.1) + 2), ((interpolated_green.x.extent_realized.s - output.min.0) + 2), 0, interpolated_green.n.min_realized, ((interpolated_green.n.extent_realized.s.s - interpolated_green.n.min_realized) + 1), interpolated_green.stride.2, 0), (uint64)0)

let halide_device_and_host_malloc_result = halide_device_and_host_malloc(interpolated_green.buffer, halide_cuda_device_interface())

assert((halide_device_and_host_malloc_result == 0), halide_device_and_host_malloc_result)
register_destructor("halide_device_and_host_free_as_destructor", interpolated_green.buffer)
halide_profiler_memory_allocate(profiler_pipeline_state, 4, (((uint64(((interpolated_green.x.extent_realized.s - output.min.0) + 2)) * uint64(((interpolated_green.y.extent_realized.s - output.min.1) + 2))) * uint64(((interpolated_green.n.extent_realized.s.s - interpolated_green.n.min_realized) + 1))) * (uint64)4))
allocate interpolated_green[float32 * ((interpolated_green.x.extent_realized.s - output.min.0) + 2) * ((interpolated_green.y.extent_realized.s - output.min.1) + 2) * ((interpolated_green.n.extent_realized.s.s - interpolated_green.n.min_realized) + 1)]custom_new{_halide_buffer_get_host(interpolated_green.buffer)}custom_delete{ halide_device_host_nop_free(); }
produce interpolated_green
{
halide_profiler_set_current_func(profiler_state, profiler_token, 4)

let t633 = ((output.extent.0 + 2) * (output.extent.1 + 2))

let halide_device_malloc_result = halide_device_malloc(interpolated_green.buffer, halide_cuda_device_interface())

assert((halide_device_malloc_result == 0), halide_device_malloc_result)
halide_profiler_decr_active_threads(profiler_state)

let t1969 = (t633 * output.extent.3)

let t1963 = max((t1969 / 8), 0)

let t1965 = max((output.extent.0 + 2), 1)

let t1964 = max(t633, 1)

let t1962 = ((t1969 + 7) / 8)

let t1967 = (output.min.3 - interpolated_green.n.min_realized)

let t1968 = (t1969 + -8)

let t1966 = ((interpolated_green.x.extent_realized.s - output.min.0) + 2)

gpu_block (interpolated_green.s0.x.xy.xyn.xyn.__block_id_x, 0, t1962)
{
gpu_thread (.__thread_id_x, 0, 8)
{
if ((interpolated_green.s0.x.xy.xyn.xyn.__block_id_x < t1963))
{
interpolated_green[(let t1484 = (((interpolated_green.s0.x.xy.xyn.xyn.__block_id_x * 8) + .__thread_id_x) % t1964) in (((t1484 % t1965) + ((t1484 / t1965) * t1966)) + ((t1967 + (((interpolated_green.s0.x.xy.xyn.xyn.__block_id_x * 8) + .__thread_id_x) / t1964)) * interpolated_green.stride.2)))] = 0.000000f
}
else
{
interpolated_green[(let t1488 = ((t1968 + .__thread_id_x) % t1964) in (((t1488 % t1965) + ((t1488 / t1965) * t1966)) + ((t1967 + ((t1968 + .__thread_id_x) / t1964)) * interpolated_green.stride.2)))] = 0.000000f
}
}
}
halide_profiler_incr_active_threads(profiler_state)
_halide_buffer_set_device_dirty(interpolated_green.buffer, (uint1)1)

let halide_copy_to_device_result = halide_copy_to_device(normalizer.buffer, halide_cuda_device_interface())

assert((halide_copy_to_device_result == 0), halide_copy_to_device_result)

let halide_copy_to_device_result$1 = halide_copy_to_device(selection.buffer, halide_cuda_device_interface())

assert((halide_copy_to_device_result$1 == 0), halide_copy_to_device_result$1)
halide_profiler_decr_active_threads(profiler_state)

let t1972 = max((output.extent.0 + 2), 1)

let t1971 = max(t633, 1)

let t1974 = min((output.extent.1 + -7), -1)

let t1973 = min((output.extent.0 + -7), -1)

let t1970 = (((t633 * output.extent.3) + 7) / 8)

let t1976 = (output.min.3 - interpolated_green.n.min_realized)

let t1977 = (output.min.3 - interp_g.n.min_realized)

let t1975 = ((interpolated_green.x.extent_realized.s - output.min.0) + 2)

let t1978 = (selection.x.extent_realized.s + 1)

let t1980 = (normalizer.x.extent_realized.s + 1)

gpu_block (interpolated_green.s1.x.xy.xyn.xyn.__block_id_x, 0, t1970)
{
gpu_thread (.__thread_id_x, 0, 8)
{

let t1984 = ((interpolated_green.s1.x.xy.xyn.xyn.__block_id_x * 8) + .__thread_id_x)

let t1985 = (t1984 % t1971)

let t1983 = (((t1985 % t1972) + ((t1985 / t1972) * t1975)) + ((t1976 + (t1984 / t1971)) * interpolated_green.stride.2))

for (interpolated_green.s1.r9$x, 0, green_filters.extent.2)
{
interpolated_green[t1983] = (let t1492 = (t1984 % t1971) in (let t1494 = (t1492 % t1972) in (let t1495 = (t1492 / t1972) in (let t1496 = (t1984 / t1971) in (interpolated_green[((t1494 + (t1495 * t1975)) + ((t1976 + t1496) * interpolated_green.stride.2))] + (interp_g[(((t1494 + (t1495 * interp_g.x.extent_realized)) + ((t1977 + t1496) * interp_g.stride.3)) + (interpolated_green.s1.r9$x * interp_g.stride.2))] * (exp_f32((0.000000f - selection[(((((t1494 - t1973) + (((t1495 - t1974) + -1) * t1978)) + (t1496 * (selection.stride.2 * green_filters.extent.2))) + (interpolated_green.s1.r9$x * selection.stride.2)) + -1)])) / normalizer[((((t1494 - t1973) + (((t1495 - t1974) + -1) * t1980)) + (t1496 * normalizer.stride.2)) + -1)])))))))
}
}
}
halide_profiler_incr_active_threads(profiler_state)
halide_profiler_memory_free(profiler_pipeline_state, 2, ((((uint64((selection.x.extent_realized.s + 1)) * uint64((selection.y.extent_realized.s + 1))) * uint64(green_filters.extent.2)) * uint64(((selection.n.extent_realized.s - output.min.3) + 1))) * (uint64)4))
free selection

let halide_device_free_result = halide_device_free(interp_g.buffer)

assert((halide_device_free_result == 0), halide_device_free_result)
free interp_g

let halide_device_and_host_free_result$1 = halide_device_and_host_free(normalizer.buffer)

assert((halide_device_and_host_free_result$1 == 0), halide_device_and_host_free_result$1)
halide_profiler_memory_free(profiler_pipeline_state, 3, (((uint64((normalizer.x.extent_realized.s + 1)) * uint64((normalizer.y.extent_realized.s + 1))) * uint64(((normalizer.n.extent_realized.s - output.min.3) + 1))) * (uint64)4))
free normalizer
}
consume interpolated_green
{
halide_profiler_set_current_func(profiler_state, profiler_token, 0)
produce output
{
halide_profiler_set_current_func(profiler_state, profiler_token, 5)

let halide_copy_to_host_result = halide_copy_to_host(interpolated_green.buffer)

assert((halide_copy_to_host_result == 0), halide_copy_to_host_result)

let halide_copy_to_host_result$4 = halide_copy_to_host(output.buffer)

assert((halide_copy_to_host_result$4 == 0), halide_copy_to_host_result$4)

let output.s0.n.prologue = min(max(mosaick.min.2, output.min.3), (output.extent.3 + output.min.3))

let output.s0.n.epilogue = max(min((mosaick.min.2 + mosaick.extent.2), (output.min.3 + output.extent.3)), min(max(mosaick.min.2, output.min.3), (output.extent.3 + output.min.3)))

let t1986 = (output.s0.n.prologue - output.min.3)

let t1993 = (output.min.0 - interpolated_green.x.extent_realized.s)

let t1991 = (interpolated_green.x.extent_realized.s - output.min.0)

let t1994 = (((output.min.0 + (output.min.1 * output.stride.1)) + (output.min.2 * output.stride.2)) + (output.min.3 * output.stride.3))

let t1989 = ((mosaick.min.2 + mosaick.extent.2) + -1)

let t1990 = ((mosaick.min.0 + (mosaick.min.1 * mosaick.stride.1)) + (mosaick.min.2 * mosaick.stride.2))

let t1988 = (mosaick.min.1 + mosaick.extent.1)

let t1987 = (mosaick.min.0 + mosaick.extent.0)

for (output.s0.n, output.min.3, t1986)
{

let t1995 = ((max(min(output.s0.n, t1989), mosaick.min.2) * mosaick.stride.2) - t1990)

let t1996 = (((output.s0.n - interpolated_green.n.min_realized) * interpolated_green.stride.2) - output.min.0)

let t1998 = ((output.s0.n * output.stride.3) - t1994)

for (output.s0.c, output.min.2, output.extent.2)
{

let t2001 = (output.s0.c == 1)

let t2000 = (output.s0.c == 0)

let t2002 = (t1998 + (output.s0.c * output.stride.2))

for (output.s0.y, output.min.1, output.extent.1)
{

let t2009 = (output.s0.y + 1)

let t2006 = (t2009 % 2)

let t2005 = ((output.s0.y + -1) % 2)

let t2003 = (output.s0.y % 2)

let t2004 = ((output.s0.y - output.min.1) + 1)

let t2008 = (t2002 + (output.s0.y * output.stride.1))

for (output.s0.x, output.min.0, output.extent.0)
{
output[(t2008 + output.s0.x)] = (let t1504 = ((output.s0.x % 2) == 0) in (let t1506 = (t2003 == 1) in (let t1507 = (t2003 == 0) in (let t1508 = (t1504 && t1507) in (let t1510 = max(min(output.s0.x, (t1987 + -1)), mosaick.min.0) in (let t1513 = (t1995 + (max(min(output.s0.y, (t1988 + -1)), mosaick.min.1) * mosaick.stride.1)) in (let t1514 = mosaick[(t1513 + t1510)] in (let t1516.s = (t1996 + (t2004 * (t1991 + 2))) in (let t1517 = select(((output.s0.x % 2) == t2003), t1514, interpolated_green[((t1516.s + output.s0.x) + 1)]) in (let t1518 = max((min(output.s0.x, t1987) + -1), mosaick.min.0) in (let t1519 = max(min((output.s0.x + 1), (t1987 + -1)), mosaick.min.0) in (let t1520 = ((output.s0.x % 2) == 1) in (let t1521 = (t1520 && t1506) in (let t1522 = (t1993 + (t1516.s + output.s0.x)) in (let t1523 = (t1991 + (t1516.s + output.s0.x)) in (let t1527 = (t1995 + (max((min(output.s0.y, t1988) + -1), mosaick.min.1) * mosaick.stride.1)) in (let t1529 = (t1995 + (max(min(t2009, (t1988 + -1)), mosaick.min.1) * mosaick.stride.1)) in (let t1530.s = (select((((output.s0.x + -1) % 2) == t2003), 0.000000f, (mosaick[(t1513 + t1518)] - interpolated_green[(t1516.s + output.s0.x)])) + select((((output.s0.x + 1) % 2) == t2003), 0.000000f, (mosaick[(t1513 + t1519)] - interpolated_green[((t1516.s + output.s0.x) + 2)]))) in (let t1531.s = (select(((output.s0.x % 2) == t2005), 0.000000f, (mosaick[(t1527 + t1510)] - interpolated_green[(t1522 + -1)])) + select(((output.s0.x % 2) == t2006), 0.000000f, (mosaick[(t1529 + t1510)] - interpolated_green[(t1523 + 3)]))) in (let t1532.s = (((select((((output.s0.x + -1) % 2) == t2005), 0.000000f, (mosaick[(t1527 + t1518)] - interpolated_green[(t1522 + -2)])) + select((((output.s0.x + -1) % 2) == t2006), 0.000000f, (mosaick[(t1529 + t1518)] - interpolated_green[(t1523 + 2)]))) + select((((output.s0.x + 1) % 2) == t2005), 0.000000f, (mosaick[(t1527 + t1519)] - interpolated_green[t1522]))) + select((((output.s0.x + 1) % 2) == t2006), 0.000000f, (mosaick[(t1529 + t1519)] - interpolated_green[(t1523 + 4)]))) in select(t2000, select(t1508, ((t1530.s * 0.500000f) + t1517), select(t1521, ((t1531.s * 0.500000f) + t1517), select((t1504 && t1506), ((t1532.s * 0.250000f) + t1517), t1514))), select(t2001, t1517, select(t1508, ((t1531.s * 0.500000f) + t1517), select(t1521, ((t1530.s * 0.500000f) + t1517), select((t1520 && t1507), ((t1532.s * 0.250000f) + t1517), t1514)))))))))))))))))))))))))
}
}
}
}

let t2024 = (mosaick.min.0 + mosaick.extent.0)

let t2025 = (mosaick.min.1 + mosaick.extent.1)

let t2026 = (output.min.0 + output.extent.0)

let t2027 = (output.min.1 + output.extent.1)

let t2028 = min(max((mosaick.min.0 + 1), output.min.0), (output.extent.0 + output.min.0))

let t2029 = min(max((mosaick.min.1 + 1), output.min.1), (output.extent.1 + output.min.1))

let t2012 = max(min((t2025 + -1), t2027), t2029)

let t2021 = max(min((t2024 + -1), t2026), t2028)

let t2010 = (output.s0.n.epilogue - output.s0.n.prologue)

let t2018 = (output.min.0 - interpolated_green.x.extent_realized.s)

let t2016 = (interpolated_green.x.extent_realized.s - output.min.0)

let t2019 = (((output.min.0 + (output.min.1 * output.stride.1)) + (output.min.2 * output.stride.2)) + (output.min.3 * output.stride.3))

let t2015 = ((mosaick.min.0 + (mosaick.min.1 * mosaick.stride.1)) + (mosaick.min.2 * mosaick.stride.2))

for (output.s0.n, output.s0.n.prologue, t2010)
{

let t2035 = (output.s0.n * mosaick.stride.2)

let t2031 = (((output.s0.n - interpolated_green.n.min_realized) * interpolated_green.stride.2) - output.min.0)

let t2033 = ((output.s0.n * output.stride.3) - t2019)

for (output.s0.c, output.min.2, output.extent.2)
{

let output.s0.y.prologue = t2029

let output.s0.y.epilogue = t2012

let t2040 = (output.s0.c == 1)

let t2039 = (output.s0.c == 0)

let t2037 = (t2035 - t2015)

let t2036 = (output.s0.y.prologue - output.min.1)

let t2041 = (t2033 + (output.s0.c * output.stride.2))

for (output.s0.y, output.min.1, t2036)
{

let t2048 = (output.s0.y + 1)

let t2045 = (t2048 % 2)

let t2044 = ((output.s0.y + -1) % 2)

let t2042 = (output.s0.y % 2)

let t2043 = ((output.s0.y - output.min.1) + 1)

let t2047 = (t2041 + (output.s0.y * output.stride.1))

for (output.s0.x, output.min.0, output.extent.0)
{
output[(t2047 + output.s0.x)] = (let t1534 = ((output.s0.x % 2) == 0) in (let t1536 = (t2042 == 1) in (let t1537 = (t2042 == 0) in (let t1538 = (t1534 && t1537) in (let t1540 = max(min(output.s0.x, (t2024 + -1)), mosaick.min.0) in (let t1543 = (t2037 + (max(min(output.s0.y, (t2025 + -1)), mosaick.min.1) * mosaick.stride.1)) in (let t1544 = mosaick[(t1543 + t1540)] in (let t1546.s = (t2031 + (t2043 * (t2016 + 2))) in (let t1547 = select(((output.s0.x % 2) == t2042), t1544, interpolated_green[((t1546.s + output.s0.x) + 1)]) in (let t1548 = max((min(output.s0.x, t2024) + -1), mosaick.min.0) in (let t1549 = max(min((output.s0.x + 1), (t2024 + -1)), mosaick.min.0) in (let t1550 = ((output.s0.x % 2) == 1) in (let t1551 = (t1550 && t1536) in (let t1552 = (t2018 + (t1546.s + output.s0.x)) in (let t1553 = (t2016 + (t1546.s + output.s0.x)) in (let t1557 = (t2037 + (max((min(output.s0.y, t2025) + -1), mosaick.min.1) * mosaick.stride.1)) in (let t1559 = (t2037 + (max(min(t2048, (t2025 + -1)), mosaick.min.1) * mosaick.stride.1)) in (let t1560.s = (select((((output.s0.x + -1) % 2) == t2042), 0.000000f, (mosaick[(t1543 + t1548)] - interpolated_green[(t1546.s + output.s0.x)])) + select((((output.s0.x + 1) % 2) == t2042), 0.000000f, (mosaick[(t1543 + t1549)] - interpolated_green[((t1546.s + output.s0.x) + 2)]))) in (let t1561.s = (select(((output.s0.x % 2) == t2044), 0.000000f, (mosaick[(t1557 + t1540)] - interpolated_green[(t1552 + -1)])) + select(((output.s0.x % 2) == t2045), 0.000000f, (mosaick[(t1559 + t1540)] - interpolated_green[(t1553 + 3)]))) in (let t1562.s = (((select((((output.s0.x + -1) % 2) == t2044), 0.000000f, (mosaick[(t1557 + t1548)] - interpolated_green[(t1552 + -2)])) + select((((output.s0.x + -1) % 2) == t2045), 0.000000f, (mosaick[(t1559 + t1548)] - interpolated_green[(t1553 + 2)]))) + select((((output.s0.x + 1) % 2) == t2044), 0.000000f, (mosaick[(t1557 + t1549)] - interpolated_green[t1552]))) + select((((output.s0.x + 1) % 2) == t2045), 0.000000f, (mosaick[(t1559 + t1549)] - interpolated_green[(t1553 + 4)]))) in select(t2039, select(t1538, ((t1560.s * 0.500000f) + t1547), select(t1551, ((t1561.s * 0.500000f) + t1547), select((t1534 && t1536), ((t1562.s * 0.250000f) + t1547), t1544))), select(t2040, t1547, select(t1538, ((t1561.s * 0.500000f) + t1547), select(t1551, ((t1560.s * 0.500000f) + t1547), select((t1550 && t1537), ((t1562.s * 0.250000f) + t1547), t1544)))))))))))))))))))))))))
}
}

let t2053 = (output.s0.c == 1)

let t2052 = (output.s0.c == 0)

let t2049 = (output.s0.y.epilogue - output.s0.y.prologue)

let t2051 = (1 - output.min.1)

let t2054 = (t2033 + (output.s0.c * output.stride.2))

for (output.s0.y, output.s0.y.prologue, t2049)
{

let output.s0.x.prologue = t2028

let output.s0.x.epilogue = t2021

let t2065 = (output.s0.y + -1)

let t2066 = (output.s0.y + 1)

let t2062 = (t2066 % 2)

let t2060 = (t2065 % 2)

let t2056 = (output.s0.y % 2)

let t2063 = (t2066 * mosaick.stride.1)

let t2061 = (t2065 * mosaick.stride.1)

let t2058 = (output.s0.y * mosaick.stride.1)

let t2057 = (t2035 - t2015)

let t2055 = (output.s0.x.prologue - output.min.0)

let t2064 = (t2054 + (output.s0.y * output.stride.1))

let t2059 = (t2051 + output.s0.y)

for (output.s0.x, output.min.0, t2055)
{
output[(t2064 + output.s0.x)] = (let t1564 = ((output.s0.x % 2) == 0) in (let t1566 = (t2056 == 1) in (let t1567 = (t2056 == 0) in (let t1568 = (t1564 && t1567) in (let t1570 = max(min(output.s0.x, (t2024 + -1)), mosaick.min.0) in (let t1573 = mosaick[((t2057 + t2058) + t1570)] in (let t1575.s = (t2031 + (t2059 * (t2016 + 2))) in (let t1576 = select(((output.s0.x % 2) == t2056), t1573, interpolated_green[((t1575.s + output.s0.x) + 1)]) in (let t1577 = max((min(output.s0.x, t2024) + -1), mosaick.min.0) in (let t1578 = max(min((output.s0.x + 1), (t2024 + -1)), mosaick.min.0) in (let t1579 = ((output.s0.x % 2) == 1) in (let t1580 = (t1579 && t1566) in (let t1581 = (t2018 + (t1575.s + output.s0.x)) in (let t1582 = (t2016 + (t1575.s + output.s0.x)) in (let t1589.s = (select((((output.s0.x + -1) % 2) == t2056), 0.000000f, (mosaick[((t2057 + t2058) + t1577)] - interpolated_green[(t1575.s + output.s0.x)])) + select((((output.s0.x + 1) % 2) == t2056), 0.000000f, (mosaick[((t2057 + t2058) + t1578)] - interpolated_green[((t1575.s + output.s0.x) + 2)]))) in (let t1590.s = (select(((output.s0.x % 2) == t2060), 0.000000f, (mosaick[((t2057 + t2061) + t1570)] - interpolated_green[(t1581 + -1)])) + select(((output.s0.x % 2) == t2062), 0.000000f, (mosaick[((t2057 + t2063) + t1570)] - interpolated_green[(t1582 + 3)]))) in (let t1591.s = (((select((((output.s0.x + -1) % 2) == t2060), 0.000000f, (mosaick[((t2057 + t2061) + t1577)] - interpolated_green[(t1581 + -2)])) + select((((output.s0.x + -1) % 2) == t2062), 0.000000f, (mosaick[((t2057 + t2063) + t1577)] - interpolated_green[(t1582 + 2)]))) + select((((output.s0.x + 1) % 2) == t2060), 0.000000f, (mosaick[((t2057 + t2061) + t1578)] - interpolated_green[t1581]))) + select((((output.s0.x + 1) % 2) == t2062), 0.000000f, (mosaick[((t2057 + t2063) + t1578)] - interpolated_green[(t1582 + 4)]))) in select(t2052, select(t1568, ((t1589.s * 0.500000f) + t1576), select(t1580, ((t1590.s * 0.500000f) + t1576), select((t1564 && t1566), ((t1591.s * 0.250000f) + t1576), t1573))), select(t2053, t1576, select(t1568, ((t1590.s * 0.500000f) + t1576), select(t1580, ((t1589.s * 0.500000f) + t1576), select((t1579 && t1567), ((t1591.s * 0.250000f) + t1576), t1573))))))))))))))))))))))
}

let t2076 = (output.s0.y + -1)

let t2077 = (output.s0.y + 1)

let t2074 = (t2077 % 2)

let t2073 = (t2076 % 2)

let t2068 = (output.s0.y % 2)

let t2072 = (t2077 * mosaick.stride.1)

let t2071 = (t2076 * mosaick.stride.1)

let t2069 = (output.s0.y * mosaick.stride.1)

let t2067 = (output.s0.x.epilogue - output.s0.x.prologue)

let t2075 = (t2054 + (output.s0.y * output.stride.1))

let t2070 = (t2051 + output.s0.y)

for (output.s0.x, output.s0.x.prologue, t2067)
{
output[(t2075 + output.s0.x)] = (let t1593 = ((output.s0.x % 2) == 0) in (let t1595 = (t2068 == 1) in