//@Description Sketch to transpose a 4X4 matrix of integers

pragma options "--bnd-unroll-amnt 6 --bnd-inbits 3 ";

int[16] transpose(int[16] mx){
	int x, y;
	for(x = 0; x < 4; x++)
		for(y = 0; y ≤ x; y++)
			mx[4*x+y] = mx[4*y+x];
	return mx;
}

generator int[4] shufps(int[4] xmm1, int[4] xmm2, bit[8] imm8){ /* automatically rewritten */
	int[4] ret;

	ret[0] = xmm1[(int)imm8[0::2]];
	ret[1] = xmm1[(int)imm8[2::2]];
	ret[2] = xmm2[(int)imm8[4::2]];
	ret[3] = xmm2[(int)imm8[6::2]];

	return ret;
}

int[16] sse_transpose(int[16] mx) implements transpose {
	int[16] p0 = 0;
	int[16] p1 = 0;

	repeat(??){
		p0[??::4] = shufps(mx[??::4], mx[??::4], ??); // 4*4*4 * 2^8 = 2^14 = 16384
	}
	repeat(??){
		p1[??::4] = shufps(p0[??::4], p0[??::4], ??);
	}

	return p1;
}