~shabbyrobe/grugdct

7801bab7333b5cb11fd83cd5f12aa2fce2238620 — Blake Williams 1 year, 10 months ago 98379b0
Spaces vs tabs
1 files changed, 259 insertions(+), 259 deletions(-)

M aan_cgo.c
M aan_cgo.c => aan_cgo.c +259 -259
@@ 41,129 41,129 @@ void dct_8x8(uint8_t *luma, size_t luma_size, int luma_width, int xpos, int ypos
    static const float s6 = 0.6532814824381885;  // (cos(6*pi/16) / 2) / (1-a1)
    static const float s7 = 1.2814577238707532;  // (cos(7*pi/16) / 2) / (a5-a4+1)

	// Columns
	for (int i = 0; i < 8; i++) {
		int xposi = xpos + i;
		const float y0 = px(luma, luma_size, luma_width, xposi, ypos+0);
		const float y1 = px(luma, luma_size, luma_width, xposi, ypos+1);
		const float y2 = px(luma, luma_size, luma_width, xposi, ypos+2);
		const float y3 = px(luma, luma_size, luma_width, xposi, ypos+3);
		const float y4 = px(luma, luma_size, luma_width, xposi, ypos+4);
		const float y5 = px(luma, luma_size, luma_width, xposi, ypos+5);
		const float y6 = px(luma, luma_size, luma_width, xposi, ypos+6);
		const float y7 = px(luma, luma_size, luma_width, xposi, ypos+7);

		const float b0 = +y0 + y7;
		const float b1 = +y1 + y6;
		const float b2 = +y2 + y5;
		const float b3 = +y3 + y4;
		const float b4 = -y4 + y3;
		const float b5 = -y5 + y2;
		const float b6 = -y6 + y1;
		const float bcde7 = -y7 + y0;

		const float c0 = +b0 + b3;
		const float c1 = +b1 + b2;
		const float c2 = -b2 + b1;
		const float c3 = -b3 + b0;
		const float cd4 = -b4 - b5;
		const float cd5 = +b5 + b6;
		const float cd6 = +b6 + bcde7;

		const float d0 = +c0 + c1;
		const float defg1 = -c1 + c0;
		const float d2 = +c2 + c3;
		const float de3 = +c3;
		const float d8 = (cd4 + cd6) * a5;

		const float efg0 = d0;
		const float e2 = d2 * a1;
		const float ef4 = -cd4*a2 - d8;
		const float e5 = cd5 * a3;
		const float ef6 = cd6*a4 - d8;

		const float fg2 = e2 + de3;
		const float fg3 = de3 - e2;
		const float f5 = e5 + bcde7;
		const float f7 = bcde7 - e5;

		const float g4 = ef4 + f7;
		const float g5 = f5 + ef6;
		const float g6 = -ef6 + f5;
		const float g7 = f7 - ef4;

		mat[(0*8)+i] = efg0 * s0;
		mat[(4*8)+i] = defg1 * s4;
		mat[(2*8)+i] = fg2 * s2;
		mat[(6*8)+i] = fg3 * s6;
		mat[(5*8)+i] = g4 * s5;
		mat[(1*8)+i] = g5 * s1;
		mat[(7*8)+i] = g6 * s7;
		mat[(3*8)+i] = g7 * s3;
	}

	// Rows
    // Columns
    for (int i = 0; i < 8; i++) {
        int xposi = xpos + i;
        const float y0 = px(luma, luma_size, luma_width, xposi, ypos+0);
        const float y1 = px(luma, luma_size, luma_width, xposi, ypos+1);
        const float y2 = px(luma, luma_size, luma_width, xposi, ypos+2);
        const float y3 = px(luma, luma_size, luma_width, xposi, ypos+3);
        const float y4 = px(luma, luma_size, luma_width, xposi, ypos+4);
        const float y5 = px(luma, luma_size, luma_width, xposi, ypos+5);
        const float y6 = px(luma, luma_size, luma_width, xposi, ypos+6);
        const float y7 = px(luma, luma_size, luma_width, xposi, ypos+7);

        const float b0 = +y0 + y7;
        const float b1 = +y1 + y6;
        const float b2 = +y2 + y5;
        const float b3 = +y3 + y4;
        const float b4 = -y4 + y3;
        const float b5 = -y5 + y2;
        const float b6 = -y6 + y1;
        const float bcde7 = -y7 + y0;

        const float c0 = +b0 + b3;
        const float c1 = +b1 + b2;
        const float c2 = -b2 + b1;
        const float c3 = -b3 + b0;
        const float cd4 = -b4 - b5;
        const float cd5 = +b5 + b6;
        const float cd6 = +b6 + bcde7;

        const float d0 = +c0 + c1;
        const float defg1 = -c1 + c0;
        const float d2 = +c2 + c3;
        const float de3 = +c3;
        const float d8 = (cd4 + cd6) * a5;

        const float efg0 = d0;
        const float e2 = d2 * a1;
        const float ef4 = -cd4*a2 - d8;
        const float e5 = cd5 * a3;
        const float ef6 = cd6*a4 - d8;

        const float fg2 = e2 + de3;
        const float fg3 = de3 - e2;
        const float f5 = e5 + bcde7;
        const float f7 = bcde7 - e5;

        const float g4 = ef4 + f7;
        const float g5 = f5 + ef6;
        const float g6 = -ef6 + f5;
        const float g7 = f7 - ef4;

        mat[(0*8)+i] = efg0 * s0;
        mat[(4*8)+i] = defg1 * s4;
        mat[(2*8)+i] = fg2 * s2;
        mat[(6*8)+i] = fg3 * s6;
        mat[(5*8)+i] = g4 * s5;
        mat[(1*8)+i] = g5 * s1;
        mat[(7*8)+i] = g6 * s7;
        mat[(3*8)+i] = g7 * s3;
    }

    // Rows
    int yoff = 0;
	for (int i = 0; i < 8; i++) {
		const float x0 = mat[yoff+0];
		const float x1 = mat[yoff+1];
		const float x2 = mat[yoff+2];
		const float x3 = mat[yoff+3];
		const float x4 = mat[yoff+4];
		const float x5 = mat[yoff+5];
		const float x6 = mat[yoff+6];
		const float x7 = mat[yoff+7];

		const float b0 = +x0 + x7;
		const float b1 = +x1 + x6;
		const float b2 = +x2 + x5;
		const float b3 = +x3 + x4;
		const float b4 = -x4 + x3;
		const float b5 = -x5 + x2;
		const float b6 = -x6 + x1;
		const float bcde7 = -x7 + x0;

		const float c0 = +b0 + b3;
		const float c1 = +b1 + b2;
		const float c2 = -b2 + b1;
		const float c3 = -b3 + b0;
		const float cd4 = -b4 - b5;
		const float cd5 = +b5 + b6;
		const float cd6 = +b6 + bcde7;

		const float d0 = +c0 + c1;
		const float defg1 = -c1 + c0;
		const float d2 = +c2 + c3;
		const float de3 = +c3;
		const float d8 = (cd4 + cd6) * a5;

		const float efg0 = d0;
		const float e2 = d2 * a1;
		const float ef4 = -cd4*a2 - d8;
		const float e5 = cd5 * a3;
		const float ef6 = cd6*a4 - d8;

		const float fg2 = e2 + de3;
		const float fg3 = de3 - e2;
		const float f5 = e5 + bcde7;
		const float f7 = bcde7 - e5;

		const float g4 = ef4 + f7;
		const float g5 = f5 + ef6;
		const float g6 = -ef6 + f5;
		const float g7 = f7 - ef4;

		mat[yoff+0] = efg0 * s0;
		mat[yoff+4] = defg1 * s4;
		mat[yoff+2] = fg2 * s2;
		mat[yoff+6] = fg3 * s6;
		mat[yoff+5] = g4 * s5;
		mat[yoff+1] = g5 * s1;
		mat[yoff+7] = g6 * s7;
		mat[yoff+3] = g7 * s3;
    for (int i = 0; i < 8; i++) {
        const float x0 = mat[yoff+0];
        const float x1 = mat[yoff+1];
        const float x2 = mat[yoff+2];
        const float x3 = mat[yoff+3];
        const float x4 = mat[yoff+4];
        const float x5 = mat[yoff+5];
        const float x6 = mat[yoff+6];
        const float x7 = mat[yoff+7];

        const float b0 = +x0 + x7;
        const float b1 = +x1 + x6;
        const float b2 = +x2 + x5;
        const float b3 = +x3 + x4;
        const float b4 = -x4 + x3;
        const float b5 = -x5 + x2;
        const float b6 = -x6 + x1;
        const float bcde7 = -x7 + x0;

        const float c0 = +b0 + b3;
        const float c1 = +b1 + b2;
        const float c2 = -b2 + b1;
        const float c3 = -b3 + b0;
        const float cd4 = -b4 - b5;
        const float cd5 = +b5 + b6;
        const float cd6 = +b6 + bcde7;

        const float d0 = +c0 + c1;
        const float defg1 = -c1 + c0;
        const float d2 = +c2 + c3;
        const float de3 = +c3;
        const float d8 = (cd4 + cd6) * a5;

        const float efg0 = d0;
        const float e2 = d2 * a1;
        const float ef4 = -cd4*a2 - d8;
        const float e5 = cd5 * a3;
        const float ef6 = cd6*a4 - d8;

        const float fg2 = e2 + de3;
        const float fg3 = de3 - e2;
        const float f5 = e5 + bcde7;
        const float f7 = bcde7 - e5;

        const float g4 = ef4 + f7;
        const float g5 = f5 + ef6;
        const float g6 = -ef6 + f5;
        const float g7 = f7 - ef4;

        mat[yoff+0] = efg0 * s0;
        mat[yoff+4] = defg1 * s4;
        mat[yoff+2] = fg2 * s2;
        mat[yoff+6] = fg3 * s6;
        mat[yoff+5] = g4 * s5;
        mat[yoff+1] = g5 * s1;
        mat[yoff+7] = g6 * s7;
        mat[yoff+3] = g7 * s3;

        yoff += 8;
	}
    }
}

void idct_8x8(float *mat, int xpos, int ypos, uint8_t *luma, size_t luma_size, int luma_width) {


@@ 183,146 183,146 @@ void idct_8x8(float *mat, int xpos, int ypos, uint8_t *luma, size_t luma_size, i
    static const float s6 = 0.19134171618254492; // cos(6/16*pi) / 2.0
    static const float s7 = 0.09754516100806417; // cos(7/16*pi) / 2.0

	// Columns
	for (int i = 0; i < 8; i++) {
		float g0 = mat[8*0+i] * s0;
		float g1 = mat[4*8+i] * s4;
		float g2 = mat[2*8+i] * s2;
		float g3 = mat[6*8+i] * s6;
		float g4 = mat[5*8+i] * s5;
		float g5 = mat[1*8+i] * s1;
		float g6 = mat[7*8+i] * s7;
		float g7 = mat[3*8+i] * s3;

		float f0 = g0;
		float f1 = g1;
		float f2 = g2;
		float f3 = g3;
		float f4 = g4 - g7;
		float f5 = g5 + g6;
		float f6 = g5 - g6;
		float f7 = g4 + g7;

		float e0 = f0;
		float e1 = f1;
		float e2 = f2 - f3;
		float e3 = f2 + f3;
		float e4 = f4;
		float e5 = f5 - f7;
		float e6 = f6;
		float e7 = f5 + f7;
		float e8 = f4 + f6;

		float d0 = e0;
		float d1 = e1;
		float d2 = e2 * m1;
		float d3 = e3;
		float d4 = e4 * m2;
		float d5 = e5 * m3;
		float d6 = e6 * m4;
		float d7 = e7;
		float d8 = e8 * m5;

		float c0 = d0 + d1;
		float c1 = d0 - d1;
		float c2 = d2 - d3;
		float c3 = d3;
		float c4 = d4 + d8;
		float c5 = d5 + d7;
		float c6 = d6 - d8;
		float c7 = d7;
		float c8 = c5 - c6;

		float b0 = c0 + c3;
		float b1 = c1 + c2;
		float b2 = c1 - c2;
		float b3 = c0 - c3;
		float b4 = c4 - c8;
		float b5 = c8;
		float b6 = c6 - c7;
		float b7 = c7;

		mat[0*8+i] = b0 + b7;
		mat[1*8+i] = b1 + b6;
		mat[2*8+i] = b2 + b5;
		mat[3*8+i] = b3 + b4;
		mat[4*8+i] = b3 - b4;
		mat[5*8+i] = b2 - b5;
		mat[6*8+i] = b1 - b6;
		mat[7*8+i] = b0 - b7;
	}

	// Rows
    // Columns
    for (int i = 0; i < 8; i++) {
        float g0 = mat[8*0+i] * s0;
        float g1 = mat[4*8+i] * s4;
        float g2 = mat[2*8+i] * s2;
        float g3 = mat[6*8+i] * s6;
        float g4 = mat[5*8+i] * s5;
        float g5 = mat[1*8+i] * s1;
        float g6 = mat[7*8+i] * s7;
        float g7 = mat[3*8+i] * s3;

        float f0 = g0;
        float f1 = g1;
        float f2 = g2;
        float f3 = g3;
        float f4 = g4 - g7;
        float f5 = g5 + g6;
        float f6 = g5 - g6;
        float f7 = g4 + g7;

        float e0 = f0;
        float e1 = f1;
        float e2 = f2 - f3;
        float e3 = f2 + f3;
        float e4 = f4;
        float e5 = f5 - f7;
        float e6 = f6;
        float e7 = f5 + f7;
        float e8 = f4 + f6;

        float d0 = e0;
        float d1 = e1;
        float d2 = e2 * m1;
        float d3 = e3;
        float d4 = e4 * m2;
        float d5 = e5 * m3;
        float d6 = e6 * m4;
        float d7 = e7;
        float d8 = e8 * m5;

        float c0 = d0 + d1;
        float c1 = d0 - d1;
        float c2 = d2 - d3;
        float c3 = d3;
        float c4 = d4 + d8;
        float c5 = d5 + d7;
        float c6 = d6 - d8;
        float c7 = d7;
        float c8 = c5 - c6;

        float b0 = c0 + c3;
        float b1 = c1 + c2;
        float b2 = c1 - c2;
        float b3 = c0 - c3;
        float b4 = c4 - c8;
        float b5 = c8;
        float b6 = c6 - c7;
        float b7 = c7;

        mat[0*8+i] = b0 + b7;
        mat[1*8+i] = b1 + b6;
        mat[2*8+i] = b2 + b5;
        mat[3*8+i] = b3 + b4;
        mat[4*8+i] = b3 - b4;
        mat[5*8+i] = b2 - b5;
        mat[6*8+i] = b1 - b6;
        mat[7*8+i] = b0 - b7;
    }

    // Rows
    int yoff = 0;
	for (int i = 0; i < 8; i++) {
		float g0 = mat[yoff+0] * s0;
		float g1 = mat[yoff+4] * s4;
		float g2 = mat[yoff+2] * s2;
		float g3 = mat[yoff+6] * s6;
		float g4 = mat[yoff+5] * s5;
		float g5 = mat[yoff+1] * s1;
		float g6 = mat[yoff+7] * s7;
		float g7 = mat[yoff+3] * s3;

		float f0 = g0;
		float f1 = g1;
		float f2 = g2;
		float f3 = g3;
		float f4 = g4 - g7;
		float f5 = g5 + g6;
		float f6 = g5 - g6;
		float f7 = g4 + g7;

		float e0 = f0;
		float e1 = f1;
		float e2 = f2 - f3;
		float e3 = f2 + f3;
		float e4 = f4;
		float e5 = f5 - f7;
		float e6 = f6;
		float e7 = f5 + f7;
		float e8 = f4 + f6;

		float d0 = e0;
		float d1 = e1;
		float d2 = e2 * m1;
		float d3 = e3;
		float d4 = e4 * m2;
		float d5 = e5 * m3;
		float d6 = e6 * m4;
		float d7 = e7;
		float d8 = e8 * m5;

		float c0 = d0 + d1;
		float c1 = d0 - d1;
		float c2 = d2 - d3;
		float c3 = d3;
		float c4 = d4 + d8;
		float c5 = d5 + d7;
		float c6 = d6 - d8;
		float c7 = d7;
		float c8 = c5 - c6;

		float b0 = c0 + c3;
		float b1 = c1 + c2;
		float b2 = c1 - c2;
		float b3 = c0 - c3;
		float b4 = c4 - c8;
		float b5 = c8;
		float b6 = c6 - c7;
		float b7 = c7;

		int yposi = ypos + i;

		put(luma, luma_size, luma_width, xpos+0, yposi, b0+b7);
		put(luma, luma_size, luma_width, xpos+1, yposi, b1+b6);
		put(luma, luma_size, luma_width, xpos+2, yposi, b2+b5);
		put(luma, luma_size, luma_width, xpos+3, yposi, b3+b4);
		put(luma, luma_size, luma_width, xpos+4, yposi, b3-b4);
		put(luma, luma_size, luma_width, xpos+5, yposi, b2-b5);
		put(luma, luma_size, luma_width, xpos+6, yposi, b1-b6);
		put(luma, luma_size, luma_width, xpos+7, yposi, b0-b7);
    for (int i = 0; i < 8; i++) {
        float g0 = mat[yoff+0] * s0;
        float g1 = mat[yoff+4] * s4;
        float g2 = mat[yoff+2] * s2;
        float g3 = mat[yoff+6] * s6;
        float g4 = mat[yoff+5] * s5;
        float g5 = mat[yoff+1] * s1;
        float g6 = mat[yoff+7] * s7;
        float g7 = mat[yoff+3] * s3;

        float f0 = g0;
        float f1 = g1;
        float f2 = g2;
        float f3 = g3;
        float f4 = g4 - g7;
        float f5 = g5 + g6;
        float f6 = g5 - g6;
        float f7 = g4 + g7;

        float e0 = f0;
        float e1 = f1;
        float e2 = f2 - f3;
        float e3 = f2 + f3;
        float e4 = f4;
        float e5 = f5 - f7;
        float e6 = f6;
        float e7 = f5 + f7;
        float e8 = f4 + f6;

        float d0 = e0;
        float d1 = e1;
        float d2 = e2 * m1;
        float d3 = e3;
        float d4 = e4 * m2;
        float d5 = e5 * m3;
        float d6 = e6 * m4;
        float d7 = e7;
        float d8 = e8 * m5;

        float c0 = d0 + d1;
        float c1 = d0 - d1;
        float c2 = d2 - d3;
        float c3 = d3;
        float c4 = d4 + d8;
        float c5 = d5 + d7;
        float c6 = d6 - d8;
        float c7 = d7;
        float c8 = c5 - c6;

        float b0 = c0 + c3;
        float b1 = c1 + c2;
        float b2 = c1 - c2;
        float b3 = c0 - c3;
        float b4 = c4 - c8;
        float b5 = c8;
        float b6 = c6 - c7;
        float b7 = c7;

        int yposi = ypos + i;

        put(luma, luma_size, luma_width, xpos+0, yposi, b0+b7);
        put(luma, luma_size, luma_width, xpos+1, yposi, b1+b6);
        put(luma, luma_size, luma_width, xpos+2, yposi, b2+b5);
        put(luma, luma_size, luma_width, xpos+3, yposi, b3+b4);
        put(luma, luma_size, luma_width, xpos+4, yposi, b3-b4);
        put(luma, luma_size, luma_width, xpos+5, yposi, b2-b5);
        put(luma, luma_size, luma_width, xpos+6, yposi, b1-b6);
        put(luma, luma_size, luma_width, xpos+7, yposi, b0-b7);

        yoff += 8;
	}
    }
}