~shabbyrobe/grugdct

b9304e5f8f34a3259ebd88cff4cb14ab9fd83f05 — Blake Williams 1 year, 8 months ago 6b7d149
Save some muls
4 files changed, 56 insertions(+), 53 deletions(-)

M basic_8x8.go
M basic_std.go
M cachedcos_8x8.go
M cachedcos_std.go
M basic_8x8.go => basic_8x8.go +13 -10
@@ 5,6 5,8 @@ import (
	"math"
)

// NOTE: The "Basic" versions are abominably slow. It takes 22 seconds to quantize an
// 8 MP image using Basic8x8.
type Basic8x8 struct{}

var _ ImageTransform8x8F32 = Basic8x8{}


@@ 28,13 30,7 @@ func (bas Basic8x8) DCT8x8Into(img Image, xpos int, ypos int, mat *Matrix8x8F32)

	for v, voff := 0, 0; v < 8; v, voff = v+1, voff+8 {
		for u := 0; u < 8; u++ {
			var cu, cv, z float32 = 1, 1, 0
			if u == 0 {
				cu = oneOverSqrt2
			}
			if v == 0 {
				cv = oneOverSqrt2
			}
			var z float32

			for y := 0; y < 8; y++ {
				for x := 0; x < 8; x++ {


@@ 50,14 46,21 @@ func (bas Basic8x8) DCT8x8Into(img Image, xpos int, ypos int, mat *Matrix8x8F32)
				}
			}

			mat[voff+u] = scale * cu * cv * z
			q := scale * z
			if u == 0 {
				q *= oneOverSqrt2
			}
			if v == 0 {
				q *= oneOverSqrt2
			}
			mat[voff+u] = q
		}
	}
}

func (bas Basic8x8) IDCT8x8Into(mat *Matrix8x8F32, xpos int, ypos int, into Image) {
	const oneOverSqrt2 = 0.7071067811865475      // 1 / sqrt(2)
	const scale = 0.25                           // 1 / sqrt(8*8) / 2
	const scale = 4.0                            // sqrt(8*8) / 2
	const piDivByDoubleDim = 0.19634954084936207 // pi / 8*2

	for y := 0; y < 8; y++ {


@@ 80,7 83,7 @@ func (bas Basic8x8) IDCT8x8Into(mat *Matrix8x8F32, xpos int, ypos int, into Imag
				}
			}

			z *= scale
			z /= scale
			if z > 255.0 {
				z = 255.0
			}

M basic_std.go => basic_std.go +15 -16
@@ 42,13 42,7 @@ func (bas Basic) DCTInto(img Image, xpos int, ypos int, mat MatrixF64) {

	for v, voff := 0, 0; v < h; v, voff = v+1, voff+bas.W {
		for u := 0; u < w; u++ {
			var cu, cv, z float64 = 1, 1, 0
			if u == 0 {
				cu = oneOverSqrt2
			}
			if v == 0 {
				cv = oneOverSqrt2
			}
			var z float64

			for y := 0; y < h; y++ {
				for x := 0; x < w; x++ {


@@ 64,7 58,14 @@ func (bas Basic) DCTInto(img Image, xpos int, ypos int, mat MatrixF64) {
				}
			}

			mat.V[voff+u] = bas.dctScale * cu * cv * z
			q := bas.dctScale * z
			if u == 0 {
				q *= oneOverSqrt2
			}
			if v == 0 {
				q *= oneOverSqrt2
			}
			mat.V[voff+u] = q
		}
	}
}


@@ 80,18 81,16 @@ func (bas Basic) IDCTInto(mat MatrixF64, xpos int, ypos int, into Image) {

			for v, voff := 0, 0; v < h; v, voff = v+1, voff+bas.W {
				for u := 0; u < w; u++ {
					var cu, cv float64 = 1, 1
					s := mat.V[voff+u]
					q := s *
						math.Cos(float64(2*x+1)*float64(u)*bas.piDivByDoubleWidth) *
						math.Cos(float64(2*y+1)*float64(v)*bas.piDivByDoubleHeight)
					if u == 0 {
						cu = oneOverSqrt2
						q *= oneOverSqrt2
					}
					if v == 0 {
						cv = oneOverSqrt2
						q *= oneOverSqrt2
					}
					s := mat.V[voff+u]
					q := cu * cv * s *
						math.Cos(float64(2*x+1)*float64(u)*bas.piDivByDoubleWidth) *
						math.Cos(float64(2*y+1)*float64(v)*bas.piDivByDoubleHeight)

					z += q
				}
			}

M cachedcos_8x8.go => cachedcos_8x8.go +14 -13
@@ 37,13 37,7 @@ func (cos CachedCosines8x8) DCT8x8Into(img Image, xpos int, ypos int, mat *Matri

	for v, voff := 0, 0; v < 8; v, voff = v+1, voff+8 {
		for u := 0; u < 8; u++ {
			var cu, cv, z float32 = 1, 1, 0
			if u == 0 {
				cu = oneOverSqrt2
			}
			if v == 0 {
				cv = oneOverSqrt2
			}
			var z float32
			for y := 0; y < 8; y++ {
				for x := 0; x < 8; x++ {
					xp, yp := xpos+x, ypos+y


@@ 54,7 48,14 @@ func (cos CachedCosines8x8) DCT8x8Into(img Image, xpos int, ypos int, mat *Matri
				}
			}

			mat[voff+u] = scale * cu * cv * z
			q := scale * z
			if u == 0 {
				q *= oneOverSqrt2
			}
			if v == 0 {
				q *= oneOverSqrt2
			}
			mat[voff+u] = q
		}
	}
}


@@ 69,15 70,15 @@ func (cos CachedCosines8x8) IDCT8x8Into(mat *Matrix8x8F32, xpos int, ypos int, i

			for v, voff := 0, 0; v < 8; v, voff = v+1, voff+8 {
				for u := 0; u < 8; u++ {
					var cu, cv float32 = 1, 1
					s := mat[voff+u]
					q := s * cosines8x8LUT32[x][u] * cosines8x8LUT32[y][v]
					if u == 0 {
						cu = oneOverSqrt2
						q *= oneOverSqrt2
					}
					if v == 0 {
						cv = oneOverSqrt2
						q *= oneOverSqrt2
					}
					s := mat[voff+u]
					z += cu * cv * s * cosines8x8LUT32[x][u] * cosines8x8LUT32[y][v]
					z += q
				}
			}


M cachedcos_std.go => cachedcos_std.go +14 -14
@@ 75,14 75,7 @@ func (cos CachedCosines) DCTInto(img Image, xpos int, ypos int, mat MatrixF64) {

	for v, voff := 0, 0; v < h; v, voff = v+1, voff+w {
		for u := 0; u < w; u++ {
			var cu, cv, z float64 = 1, 1, 0
			if u == 0 {
				cu = oneOverSqrt2
			}
			if v == 0 {
				cv = oneOverSqrt2
			}

			var z float64
			for y := 0; y < h; y++ {
				for x := 0; x < w; x++ {
					xp, yp := xpos+x, ypos+y


@@ 93,7 86,14 @@ func (cos CachedCosines) DCTInto(img Image, xpos int, ypos int, mat MatrixF64) {
				}
			}

			mat.V[voff+u] = cos.dctScale * cu * cv * z
			q := cos.dctScale * z
			if u == 0 {
				q *= oneOverSqrt2
			}
			if v == 0 {
				q *= oneOverSqrt2
			}
			mat.V[voff+u] = q
		}
	}
}


@@ 109,15 109,15 @@ func (dct CachedCosines) IDCTInto(mat MatrixF64, xpos int, ypos int, into Image)

			for v, voff := 0, 0; v < h; v, voff = v+1, voff+dct.W {
				for u := 0; u < w; u++ {
					var cu, cv float64 = 1, 1
					s := mat.V[voff+u]
					q := s * dct.wcos[u*w+x] * dct.hcos[v*h+y]
					if u == 0 {
						cu = oneOverSqrt2
						q *= oneOverSqrt2
					}
					if v == 0 {
						cv = oneOverSqrt2
						q *= oneOverSqrt2
					}
					s := mat.V[voff+u]
					z += cu * cv * s * dct.wcos[u*w+x] * dct.hcos[v*h+y]
					z += q
				}
			}