From 8bc6737dea4e46a50141fdd58267713b7110efa9 Mon Sep 17 00:00:00 2001 From: Egon Elbre Date: Thu, 5 Jan 2023 14:22:13 +0200 Subject: [PATCH] gpu: optimize encodeQuadTo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name old time/op new time/op delta EncodeQuadTo-32 35.4ns ± 1% 11.9ns ± 3% -66.34% (p=0.008 n=5+5) Signed-off-by: Egon Elbre --- gpu/clip.go | 45 +++++++++++++++++++++++++++++++++++++-------- gpu/clip_test.go | 21 +++++++++++++++++++++ gpu/path.go | 18 ++++++++++-------- 3 files changed, 68 insertions(+), 16 deletions(-) create mode 100644 gpu/clip_test.go diff --git a/gpu/clip.go b/gpu/clip.go index e9dfa3f8..292a6c04 100644 --- a/gpu/clip.go +++ b/gpu/clip.go @@ -1,6 +1,9 @@ package gpu import ( + "encoding/binary" + "math" + "gioui.org/internal/f32" "gioui.org/internal/stroke" ) @@ -12,16 +15,42 @@ type quadSplitter struct { } func encodeQuadTo(data []byte, meta uint32, from, ctrl, to f32.Point) { - // NW. - encodeVertex(data, meta, -1, 1, from, ctrl, to) - // NE. - encodeVertex(data[vertStride:], meta, 1, 1, from, ctrl, to) - // SW. - encodeVertex(data[vertStride*2:], meta, -1, -1, from, ctrl, to) - // SE. - encodeVertex(data[vertStride*3:], meta, 1, -1, from, ctrl, to) + // inlined code: + // encodeVertex(data, meta, -1, 1, from, ctrl, to) + // encodeVertex(data[vertStride:], meta, 1, 1, from, ctrl, to) + // encodeVertex(data[vertStride*2:], meta, -1, -1, from, ctrl, to) + // encodeVertex(data[vertStride*3:], meta, 1, -1, from, ctrl, to) + // this code needs to stay in sync with `vertex.encode`. + + bo := binary.LittleEndian + data = data[:vertStride*4] + + // encode the main template + bo.PutUint32(data[4:8], meta) + bo.PutUint32(data[8:12], math.Float32bits(from.X)) + bo.PutUint32(data[12:16], math.Float32bits(from.Y)) + bo.PutUint32(data[16:20], math.Float32bits(ctrl.X)) + bo.PutUint32(data[20:24], math.Float32bits(ctrl.Y)) + bo.PutUint32(data[24:28], math.Float32bits(to.X)) + bo.PutUint32(data[28:32], math.Float32bits(to.Y)) + + copy(data[vertStride*1:vertStride*2], data[vertStride*0:vertStride*1]) + copy(data[vertStride*2:vertStride*3], data[vertStride*0:vertStride*1]) + copy(data[vertStride*3:vertStride*4], data[vertStride*0:vertStride*1]) + + bo.PutUint32(data[vertStride*0:vertStride*0+4], math.Float32bits(nwCorner)) + bo.PutUint32(data[vertStride*1:vertStride*1+4], math.Float32bits(neCorner)) + bo.PutUint32(data[vertStride*2:vertStride*2+4], math.Float32bits(swCorner)) + bo.PutUint32(data[vertStride*3:vertStride*3+4], math.Float32bits(seCorner)) } +const ( + nwCorner = 1*0.25 + 0*0.5 + neCorner = 1*0.25 + 1*0.5 + swCorner = 0*0.25 + 0*0.5 + seCorner = 0*0.25 + 1*0.5 +) + func encodeVertex(data []byte, meta uint32, cornerx, cornery int16, from, ctrl, to f32.Point) { var corner float32 if cornerx == 1 { diff --git a/gpu/clip_test.go b/gpu/clip_test.go new file mode 100644 index 00000000..ef4ddbe2 --- /dev/null +++ b/gpu/clip_test.go @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: Unlicense OR MIT + +package gpu + +import ( + "testing" + + "gioui.org/internal/f32" +) + +func BenchmarkEncodeQuadTo(b *testing.B) { + var data [vertStride * 4]byte + for i := 0; i < b.N; i++ { + v := float32(i) + encodeQuadTo(data[:], 123, + f32.Point{X: v, Y: v}, + f32.Point{X: v, Y: v}, + f32.Point{X: v, Y: v}, + ) + } +} diff --git a/gpu/path.go b/gpu/path.go index c65c81c1..79248fd6 100644 --- a/gpu/path.go +++ b/gpu/path.go @@ -113,16 +113,18 @@ type vertex struct { ToX, ToY float32 } +// encode needs to stay in-sync with the code in clip.go encodeQuadTo. func (v vertex) encode(d []byte, maxy uint32) { + d = d[0:32] bo := binary.LittleEndian - bo.PutUint32(d[0:], math.Float32bits(v.Corner)) - bo.PutUint32(d[4:], maxy) - bo.PutUint32(d[8:], math.Float32bits(v.FromX)) - bo.PutUint32(d[12:], math.Float32bits(v.FromY)) - bo.PutUint32(d[16:], math.Float32bits(v.CtrlX)) - bo.PutUint32(d[20:], math.Float32bits(v.CtrlY)) - bo.PutUint32(d[24:], math.Float32bits(v.ToX)) - bo.PutUint32(d[28:], math.Float32bits(v.ToY)) + bo.PutUint32(d[0:4], math.Float32bits(v.Corner)) + bo.PutUint32(d[4:8], maxy) + bo.PutUint32(d[8:12], math.Float32bits(v.FromX)) + bo.PutUint32(d[12:16], math.Float32bits(v.FromY)) + bo.PutUint32(d[16:20], math.Float32bits(v.CtrlX)) + bo.PutUint32(d[20:24], math.Float32bits(v.CtrlY)) + bo.PutUint32(d[24:28], math.Float32bits(v.ToX)) + bo.PutUint32(d[28:32], math.Float32bits(v.ToY)) } const ( -- 2.38.5