~eliasnaur/gio

8bc6737dea4e46a50141fdd58267713b7110efa9 — Egon Elbre 4 months ago c81a1f9
gpu: optimize encodeQuadTo

name             old time/op  new time/op  delta
EncodeQuadTo-32  35.4ns ± 1%  11.9ns ± 3%  -66.34%  (p=0.008 n=5+5)

Signed-off-by: Egon Elbre <egonelbre@gmail.com>
3 files changed, 68 insertions(+), 16 deletions(-)

M gpu/clip.go
A gpu/clip_test.go
M gpu/path.go
M gpu/clip.go => gpu/clip.go +37 -8
@@ 1,6 1,9 @@
package gpu

import (
	"encoding/binary"
	"math"

	"gioui.org/internal/f32"
	"gioui.org/internal/stroke"
)


@@ 12,16 15,42 @@ type quadSplitter struct {
}

func encodeQuadTo(data []byte, meta uint32, from, ctrl, to f32.Point) {
	// NW.
	encodeVertex(data, meta, -1, 1, from, ctrl, to)
	// NE.
	encodeVertex(data[vertStride:], meta, 1, 1, from, ctrl, to)
	// SW.
	encodeVertex(data[vertStride*2:], meta, -1, -1, from, ctrl, to)
	// SE.
	encodeVertex(data[vertStride*3:], meta, 1, -1, from, ctrl, to)
	// inlined code:
	//   encodeVertex(data, meta, -1, 1, from, ctrl, to)
	//   encodeVertex(data[vertStride:], meta, 1, 1, from, ctrl, to)
	//   encodeVertex(data[vertStride*2:], meta, -1, -1, from, ctrl, to)
	//   encodeVertex(data[vertStride*3:], meta, 1, -1, from, ctrl, to)
	// this code needs to stay in sync with `vertex.encode`.

	bo := binary.LittleEndian
	data = data[:vertStride*4]

	// encode the main template
	bo.PutUint32(data[4:8], meta)
	bo.PutUint32(data[8:12], math.Float32bits(from.X))
	bo.PutUint32(data[12:16], math.Float32bits(from.Y))
	bo.PutUint32(data[16:20], math.Float32bits(ctrl.X))
	bo.PutUint32(data[20:24], math.Float32bits(ctrl.Y))
	bo.PutUint32(data[24:28], math.Float32bits(to.X))
	bo.PutUint32(data[28:32], math.Float32bits(to.Y))

	copy(data[vertStride*1:vertStride*2], data[vertStride*0:vertStride*1])
	copy(data[vertStride*2:vertStride*3], data[vertStride*0:vertStride*1])
	copy(data[vertStride*3:vertStride*4], data[vertStride*0:vertStride*1])

	bo.PutUint32(data[vertStride*0:vertStride*0+4], math.Float32bits(nwCorner))
	bo.PutUint32(data[vertStride*1:vertStride*1+4], math.Float32bits(neCorner))
	bo.PutUint32(data[vertStride*2:vertStride*2+4], math.Float32bits(swCorner))
	bo.PutUint32(data[vertStride*3:vertStride*3+4], math.Float32bits(seCorner))
}

const (
	nwCorner = 1*0.25 + 0*0.5
	neCorner = 1*0.25 + 1*0.5
	swCorner = 0*0.25 + 0*0.5
	seCorner = 0*0.25 + 1*0.5
)

func encodeVertex(data []byte, meta uint32, cornerx, cornery int16, from, ctrl, to f32.Point) {
	var corner float32
	if cornerx == 1 {

A gpu/clip_test.go => gpu/clip_test.go +21 -0
@@ 0,0 1,21 @@
// SPDX-License-Identifier: Unlicense OR MIT

package gpu

import (
	"testing"

	"gioui.org/internal/f32"
)

func BenchmarkEncodeQuadTo(b *testing.B) {
	var data [vertStride * 4]byte
	for i := 0; i < b.N; i++ {
		v := float32(i)
		encodeQuadTo(data[:], 123,
			f32.Point{X: v, Y: v},
			f32.Point{X: v, Y: v},
			f32.Point{X: v, Y: v},
		)
	}
}

M gpu/path.go => gpu/path.go +10 -8
@@ 113,16 113,18 @@ type vertex struct {
	ToX, ToY     float32
}

// encode needs to stay in-sync with the code in clip.go encodeQuadTo.
func (v vertex) encode(d []byte, maxy uint32) {
	d = d[0:32]
	bo := binary.LittleEndian
	bo.PutUint32(d[0:], math.Float32bits(v.Corner))
	bo.PutUint32(d[4:], maxy)
	bo.PutUint32(d[8:], math.Float32bits(v.FromX))
	bo.PutUint32(d[12:], math.Float32bits(v.FromY))
	bo.PutUint32(d[16:], math.Float32bits(v.CtrlX))
	bo.PutUint32(d[20:], math.Float32bits(v.CtrlY))
	bo.PutUint32(d[24:], math.Float32bits(v.ToX))
	bo.PutUint32(d[28:], math.Float32bits(v.ToY))
	bo.PutUint32(d[0:4], math.Float32bits(v.Corner))
	bo.PutUint32(d[4:8], maxy)
	bo.PutUint32(d[8:12], math.Float32bits(v.FromX))
	bo.PutUint32(d[12:16], math.Float32bits(v.FromY))
	bo.PutUint32(d[16:20], math.Float32bits(v.CtrlX))
	bo.PutUint32(d[20:24], math.Float32bits(v.CtrlY))
	bo.PutUint32(d[24:28], math.Float32bits(v.ToX))
	bo.PutUint32(d[28:32], math.Float32bits(v.ToY))
}

const (