~eliasnaur/gio

fca881a3dbf47527b908476a6eb863ccd8711aba — Elias Naur 5 days ago ad47188
compute: fix lowered workgroup size

Signed-off-by: Elias Naur <mail@eliasnaur.com>
3 files changed, 49 insertions(+), 36 deletions(-)

M gpu/compute.go
M gpu/shaders.go
M gpu/shaders/setup.h
M gpu/compute.go => gpu/compute.go +40 -27
@@ 6,10 6,7 @@ import (
	"encoding/binary"
	"image"
	"image/color"
	"io/ioutil"
	"log"
	"math"
	"os"

	"gioui.org/f32"
	"gioui.org/gpu/backend"


@@ 56,12 53,13 @@ type encoder struct {
const (
	MB = 1024 * 1024

	widthInTiles  = 128
	heightInTiles = 96
	tileSize      = 16
	widthInTiles  = 64
	heightInTiles = 64
	tileWidthPx   = 32
	tileHeightPx  = 32

	width  = widthInTiles * tileSize
	height = heightInTiles * tileSize
	width  = widthInTiles * tileWidthPx
	height = heightInTiles * tileHeightPx

	ptclInitialAlloc = 1024



@@ 157,7 155,7 @@ func (g *Compute) Render() {
	}
}

var first = true
//var first = true

func (g *Compute) dumpImage() error {
	srcFBO, err := g.ctx.NewFramebuffer(g.buffers.image, 0)


@@ 180,7 178,7 @@ func (g *Compute) dumpImage() error {
	r := image.Rect(0, 0, width, height)
	g.ctx.BlitFramebuffer(g.defFBO, srcFBO, r, r)

	if first {
	/*if first {
		first = false
		compareBuffer(g.ctx, "state", g.buffers.state, stateSize)
		compareBuffer(g.ctx, "anno", g.buffers.anno, 12*MB)


@@ 188,7 186,7 @@ func (g *Compute) dumpImage() error {
		compareBuffer(g.ctx, "tile", g.buffers.tile, 12*MB)
		compareBuffer(g.ctx, "bin", g.buffers.bin, 12*MB)
		compareBuffer(g.ctx, "ptcl", g.buffers.ptcl, 12*MB)
	}
	}*/
	//g.ctx.BlitFramebuffer(dstFBO, srcFBO, r, r)
	/*if err := dstFBO.ReadPixels(r, pixels); err != nil {
		return err


@@ 207,10 205,12 @@ func (g *Compute) dumpImage() error {
}

func (g *Compute) render() {
	/*g.enc.scene = append(g.enc.scene, tiger...)
	g.enc.scene = append(g.enc.scene, tiger...)
	g.enc.npath += 183
	g.enc.npathseg += 2459*/
	g.enc.npathseg += 2459

	g.buffers.scene.Upload(g.enc.scene)
	//log.Println(g.enc.npath, g.enc.npathseg, g.enc.numElements())

	tileAllocStart := ((g.enc.npath + 31) & ^31) * pathElemSize
	g.buffers.tileAlloc.Upload(unsafe.BytesView(


@@ 230,7 230,10 @@ func (g *Compute) render() {
	const kernel4OutputUnit = 2 // from kernel4.comp
	g.ctx.BindImageTexture(kernel4OutputUnit, g.buffers.image, backend.AccessWrite, backend.TextureFormatRGBA8)

	const wgSize = 128
	const (
		wgSize = 128
		tileY  = 8
	)

	g.ctx.MemoryBarrier()
	g.ctx.BindProgram(g.programs.elements)


@@ 249,10 252,10 @@ func (g *Compute) render() {
	g.ctx.DispatchCompute((g.enc.npath+wgSize-1)/wgSize, 1, 1)
	g.ctx.MemoryBarrier()
	g.ctx.BindProgram(g.programs.coarse)
	g.ctx.DispatchCompute(width/256, height/wgSize, 1)
	g.ctx.DispatchCompute(widthInTiles/16, heightInTiles/tileY, 1)
	g.ctx.MemoryBarrier()
	g.ctx.BindProgram(g.programs.kernel4)
	g.ctx.DispatchCompute((width+tileSize-1)/tileSize, (height+tileSize-1)/tileSize, 1)
	g.ctx.DispatchCompute((width+tileWidthPx-1)/tileWidthPx, (height+tileHeightPx-1)/tileHeightPx, 1)
	g.ctx.MemoryBarrier()
}



@@ 309,29 312,33 @@ const (
	elemTransform   = 10
)

var angle float32 = math.Pi * 3 / 4
var angle float32 = 0 //math.Pi * 3 / 4

func (g *Compute) encode() {
	g.enc.reset()

	g.enc.transform(f32.Affine2D{}.Rotate(f32.Pt(width/2, height/2), angle))
	angle += .01
	//g.enc.transform(f32.Affine2D{}.Offset(f32.Pt(-500, -500)).Rotate(f32.Pt(104, 100), angle))
	g.enc.transform(f32.Affine2D{}.Rotate(f32.Pt(800, 600), angle).Offset(f32.Pt(200, 400)))
	angle += .005
	g.enc.lineWidth(45.0)
	/*g.enc.line(f32.Pt(100, 200), f32.Pt(200, 200), true)
	g.enc.line(f32.Pt(200, 200), f32.Pt(200, 50), true)
	g.enc.line(f32.Pt(200, 50), f32.Pt(100, 200), true)*/

	//g.enc.quad(f32.Pt(000, 400), f32.Pt(450, 200), f32.Pt(200, 50), true)
	//g.enc.line(f32.Pt(000, 400), f32.Pt(200, 50), true)
	g.enc.line(f32.Pt(000, 400), f32.Pt(950, 000), true)
	g.enc.line(f32.Pt(000, 400), f32.Pt(200, 50), true)
	//g.enc.line(f32.Pt(000, 546), f32.Pt(0, 548), true)

	//g.enc.quad(f32.Pt(2000, 1500), f32.Pt(450, 200), f32.Pt(200, 50), true)
	//g.enc.line(f32.Pt(400, 400), f32.Pt(200, 50), true)
	//g.enc.line(f32.Pt(20, 50), f32.Pt(800, 800), true)
	/*g.enc.line(f32.Pt(100, 100), f32.Pt(200, 150), true)*/
	//g.enc.stroke(color.RGBA{A: 0xff, R: 0xff, G: 0xff, B: 0xff})
	g.enc.stroke(color.RGBA{A: 0xff, R: 0x00, G: 0x00, B: 0xff})
	g.enc.stroke(color.RGBA{A: 0xde, R: 0xad, G: 0xbe, B: 0xef})

	/*g.enc.quad(f32.Pt(000, 400), f32.Pt(450, 200), f32.Pt(200, 50), true)
	g.enc.line(f32.Pt(100, 400), f32.Pt(950, 000), true)
	g.enc.stroke(color.RGBA{A: 0xff, R: 0x00, G: 0xff, B: 0xff})*/
	//g.enc.transform(f32.Affine2D{}.Scale(f32.Pt(width/2, height/2), f32.Pt(1, -1)))
	//g.enc.transform(f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(1, 2.1)))
	//g.enc.transform(f32.Affine2D{}.Offset(f32.Pt(-200, 500)))


@@ 429,7 436,7 @@ func (e *encoder) cmd(cmd []byte) {
	e.scene = append(e.scene, cmd...)
}

func compareBuffer(ctx backend.Device, name string, buffer backend.Buffer, size int) {
/*func compareBuffer(ctx backend.Device, name string, buffer backend.Buffer, size int) {
	buf := make([]byte, size)
	buffer.Download(buf)
	fname := name + ".dump"


@@ 439,17 446,23 @@ func compareBuffer(ctx backend.Device, name string, buffer backend.Buffer, size 
			panic(err)
		}
		diffs := 0
		for i, b := range buf {
			if compare[i] != b {
		nonzeros := 0
		for i := 0; i < len(buf); i += 4 {
			b := bo.Uint32(buf[i:])
			if b != 0 {
				nonzeros++
			}
			c := bo.Uint32(compare[i:])
			if c != b {
				diffs++
			}
		}
		if diffs != 0 {
			log.Printf("%d diffs for %s\n", diffs, name)
			log.Printf("%d diffs, %d non-zeros for %s\n", diffs, nonzeros, name)
		}
	} else {
		if err := ioutil.WriteFile(fname, buf, 0600); err != nil {
			panic(err)
		}
	}
}
}*/

M gpu/shaders.go => gpu/shaders.go +5 -5
@@ 11,7 11,7 @@ var (
	}
	shader_binning_comp = backend.ShaderSources{
		Name:      "binning.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    uint rgba_color;\n    vec4 bbox;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct BinInstanceRef\n{\n    uint offset;\n};\n\nstruct BinInstance\n{\n    uint element_ix;\n    float right_edge;\n};\n\nlayout(binding = 0, std430) buffer AnnotatedBuf\n{\n    uint annotated[];\n} _47;\n\nlayout(binding = 2, std430) buffer BinsBuf\n{\n    uint bins[];\n} _119;\n\nlayout(binding = 1, std430) buffer AllocBuf\n{\n    uint n_elements;\n    uint alloc;\n} _135;\n\nshared uint bitmaps[4][128];\nshared uint count[4][128];\nshared uint sh_chunk_start[128];\n\nuint Annotated_tag(AnnotatedRef ref)\n{\n    return _47.annotated[ref.offset >> uint(2)];\n}\n\nAnnoFill AnnoFill_read(AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _47.annotated[ix + 0u];\n    uint raw1 = _47.annotated[ix + 1u];\n    uint raw2 = _47.annotated[ix + 2u];\n    uint raw3 = _47.annotated[ix + 3u];\n    uint raw4 = _47.annotated[ix + 4u];\n    AnnoFill s;\n    s.rgba_color = raw0;\n    s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4));\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(AnnotatedRef ref)\n{\n    AnnoFillRef param = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param);\n}\n\nvoid BinInstance_write(BinInstanceRef ref, BinInstance s)\n{\n    uint ix = ref.offset >> uint(2);\n    _119.bins[ix + 0u] = s.element_ix;\n    _119.bins[ix + 1u] = floatBitsToUint(s.right_edge);\n}\n\nvoid main()\n{\n    uint my_n_elements = _135.n_elements;\n    uint my_partition = gl_WorkGroupID.x;\n    for (uint i = 0u; i < 4u; i++)\n    {\n        bitmaps[i][gl_LocalInvocationID.x] = 0u;\n    }\n    barrier();\n    uint element_ix = (my_partition * 128u) + gl_LocalInvocationID.x;\n    AnnotatedRef ref = AnnotatedRef(element_ix * 44u);\n    uint tag = 0u;\n    if (element_ix < my_n_elements)\n    {\n        AnnotatedRef param = ref;\n        tag = Annotated_tag(param);\n    }\n    int x0 = 0;\n    int y0 = 0;\n    int x1 = 0;\n    int y1 = 0;\n    float my_right_edge = uintBitsToFloat(0x7f800000u);\n    switch (tag)\n    {\n        case 6u:\n        case 5u:\n        {\n            AnnotatedRef param_1 = ref;\n            AnnoFill fill = Annotated_Fill_read(param_1);\n            x0 = int(floor(fill.bbox.x * 0.00390625));\n            y0 = int(floor(fill.bbox.y * 0.0078125));\n            x1 = int(ceil(fill.bbox.z * 0.00390625));\n            y1 = int(ceil(fill.bbox.w * 0.0078125));\n            my_right_edge = fill.bbox.z;\n            break;\n        }\n    }\n    x0 = clamp(x0, 0, 16);\n    x1 = clamp(x1, x0, 16);\n    y0 = clamp(y0, 0, 8);\n    y1 = clamp(y1, y0, 8);\n    if (x0 == x1)\n    {\n        y1 = y0;\n    }\n    int x = x0;\n    int y = y0;\n    uint my_slice = gl_LocalInvocationID.x / 32u;\n    uint my_mask = uint(1 << int(gl_LocalInvocationID.x & 31u));\n    while (y < y1)\n    {\n        uint _277 = atomicOr(bitmaps[my_slice][(y * 16) + x], my_mask);\n        x++;\n        if (x == x1)\n        {\n            x = x0;\n            y++;\n        }\n    }\n    barrier();\n    uint element_count = 0u;\n    for (uint i_1 = 0u; i_1 < 4u; i_1++)\n    {\n        element_count += uint(bitCount(bitmaps[i_1][gl_LocalInvocationID.x]));\n        count[i_1][gl_LocalInvocationID.x] = element_count;\n    }\n    uint chunk_start = 0u;\n    if (element_count != 0u)\n    {\n        uint _323 = atomicAdd(_135.alloc, element_count * 8u);\n        chunk_start = _323;\n        sh_chunk_start[gl_LocalInvocationID.x] = chunk_start;\n    }\n    uint out_ix = ((my_partition * 128u) + gl_LocalInvocationID.x) * 2u;\n    _119.bins[out_ix] = element_count;\n    _119.bins[out_ix + 1u] = chunk_start;\n    barrier();\n    x = x0;\n    y = y0;\n    while (y < y1)\n    {\n        uint bin_ix = uint((y * 16) + x);\n        uint out_mask = bitmaps[my_slice][bin_ix];\n        if ((out_mask & my_mask) != 0u)\n        {\n            uint idx = uint(bitCount(out_mask & (my_mask - 1u)));\n            if (my_slice > 0u)\n            {\n                idx += count[my_slice - 1u][bin_ix];\n            }\n            uint out_offset = sh_chunk_start[bin_ix] + (idx * 8u);\n            BinInstanceRef param_2 = BinInstanceRef(out_offset);\n            BinInstance param_3 = BinInstance(element_ix, my_right_edge);\n            BinInstance_write(param_2, param_3);\n        }\n        x++;\n        if (x == x1)\n        {\n            x = x0;\n            y++;\n        }\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    uint rgba_color;\n    vec4 bbox;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct BinInstanceRef\n{\n    uint offset;\n};\n\nstruct BinInstance\n{\n    uint element_ix;\n    float right_edge;\n};\n\nlayout(binding = 0, std430) buffer AnnotatedBuf\n{\n    uint annotated[];\n} _47;\n\nlayout(binding = 2, std430) buffer BinsBuf\n{\n    uint bins[];\n} _119;\n\nlayout(binding = 1, std430) buffer AllocBuf\n{\n    uint n_elements;\n    uint alloc;\n} _135;\n\nshared uint bitmaps[4][128];\nshared uint count[4][128];\nshared uint sh_chunk_start[128];\n\nuint Annotated_tag(AnnotatedRef ref)\n{\n    return _47.annotated[ref.offset >> uint(2)];\n}\n\nAnnoFill AnnoFill_read(AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _47.annotated[ix + 0u];\n    uint raw1 = _47.annotated[ix + 1u];\n    uint raw2 = _47.annotated[ix + 2u];\n    uint raw3 = _47.annotated[ix + 3u];\n    uint raw4 = _47.annotated[ix + 4u];\n    AnnoFill s;\n    s.rgba_color = raw0;\n    s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4));\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(AnnotatedRef ref)\n{\n    AnnoFillRef param = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param);\n}\n\nvoid BinInstance_write(BinInstanceRef ref, BinInstance s)\n{\n    uint ix = ref.offset >> uint(2);\n    _119.bins[ix + 0u] = s.element_ix;\n    _119.bins[ix + 1u] = floatBitsToUint(s.right_edge);\n}\n\nvoid main()\n{\n    uint my_n_elements = _135.n_elements;\n    uint my_partition = gl_WorkGroupID.x;\n    for (uint i = 0u; i < 4u; i++)\n    {\n        bitmaps[i][gl_LocalInvocationID.x] = 0u;\n    }\n    barrier();\n    uint element_ix = (my_partition * 128u) + gl_LocalInvocationID.x;\n    AnnotatedRef ref = AnnotatedRef(element_ix * 44u);\n    uint tag = 0u;\n    if (element_ix < my_n_elements)\n    {\n        AnnotatedRef param = ref;\n        tag = Annotated_tag(param);\n    }\n    int x0 = 0;\n    int y0 = 0;\n    int x1 = 0;\n    int y1 = 0;\n    float my_right_edge = uintBitsToFloat(0x7f800000u);\n    switch (tag)\n    {\n        case 6u:\n        case 5u:\n        {\n            AnnotatedRef param_1 = ref;\n            AnnoFill fill = Annotated_Fill_read(param_1);\n            x0 = int(floor(fill.bbox.x * 0.001953125));\n            y0 = int(floor(fill.bbox.y * 0.00390625));\n            x1 = int(ceil(fill.bbox.z * 0.001953125));\n            y1 = int(ceil(fill.bbox.w * 0.00390625));\n            my_right_edge = fill.bbox.z;\n            break;\n        }\n    }\n    x0 = clamp(x0, 0, 16);\n    x1 = clamp(x1, x0, 16);\n    y0 = clamp(y0, 0, 8);\n    y1 = clamp(y1, y0, 8);\n    if (x0 == x1)\n    {\n        y1 = y0;\n    }\n    int x = x0;\n    int y = y0;\n    uint my_slice = gl_LocalInvocationID.x / 32u;\n    uint my_mask = uint(1 << int(gl_LocalInvocationID.x & 31u));\n    while (y < y1)\n    {\n        uint _277 = atomicOr(bitmaps[my_slice][(y * 16) + x], my_mask);\n        x++;\n        if (x == x1)\n        {\n            x = x0;\n            y++;\n        }\n    }\n    barrier();\n    uint element_count = 0u;\n    for (uint i_1 = 0u; i_1 < 4u; i_1++)\n    {\n        element_count += uint(bitCount(bitmaps[i_1][gl_LocalInvocationID.x]));\n        count[i_1][gl_LocalInvocationID.x] = element_count;\n    }\n    uint chunk_start = 0u;\n    if (element_count != 0u)\n    {\n        uint _323 = atomicAdd(_135.alloc, element_count * 8u);\n        chunk_start = _323;\n        sh_chunk_start[gl_LocalInvocationID.x] = chunk_start;\n    }\n    uint out_ix = ((my_partition * 128u) + gl_LocalInvocationID.x) * 2u;\n    _119.bins[out_ix] = element_count;\n    _119.bins[out_ix + 1u] = chunk_start;\n    barrier();\n    x = x0;\n    y = y0;\n    while (y < y1)\n    {\n        uint bin_ix = uint((y * 16) + x);\n        uint out_mask = bitmaps[my_slice][bin_ix];\n        if ((out_mask & my_mask) != 0u)\n        {\n            uint idx = uint(bitCount(out_mask & (my_mask - 1u)));\n            if (my_slice > 0u)\n            {\n                idx += count[my_slice - 1u][bin_ix];\n            }\n            uint out_offset = sh_chunk_start[bin_ix] + (idx * 8u);\n            BinInstanceRef param_2 = BinInstanceRef(out_offset);\n            BinInstance param_3 = BinInstance(element_ix, my_right_edge);\n            BinInstance_write(param_2, param_3);\n        }\n        x++;\n        if (x == x1)\n        {\n            x = x0;\n            y++;\n        }\n    }\n}\n\n",
	}
	shader_blit_frag = [...]backend.ShaderSources{
		{


@@ 189,7 189,7 @@ var (
	}
	shader_coarse_comp = backend.ShaderSources{
		Name:      "coarse.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    uint rgba_color;\n    vec4 bbox;\n};\n\nstruct AnnoStrokeRef\n{\n    uint offset;\n};\n\nstruct AnnoStroke\n{\n    uint rgba_color;\n    vec4 bbox;\n    float linewidth;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct BinInstanceRef\n{\n    uint offset;\n};\n\nstruct BinInstance\n{\n    uint element_ix;\n    float right_edge;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct Tile\n{\n    TileSegRef tile;\n    int backdrop;\n};\n\nstruct CmdStrokeRef\n{\n    uint offset;\n};\n\nstruct CmdStroke\n{\n    uint tile_ref;\n    float half_width;\n    uint rgba_color;\n};\n\nstruct CmdFillRef\n{\n    uint offset;\n};\n\nstruct CmdFill\n{\n    uint tile_ref;\n    int backdrop;\n    uint rgba_color;\n};\n\nstruct CmdSolidRef\n{\n    uint offset;\n};\n\nstruct CmdSolid\n{\n    uint rgba_color;\n};\n\nstruct CmdJumpRef\n{\n    uint offset;\n};\n\nstruct CmdJump\n{\n    uint new_ref;\n};\n\nstruct CmdRef\n{\n    uint offset;\n};\n\nlayout(binding = 0, std430) buffer AnnotatedBuf\n{\n    uint annotated[];\n} _144;\n\nlayout(binding = 1, std430) buffer BinsBuf\n{\n    uint bins[];\n} _290;\n\nlayout(binding = 2, std430) buffer TileBuf\n{\n    uint tile[];\n} _318;\n\nlayout(binding = 4, std430) buffer PtclBuf\n{\n    uint ptcl[];\n} _388;\n\nlayout(binding = 3, std430) buffer AllocBuf\n{\n    uint n_elements;\n    uint alloc;\n} _506;\n\nshared uint sh_bitmaps[4][128];\nshared uint sh_part_elements[128];\nshared uint sh_part_count[128];\nshared uint sh_elements[128];\nshared float sh_right_edge[128];\nshared uint sh_tile_stride[128];\nshared uint sh_tile_width[128];\nshared uint sh_tile_x0[128];\nshared uint sh_tile_y0[128];\nshared uint sh_tile_base[128];\nshared uint sh_tile_count[128];\n\nBinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)\n{\n    return BinInstanceRef(ref.offset + (index * 8u));\n}\n\nBinInstance BinInstance_read(BinInstanceRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _290.bins[ix + 0u];\n    uint raw1 = _290.bins[ix + 1u];\n    BinInstance s;\n    s.element_ix = raw0;\n    s.right_edge = uintBitsToFloat(raw1);\n    return s;\n}\n\nuint Annotated_tag(AnnotatedRef ref)\n{\n    return _144.annotated[ref.offset >> uint(2)];\n}\n\nPath Path_read(PathRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _318.tile[ix + 0u];\n    uint raw1 = _318.tile[ix + 1u];\n    uint raw2 = _318.tile[ix + 2u];\n    Path s;\n    s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n    s.tiles = TileRef(raw2);\n    return s;\n}\n\nTile Tile_read(TileRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _318.tile[ix + 0u];\n    uint raw1 = _318.tile[ix + 1u];\n    Tile s;\n    s.tile = TileSegRef(raw0);\n    s.backdrop = int(raw1);\n    return s;\n}\n\nAnnoFill AnnoFill_read(AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _144.annotated[ix + 0u];\n    uint raw1 = _144.annotated[ix + 1u];\n    uint raw2 = _144.annotated[ix + 2u];\n    uint raw3 = _144.annotated[ix + 3u];\n    uint raw4 = _144.annotated[ix + 4u];\n    AnnoFill s;\n    s.rgba_color = raw0;\n    s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4));\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(AnnotatedRef ref)\n{\n    AnnoFillRef param = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param);\n}\n\nvoid CmdJump_write(CmdJumpRef ref, CmdJump s)\n{\n    uint ix = ref.offset >> uint(2);\n    _388.ptcl[ix + 0u] = s.new_ref;\n}\n\nvoid Cmd_Jump_write(CmdRef ref, CmdJump s)\n{\n    _388.ptcl[ref.offset >> uint(2)] = 8u;\n    CmdJumpRef param = CmdJumpRef(ref.offset + 4u);\n    CmdJump param_1 = s;\n    CmdJump_write(param, param_1);\n}\n\nvoid alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit)\n{\n    if (cmd_ref.offset > cmd_limit)\n    {\n        uint _509 = atomicAdd(_506.alloc, 1024u);\n        uint new_cmd = _509;\n        CmdJump jump = CmdJump(new_cmd);\n        CmdRef param = cmd_ref;\n        CmdJump param_1 = jump;\n        Cmd_Jump_write(param, param_1);\n        cmd_ref = CmdRef(new_cmd);\n        cmd_limit = (new_cmd + 1024u) - 40u;\n    }\n}\n\nvoid CmdFill_write(CmdFillRef ref, CmdFill s)\n{\n    uint ix = ref.offset >> uint(2);\n    _388.ptcl[ix + 0u] = s.tile_ref;\n    _388.ptcl[ix + 1u] = uint(s.backdrop);\n    _388.ptcl[ix + 2u] = s.rgba_color;\n}\n\nvoid Cmd_Fill_write(CmdRef ref, CmdFill s)\n{\n    _388.ptcl[ref.offset >> uint(2)] = 3u;\n    CmdFillRef param = CmdFillRef(ref.offset + 4u);\n    CmdFill param_1 = s;\n    CmdFill_write(param, param_1);\n}\n\nvoid CmdSolid_write(CmdSolidRef ref, CmdSolid s)\n{\n    uint ix = ref.offset >> uint(2);\n    _388.ptcl[ix + 0u] = s.rgba_color;\n}\n\nvoid Cmd_Solid_write(CmdRef ref, CmdSolid s)\n{\n    _388.ptcl[ref.offset >> uint(2)] = 7u;\n    CmdSolidRef param = CmdSolidRef(ref.offset + 4u);\n    CmdSolid param_1 = s;\n    CmdSolid_write(param, param_1);\n}\n\nAnnoStroke AnnoStroke_read(AnnoStrokeRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _144.annotated[ix + 0u];\n    uint raw1 = _144.annotated[ix + 1u];\n    uint raw2 = _144.annotated[ix + 2u];\n    uint raw3 = _144.annotated[ix + 3u];\n    uint raw4 = _144.annotated[ix + 4u];\n    uint raw5 = _144.annotated[ix + 5u];\n    AnnoStroke s;\n    s.rgba_color = raw0;\n    s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4));\n    s.linewidth = uintBitsToFloat(raw5);\n    return s;\n}\n\nAnnoStroke Annotated_Stroke_read(AnnotatedRef ref)\n{\n    AnnoStrokeRef param = AnnoStrokeRef(ref.offset + 4u);\n    return AnnoStroke_read(param);\n}\n\nvoid CmdStroke_write(CmdStrokeRef ref, CmdStroke s)\n{\n    uint ix = ref.offset >> uint(2);\n    _388.ptcl[ix + 0u] = s.tile_ref;\n    _388.ptcl[ix + 1u] = floatBitsToUint(s.half_width);\n    _388.ptcl[ix + 2u] = s.rgba_color;\n}\n\nvoid Cmd_Stroke_write(CmdRef ref, CmdStroke s)\n{\n    _388.ptcl[ref.offset >> uint(2)] = 4u;\n    CmdStrokeRef param = CmdStrokeRef(ref.offset + 4u);\n    CmdStroke param_1 = s;\n    CmdStroke_write(param, param_1);\n}\n\nvoid Cmd_End_write(CmdRef ref)\n{\n    _388.ptcl[ref.offset >> uint(2)] = 0u;\n}\n\nvoid main()\n{\n    uint bin_ix = (16u * gl_WorkGroupID.y) + gl_WorkGroupID.x;\n    uint partition_ix = 0u;\n    uint n_partitions = ((_506.n_elements + 128u) - 1u) / 128u;\n    uint th_ix = gl_LocalInvocationID.x;\n    uint bin_tile_x = 16u * gl_WorkGroupID.x;\n    uint bin_tile_y = 8u * gl_WorkGroupID.y;\n    uint tile_x = gl_LocalInvocationID.x % 16u;\n    uint tile_y = gl_LocalInvocationID.x / 16u;\n    uint this_tile_ix = (((bin_tile_y + tile_y) * 128u) + bin_tile_x) + tile_x;\n    CmdRef cmd_ref = CmdRef(this_tile_ix * 1024u);\n    uint cmd_limit = (cmd_ref.offset + 1024u) - 40u;\n    uint rd_ix = 0u;\n    uint wr_ix = 0u;\n    uint part_start_ix = 0u;\n    uint ready_ix = 0u;\n    uint _751;\n    uint element_ix;\n    AnnotatedRef ref;\n    uint tile_count;\n    uint _1014;\n    Tile tile_1;\n    CmdFill cmd_fill;\n    CmdStroke cmd_stroke;\n    while (true)\n    {\n        for (uint i = 0u; i < 4u; i++)\n        {\n            sh_bitmaps[i][th_ix] = 0u;\n        }\n        bool _806;\n        for (;;)\n        {\n            if ((ready_ix == wr_ix) && (partition_ix < n_partitions))\n            {\n                part_start_ix = ready_ix;\n                uint count = 0u;\n                bool _626 = th_ix < 128u;\n                bool _634;\n                if (_626)\n                {\n                    _634 = (partition_ix + th_ix) < n_partitions;\n                }\n                else\n                {\n                    _634 = _626;\n                }\n                if (_634)\n                {\n                    uint in_ix = (((partition_ix + th_ix) * 128u) + bin_ix) * 2u;\n                    count = _290.bins[in_ix];\n                    sh_part_elements[th_ix] = _290.bins[in_ix + 1u];\n                }\n                for (uint i_1 = 0u; i_1 < 7u; i_1++)\n                {\n                    if (th_ix < 128u)\n                    {\n                        sh_part_count[th_ix] = count;\n                    }\n                    barrier();\n                    if (th_ix < 128u)\n                    {\n                        if (th_ix >= uint(1 << int(i_1)))\n                        {\n                            count += sh_part_count[th_ix - uint(1 << int(i_1))];\n                        }\n                    }\n                    barrier();\n                }\n                if (th_ix < 128u)\n                {\n                    sh_part_count[th_ix] = part_start_ix + count;\n                }\n                barrier();\n                ready_ix = sh_part_count[127];\n                partition_ix += 128u;\n            }\n            uint ix = rd_ix + th_ix;\n            if ((ix >= wr_ix) && (ix < ready_ix))\n            {\n                uint part_ix = 0u;\n                for (uint i_2 = 0u; i_2 < 7u; i_2++)\n                {\n                    uint probe = part_ix + uint(64 >> int(i_2));\n                    if (ix >= sh_part_count[probe - 1u])\n                    {\n                        part_ix = probe;\n                    }\n                }\n                if (part_ix > 0u)\n                {\n                    _751 = sh_part_count[part_ix - 1u];\n                }\n                else\n                {\n                    _751 = part_start_ix;\n                }\n                ix -= _751;\n                BinInstanceRef inst_ref = BinInstanceRef(sh_part_elements[part_ix]);\n                BinInstanceRef param = inst_ref;\n                uint param_1 = ix;\n                BinInstanceRef param_2 = BinInstance_index(param, param_1);\n                BinInstance inst = BinInstance_read(param_2);\n                sh_elements[th_ix] = inst.element_ix;\n                sh_right_edge[th_ix] = inst.right_edge;\n            }\n            barrier();\n            wr_ix = min((rd_ix + 128u), ready_ix);\n            bool _796 = (wr_ix - rd_ix) < 128u;\n            if (_796)\n            {\n                _806 = (wr_ix < ready_ix) || (partition_ix < n_partitions);\n            }\n            else\n            {\n                _806 = _796;\n            }\n            if (_806)\n            {\n                continue;\n            }\n            else\n            {\n                break;\n            }\n        }\n        uint tag = 0u;\n        float right_edge = 0.0;\n        if ((th_ix + rd_ix) < wr_ix)\n        {\n            element_ix = sh_elements[th_ix];\n            right_edge = sh_right_edge[th_ix];\n            ref = AnnotatedRef(element_ix * 44u);\n            AnnotatedRef param_3 = ref;\n            tag = Annotated_tag(param_3);\n        }\n        switch (tag)\n        {\n            case 6u:\n            case 5u:\n            {\n                uint path_ix = element_ix;\n                PathRef param_4 = PathRef(path_ix * 12u);\n                Path path = Path_read(param_4);\n                uint stride = path.bbox.z - path.bbox.x;\n                sh_tile_stride[th_ix] = stride;\n                int dx = int(path.bbox.x) - int(bin_tile_x);\n                int dy = int(path.bbox.y) - int(bin_tile_y);\n                int x0 = clamp(dx, 0, 16);\n                int y0 = clamp(dy, 0, 8);\n                int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16);\n                int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 8);\n                sh_tile_width[th_ix] = uint(x1 - x0);\n                sh_tile_x0[th_ix] = uint(x0);\n                sh_tile_y0[th_ix] = uint(y0);\n                tile_count = uint(x1 - x0) * uint(y1 - y0);\n                uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u);\n                sh_tile_base[th_ix] = base;\n                break;\n            }\n            default:\n            {\n                tile_count = 0u;\n                break;\n            }\n        }\n        sh_tile_count[th_ix] = tile_count;\n        for (uint i_3 = 0u; i_3 < 7u; i_3++)\n        {\n            barrier();\n            if (th_ix >= uint(1 << int(i_3)))\n            {\n                tile_count += sh_tile_count[th_ix - uint(1 << int(i_3))];\n            }\n            barrier();\n            sh_tile_count[th_ix] = tile_count;\n        }\n        barrier();\n        uint total_tile_count = sh_tile_count[127];\n        for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 128u)\n        {\n            uint el_ix = 0u;\n            for (uint i_4 = 0u; i_4 < 7u; i_4++)\n            {\n                uint probe_1 = el_ix + uint(64 >> int(i_4));\n                if (ix_1 >= sh_tile_count[probe_1 - 1u])\n                {\n                    el_ix = probe_1;\n                }\n            }\n            if (el_ix > 0u)\n            {\n                _1014 = sh_tile_count[el_ix - 1u];\n            }\n            else\n            {\n                _1014 = 0u;\n            }\n            uint seq_ix = ix_1 - _1014;\n            uint width = sh_tile_width[el_ix];\n            uint x = sh_tile_x0[el_ix] + (seq_ix % width);\n            uint y = sh_tile_y0[el_ix] + (seq_ix / width);\n            TileRef param_5 = TileRef(sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u));\n            Tile tile = Tile_read(param_5);\n            bool _1062 = tile.tile.offset != 0u;\n            bool _1069;\n            if (!_1062)\n            {\n                _1069 = tile.backdrop != 0;\n            }\n            else\n            {\n                _1069 = _1062;\n            }\n            if (_1069)\n            {\n                uint el_slice = el_ix / 32u;\n                uint el_mask = uint(1 << int(el_ix & 31u));\n                uint _1089 = atomicOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask);\n            }\n        }\n        barrier();\n        uint slice_ix = 0u;\n        uint bitmap = sh_bitmaps[0][th_ix];\n        while (true)\n        {\n            if (bitmap == 0u)\n            {\n                slice_ix++;\n                if (slice_ix == 4u)\n                {\n                    break;\n                }\n                bitmap = sh_bitmaps[slice_ix][th_ix];\n                if (bitmap == 0u)\n                {\n                    continue;\n                }\n            }\n            uint element_ref_ix = (slice_ix * 32u) + uint(findLSB(bitmap));\n            uint element_ix_1 = sh_elements[element_ref_ix];\n            bitmap &= (bitmap - 1u);\n            ref = AnnotatedRef(element_ix_1 * 44u);\n            AnnotatedRef param_6 = ref;\n            tag = Annotated_tag(param_6);\n            switch (tag)\n            {\n                case 6u:\n                {\n                    TileRef param_7 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                    tile_1 = Tile_read(param_7);\n                    AnnotatedRef param_8 = ref;\n                    AnnoFill fill = Annotated_Fill_read(param_8);\n                    CmdRef param_9 = cmd_ref;\n                    uint param_10 = cmd_limit;\n                    alloc_cmd(param_9, param_10);\n                    cmd_ref = param_9;\n                    cmd_limit = param_10;\n                    if (tile_1.tile.offset != 0u)\n                    {\n                        cmd_fill.tile_ref = tile_1.tile.offset;\n                        cmd_fill.backdrop = tile_1.backdrop;\n                        cmd_fill.rgba_color = fill.rgba_color;\n                        CmdRef param_11 = cmd_ref;\n                        CmdFill param_12 = cmd_fill;\n                        Cmd_Fill_write(param_11, param_12);\n                    }\n                    else\n                    {\n                        CmdRef param_13 = cmd_ref;\n                        CmdSolid param_14 = CmdSolid(fill.rgba_color);\n                        Cmd_Solid_write(param_13, param_14);\n                    }\n                    cmd_ref.offset += 20u;\n                    break;\n                }\n                case 5u:\n                {\n                    TileRef param_15 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                    tile_1 = Tile_read(param_15);\n                    AnnotatedRef param_16 = ref;\n                    AnnoStroke stroke = Annotated_Stroke_read(param_16);\n                    cmd_stroke.tile_ref = tile_1.tile.offset;\n                    cmd_stroke.half_width = 0.5 * stroke.linewidth;\n                    cmd_stroke.rgba_color = stroke.rgba_color;\n                    CmdRef param_17 = cmd_ref;\n                    uint param_18 = cmd_limit;\n                    alloc_cmd(param_17, param_18);\n                    cmd_ref = param_17;\n                    cmd_limit = param_18;\n                    CmdRef param_19 = cmd_ref;\n                    CmdStroke param_20 = cmd_stroke;\n                    Cmd_Stroke_write(param_19, param_20);\n                    cmd_ref.offset += 20u;\n                    break;\n                }\n            }\n        }\n        barrier();\n        rd_ix += 128u;\n        if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions))\n        {\n            break;\n        }\n    }\n    CmdRef param_21 = cmd_ref;\n    Cmd_End_write(param_21);\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    uint rgba_color;\n    vec4 bbox;\n};\n\nstruct AnnoStrokeRef\n{\n    uint offset;\n};\n\nstruct AnnoStroke\n{\n    uint rgba_color;\n    vec4 bbox;\n    float linewidth;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct BinInstanceRef\n{\n    uint offset;\n};\n\nstruct BinInstance\n{\n    uint element_ix;\n    float right_edge;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct Tile\n{\n    TileSegRef tile;\n    int backdrop;\n};\n\nstruct CmdStrokeRef\n{\n    uint offset;\n};\n\nstruct CmdStroke\n{\n    uint tile_ref;\n    float half_width;\n    uint rgba_color;\n};\n\nstruct CmdFillRef\n{\n    uint offset;\n};\n\nstruct CmdFill\n{\n    uint tile_ref;\n    int backdrop;\n    uint rgba_color;\n};\n\nstruct CmdSolidRef\n{\n    uint offset;\n};\n\nstruct CmdSolid\n{\n    uint rgba_color;\n};\n\nstruct CmdJumpRef\n{\n    uint offset;\n};\n\nstruct CmdJump\n{\n    uint new_ref;\n};\n\nstruct CmdRef\n{\n    uint offset;\n};\n\nlayout(binding = 0, std430) buffer AnnotatedBuf\n{\n    uint annotated[];\n} _144;\n\nlayout(binding = 1, std430) buffer BinsBuf\n{\n    uint bins[];\n} _290;\n\nlayout(binding = 2, std430) buffer TileBuf\n{\n    uint tile[];\n} _318;\n\nlayout(binding = 4, std430) buffer PtclBuf\n{\n    uint ptcl[];\n} _388;\n\nlayout(binding = 3, std430) buffer AllocBuf\n{\n    uint n_elements;\n    uint alloc;\n} _506;\n\nshared uint sh_bitmaps[4][128];\nshared uint sh_part_elements[128];\nshared uint sh_part_count[128];\nshared uint sh_elements[128];\nshared float sh_right_edge[128];\nshared uint sh_tile_stride[128];\nshared uint sh_tile_width[128];\nshared uint sh_tile_x0[128];\nshared uint sh_tile_y0[128];\nshared uint sh_tile_base[128];\nshared uint sh_tile_count[128];\n\nBinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)\n{\n    return BinInstanceRef(ref.offset + (index * 8u));\n}\n\nBinInstance BinInstance_read(BinInstanceRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _290.bins[ix + 0u];\n    uint raw1 = _290.bins[ix + 1u];\n    BinInstance s;\n    s.element_ix = raw0;\n    s.right_edge = uintBitsToFloat(raw1);\n    return s;\n}\n\nuint Annotated_tag(AnnotatedRef ref)\n{\n    return _144.annotated[ref.offset >> uint(2)];\n}\n\nPath Path_read(PathRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _318.tile[ix + 0u];\n    uint raw1 = _318.tile[ix + 1u];\n    uint raw2 = _318.tile[ix + 2u];\n    Path s;\n    s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n    s.tiles = TileRef(raw2);\n    return s;\n}\n\nTile Tile_read(TileRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _318.tile[ix + 0u];\n    uint raw1 = _318.tile[ix + 1u];\n    Tile s;\n    s.tile = TileSegRef(raw0);\n    s.backdrop = int(raw1);\n    return s;\n}\n\nAnnoFill AnnoFill_read(AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _144.annotated[ix + 0u];\n    uint raw1 = _144.annotated[ix + 1u];\n    uint raw2 = _144.annotated[ix + 2u];\n    uint raw3 = _144.annotated[ix + 3u];\n    uint raw4 = _144.annotated[ix + 4u];\n    AnnoFill s;\n    s.rgba_color = raw0;\n    s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4));\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(AnnotatedRef ref)\n{\n    AnnoFillRef param = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param);\n}\n\nvoid CmdJump_write(CmdJumpRef ref, CmdJump s)\n{\n    uint ix = ref.offset >> uint(2);\n    _388.ptcl[ix + 0u] = s.new_ref;\n}\n\nvoid Cmd_Jump_write(CmdRef ref, CmdJump s)\n{\n    _388.ptcl[ref.offset >> uint(2)] = 8u;\n    CmdJumpRef param = CmdJumpRef(ref.offset + 4u);\n    CmdJump param_1 = s;\n    CmdJump_write(param, param_1);\n}\n\nvoid alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit)\n{\n    if (cmd_ref.offset > cmd_limit)\n    {\n        uint _509 = atomicAdd(_506.alloc, 1024u);\n        uint new_cmd = _509;\n        CmdJump jump = CmdJump(new_cmd);\n        CmdRef param = cmd_ref;\n        CmdJump param_1 = jump;\n        Cmd_Jump_write(param, param_1);\n        cmd_ref = CmdRef(new_cmd);\n        cmd_limit = (new_cmd + 1024u) - 40u;\n    }\n}\n\nvoid CmdFill_write(CmdFillRef ref, CmdFill s)\n{\n    uint ix = ref.offset >> uint(2);\n    _388.ptcl[ix + 0u] = s.tile_ref;\n    _388.ptcl[ix + 1u] = uint(s.backdrop);\n    _388.ptcl[ix + 2u] = s.rgba_color;\n}\n\nvoid Cmd_Fill_write(CmdRef ref, CmdFill s)\n{\n    _388.ptcl[ref.offset >> uint(2)] = 3u;\n    CmdFillRef param = CmdFillRef(ref.offset + 4u);\n    CmdFill param_1 = s;\n    CmdFill_write(param, param_1);\n}\n\nvoid CmdSolid_write(CmdSolidRef ref, CmdSolid s)\n{\n    uint ix = ref.offset >> uint(2);\n    _388.ptcl[ix + 0u] = s.rgba_color;\n}\n\nvoid Cmd_Solid_write(CmdRef ref, CmdSolid s)\n{\n    _388.ptcl[ref.offset >> uint(2)] = 7u;\n    CmdSolidRef param = CmdSolidRef(ref.offset + 4u);\n    CmdSolid param_1 = s;\n    CmdSolid_write(param, param_1);\n}\n\nAnnoStroke AnnoStroke_read(AnnoStrokeRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _144.annotated[ix + 0u];\n    uint raw1 = _144.annotated[ix + 1u];\n    uint raw2 = _144.annotated[ix + 2u];\n    uint raw3 = _144.annotated[ix + 3u];\n    uint raw4 = _144.annotated[ix + 4u];\n    uint raw5 = _144.annotated[ix + 5u];\n    AnnoStroke s;\n    s.rgba_color = raw0;\n    s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4));\n    s.linewidth = uintBitsToFloat(raw5);\n    return s;\n}\n\nAnnoStroke Annotated_Stroke_read(AnnotatedRef ref)\n{\n    AnnoStrokeRef param = AnnoStrokeRef(ref.offset + 4u);\n    return AnnoStroke_read(param);\n}\n\nvoid CmdStroke_write(CmdStrokeRef ref, CmdStroke s)\n{\n    uint ix = ref.offset >> uint(2);\n    _388.ptcl[ix + 0u] = s.tile_ref;\n    _388.ptcl[ix + 1u] = floatBitsToUint(s.half_width);\n    _388.ptcl[ix + 2u] = s.rgba_color;\n}\n\nvoid Cmd_Stroke_write(CmdRef ref, CmdStroke s)\n{\n    _388.ptcl[ref.offset >> uint(2)] = 4u;\n    CmdStrokeRef param = CmdStrokeRef(ref.offset + 4u);\n    CmdStroke param_1 = s;\n    CmdStroke_write(param, param_1);\n}\n\nvoid Cmd_End_write(CmdRef ref)\n{\n    _388.ptcl[ref.offset >> uint(2)] = 0u;\n}\n\nvoid main()\n{\n    uint bin_ix = (16u * gl_WorkGroupID.y) + gl_WorkGroupID.x;\n    uint partition_ix = 0u;\n    uint n_partitions = ((_506.n_elements + 128u) - 1u) / 128u;\n    uint th_ix = gl_LocalInvocationID.x;\n    uint bin_tile_x = 16u * gl_WorkGroupID.x;\n    uint bin_tile_y = 8u * gl_WorkGroupID.y;\n    uint tile_x = gl_LocalInvocationID.x % 16u;\n    uint tile_y = gl_LocalInvocationID.x / 16u;\n    uint this_tile_ix = (((bin_tile_y + tile_y) * 64u) + bin_tile_x) + tile_x;\n    CmdRef cmd_ref = CmdRef(this_tile_ix * 1024u);\n    uint cmd_limit = (cmd_ref.offset + 1024u) - 40u;\n    uint rd_ix = 0u;\n    uint wr_ix = 0u;\n    uint part_start_ix = 0u;\n    uint ready_ix = 0u;\n    uint _752;\n    uint element_ix;\n    AnnotatedRef ref;\n    uint tile_count;\n    uint _1015;\n    Tile tile_1;\n    CmdFill cmd_fill;\n    CmdStroke cmd_stroke;\n    while (true)\n    {\n        for (uint i = 0u; i < 4u; i++)\n        {\n            sh_bitmaps[i][th_ix] = 0u;\n        }\n        bool _807;\n        for (;;)\n        {\n            if ((ready_ix == wr_ix) && (partition_ix < n_partitions))\n            {\n                part_start_ix = ready_ix;\n                uint count = 0u;\n                bool _627 = th_ix < 128u;\n                bool _635;\n                if (_627)\n                {\n                    _635 = (partition_ix + th_ix) < n_partitions;\n                }\n                else\n                {\n                    _635 = _627;\n                }\n                if (_635)\n                {\n                    uint in_ix = (((partition_ix + th_ix) * 128u) + bin_ix) * 2u;\n                    count = _290.bins[in_ix];\n                    sh_part_elements[th_ix] = _290.bins[in_ix + 1u];\n                }\n                for (uint i_1 = 0u; i_1 < 7u; i_1++)\n                {\n                    if (th_ix < 128u)\n                    {\n                        sh_part_count[th_ix] = count;\n                    }\n                    barrier();\n                    if (th_ix < 128u)\n                    {\n                        if (th_ix >= uint(1 << int(i_1)))\n                        {\n                            count += sh_part_count[th_ix - uint(1 << int(i_1))];\n                        }\n                    }\n                    barrier();\n                }\n                if (th_ix < 128u)\n                {\n                    sh_part_count[th_ix] = part_start_ix + count;\n                }\n                barrier();\n                ready_ix = sh_part_count[127];\n                partition_ix += 128u;\n            }\n            uint ix = rd_ix + th_ix;\n            if ((ix >= wr_ix) && (ix < ready_ix))\n            {\n                uint part_ix = 0u;\n                for (uint i_2 = 0u; i_2 < 7u; i_2++)\n                {\n                    uint probe = part_ix + uint(64 >> int(i_2));\n                    if (ix >= sh_part_count[probe - 1u])\n                    {\n                        part_ix = probe;\n                    }\n                }\n                if (part_ix > 0u)\n                {\n                    _752 = sh_part_count[part_ix - 1u];\n                }\n                else\n                {\n                    _752 = part_start_ix;\n                }\n                ix -= _752;\n                BinInstanceRef inst_ref = BinInstanceRef(sh_part_elements[part_ix]);\n                BinInstanceRef param = inst_ref;\n                uint param_1 = ix;\n                BinInstanceRef param_2 = BinInstance_index(param, param_1);\n                BinInstance inst = BinInstance_read(param_2);\n                sh_elements[th_ix] = inst.element_ix;\n                sh_right_edge[th_ix] = inst.right_edge;\n            }\n            barrier();\n            wr_ix = min((rd_ix + 128u), ready_ix);\n            bool _797 = (wr_ix - rd_ix) < 128u;\n            if (_797)\n            {\n                _807 = (wr_ix < ready_ix) || (partition_ix < n_partitions);\n            }\n            else\n            {\n                _807 = _797;\n            }\n            if (_807)\n            {\n                continue;\n            }\n            else\n            {\n                break;\n            }\n        }\n        uint tag = 0u;\n        float right_edge = 0.0;\n        if ((th_ix + rd_ix) < wr_ix)\n        {\n            element_ix = sh_elements[th_ix];\n            right_edge = sh_right_edge[th_ix];\n            ref = AnnotatedRef(element_ix * 44u);\n            AnnotatedRef param_3 = ref;\n            tag = Annotated_tag(param_3);\n        }\n        switch (tag)\n        {\n            case 6u:\n            case 5u:\n            {\n                uint path_ix = element_ix;\n                PathRef param_4 = PathRef(path_ix * 12u);\n                Path path = Path_read(param_4);\n                uint stride = path.bbox.z - path.bbox.x;\n                sh_tile_stride[th_ix] = stride;\n                int dx = int(path.bbox.x) - int(bin_tile_x);\n                int dy = int(path.bbox.y) - int(bin_tile_y);\n                int x0 = clamp(dx, 0, 16);\n                int y0 = clamp(dy, 0, 8);\n                int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16);\n                int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 8);\n                sh_tile_width[th_ix] = uint(x1 - x0);\n                sh_tile_x0[th_ix] = uint(x0);\n                sh_tile_y0[th_ix] = uint(y0);\n                tile_count = uint(x1 - x0) * uint(y1 - y0);\n                uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u);\n                sh_tile_base[th_ix] = base;\n                break;\n            }\n            default:\n            {\n                tile_count = 0u;\n                break;\n            }\n        }\n        sh_tile_count[th_ix] = tile_count;\n        for (uint i_3 = 0u; i_3 < 7u; i_3++)\n        {\n            barrier();\n            if (th_ix >= uint(1 << int(i_3)))\n            {\n                tile_count += sh_tile_count[th_ix - uint(1 << int(i_3))];\n            }\n            barrier();\n            sh_tile_count[th_ix] = tile_count;\n        }\n        barrier();\n        uint total_tile_count = sh_tile_count[127];\n        for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 128u)\n        {\n            uint el_ix = 0u;\n            for (uint i_4 = 0u; i_4 < 7u; i_4++)\n            {\n                uint probe_1 = el_ix + uint(64 >> int(i_4));\n                if (ix_1 >= sh_tile_count[probe_1 - 1u])\n                {\n                    el_ix = probe_1;\n                }\n            }\n            if (el_ix > 0u)\n            {\n                _1015 = sh_tile_count[el_ix - 1u];\n            }\n            else\n            {\n                _1015 = 0u;\n            }\n            uint seq_ix = ix_1 - _1015;\n            uint width = sh_tile_width[el_ix];\n            uint x = sh_tile_x0[el_ix] + (seq_ix % width);\n            uint y = sh_tile_y0[el_ix] + (seq_ix / width);\n            TileRef param_5 = TileRef(sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u));\n            Tile tile = Tile_read(param_5);\n            bool _1063 = tile.tile.offset != 0u;\n            bool _1070;\n            if (!_1063)\n            {\n                _1070 = tile.backdrop != 0;\n            }\n            else\n            {\n                _1070 = _1063;\n            }\n            if (_1070)\n            {\n                uint el_slice = el_ix / 32u;\n                uint el_mask = uint(1 << int(el_ix & 31u));\n                uint _1090 = atomicOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask);\n            }\n        }\n        barrier();\n        uint slice_ix = 0u;\n        uint bitmap = sh_bitmaps[0][th_ix];\n        while (true)\n        {\n            if (bitmap == 0u)\n            {\n                slice_ix++;\n                if (slice_ix == 4u)\n                {\n                    break;\n                }\n                bitmap = sh_bitmaps[slice_ix][th_ix];\n                if (bitmap == 0u)\n                {\n                    continue;\n                }\n            }\n            uint element_ref_ix = (slice_ix * 32u) + uint(findLSB(bitmap));\n            uint element_ix_1 = sh_elements[element_ref_ix];\n            bitmap &= (bitmap - 1u);\n            ref = AnnotatedRef(element_ix_1 * 44u);\n            AnnotatedRef param_6 = ref;\n            tag = Annotated_tag(param_6);\n            switch (tag)\n            {\n                case 6u:\n                {\n                    TileRef param_7 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                    tile_1 = Tile_read(param_7);\n                    AnnotatedRef param_8 = ref;\n                    AnnoFill fill = Annotated_Fill_read(param_8);\n                    CmdRef param_9 = cmd_ref;\n                    uint param_10 = cmd_limit;\n                    alloc_cmd(param_9, param_10);\n                    cmd_ref = param_9;\n                    cmd_limit = param_10;\n                    if (tile_1.tile.offset != 0u)\n                    {\n                        cmd_fill.tile_ref = tile_1.tile.offset;\n                        cmd_fill.backdrop = tile_1.backdrop;\n                        cmd_fill.rgba_color = fill.rgba_color;\n                        CmdRef param_11 = cmd_ref;\n                        CmdFill param_12 = cmd_fill;\n                        Cmd_Fill_write(param_11, param_12);\n                    }\n                    else\n                    {\n                        CmdRef param_13 = cmd_ref;\n                        CmdSolid param_14 = CmdSolid(fill.rgba_color);\n                        Cmd_Solid_write(param_13, param_14);\n                    }\n                    cmd_ref.offset += 20u;\n                    break;\n                }\n                case 5u:\n                {\n                    TileRef param_15 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                    tile_1 = Tile_read(param_15);\n                    AnnotatedRef param_16 = ref;\n                    AnnoStroke stroke = Annotated_Stroke_read(param_16);\n                    cmd_stroke.tile_ref = tile_1.tile.offset;\n                    cmd_stroke.half_width = 0.5 * stroke.linewidth;\n                    cmd_stroke.rgba_color = stroke.rgba_color;\n                    CmdRef param_17 = cmd_ref;\n                    uint param_18 = cmd_limit;\n                    alloc_cmd(param_17, param_18);\n                    cmd_ref = param_17;\n                    cmd_limit = param_18;\n                    CmdRef param_19 = cmd_ref;\n                    CmdStroke param_20 = cmd_stroke;\n                    Cmd_Stroke_write(param_19, param_20);\n                    cmd_ref.offset += 20u;\n                    break;\n                }\n            }\n        }\n        barrier();\n        rd_ix += 128u;\n        if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions))\n        {\n            break;\n        }\n    }\n    CmdRef param_21 = cmd_ref;\n    Cmd_End_write(param_21);\n}\n\n",
	}
	shader_cover_frag = [...]backend.ShaderSources{
		{


@@ 516,11 516,11 @@ var (
	}
	shader_kernel4_comp = backend.ShaderSources{
		Name:      "kernel4.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 16, local_size_y = 2, local_size_z = 1) in;\n\nstruct CmdCircleRef\n{\n    uint offset;\n};\n\nstruct CmdCircle\n{\n    vec2 center;\n    float radius;\n    uint rgba_color;\n};\n\nstruct CmdStrokeRef\n{\n    uint offset;\n};\n\nstruct CmdStroke\n{\n    uint tile_ref;\n    float half_width;\n    uint rgba_color;\n};\n\nstruct CmdFillRef\n{\n    uint offset;\n};\n\nstruct CmdFill\n{\n    uint tile_ref;\n    int backdrop;\n    uint rgba_color;\n};\n\nstruct CmdSolidRef\n{\n    uint offset;\n};\n\nstruct CmdSolid\n{\n    uint rgba_color;\n};\n\nstruct CmdJumpRef\n{\n    uint offset;\n};\n\nstruct CmdJump\n{\n    uint new_ref;\n};\n\nstruct CmdRef\n{\n    uint offset;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct TileSeg\n{\n    vec2 start;\n    vec2 end;\n    float y_edge;\n    TileSegRef next;\n};\n\nlayout(binding = 0, std430) buffer PtclBuf\n{\n    uint ptcl[];\n} _89;\n\nlayout(binding = 1, std430) buffer TileBuf\n{\n    uint tile[];\n} _284;\n\nlayout(binding = 2, rgba8) uniform writeonly highp image2D image;\n\nuint Cmd_tag(CmdRef ref)\n{\n    return _89.ptcl[ref.offset >> uint(2)];\n}\n\nCmdCircle CmdCircle_read(CmdCircleRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _89.ptcl[ix + 0u];\n    uint raw1 = _89.ptcl[ix + 1u];\n    uint raw2 = _89.ptcl[ix + 2u];\n    uint raw3 = _89.ptcl[ix + 3u];\n    CmdCircle s;\n    s.center = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.radius = uintBitsToFloat(raw2);\n    s.rgba_color = raw3;\n    return s;\n}\n\nCmdCircle Cmd_Circle_read(CmdRef ref)\n{\n    CmdCircleRef param = CmdCircleRef(ref.offset + 4u);\n    return CmdCircle_read(param);\n}\n\nCmdStroke CmdStroke_read(CmdStrokeRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _89.ptcl[ix + 0u];\n    uint raw1 = _89.ptcl[ix + 1u];\n    uint raw2 = _89.ptcl[ix + 2u];\n    CmdStroke s;\n    s.tile_ref = raw0;\n    s.half_width = uintBitsToFloat(raw1);\n    s.rgba_color = raw2;\n    return s;\n}\n\nCmdStroke Cmd_Stroke_read(CmdRef ref)\n{\n    CmdStrokeRef param = CmdStrokeRef(ref.offset + 4u);\n    return CmdStroke_read(param);\n}\n\nTileSeg TileSeg_read(TileSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _284.tile[ix + 0u];\n    uint raw1 = _284.tile[ix + 1u];\n    uint raw2 = _284.tile[ix + 2u];\n    uint raw3 = _284.tile[ix + 3u];\n    uint raw4 = _284.tile[ix + 4u];\n    uint raw5 = _284.tile[ix + 5u];\n    TileSeg s;\n    s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.y_edge = uintBitsToFloat(raw4);\n    s.next = TileSegRef(raw5);\n    return s;\n}\n\nCmdFill CmdFill_read(CmdFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _89.ptcl[ix + 0u];\n    uint raw1 = _89.ptcl[ix + 1u];\n    uint raw2 = _89.ptcl[ix + 2u];\n    CmdFill s;\n    s.tile_ref = raw0;\n    s.backdrop = int(raw1);\n    s.rgba_color = raw2;\n    return s;\n}\n\nCmdFill Cmd_Fill_read(CmdRef ref)\n{\n    CmdFillRef param = CmdFillRef(ref.offset + 4u);\n    return CmdFill_read(param);\n}\n\nCmdSolid CmdSolid_read(CmdSolidRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _89.ptcl[ix + 0u];\n    CmdSolid s;\n    s.rgba_color = raw0;\n    return s;\n}\n\nCmdSolid Cmd_Solid_read(CmdRef ref)\n{\n    CmdSolidRef param = CmdSolidRef(ref.offset + 4u);\n    return CmdSolid_read(param);\n}\n\nCmdJump CmdJump_read(CmdJumpRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _89.ptcl[ix + 0u];\n    CmdJump s;\n    s.new_ref = raw0;\n    return s;\n}\n\nCmdJump Cmd_Jump_read(CmdRef ref)\n{\n    CmdJumpRef param = CmdJumpRef(ref.offset + 4u);\n    return CmdJump_read(param);\n}\n\nvoid main()\n{\n    uint tile_ix = (gl_WorkGroupID.y * 128u) + gl_WorkGroupID.x;\n    CmdRef cmd_ref = CmdRef(tile_ix * 1024u);\n    uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));\n    vec2 xy = vec2(xy_uint);\n    vec3 rgb[8];\n    for (uint i = 0u; i < 8u; i++)\n    {\n        rgb[i] = vec3(0.5);\n    }\n    vec4 fg_rgba;\n    float df[8];\n    TileSegRef tile_seg_ref;\n    float area[8];\n    while (true)\n    {\n        CmdRef param = cmd_ref;\n        uint tag = Cmd_tag(param);\n        if (tag == 0u)\n        {\n            break;\n        }\n        switch (tag)\n        {\n            case 1u:\n            {\n                CmdRef param_1 = cmd_ref;\n                CmdCircle circle = Cmd_Circle_read(param_1);\n                fg_rgba = unpackUnorm4x8(circle.rgba_color).wzyx;\n                for (uint i_1 = 0u; i_1 < 8u; i_1++)\n                {\n                    float dy = float(i_1 * 2u);\n                    float r = length((vec2(xy.x, xy.y + dy) + vec2(0.5)) - circle.center);\n                    float alpha = clamp((0.5 + circle.radius) - r, 0.0, 1.0);\n                    rgb[i_1] = mix(rgb[i_1], fg_rgba.xyz, vec3(alpha * fg_rgba.w));\n                }\n                break;\n            }\n            case 4u:\n            {\n                CmdRef param_2 = cmd_ref;\n                CmdStroke stroke = Cmd_Stroke_read(param_2);\n                for (uint k = 0u; k < 8u; k++)\n                {\n                    df[k] = 1000000000.0;\n                }\n                tile_seg_ref = TileSegRef(stroke.tile_ref);\n                do\n                {\n                    TileSegRef param_3 = tile_seg_ref;\n                    TileSeg seg = TileSeg_read(param_3);\n                    vec2 line_vec = seg.end - seg.start;\n                    for (uint k_1 = 0u; k_1 < 8u; k_1++)\n                    {\n                        vec2 dpos = (xy + vec2(0.5)) - seg.start;\n                        dpos.y += float(k_1 * 2u);\n                        float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);\n                        df[k_1] = min(df[k_1], length((line_vec * t) - dpos));\n                    }\n                    tile_seg_ref = seg.next;\n                } while (tile_seg_ref.offset != 0u);\n                fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;\n                for (uint k_2 = 0u; k_2 < 8u; k_2++)\n                {\n                    float alpha_1 = clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0);\n                    rgb[k_2] = mix(rgb[k_2], fg_rgba.xyz, vec3(alpha_1 * fg_rgba.w));\n                }\n                break;\n            }\n            case 3u:\n            {\n                CmdRef param_4 = cmd_ref;\n                CmdFill fill = Cmd_Fill_read(param_4);\n                for (uint k_3 = 0u; k_3 < 8u; k_3++)\n                {\n                    area[k_3] = float(fill.backdrop);\n                }\n                tile_seg_ref = TileSegRef(fill.tile_ref);\n                do\n                {\n                    TileSegRef param_5 = tile_seg_ref;\n                    TileSeg seg_1 = TileSeg_read(param_5);\n                    for (uint k_4 = 0u; k_4 < 8u; k_4++)\n                    {\n                        vec2 my_xy = vec2(xy.x, xy.y + float(k_4 * 2u));\n                        vec2 start = seg_1.start - my_xy;\n                        vec2 end = seg_1.end - my_xy;\n                        vec2 window = clamp(vec2(start.y, end.y), vec2(0.0), vec2(1.0));\n                        if (window.x != window.y)\n                        {\n                            vec2 t_1 = (window - vec2(start.y)) / vec2(end.y - start.y);\n                            vec2 xs = vec2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y));\n                            float xmin = min(min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07;\n                            float xmax = max(xs.x, xs.y);\n                            float b = min(xmax, 1.0);\n                            float c = max(b, 0.0);\n                            float d = max(xmin, 0.0);\n                            float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin);\n                            area[k_4] += (a * (window.x - window.y));\n                        }\n                        area[k_4] += (sign(end.x - start.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0));\n                    }\n                    tile_seg_ref = seg_1.next;\n                } while (tile_seg_ref.offset != 0u);\n                fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;\n                for (uint k_5 = 0u; k_5 < 8u; k_5++)\n                {\n                    float alpha_2 = min(abs(area[k_5]), 1.0);\n                    rgb[k_5] = mix(rgb[k_5], fg_rgba.xyz, vec3(alpha_2 * fg_rgba.w));\n                }\n                break;\n            }\n            case 7u:\n            {\n                CmdRef param_6 = cmd_ref;\n                CmdSolid solid = Cmd_Solid_read(param_6);\n                fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx;\n                for (uint k_6 = 0u; k_6 < 8u; k_6++)\n                {\n                    rgb[k_6] = mix(rgb[k_6], fg_rgba.xyz, vec3(fg_rgba.w));\n                }\n                break;\n            }\n            case 8u:\n            {\n                CmdRef param_7 = cmd_ref;\n                cmd_ref = CmdRef(Cmd_Jump_read(param_7).new_ref);\n                continue;\n            }\n        }\n        cmd_ref.offset += 20u;\n    }\n    for (uint i_2 = 0u; i_2 < 8u; i_2++)\n    {\n        imageStore(image, ivec2(int(xy_uint.x), int(xy_uint.y + (2u * i_2))), vec4(rgb[i_2], 1.0));\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 4, local_size_z = 1) in;\n\nstruct CmdCircleRef\n{\n    uint offset;\n};\n\nstruct CmdCircle\n{\n    vec2 center;\n    float radius;\n    uint rgba_color;\n};\n\nstruct CmdStrokeRef\n{\n    uint offset;\n};\n\nstruct CmdStroke\n{\n    uint tile_ref;\n    float half_width;\n    uint rgba_color;\n};\n\nstruct CmdFillRef\n{\n    uint offset;\n};\n\nstruct CmdFill\n{\n    uint tile_ref;\n    int backdrop;\n    uint rgba_color;\n};\n\nstruct CmdSolidRef\n{\n    uint offset;\n};\n\nstruct CmdSolid\n{\n    uint rgba_color;\n};\n\nstruct CmdJumpRef\n{\n    uint offset;\n};\n\nstruct CmdJump\n{\n    uint new_ref;\n};\n\nstruct CmdRef\n{\n    uint offset;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct TileSeg\n{\n    vec2 start;\n    vec2 end;\n    float y_edge;\n    TileSegRef next;\n};\n\nlayout(binding = 0, std430) buffer PtclBuf\n{\n    uint ptcl[];\n} _89;\n\nlayout(binding = 1, std430) buffer TileBuf\n{\n    uint tile[];\n} _284;\n\nlayout(binding = 2, rgba8) uniform writeonly highp image2D image;\n\nuint Cmd_tag(CmdRef ref)\n{\n    return _89.ptcl[ref.offset >> uint(2)];\n}\n\nCmdCircle CmdCircle_read(CmdCircleRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _89.ptcl[ix + 0u];\n    uint raw1 = _89.ptcl[ix + 1u];\n    uint raw2 = _89.ptcl[ix + 2u];\n    uint raw3 = _89.ptcl[ix + 3u];\n    CmdCircle s;\n    s.center = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.radius = uintBitsToFloat(raw2);\n    s.rgba_color = raw3;\n    return s;\n}\n\nCmdCircle Cmd_Circle_read(CmdRef ref)\n{\n    CmdCircleRef param = CmdCircleRef(ref.offset + 4u);\n    return CmdCircle_read(param);\n}\n\nCmdStroke CmdStroke_read(CmdStrokeRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _89.ptcl[ix + 0u];\n    uint raw1 = _89.ptcl[ix + 1u];\n    uint raw2 = _89.ptcl[ix + 2u];\n    CmdStroke s;\n    s.tile_ref = raw0;\n    s.half_width = uintBitsToFloat(raw1);\n    s.rgba_color = raw2;\n    return s;\n}\n\nCmdStroke Cmd_Stroke_read(CmdRef ref)\n{\n    CmdStrokeRef param = CmdStrokeRef(ref.offset + 4u);\n    return CmdStroke_read(param);\n}\n\nTileSeg TileSeg_read(TileSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _284.tile[ix + 0u];\n    uint raw1 = _284.tile[ix + 1u];\n    uint raw2 = _284.tile[ix + 2u];\n    uint raw3 = _284.tile[ix + 3u];\n    uint raw4 = _284.tile[ix + 4u];\n    uint raw5 = _284.tile[ix + 5u];\n    TileSeg s;\n    s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.y_edge = uintBitsToFloat(raw4);\n    s.next = TileSegRef(raw5);\n    return s;\n}\n\nCmdFill CmdFill_read(CmdFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _89.ptcl[ix + 0u];\n    uint raw1 = _89.ptcl[ix + 1u];\n    uint raw2 = _89.ptcl[ix + 2u];\n    CmdFill s;\n    s.tile_ref = raw0;\n    s.backdrop = int(raw1);\n    s.rgba_color = raw2;\n    return s;\n}\n\nCmdFill Cmd_Fill_read(CmdRef ref)\n{\n    CmdFillRef param = CmdFillRef(ref.offset + 4u);\n    return CmdFill_read(param);\n}\n\nCmdSolid CmdSolid_read(CmdSolidRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _89.ptcl[ix + 0u];\n    CmdSolid s;\n    s.rgba_color = raw0;\n    return s;\n}\n\nCmdSolid Cmd_Solid_read(CmdRef ref)\n{\n    CmdSolidRef param = CmdSolidRef(ref.offset + 4u);\n    return CmdSolid_read(param);\n}\n\nCmdJump CmdJump_read(CmdJumpRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _89.ptcl[ix + 0u];\n    CmdJump s;\n    s.new_ref = raw0;\n    return s;\n}\n\nCmdJump Cmd_Jump_read(CmdRef ref)\n{\n    CmdJumpRef param = CmdJumpRef(ref.offset + 4u);\n    return CmdJump_read(param);\n}\n\nvoid main()\n{\n    uint tile_ix = (gl_WorkGroupID.y * 64u) + gl_WorkGroupID.x;\n    CmdRef cmd_ref = CmdRef(tile_ix * 1024u);\n    uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + (32u * gl_WorkGroupID.y));\n    vec2 xy = vec2(xy_uint);\n    vec3 rgb[8];\n    for (uint i = 0u; i < 8u; i++)\n    {\n        rgb[i] = vec3(0.5);\n    }\n    vec4 fg_rgba;\n    float df[8];\n    TileSegRef tile_seg_ref;\n    float area[8];\n    while (true)\n    {\n        CmdRef param = cmd_ref;\n        uint tag = Cmd_tag(param);\n        if (tag == 0u)\n        {\n            break;\n        }\n        switch (tag)\n        {\n            case 1u:\n            {\n                CmdRef param_1 = cmd_ref;\n                CmdCircle circle = Cmd_Circle_read(param_1);\n                fg_rgba = unpackUnorm4x8(circle.rgba_color).wzyx;\n                for (uint i_1 = 0u; i_1 < 8u; i_1++)\n                {\n                    float dy = float(i_1 * 4u);\n                    float r = length((vec2(xy.x, xy.y + dy) + vec2(0.5)) - circle.center);\n                    float alpha = clamp((0.5 + circle.radius) - r, 0.0, 1.0);\n                    rgb[i_1] = mix(rgb[i_1], fg_rgba.xyz, vec3(alpha * fg_rgba.w));\n                }\n                break;\n            }\n            case 4u:\n            {\n                CmdRef param_2 = cmd_ref;\n                CmdStroke stroke = Cmd_Stroke_read(param_2);\n                for (uint k = 0u; k < 8u; k++)\n                {\n                    df[k] = 1000000000.0;\n                }\n                tile_seg_ref = TileSegRef(stroke.tile_ref);\n                do\n                {\n                    TileSegRef param_3 = tile_seg_ref;\n                    TileSeg seg = TileSeg_read(param_3);\n                    vec2 line_vec = seg.end - seg.start;\n                    for (uint k_1 = 0u; k_1 < 8u; k_1++)\n                    {\n                        vec2 dpos = (xy + vec2(0.5)) - seg.start;\n                        dpos.y += float(k_1 * 4u);\n                        float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);\n                        df[k_1] = min(df[k_1], length((line_vec * t) - dpos));\n                    }\n                    tile_seg_ref = seg.next;\n                } while (tile_seg_ref.offset != 0u);\n                fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;\n                for (uint k_2 = 0u; k_2 < 8u; k_2++)\n                {\n                    float alpha_1 = clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0);\n                    rgb[k_2] = mix(rgb[k_2], fg_rgba.xyz, vec3(alpha_1 * fg_rgba.w));\n                }\n                break;\n            }\n            case 3u:\n            {\n                CmdRef param_4 = cmd_ref;\n                CmdFill fill = Cmd_Fill_read(param_4);\n                for (uint k_3 = 0u; k_3 < 8u; k_3++)\n                {\n                    area[k_3] = float(fill.backdrop);\n                }\n                tile_seg_ref = TileSegRef(fill.tile_ref);\n                do\n                {\n                    TileSegRef param_5 = tile_seg_ref;\n                    TileSeg seg_1 = TileSeg_read(param_5);\n                    for (uint k_4 = 0u; k_4 < 8u; k_4++)\n                    {\n                        vec2 my_xy = vec2(xy.x, xy.y + float(k_4 * 4u));\n                        vec2 start = seg_1.start - my_xy;\n                        vec2 end = seg_1.end - my_xy;\n                        vec2 window = clamp(vec2(start.y, end.y), vec2(0.0), vec2(1.0));\n                        if (window.x != window.y)\n                        {\n                            vec2 t_1 = (window - vec2(start.y)) / vec2(end.y - start.y);\n                            vec2 xs = vec2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y));\n                            float xmin = min(min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07;\n                            float xmax = max(xs.x, xs.y);\n                            float b = min(xmax, 1.0);\n                            float c = max(b, 0.0);\n                            float d = max(xmin, 0.0);\n                            float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin);\n                            area[k_4] += (a * (window.x - window.y));\n                        }\n                        area[k_4] += (sign(end.x - start.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0));\n                    }\n                    tile_seg_ref = seg_1.next;\n                } while (tile_seg_ref.offset != 0u);\n                fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;\n                for (uint k_5 = 0u; k_5 < 8u; k_5++)\n                {\n                    float alpha_2 = min(abs(area[k_5]), 1.0);\n                    rgb[k_5] = mix(rgb[k_5], fg_rgba.xyz, vec3(alpha_2 * fg_rgba.w));\n                }\n                break;\n            }\n            case 7u:\n            {\n                CmdRef param_6 = cmd_ref;\n                CmdSolid solid = Cmd_Solid_read(param_6);\n                fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx;\n                for (uint k_6 = 0u; k_6 < 8u; k_6++)\n                {\n                    rgb[k_6] = mix(rgb[k_6], fg_rgba.xyz, vec3(fg_rgba.w));\n                }\n                break;\n            }\n            case 8u:\n            {\n                CmdRef param_7 = cmd_ref;\n                cmd_ref = CmdRef(Cmd_Jump_read(param_7).new_ref);\n                continue;\n            }\n        }\n        cmd_ref.offset += 20u;\n    }\n    for (uint i_2 = 0u; i_2 < 8u; i_2++)\n    {\n        imageStore(image, ivec2(int(xy_uint.x), int(xy_uint.y + (4u * i_2))), vec4(rgb[i_2], 1.0));\n    }\n}\n\n",
	}
	shader_path_coarse_comp = backend.ShaderSources{
		Name:      "path_coarse.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;\n\nstruct PathStrokeCubicRef\n{\n    uint offset;\n};\n\nstruct PathStrokeCubic\n{\n    vec2 p0;\n    vec2 p1;\n    vec2 p2;\n    vec2 p3;\n    uint path_ix;\n    vec2 stroke;\n};\n\nstruct PathSegRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct TileSeg\n{\n    vec2 start;\n    vec2 end;\n    float y_edge;\n    TileSegRef next;\n};\n\nstruct SubdivResult\n{\n    float val;\n    float a0;\n    float a2;\n};\n\nlayout(binding = 0, std430) buffer PathSegBuf\n{\n    uint pathseg[];\n} _94;\n\nlayout(binding = 2, std430) buffer TileBuf\n{\n    uint tile[];\n} _233;\n\nlayout(binding = 1, std430) buffer AllocBuf\n{\n    uint n_paths;\n    uint n_pathseg;\n    uint alloc;\n} _528;\n\nuint PathSeg_tag(PathSegRef ref)\n{\n    return _94.pathseg[ref.offset >> uint(2)];\n}\n\nPathStrokeCubic PathStrokeCubic_read(PathStrokeCubicRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _94.pathseg[ix + 0u];\n    uint raw1 = _94.pathseg[ix + 1u];\n    uint raw2 = _94.pathseg[ix + 2u];\n    uint raw3 = _94.pathseg[ix + 3u];\n    uint raw4 = _94.pathseg[ix + 4u];\n    uint raw5 = _94.pathseg[ix + 5u];\n    uint raw6 = _94.pathseg[ix + 6u];\n    uint raw7 = _94.pathseg[ix + 7u];\n    uint raw8 = _94.pathseg[ix + 8u];\n    uint raw9 = _94.pathseg[ix + 9u];\n    uint raw10 = _94.pathseg[ix + 10u];\n    PathStrokeCubic s;\n    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));\n    s.path_ix = raw8;\n    s.stroke = vec2(uintBitsToFloat(raw9), uintBitsToFloat(raw10));\n    return s;\n}\n\nPathStrokeCubic PathSeg_StrokeCubic_read(PathSegRef ref)\n{\n    PathStrokeCubicRef param = PathStrokeCubicRef(ref.offset + 4u);\n    return PathStrokeCubic_read(param);\n}\n\nvec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t)\n{\n    float mt = 1.0 - t;\n    return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0)) + (((p2 * (mt * 3.0)) + (p3 * t)) * t)) * t);\n}\n\nfloat approx_parabola_integral(float x)\n{\n    return x * inversesqrt(sqrt(0.3300000131130218505859375 + (0.201511204242706298828125 + ((0.25 * x) * x))));\n}\n\nSubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol)\n{\n    vec2 d01 = p1 - p0;\n    vec2 d12 = p2 - p1;\n    vec2 dd = d01 - d12;\n    float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x);\n    float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross;\n    float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross;\n    float scale = abs(_cross / (length(dd) * (x2 - x0)));\n    float param = x0;\n    float a0 = approx_parabola_integral(param);\n    float param_1 = x2;\n    float a2 = approx_parabola_integral(param_1);\n    float val = 0.0;\n    if (scale < 1000000000.0)\n    {\n        float da = abs(a2 - a0);\n        float sqrt_scale = sqrt(scale);\n        if (sign(x0) == sign(x2))\n        {\n            val = da * sqrt_scale;\n        }\n        else\n        {\n            float xmin = sqrt_tol / sqrt_scale;\n            float param_2 = xmin;\n            val = (sqrt_tol * da) / approx_parabola_integral(param_2);\n        }\n    }\n    return SubdivResult(val, a0, a2);\n}\n\nPath Path_read(PathRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _233.tile[ix + 0u];\n    uint raw1 = _233.tile[ix + 1u];\n    uint raw2 = _233.tile[ix + 2u];\n    Path s;\n    s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n    s.tiles = TileRef(raw2);\n    return s;\n}\n\nfloat approx_parabola_inv_integral(float x)\n{\n    return x * sqrt(0.61000001430511474609375 + (0.1520999968051910400390625 + ((0.25 * x) * x)));\n}\n\nvec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t)\n{\n    float mt = 1.0 - t;\n    return (p0 * (mt * mt)) + (((p1 * (mt * 2.0)) + (p2 * t)) * t);\n}\n\nTileRef Tile_index(TileRef ref, uint index)\n{\n    return TileRef(ref.offset + (index * 8u));\n}\n\nvoid TileSeg_write(TileSegRef ref, TileSeg s)\n{\n    uint ix = ref.offset >> uint(2);\n    _233.tile[ix + 0u] = floatBitsToUint(s.start.x);\n    _233.tile[ix + 1u] = floatBitsToUint(s.start.y);\n    _233.tile[ix + 2u] = floatBitsToUint(s.end.x);\n    _233.tile[ix + 3u] = floatBitsToUint(s.end.y);\n    _233.tile[ix + 4u] = floatBitsToUint(s.y_edge);\n    _233.tile[ix + 5u] = s.next.offset;\n}\n\nvoid main()\n{\n    uint element_ix = gl_GlobalInvocationID.x;\n    PathSegRef ref = PathSegRef(element_ix * 48u);\n    uint tag = 0u;\n    if (element_ix < _528.n_pathseg)\n    {\n        PathSegRef param = ref;\n        tag = PathSeg_tag(param);\n    }\n    switch (tag)\n    {\n        case 3u:\n        case 4u:\n        {\n            PathSegRef param_1 = ref;\n            PathStrokeCubic cubic = PathSeg_StrokeCubic_read(param_1);\n            vec2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3;\n            float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);\n            uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u);\n            float val = 0.0;\n            vec2 qp0 = cubic.p0;\n            float _step = 1.0 / float(n_quads);\n            for (uint i = 0u; i < n_quads; i++)\n            {\n                float t = float(i + 1u) * _step;\n                vec2 param_2 = cubic.p0;\n                vec2 param_3 = cubic.p1;\n                vec2 param_4 = cubic.p2;\n                vec2 param_5 = cubic.p3;\n                float param_6 = t;\n                vec2 qp2 = eval_cubic(param_2, param_3, param_4, param_5, param_6);\n                vec2 param_7 = cubic.p0;\n                vec2 param_8 = cubic.p1;\n                vec2 param_9 = cubic.p2;\n                vec2 param_10 = cubic.p3;\n                float param_11 = t - (0.5 * _step);\n                vec2 qp1 = eval_cubic(param_7, param_8, param_9, param_10, param_11);\n                qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5);\n                vec2 param_12 = qp0;\n                vec2 param_13 = qp1;\n                vec2 param_14 = qp2;\n                float param_15 = 0.4743416607379913330078125;\n                SubdivResult params = estimate_subdiv(param_12, param_13, param_14, param_15);\n                val += params.val;\n                qp0 = qp2;\n            }\n            uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u);\n            uint path_ix = cubic.path_ix;\n            PathRef param_16 = PathRef(path_ix * 12u);\n            Path path = Path_read(param_16);\n            ivec4 bbox = ivec4(path.bbox);\n            vec2 p0 = cubic.p0;\n            qp0 = cubic.p0;\n            float v_step = val / float(n);\n            int n_out = 1;\n            float val_sum = 0.0;\n            vec2 p1;\n            float _899;\n            TileSeg tile_seg;\n            for (uint i_1 = 0u; i_1 < n_quads; i_1++)\n            {\n                float t_1 = float(i_1 + 1u) * _step;\n                vec2 param_17 = cubic.p0;\n                vec2 param_18 = cubic.p1;\n                vec2 param_19 = cubic.p2;\n                vec2 param_20 = cubic.p3;\n                float param_21 = t_1;\n                vec2 qp2_1 = eval_cubic(param_17, param_18, param_19, param_20, param_21);\n                vec2 param_22 = cubic.p0;\n                vec2 param_23 = cubic.p1;\n                vec2 param_24 = cubic.p2;\n                vec2 param_25 = cubic.p3;\n                float param_26 = t_1 - (0.5 * _step);\n                vec2 qp1_1 = eval_cubic(param_22, param_23, param_24, param_25, param_26);\n                qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5);\n                vec2 param_27 = qp0;\n                vec2 param_28 = qp1_1;\n                vec2 param_29 = qp2_1;\n                float param_30 = 0.4743416607379913330078125;\n                SubdivResult params_1 = estimate_subdiv(param_27, param_28, param_29, param_30);\n                float param_31 = params_1.a0;\n                float u0 = approx_parabola_inv_integral(param_31);\n                float param_32 = params_1.a2;\n                float u2 = approx_parabola_inv_integral(param_32);\n                float uscale = 1.0 / (u2 - u0);\n                float target = float(n_out) * v_step;\n                for (;;)\n                {\n                    bool _792 = uint(n_out) == n;\n                    bool _802;\n                    if (!_792)\n                    {\n                        _802 = target < (val_sum + params_1.val);\n                    }\n                    else\n                    {\n                        _802 = _792;\n                    }\n                    if (_802)\n                    {\n                        if (uint(n_out) == n)\n                        {\n                            p1 = cubic.p3;\n                        }\n                        else\n                        {\n                            float u = (target - val_sum) / params_1.val;\n                            float a = mix(params_1.a0, params_1.a2, u);\n                            float param_33 = a;\n                            float au = approx_parabola_inv_integral(param_33);\n                            float t_2 = (au - u0) * uscale;\n                            vec2 param_34 = qp0;\n                            vec2 param_35 = qp1_1;\n                            vec2 param_36 = qp2_1;\n                            float param_37 = t_2;\n                            p1 = eval_quad(param_34, param_35, param_36, param_37);\n                        }\n                        float xmin = min(p0.x, p1.x) - cubic.stroke.x;\n                        float xmax = max(p0.x, p1.x) + cubic.stroke.x;\n                        float ymin = min(p0.y, p1.y) - cubic.stroke.y;\n                        float ymax = max(p0.y, p1.y) + cubic.stroke.y;\n                        float dx = p1.x - p0.x;\n                        float dy = p1.y - p0.y;\n                        if (abs(dy) < 9.999999717180685365747194737196e-10)\n                        {\n                            _899 = 1000000000.0;\n                        }\n                        else\n                        {\n                            _899 = dx / dy;\n                        }\n                        float invslope = _899;\n                        float c = (cubic.stroke.x + (abs(invslope) * (8.0 + cubic.stroke.y))) * 0.0625;\n                        float b = invslope;\n                        float a_1 = (p0.x - ((p0.y - 8.0) * b)) * 0.0625;\n                        int x0 = int(floor(xmin * 0.0625));\n                        int x1 = int(ceil(xmax * 0.0625));\n                        int y0 = int(floor(ymin * 0.0625));\n                        int y1 = int(ceil(ymax * 0.0625));\n                        x0 = clamp(x0, bbox.x, bbox.z);\n                        y0 = clamp(y0, bbox.y, bbox.w);\n                        x1 = clamp(x1, bbox.x, bbox.z);\n                        y1 = clamp(y1, bbox.y, bbox.w);\n                        float xc = a_1 + (b * float(y0));\n                        int stride = bbox.z - bbox.x;\n                        int base = ((y0 - bbox.y) * stride) - bbox.x;\n                        uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));\n                        uint _1013 = atomicAdd(_528.alloc, n_tile_alloc * 24u);\n                        uint tile_offset = _1013;\n                        for (int y = y0; y < y1; y++)\n                        {\n                            float tile_y0 = float(y * 16);\n                            bool _1029 = tag == 3u;\n                            bool _1039;\n                            if (_1029)\n                            {\n                                _1039 = min(p0.y, p1.y) <= tile_y0;\n                            }\n                            else\n                            {\n                                _1039 = _1029;\n                            }\n                            if (_1039)\n                            {\n                                int xray = max(int(ceil(xc - (0.5 * b))), bbox.x);\n                                if (xray < bbox.z)\n                                {\n                                    int backdrop = (p1.y < p0.y) ? 1 : (-1);\n                                    TileRef param_38 = path.tiles;\n                                    uint param_39 = uint(base + xray);\n                                    TileRef tile_ref = Tile_index(param_38, param_39);\n                                    uint tile_el = tile_ref.offset >> uint(2);\n                                    uint _1085 = atomicAdd(_233.tile[tile_el + 1u], uint(backdrop));\n                                }\n                            }\n                            int xx0 = clamp(int(floor(xc - c)), x0, x1);\n                            int xx1 = clamp(int(ceil(xc + c)), x0, x1);\n                            for (int x = xx0; x < xx1; x++)\n                            {\n                                float tile_x0 = float(x * 16);\n                                TileRef param_40 = path.tiles;\n                                uint param_41 = uint(base + x);\n                                TileRef tile_ref_1 = Tile_index(param_40, param_41);\n                                uint tile_el_1 = tile_ref_1.offset >> uint(2);\n                                uint _1136 = atomicExchange(_233.tile[tile_el_1], tile_offset);\n                                uint old = _1136;\n                                tile_seg.start = p0;\n                                tile_seg.end = p1;\n                                float y_edge = 0.0;\n                                if (tag == 3u)\n                                {\n                                    y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);\n                                    bool _1168 = (min(p0.x, p1.x) < tile_x0) && (y_edge >= tile_y0);\n                                    bool _1176;\n                                    if (_1168)\n                                    {\n                                        _1176 = y_edge < (tile_y0 + 16.0);\n                                    }\n                                    else\n                                    {\n                                        _1176 = _1168;\n                                    }\n                                    if (_1176)\n                                    {\n                                        if (p0.x > p1.x)\n                                        {\n                                            tile_seg.end = vec2(tile_x0, y_edge);\n                                        }\n                                        else\n                                        {\n                                            tile_seg.start = vec2(tile_x0, y_edge);\n                                        }\n                                    }\n                                    else\n                                    {\n                                        y_edge = 1000000000.0;\n                                    }\n                                }\n                                tile_seg.y_edge = y_edge;\n                                tile_seg.next.offset = old;\n                                TileSegRef param_42 = TileSegRef(tile_offset);\n                                TileSeg param_43 = tile_seg;\n                                TileSeg_write(param_42, param_43);\n                                tile_offset += 24u;\n                            }\n                            xc += b;\n                            base += stride;\n                        }\n                        n_out++;\n                        target += v_step;\n                        p0 = p1;\n                        continue;\n                    }\n                    else\n                    {\n                        break;\n                    }\n                }\n                val_sum += params_1.val;\n                qp0 = qp2_1;\n            }\n            break;\n        }\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;\n\nstruct PathStrokeCubicRef\n{\n    uint offset;\n};\n\nstruct PathStrokeCubic\n{\n    vec2 p0;\n    vec2 p1;\n    vec2 p2;\n    vec2 p3;\n    uint path_ix;\n    vec2 stroke;\n};\n\nstruct PathSegRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct TileSeg\n{\n    vec2 start;\n    vec2 end;\n    float y_edge;\n    TileSegRef next;\n};\n\nstruct SubdivResult\n{\n    float val;\n    float a0;\n    float a2;\n};\n\nlayout(binding = 0, std430) buffer PathSegBuf\n{\n    uint pathseg[];\n} _94;\n\nlayout(binding = 2, std430) buffer TileBuf\n{\n    uint tile[];\n} _233;\n\nlayout(binding = 1, std430) buffer AllocBuf\n{\n    uint n_paths;\n    uint n_pathseg;\n    uint alloc;\n} _528;\n\nuint PathSeg_tag(PathSegRef ref)\n{\n    return _94.pathseg[ref.offset >> uint(2)];\n}\n\nPathStrokeCubic PathStrokeCubic_read(PathStrokeCubicRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _94.pathseg[ix + 0u];\n    uint raw1 = _94.pathseg[ix + 1u];\n    uint raw2 = _94.pathseg[ix + 2u];\n    uint raw3 = _94.pathseg[ix + 3u];\n    uint raw4 = _94.pathseg[ix + 4u];\n    uint raw5 = _94.pathseg[ix + 5u];\n    uint raw6 = _94.pathseg[ix + 6u];\n    uint raw7 = _94.pathseg[ix + 7u];\n    uint raw8 = _94.pathseg[ix + 8u];\n    uint raw9 = _94.pathseg[ix + 9u];\n    uint raw10 = _94.pathseg[ix + 10u];\n    PathStrokeCubic s;\n    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));\n    s.path_ix = raw8;\n    s.stroke = vec2(uintBitsToFloat(raw9), uintBitsToFloat(raw10));\n    return s;\n}\n\nPathStrokeCubic PathSeg_StrokeCubic_read(PathSegRef ref)\n{\n    PathStrokeCubicRef param = PathStrokeCubicRef(ref.offset + 4u);\n    return PathStrokeCubic_read(param);\n}\n\nvec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t)\n{\n    float mt = 1.0 - t;\n    return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0)) + (((p2 * (mt * 3.0)) + (p3 * t)) * t)) * t);\n}\n\nfloat approx_parabola_integral(float x)\n{\n    return x * inversesqrt(sqrt(0.3300000131130218505859375 + (0.201511204242706298828125 + ((0.25 * x) * x))));\n}\n\nSubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol)\n{\n    vec2 d01 = p1 - p0;\n    vec2 d12 = p2 - p1;\n    vec2 dd = d01 - d12;\n    float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x);\n    float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross;\n    float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross;\n    float scale = abs(_cross / (length(dd) * (x2 - x0)));\n    float param = x0;\n    float a0 = approx_parabola_integral(param);\n    float param_1 = x2;\n    float a2 = approx_parabola_integral(param_1);\n    float val = 0.0;\n    if (scale < 1000000000.0)\n    {\n        float da = abs(a2 - a0);\n        float sqrt_scale = sqrt(scale);\n        if (sign(x0) == sign(x2))\n        {\n            val = da * sqrt_scale;\n        }\n        else\n        {\n            float xmin = sqrt_tol / sqrt_scale;\n            float param_2 = xmin;\n            val = (sqrt_tol * da) / approx_parabola_integral(param_2);\n        }\n    }\n    return SubdivResult(val, a0, a2);\n}\n\nPath Path_read(PathRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _233.tile[ix + 0u];\n    uint raw1 = _233.tile[ix + 1u];\n    uint raw2 = _233.tile[ix + 2u];\n    Path s;\n    s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n    s.tiles = TileRef(raw2);\n    return s;\n}\n\nfloat approx_parabola_inv_integral(float x)\n{\n    return x * sqrt(0.61000001430511474609375 + (0.1520999968051910400390625 + ((0.25 * x) * x)));\n}\n\nvec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t)\n{\n    float mt = 1.0 - t;\n    return (p0 * (mt * mt)) + (((p1 * (mt * 2.0)) + (p2 * t)) * t);\n}\n\nTileRef Tile_index(TileRef ref, uint index)\n{\n    return TileRef(ref.offset + (index * 8u));\n}\n\nvoid TileSeg_write(TileSegRef ref, TileSeg s)\n{\n    uint ix = ref.offset >> uint(2);\n    _233.tile[ix + 0u] = floatBitsToUint(s.start.x);\n    _233.tile[ix + 1u] = floatBitsToUint(s.start.y);\n    _233.tile[ix + 2u] = floatBitsToUint(s.end.x);\n    _233.tile[ix + 3u] = floatBitsToUint(s.end.y);\n    _233.tile[ix + 4u] = floatBitsToUint(s.y_edge);\n    _233.tile[ix + 5u] = s.next.offset;\n}\n\nvoid main()\n{\n    uint element_ix = gl_GlobalInvocationID.x;\n    PathSegRef ref = PathSegRef(element_ix * 48u);\n    uint tag = 0u;\n    if (element_ix < _528.n_pathseg)\n    {\n        PathSegRef param = ref;\n        tag = PathSeg_tag(param);\n    }\n    switch (tag)\n    {\n        case 3u:\n        case 4u:\n        {\n            PathSegRef param_1 = ref;\n            PathStrokeCubic cubic = PathSeg_StrokeCubic_read(param_1);\n            vec2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3;\n            float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);\n            uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u);\n            float val = 0.0;\n            vec2 qp0 = cubic.p0;\n            float _step = 1.0 / float(n_quads);\n            for (uint i = 0u; i < n_quads; i++)\n            {\n                float t = float(i + 1u) * _step;\n                vec2 param_2 = cubic.p0;\n                vec2 param_3 = cubic.p1;\n                vec2 param_4 = cubic.p2;\n                vec2 param_5 = cubic.p3;\n                float param_6 = t;\n                vec2 qp2 = eval_cubic(param_2, param_3, param_4, param_5, param_6);\n                vec2 param_7 = cubic.p0;\n                vec2 param_8 = cubic.p1;\n                vec2 param_9 = cubic.p2;\n                vec2 param_10 = cubic.p3;\n                float param_11 = t - (0.5 * _step);\n                vec2 qp1 = eval_cubic(param_7, param_8, param_9, param_10, param_11);\n                qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5);\n                vec2 param_12 = qp0;\n                vec2 param_13 = qp1;\n                vec2 param_14 = qp2;\n                float param_15 = 0.4743416607379913330078125;\n                SubdivResult params = estimate_subdiv(param_12, param_13, param_14, param_15);\n                val += params.val;\n                qp0 = qp2;\n            }\n            uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u);\n            uint path_ix = cubic.path_ix;\n            PathRef param_16 = PathRef(path_ix * 12u);\n            Path path = Path_read(param_16);\n            ivec4 bbox = ivec4(path.bbox);\n            vec2 p0 = cubic.p0;\n            qp0 = cubic.p0;\n            float v_step = val / float(n);\n            int n_out = 1;\n            float val_sum = 0.0;\n            vec2 p1;\n            float _899;\n            TileSeg tile_seg;\n            for (uint i_1 = 0u; i_1 < n_quads; i_1++)\n            {\n                float t_1 = float(i_1 + 1u) * _step;\n                vec2 param_17 = cubic.p0;\n                vec2 param_18 = cubic.p1;\n                vec2 param_19 = cubic.p2;\n                vec2 param_20 = cubic.p3;\n                float param_21 = t_1;\n                vec2 qp2_1 = eval_cubic(param_17, param_18, param_19, param_20, param_21);\n                vec2 param_22 = cubic.p0;\n                vec2 param_23 = cubic.p1;\n                vec2 param_24 = cubic.p2;\n                vec2 param_25 = cubic.p3;\n                float param_26 = t_1 - (0.5 * _step);\n                vec2 qp1_1 = eval_cubic(param_22, param_23, param_24, param_25, param_26);\n                qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5);\n                vec2 param_27 = qp0;\n                vec2 param_28 = qp1_1;\n                vec2 param_29 = qp2_1;\n                float param_30 = 0.4743416607379913330078125;\n                SubdivResult params_1 = estimate_subdiv(param_27, param_28, param_29, param_30);\n                float param_31 = params_1.a0;\n                float u0 = approx_parabola_inv_integral(param_31);\n                float param_32 = params_1.a2;\n                float u2 = approx_parabola_inv_integral(param_32);\n                float uscale = 1.0 / (u2 - u0);\n                float target = float(n_out) * v_step;\n                for (;;)\n                {\n                    bool _792 = uint(n_out) == n;\n                    bool _802;\n                    if (!_792)\n                    {\n                        _802 = target < (val_sum + params_1.val);\n                    }\n                    else\n                    {\n                        _802 = _792;\n                    }\n                    if (_802)\n                    {\n                        if (uint(n_out) == n)\n                        {\n                            p1 = cubic.p3;\n                        }\n                        else\n                        {\n                            float u = (target - val_sum) / params_1.val;\n                            float a = mix(params_1.a0, params_1.a2, u);\n                            float param_33 = a;\n                            float au = approx_parabola_inv_integral(param_33);\n                            float t_2 = (au - u0) * uscale;\n                            vec2 param_34 = qp0;\n                            vec2 param_35 = qp1_1;\n                            vec2 param_36 = qp2_1;\n                            float param_37 = t_2;\n                            p1 = eval_quad(param_34, param_35, param_36, param_37);\n                        }\n                        float xmin = min(p0.x, p1.x) - cubic.stroke.x;\n                        float xmax = max(p0.x, p1.x) + cubic.stroke.x;\n                        float ymin = min(p0.y, p1.y) - cubic.stroke.y;\n                        float ymax = max(p0.y, p1.y) + cubic.stroke.y;\n                        float dx = p1.x - p0.x;\n                        float dy = p1.y - p0.y;\n                        if (abs(dy) < 9.999999717180685365747194737196e-10)\n                        {\n                            _899 = 1000000000.0;\n                        }\n                        else\n                        {\n                            _899 = dx / dy;\n                        }\n                        float invslope = _899;\n                        float c = (cubic.stroke.x + (abs(invslope) * (16.0 + cubic.stroke.y))) * 0.03125;\n                        float b = invslope;\n                        float a_1 = (p0.x - ((p0.y - 16.0) * b)) * 0.03125;\n                        int x0 = int(floor(xmin * 0.03125));\n                        int x1 = int(ceil(xmax * 0.03125));\n                        int y0 = int(floor(ymin * 0.03125));\n                        int y1 = int(ceil(ymax * 0.03125));\n                        x0 = clamp(x0, bbox.x, bbox.z);\n                        y0 = clamp(y0, bbox.y, bbox.w);\n                        x1 = clamp(x1, bbox.x, bbox.z);\n                        y1 = clamp(y1, bbox.y, bbox.w);\n                        float xc = a_1 + (b * float(y0));\n                        int stride = bbox.z - bbox.x;\n                        int base = ((y0 - bbox.y) * stride) - bbox.x;\n                        uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));\n                        uint _1013 = atomicAdd(_528.alloc, n_tile_alloc * 24u);\n                        uint tile_offset = _1013;\n                        for (int y = y0; y < y1; y++)\n                        {\n                            float tile_y0 = float(y * 32);\n                            bool _1030 = tag == 3u;\n                            bool _1040;\n                            if (_1030)\n                            {\n                                _1040 = min(p0.y, p1.y) <= tile_y0;\n                            }\n                            else\n                            {\n                                _1040 = _1030;\n                            }\n                            if (_1040)\n                            {\n                                int xray = max(int(ceil(xc - (0.5 * b))), bbox.x);\n                                if (xray < bbox.z)\n                                {\n                                    int backdrop = (p1.y < p0.y) ? 1 : (-1);\n                                    TileRef param_38 = path.tiles;\n                                    uint param_39 = uint(base + xray);\n                                    TileRef tile_ref = Tile_index(param_38, param_39);\n                                    uint tile_el = tile_ref.offset >> uint(2);\n                                    uint _1086 = atomicAdd(_233.tile[tile_el + 1u], uint(backdrop));\n                                }\n                            }\n                            int xx0 = clamp(int(floor(xc - c)), x0, x1);\n                            int xx1 = clamp(int(ceil(xc + c)), x0, x1);\n                            for (int x = xx0; x < xx1; x++)\n                            {\n                                float tile_x0 = float(x * 32);\n                                TileRef param_40 = path.tiles;\n                                uint param_41 = uint(base + x);\n                                TileRef tile_ref_1 = Tile_index(param_40, param_41);\n                                uint tile_el_1 = tile_ref_1.offset >> uint(2);\n                                uint _1137 = atomicExchange(_233.tile[tile_el_1], tile_offset);\n                                uint old = _1137;\n                                tile_seg.start = p0;\n                                tile_seg.end = p1;\n                                float y_edge = 0.0;\n                                if (tag == 3u)\n                                {\n                                    y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);\n                                    bool _1169 = (min(p0.x, p1.x) < tile_x0) && (y_edge >= tile_y0);\n                                    bool _1177;\n                                    if (_1169)\n                                    {\n                                        _1177 = y_edge < (tile_y0 + 32.0);\n                                    }\n                                    else\n                                    {\n                                        _1177 = _1169;\n                                    }\n                                    if (_1177)\n                                    {\n                                        if (p0.x > p1.x)\n                                        {\n                                            tile_seg.end = vec2(tile_x0, y_edge);\n                                        }\n                                        else\n                                        {\n                                            tile_seg.start = vec2(tile_x0, y_edge);\n                                        }\n                                    }\n                                    else\n                                    {\n                                        y_edge = 1000000000.0;\n                                    }\n                                }\n                                tile_seg.y_edge = y_edge;\n                                tile_seg.next.offset = old;\n                                TileSegRef param_42 = TileSegRef(tile_offset);\n                                TileSeg param_43 = tile_seg;\n                                TileSeg_write(param_42, param_43);\n                                tile_offset += 24u;\n                            }\n                            xc += b;\n                            base += stride;\n                        }\n                        n_out++;\n                        target += v_step;\n                        p0 = p1;\n                        continue;\n                    }\n                    else\n                    {\n                        break;\n                    }\n                }\n                val_sum += params_1.val;\n                qp0 = qp2_1;\n            }\n            break;\n        }\n    }\n}\n\n",
	}
	shader_stencil_frag = backend.ShaderSources{
		Name:      "stencil.frag",


@@ 690,6 690,6 @@ var (
	}
	shader_tile_alloc_comp = backend.ShaderSources{
		Name:      "tile_alloc.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    uint rgba_color;\n    vec4 bbox;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nlayout(binding = 0, std430) buffer AnnotatedBuf\n{\n    uint annotated[];\n} _49;\n\nlayout(binding = 2, std430) buffer TileBuf\n{\n    uint tile[];\n} _121;\n\nlayout(binding = 1, std430) buffer AllocBuf\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint alloc;\n} _171;\n\nshared uint sh_tile_count[128];\nshared uint sh_tile_alloc;\n\nuint Annotated_tag(AnnotatedRef ref)\n{\n    return _49.annotated[ref.offset >> uint(2)];\n}\n\nAnnoFill AnnoFill_read(AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _49.annotated[ix + 0u];\n    uint raw1 = _49.annotated[ix + 1u];\n    uint raw2 = _49.annotated[ix + 2u];\n    uint raw3 = _49.annotated[ix + 3u];\n    uint raw4 = _49.annotated[ix + 4u];\n    AnnoFill s;\n    s.rgba_color = raw0;\n    s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4));\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(AnnotatedRef ref)\n{\n    AnnoFillRef param = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param);\n}\n\nvoid Path_write(PathRef ref, Path s)\n{\n    uint ix = ref.offset >> uint(2);\n    _121.tile[ix + 0u] = s.bbox.x | (s.bbox.y << uint(16));\n    _121.tile[ix + 1u] = s.bbox.z | (s.bbox.w << uint(16));\n    _121.tile[ix + 2u] = s.tiles.offset;\n}\n\nvoid main()\n{\n    uint th_ix = gl_LocalInvocationID.x;\n    uint element_ix = gl_GlobalInvocationID.x;\n    PathRef path_ref = PathRef(element_ix * 12u);\n    AnnotatedRef ref = AnnotatedRef(element_ix * 44u);\n    uint tag = 0u;\n    if (element_ix < _171.n_elements)\n    {\n        AnnotatedRef param = ref;\n        tag = Annotated_tag(param);\n    }\n    int x0 = 0;\n    int y0 = 0;\n    int x1 = 0;\n    int y1 = 0;\n    switch (tag)\n    {\n        case 6u:\n        case 5u:\n        {\n            AnnotatedRef param_1 = ref;\n            AnnoFill fill = Annotated_Fill_read(param_1);\n            x0 = int(floor(fill.bbox.x * 0.0625));\n            y0 = int(floor(fill.bbox.y * 0.0625));\n            x1 = int(ceil(fill.bbox.z * 0.0625));\n            y1 = int(ceil(fill.bbox.w * 0.0625));\n            break;\n        }\n    }\n    x0 = clamp(x0, 0, 128);\n    y0 = clamp(y0, 0, 96);\n    x1 = clamp(x1, 0, 128);\n    y1 = clamp(y1, 0, 96);\n    Path path;\n    path.bbox = uvec4(uint(x0), uint(y0), uint(x1), uint(y1));\n    uint tile_count = uint((x1 - x0) * (y1 - y0));\n    sh_tile_count[th_ix] = tile_count;\n    for (uint i = 0u; i < 7u; i++)\n    {\n        barrier();\n        if (th_ix >= uint(1 << int(i)))\n        {\n            tile_count += sh_tile_count[th_ix - uint(1 << int(i))];\n        }\n        barrier();\n        sh_tile_count[th_ix] = tile_count;\n    }\n    if (th_ix == 127u)\n    {\n        uint _297 = atomicAdd(_171.alloc, tile_count * 8u);\n        sh_tile_alloc = _297;\n    }\n    barrier();\n    uint alloc_start = sh_tile_alloc;\n    if (element_ix < _171.n_elements)\n    {\n        uint _309;\n        if (th_ix > 0u)\n        {\n            _309 = sh_tile_count[th_ix - 1u];\n        }\n        else\n        {\n            _309 = 0u;\n        }\n        uint tile_subix = _309;\n        path.tiles = TileRef(alloc_start + (8u * tile_subix));\n        PathRef param_2 = path_ref;\n        Path param_3 = path;\n        Path_write(param_2, param_3);\n    }\n    uint total_count = sh_tile_count[127] * 2u;\n    uint start_ix = alloc_start >> uint(2);\n    for (uint i_1 = th_ix; i_1 < total_count; i_1 += 128u)\n    {\n        _121.tile[start_ix + i_1] = 0u;\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    uint rgba_color;\n    vec4 bbox;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nlayout(binding = 0, std430) buffer AnnotatedBuf\n{\n    uint annotated[];\n} _49;\n\nlayout(binding = 2, std430) buffer TileBuf\n{\n    uint tile[];\n} _121;\n\nlayout(binding = 1, std430) buffer AllocBuf\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint alloc;\n} _171;\n\nshared uint sh_tile_count[128];\nshared uint sh_tile_alloc;\n\nuint Annotated_tag(AnnotatedRef ref)\n{\n    return _49.annotated[ref.offset >> uint(2)];\n}\n\nAnnoFill AnnoFill_read(AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _49.annotated[ix + 0u];\n    uint raw1 = _49.annotated[ix + 1u];\n    uint raw2 = _49.annotated[ix + 2u];\n    uint raw3 = _49.annotated[ix + 3u];\n    uint raw4 = _49.annotated[ix + 4u];\n    AnnoFill s;\n    s.rgba_color = raw0;\n    s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4));\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(AnnotatedRef ref)\n{\n    AnnoFillRef param = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param);\n}\n\nvoid Path_write(PathRef ref, Path s)\n{\n    uint ix = ref.offset >> uint(2);\n    _121.tile[ix + 0u] = s.bbox.x | (s.bbox.y << uint(16));\n    _121.tile[ix + 1u] = s.bbox.z | (s.bbox.w << uint(16));\n    _121.tile[ix + 2u] = s.tiles.offset;\n}\n\nvoid main()\n{\n    uint th_ix = gl_LocalInvocationID.x;\n    uint element_ix = gl_GlobalInvocationID.x;\n    PathRef path_ref = PathRef(element_ix * 12u);\n    AnnotatedRef ref = AnnotatedRef(element_ix * 44u);\n    uint tag = 0u;\n    if (element_ix < _171.n_elements)\n    {\n        AnnotatedRef param = ref;\n        tag = Annotated_tag(param);\n    }\n    int x0 = 0;\n    int y0 = 0;\n    int x1 = 0;\n    int y1 = 0;\n    switch (tag)\n    {\n        case 6u:\n        case 5u:\n        {\n            AnnotatedRef param_1 = ref;\n            AnnoFill fill = Annotated_Fill_read(param_1);\n            x0 = int(floor(fill.bbox.x * 0.03125));\n            y0 = int(floor(fill.bbox.y * 0.03125));\n            x1 = int(ceil(fill.bbox.z * 0.03125));\n            y1 = int(ceil(fill.bbox.w * 0.03125));\n            break;\n        }\n    }\n    x0 = clamp(x0, 0, 64);\n    y0 = clamp(y0, 0, 64);\n    x1 = clamp(x1, 0, 64);\n    y1 = clamp(y1, 0, 64);\n    Path path;\n    path.bbox = uvec4(uint(x0), uint(y0), uint(x1), uint(y1));\n    uint tile_count = uint((x1 - x0) * (y1 - y0));\n    sh_tile_count[th_ix] = tile_count;\n    for (uint i = 0u; i < 7u; i++)\n    {\n        barrier();\n        if (th_ix >= uint(1 << int(i)))\n        {\n            tile_count += sh_tile_count[th_ix - uint(1 << int(i))];\n        }\n        barrier();\n        sh_tile_count[th_ix] = tile_count;\n    }\n    if (th_ix == 127u)\n    {\n        uint _296 = atomicAdd(_171.alloc, tile_count * 8u);\n        sh_tile_alloc = _296;\n    }\n    barrier();\n    uint alloc_start = sh_tile_alloc;\n    if (element_ix < _171.n_elements)\n    {\n        uint _308;\n        if (th_ix > 0u)\n        {\n            _308 = sh_tile_count[th_ix - 1u];\n        }\n        else\n        {\n            _308 = 0u;\n        }\n        uint tile_subix = _308;\n        path.tiles = TileRef(alloc_start + (8u * tile_subix));\n        PathRef param_2 = path_ref;\n        Path param_3 = path;\n        Path_write(param_2, param_3);\n    }\n    uint total_count = sh_tile_count[127] * 2u;\n    uint start_ix = alloc_start >> uint(2);\n    for (uint i_1 = th_ix; i_1 < total_count; i_1 += 128u)\n    {\n        _121.tile[start_ix + i_1] = 0u;\n    }\n}\n\n",
	}
)

M gpu/shaders/setup.h => gpu/shaders/setup.h +4 -4
@@ 10,10 10,10 @@

// TODO: compute all these

#define WIDTH_IN_TILES 128
#define HEIGHT_IN_TILES 96
#define TILE_WIDTH_PX 16
#define TILE_HEIGHT_PX 16
#define WIDTH_IN_TILES 64
#define HEIGHT_IN_TILES 64
#define TILE_WIDTH_PX 32
#define TILE_HEIGHT_PX 32

#define PTCL_INITIAL_ALLOC 1024