~eliasnaur/gio

258033d0b05a94eb0d0368274d881b3fbe27a169 — Elias Naur 8 months ago 9cb9e67
gpu: eliminate gaps by ensuring consistent transformations

This is another attempt at fixing the issue described in [0], the
previous attempt was reverted[1].

This change fixes the issue by tracking resolved transformations and
ensure that all segments within a path share a single transformation.

[0] https://github.com/linebender/piet-gpu/issues/62
[1] 2b21b48a7c5c4451deb642c164548a134bb9ad06">https://gioui.org/commit/2b21b48a7c5c4451deb642c164548a134bb9ad06

Signed-off-by: Elias Naur <mail@eliasnaur.com>
M gpu/compute.go => gpu/compute.go +9 -2
@@ 126,6 126,7 @@ type encoder struct {
	scene    []scene.Command
	npath    int
	npathseg int
	ntrans   int
}

type encodeState struct {


@@ 149,6 150,7 @@ type config struct {
	ptcl_alloc      memAlloc
	pathseg_alloc   memAlloc
	anno_alloc      memAlloc
	trans_alloc     memAlloc
}

// memAlloc matches Alloc in mem.h


@@ 173,9 175,10 @@ const (

	pathSize    = 12
	binSize     = 8
	pathsegSize = 48
	pathsegSize = 52
	annoSize    = 28
	stateSize   = 56
	transSize   = 24
	stateSize   = 60
	stateStride = 4 + 2*stateSize
)



@@ 748,6 751,7 @@ func (g *compute) render(tileDims image.Point) error {
		ptcl_alloc:      malloc(tileDims.X * tileDims.Y * ptclInitialAlloc),
		pathseg_alloc:   malloc(g.enc.npathseg * pathsegSize),
		anno_alloc:      malloc(g.enc.npath * annoSize),
		trans_alloc:     malloc(g.enc.ntrans * transSize),
	}

	numPartitions := (g.enc.numElements() + 127) / 128


@@ 972,6 976,7 @@ func (e *encoder) reset() {
	e.scene = e.scene[:0]
	e.npath = 0
	e.npathseg = 0
	e.ntrans = 0
}

func (e *encoder) numElements() int {


@@ 982,10 987,12 @@ func (e *encoder) append(e2 encoder) {
	e.scene = append(e.scene, e2.scene...)
	e.npath += e2.npath
	e.npathseg += e2.npathseg
	e.ntrans += e2.ntrans
}

func (e *encoder) transform(m f32.Affine2D) {
	e.scene = append(e.scene, scene.Transform(m))
	e.ntrans++
}

func (e *encoder) lineWidth(width float32) {

M gpu/shaders.go => gpu/shaders.go +7 -7
@@ 7,11 7,11 @@ import "gioui.org/gpu/internal/driver"
var (
	shader_backdrop_comp = driver.ShaderSources{
		Name:      "backdrop.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _72;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _176;\n\nshared uint sh_row_width[128];\nshared Alloc sh_row_alloc[128];\nshared uint sh_row_count[128];\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _72.memory[offset];\n    return v;\n}\n\nuint Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nPath Path_read(Alloc a, PathRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Path s;\n    s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n    s.tiles = TileRef(raw2);\n    return s;\n}\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _72.memory[offset] = val;\n}\n\nvoid main()\n{\n    if (_72.mem_error != 0u)\n    {\n        return;\n    }\n    uint th_ix = gl_LocalInvocationID.x;\n    uint element_ix = gl_GlobalInvocationID.x;\n    AnnotatedRef ref = AnnotatedRef(_176.conf.anno_alloc.offset + (element_ix * 28u));\n    uint row_count = 0u;\n    if (element_ix < _176.conf.n_elements)\n    {\n        Alloc param;\n        param.offset = _176.conf.anno_alloc.offset;\n        AnnotatedRef param_1 = ref;\n        uint tag = Annotated_tag(param, param_1);\n        switch (tag)\n        {\n            case 2u:\n            case 3u:\n            case 4u:\n            {\n                PathRef path_ref = PathRef(_176.conf.tile_alloc.offset + (element_ix * 12u));\n                Alloc param_2;\n                param_2.offset = _176.conf.tile_alloc.offset;\n                PathRef param_3 = path_ref;\n                Path path = Path_read(param_2, param_3);\n                sh_row_width[th_ix] = path.bbox.z - path.bbox.x;\n                row_count = path.bbox.w - path.bbox.y;\n                bool _242 = row_count == 1u;\n                bool _248;\n                if (_242)\n                {\n                    _248 = path.bbox.y > 0u;\n                }\n                else\n                {\n                    _248 = _242;\n                }\n                if (_248)\n                {\n                    row_count = 0u;\n                }\n                uint param_4 = path.tiles.offset;\n                uint param_5 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;\n                Alloc path_alloc = new_alloc(param_4, param_5);\n                sh_row_alloc[th_ix] = path_alloc;\n                break;\n            }\n        }\n    }\n    sh_row_count[th_ix] = row_count;\n    for (uint i = 0u; i < 7u; i++)\n    {\n        barrier();\n        if (th_ix >= uint(1 << int(i)))\n        {\n            row_count += sh_row_count[th_ix - uint(1 << int(i))];\n        }\n        barrier();\n        sh_row_count[th_ix] = row_count;\n    }\n    barrier();\n    uint total_rows = sh_row_count[127];\n    uint _370;\n    for (uint row = th_ix; row < total_rows; row += 128u)\n    {\n        uint el_ix = 0u;\n        for (uint i_1 = 0u; i_1 < 7u; i_1++)\n        {\n            uint probe = el_ix + uint(64 >> int(i_1));\n            if (row >= sh_row_count[probe - 1u])\n            {\n                el_ix = probe;\n            }\n        }\n        uint width = sh_row_width[el_ix];\n        if (width > 0u)\n        {\n            Alloc tiles_alloc = sh_row_alloc[el_ix];\n            if (el_ix > 0u)\n            {\n                _370 = sh_row_count[el_ix - 1u];\n            }\n            else\n            {\n                _370 = 0u;\n            }\n            uint seq_ix = row - _370;\n            uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);\n            Alloc param_6 = tiles_alloc;\n            uint param_7 = tile_el_ix;\n            uint sum = read_mem(param_6, param_7);\n            for (uint x = 1u; x < width; x++)\n            {\n                tile_el_ix += 2u;\n                Alloc param_8 = tiles_alloc;\n                uint param_9 = tile_el_ix;\n                sum += read_mem(param_8, param_9);\n                Alloc param_10 = tiles_alloc;\n                uint param_11 = tile_el_ix;\n                uint param_12 = sum;\n                write_mem(param_10, param_11, param_12);\n            }\n        }\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n    Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _72;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _176;\n\nshared uint sh_row_width[128];\nshared Alloc sh_row_alloc[128];\nshared uint sh_row_count[128];\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _72.memory[offset];\n    return v;\n}\n\nuint Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nPath Path_read(Alloc a, PathRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Path s;\n    s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n    s.tiles = TileRef(raw2);\n    return s;\n}\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _72.memory[offset] = val;\n}\n\nvoid main()\n{\n    if (_72.mem_error != 0u)\n    {\n        return;\n    }\n    uint th_ix = gl_LocalInvocationID.x;\n    uint element_ix = gl_GlobalInvocationID.x;\n    AnnotatedRef ref = AnnotatedRef(_176.conf.anno_alloc.offset + (element_ix * 28u));\n    uint row_count = 0u;\n    if (element_ix < _176.conf.n_elements)\n    {\n        Alloc param;\n        param.offset = _176.conf.anno_alloc.offset;\n        AnnotatedRef param_1 = ref;\n        uint tag = Annotated_tag(param, param_1);\n        switch (tag)\n        {\n            case 2u:\n            case 3u:\n            case 4u:\n            {\n                PathRef path_ref = PathRef(_176.conf.tile_alloc.offset + (element_ix * 12u));\n                Alloc param_2;\n                param_2.offset = _176.conf.tile_alloc.offset;\n                PathRef param_3 = path_ref;\n                Path path = Path_read(param_2, param_3);\n                sh_row_width[th_ix] = path.bbox.z - path.bbox.x;\n                row_count = path.bbox.w - path.bbox.y;\n                bool _242 = row_count == 1u;\n                bool _248;\n                if (_242)\n                {\n                    _248 = path.bbox.y > 0u;\n                }\n                else\n                {\n                    _248 = _242;\n                }\n                if (_248)\n                {\n                    row_count = 0u;\n                }\n                uint param_4 = path.tiles.offset;\n                uint param_5 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;\n                Alloc path_alloc = new_alloc(param_4, param_5);\n                sh_row_alloc[th_ix] = path_alloc;\n                break;\n            }\n        }\n    }\n    sh_row_count[th_ix] = row_count;\n    for (uint i = 0u; i < 7u; i++)\n    {\n        barrier();\n        if (th_ix >= uint(1 << int(i)))\n        {\n            row_count += sh_row_count[th_ix - uint(1 << int(i))];\n        }\n        barrier();\n        sh_row_count[th_ix] = row_count;\n    }\n    barrier();\n    uint total_rows = sh_row_count[127];\n    uint _370;\n    for (uint row = th_ix; row < total_rows; row += 128u)\n    {\n        uint el_ix = 0u;\n        for (uint i_1 = 0u; i_1 < 7u; i_1++)\n        {\n            uint probe = el_ix + uint(64 >> int(i_1));\n            if (row >= sh_row_count[probe - 1u])\n            {\n                el_ix = probe;\n            }\n        }\n        uint width = sh_row_width[el_ix];\n        if (width > 0u)\n        {\n            Alloc tiles_alloc = sh_row_alloc[el_ix];\n            if (el_ix > 0u)\n            {\n                _370 = sh_row_count[el_ix - 1u];\n            }\n            else\n            {\n                _370 = 0u;\n            }\n            uint seq_ix = row - _370;\n            uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);\n            Alloc param_6 = tiles_alloc;\n            uint param_7 = tile_el_ix;\n            uint sum = read_mem(param_6, param_7);\n            for (uint x = 1u; x < width; x++)\n            {\n                tile_el_ix += 2u;\n                Alloc param_8 = tiles_alloc;\n                uint param_9 = tile_el_ix;\n                sum += read_mem(param_8, param_9);\n                Alloc param_10 = tiles_alloc;\n                uint param_11 = tile_el_ix;\n                uint param_12 = sum;\n                write_mem(param_10, param_11, param_12);\n            }\n        }\n    }\n}\n\n",
	}
	shader_binning_comp = driver.ShaderSources{
		Name:      "binning.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct MallocResult\n{\n    Alloc alloc;\n    bool failed;\n};\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    vec4 bbox;\n    uint rgba_color;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct BinInstanceRef\n{\n    uint offset;\n};\n\nstruct BinInstance\n{\n    uint element_ix;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _87;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _254;\n\nshared uint bitmaps[4][128];\nshared bool sh_alloc_failed;\nshared uint count[4][128];\nshared Alloc sh_chunk_alloc[128];\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _87.memory[offset];\n    return v;\n}\n\nuint Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nAnnoFill AnnoFill_read(Alloc a, AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    AnnoFill s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.rgba_color = raw4;\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    AnnoFillRef param_1 = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param, param_1);\n}\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nMallocResult malloc(uint size)\n{\n    MallocResult r;\n    r.failed = false;\n    uint _93 = atomicAdd(_87.mem_offset, size);\n    uint offset = _93;\n    uint param = offset;\n    uint param_1 = size;\n    r.alloc = new_alloc(param, param_1);\n    if ((offset + size) > uint(int(uint(_87.memory.length())) * 4))\n    {\n        r.failed = true;\n        uint _114 = atomicMax(_87.mem_error, 1u);\n        return r;\n    }\n    return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _87.memory[offset] = val;\n}\n\nvoid BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.element_ix;\n    write_mem(param, param_1, param_2);\n}\n\nvoid main()\n{\n    if (_87.mem_error != 0u)\n    {\n        return;\n    }\n    uint my_n_elements = _254.conf.n_elements;\n    uint my_partition = gl_WorkGroupID.x;\n    for (uint i = 0u; i < 4u; i++)\n    {\n        bitmaps[i][gl_LocalInvocationID.x] = 0u;\n    }\n    if (gl_LocalInvocationID.x == 0u)\n    {\n        sh_alloc_failed = false;\n    }\n    barrier();\n    uint element_ix = (my_partition * 128u) + gl_LocalInvocationID.x;\n    AnnotatedRef ref = AnnotatedRef(_254.conf.anno_alloc.offset + (element_ix * 28u));\n    uint tag = 0u;\n    if (element_ix < my_n_elements)\n    {\n        Alloc param;\n        param.offset = _254.conf.anno_alloc.offset;\n        AnnotatedRef param_1 = ref;\n        tag = Annotated_tag(param, param_1);\n    }\n    int x0 = 0;\n    int y0 = 0;\n    int x1 = 0;\n    int y1 = 0;\n    switch (tag)\n    {\n        case 2u:\n        case 3u:\n        case 1u:\n        case 4u:\n        case 5u:\n        {\n            Alloc param_2;\n            param_2.offset = _254.conf.anno_alloc.offset;\n            AnnotatedRef param_3 = ref;\n            AnnoFill fill = Annotated_Fill_read(param_2, param_3);\n            x0 = int(floor(fill.bbox.x * 0.001953125));\n            y0 = int(floor(fill.bbox.y * 0.00390625));\n            x1 = int(ceil(fill.bbox.z * 0.001953125));\n            y1 = int(ceil(fill.bbox.w * 0.00390625));\n            break;\n        }\n    }\n    uint width_in_bins = ((_254.conf.width_in_tiles + 16u) - 1u) / 16u;\n    uint height_in_bins = ((_254.conf.height_in_tiles + 8u) - 1u) / 8u;\n    x0 = clamp(x0, 0, int(width_in_bins));\n    x1 = clamp(x1, x0, int(width_in_bins));\n    y0 = clamp(y0, 0, int(height_in_bins));\n    y1 = clamp(y1, y0, int(height_in_bins));\n    if (x0 == x1)\n    {\n        y1 = y0;\n    }\n    int x = x0;\n    int y = y0;\n    uint my_slice = gl_LocalInvocationID.x / 32u;\n    uint my_mask = uint(1 << int(gl_LocalInvocationID.x & 31u));\n    while (y < y1)\n    {\n        uint _438 = atomicOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask);\n        x++;\n        if (x == x1)\n        {\n            x = x0;\n            y++;\n        }\n    }\n    barrier();\n    uint element_count = 0u;\n    for (uint i_1 = 0u; i_1 < 4u; i_1++)\n    {\n        element_count += uint(bitCount(bitmaps[i_1][gl_LocalInvocationID.x]));\n        count[i_1][gl_LocalInvocationID.x] = element_count;\n    }\n    uint param_4 = 0u;\n    uint param_5 = 0u;\n    Alloc chunk_alloc = new_alloc(param_4, param_5);\n    if (element_count != 0u)\n    {\n        uint param_6 = element_count * 4u;\n        MallocResult _487 = malloc(param_6);\n        MallocResult chunk = _487;\n        chunk_alloc = chunk.alloc;\n        sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;\n        if (chunk.failed)\n        {\n            sh_alloc_failed = true;\n        }\n    }\n    uint out_ix = (_254.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 128u) + gl_LocalInvocationID.x) * 2u);\n    Alloc param_7;\n    param_7.offset = _254.conf.bin_alloc.offset;\n    uint param_8 = out_ix;\n    uint param_9 = element_count;\n    write_mem(param_7, param_8, param_9);\n    Alloc param_10;\n    param_10.offset = _254.conf.bin_alloc.offset;\n    uint param_11 = out_ix + 1u;\n    uint param_12 = chunk_alloc.offset;\n    write_mem(param_10, param_11, param_12);\n    barrier();\n    if (sh_alloc_failed)\n    {\n        return;\n    }\n    x = x0;\n    y = y0;\n    while (y < y1)\n    {\n        uint bin_ix = (uint(y) * width_in_bins) + uint(x);\n        uint out_mask = bitmaps[my_slice][bin_ix];\n        if ((out_mask & my_mask) != 0u)\n        {\n            uint idx = uint(bitCount(out_mask & (my_mask - 1u)));\n            if (my_slice > 0u)\n            {\n                idx += count[my_slice - 1u][bin_ix];\n            }\n            Alloc out_alloc = sh_chunk_alloc[bin_ix];\n            uint out_offset = out_alloc.offset + (idx * 4u);\n            Alloc param_13 = out_alloc;\n            BinInstanceRef param_14 = BinInstanceRef(out_offset);\n            BinInstance param_15 = BinInstance(element_ix);\n            BinInstance_write(param_13, param_14, param_15);\n        }\n        x++;\n        if (x == x1)\n        {\n            x = x0;\n            y++;\n        }\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct MallocResult\n{\n    Alloc alloc;\n    bool failed;\n};\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    vec4 bbox;\n    uint rgba_color;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct BinInstanceRef\n{\n    uint offset;\n};\n\nstruct BinInstance\n{\n    uint element_ix;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n    Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _87;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _254;\n\nshared uint bitmaps[4][128];\nshared bool sh_alloc_failed;\nshared uint count[4][128];\nshared Alloc sh_chunk_alloc[128];\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _87.memory[offset];\n    return v;\n}\n\nuint Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nAnnoFill AnnoFill_read(Alloc a, AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    AnnoFill s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.rgba_color = raw4;\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    AnnoFillRef param_1 = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param, param_1);\n}\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nMallocResult malloc(uint size)\n{\n    MallocResult r;\n    r.failed = false;\n    uint _93 = atomicAdd(_87.mem_offset, size);\n    uint offset = _93;\n    uint param = offset;\n    uint param_1 = size;\n    r.alloc = new_alloc(param, param_1);\n    if ((offset + size) > uint(int(uint(_87.memory.length())) * 4))\n    {\n        r.failed = true;\n        uint _114 = atomicMax(_87.mem_error, 1u);\n        return r;\n    }\n    return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _87.memory[offset] = val;\n}\n\nvoid BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.element_ix;\n    write_mem(param, param_1, param_2);\n}\n\nvoid main()\n{\n    if (_87.mem_error != 0u)\n    {\n        return;\n    }\n    uint my_n_elements = _254.conf.n_elements;\n    uint my_partition = gl_WorkGroupID.x;\n    for (uint i = 0u; i < 4u; i++)\n    {\n        bitmaps[i][gl_LocalInvocationID.x] = 0u;\n    }\n    if (gl_LocalInvocationID.x == 0u)\n    {\n        sh_alloc_failed = false;\n    }\n    barrier();\n    uint element_ix = (my_partition * 128u) + gl_LocalInvocationID.x;\n    AnnotatedRef ref = AnnotatedRef(_254.conf.anno_alloc.offset + (element_ix * 28u));\n    uint tag = 0u;\n    if (element_ix < my_n_elements)\n    {\n        Alloc param;\n        param.offset = _254.conf.anno_alloc.offset;\n        AnnotatedRef param_1 = ref;\n        tag = Annotated_tag(param, param_1);\n    }\n    int x0 = 0;\n    int y0 = 0;\n    int x1 = 0;\n    int y1 = 0;\n    switch (tag)\n    {\n        case 2u:\n        case 3u:\n        case 1u:\n        case 4u:\n        case 5u:\n        {\n            Alloc param_2;\n            param_2.offset = _254.conf.anno_alloc.offset;\n            AnnotatedRef param_3 = ref;\n            AnnoFill fill = Annotated_Fill_read(param_2, param_3);\n            x0 = int(floor(fill.bbox.x * 0.001953125));\n            y0 = int(floor(fill.bbox.y * 0.00390625));\n            x1 = int(ceil(fill.bbox.z * 0.001953125));\n            y1 = int(ceil(fill.bbox.w * 0.00390625));\n            break;\n        }\n    }\n    uint width_in_bins = ((_254.conf.width_in_tiles + 16u) - 1u) / 16u;\n    uint height_in_bins = ((_254.conf.height_in_tiles + 8u) - 1u) / 8u;\n    x0 = clamp(x0, 0, int(width_in_bins));\n    x1 = clamp(x1, x0, int(width_in_bins));\n    y0 = clamp(y0, 0, int(height_in_bins));\n    y1 = clamp(y1, y0, int(height_in_bins));\n    if (x0 == x1)\n    {\n        y1 = y0;\n    }\n    int x = x0;\n    int y = y0;\n    uint my_slice = gl_LocalInvocationID.x / 32u;\n    uint my_mask = uint(1 << int(gl_LocalInvocationID.x & 31u));\n    while (y < y1)\n    {\n        uint _438 = atomicOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask);\n        x++;\n        if (x == x1)\n        {\n            x = x0;\n            y++;\n        }\n    }\n    barrier();\n    uint element_count = 0u;\n    for (uint i_1 = 0u; i_1 < 4u; i_1++)\n    {\n        element_count += uint(bitCount(bitmaps[i_1][gl_LocalInvocationID.x]));\n        count[i_1][gl_LocalInvocationID.x] = element_count;\n    }\n    uint param_4 = 0u;\n    uint param_5 = 0u;\n    Alloc chunk_alloc = new_alloc(param_4, param_5);\n    if (element_count != 0u)\n    {\n        uint param_6 = element_count * 4u;\n        MallocResult _487 = malloc(param_6);\n        MallocResult chunk = _487;\n        chunk_alloc = chunk.alloc;\n        sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;\n        if (chunk.failed)\n        {\n            sh_alloc_failed = true;\n        }\n    }\n    uint out_ix = (_254.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 128u) + gl_LocalInvocationID.x) * 2u);\n    Alloc param_7;\n    param_7.offset = _254.conf.bin_alloc.offset;\n    uint param_8 = out_ix;\n    uint param_9 = element_count;\n    write_mem(param_7, param_8, param_9);\n    Alloc param_10;\n    param_10.offset = _254.conf.bin_alloc.offset;\n    uint param_11 = out_ix + 1u;\n    uint param_12 = chunk_alloc.offset;\n    write_mem(param_10, param_11, param_12);\n    barrier();\n    if (sh_alloc_failed)\n    {\n        return;\n    }\n    x = x0;\n    y = y0;\n    while (y < y1)\n    {\n        uint bin_ix = (uint(y) * width_in_bins) + uint(x);\n        uint out_mask = bitmaps[my_slice][bin_ix];\n        if ((out_mask & my_mask) != 0u)\n        {\n            uint idx = uint(bitCount(out_mask & (my_mask - 1u)));\n            if (my_slice > 0u)\n            {\n                idx += count[my_slice - 1u][bin_ix];\n            }\n            Alloc out_alloc = sh_chunk_alloc[bin_ix];\n            uint out_offset = out_alloc.offset + (idx * 4u);\n            Alloc param_13 = out_alloc;\n            BinInstanceRef param_14 = BinInstanceRef(out_offset);\n            BinInstance param_15 = BinInstance(element_ix);\n            BinInstance_write(param_13, param_14, param_15);\n        }\n        x++;\n        if (x == x1)\n        {\n            x = x0;\n            y++;\n        }\n    }\n}\n\n",
	}
	shader_blit_frag = [...]driver.ShaderSources{
		{


@@ 66,7 66,7 @@ var (
	}
	shader_coarse_comp = driver.ShaderSources{
		Name:      "coarse.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct MallocResult\n{\n    Alloc alloc;\n    bool failed;\n};\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    vec4 bbox;\n    uint rgba_color;\n};\n\nstruct AnnoFillImageRef\n{\n    uint offset;\n};\n\nstruct AnnoFillImage\n{\n    vec4 bbox;\n    uint index;\n    ivec2 offset;\n};\n\nstruct AnnoStrokeRef\n{\n    uint offset;\n};\n\nstruct AnnoStroke\n{\n    vec4 bbox;\n    uint rgba_color;\n    float linewidth;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct BinInstanceRef\n{\n    uint offset;\n};\n\nstruct BinInstance\n{\n    uint element_ix;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct Tile\n{\n    TileSegRef tile;\n    int backdrop;\n};\n\nstruct CmdStrokeRef\n{\n    uint offset;\n};\n\nstruct CmdStroke\n{\n    uint tile_ref;\n    float half_width;\n    uint rgba_color;\n};\n\nstruct CmdFillRef\n{\n    uint offset;\n};\n\nstruct CmdFill\n{\n    uint tile_ref;\n    int backdrop;\n    uint rgba_color;\n};\n\nstruct CmdFillImageRef\n{\n    uint offset;\n};\n\nstruct CmdFillImage\n{\n    uint tile_ref;\n    int backdrop;\n    uint index;\n    ivec2 offset;\n};\n\nstruct CmdBeginClipRef\n{\n    uint offset;\n};\n\nstruct CmdBeginClip\n{\n    uint tile_ref;\n    int backdrop;\n};\n\nstruct CmdBeginSolidClipRef\n{\n    uint offset;\n};\n\nstruct CmdBeginSolidClip\n{\n    float alpha;\n};\n\nstruct CmdEndClipRef\n{\n    uint offset;\n};\n\nstruct CmdEndClip\n{\n    float alpha;\n};\n\nstruct CmdSolidRef\n{\n    uint offset;\n};\n\nstruct CmdSolid\n{\n    uint rgba_color;\n};\n\nstruct CmdSolidImageRef\n{\n    uint offset;\n};\n\nstruct CmdSolidImage\n{\n    uint index;\n    ivec2 offset;\n};\n\nstruct CmdJumpRef\n{\n    uint offset;\n};\n\nstruct CmdJump\n{\n    uint new_ref;\n};\n\nstruct CmdRef\n{\n    uint offset;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _307;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _1176;\n\nshared uint sh_bitmaps[4][128];\nshared Alloc sh_part_elements[128];\nshared uint sh_part_count[128];\nshared uint sh_elements[128];\nshared uint sh_tile_stride[128];\nshared uint sh_tile_width[128];\nshared uint sh_tile_x0[128];\nshared uint sh_tile_y0[128];\nshared uint sh_tile_base[128];\nshared uint sh_tile_count[128];\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n    uint param = a.offset + offset;\n    uint param_1 = size;\n    return new_alloc(param, param_1);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _307.memory[offset];\n    return v;\n}\n\nBinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)\n{\n    return BinInstanceRef(ref.offset + (index * 4u));\n}\n\nBinInstance BinInstance_read(Alloc a, BinInstanceRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    BinInstance s;\n    s.element_ix = raw0;\n    return s;\n}\n\nuint Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nPath Path_read(Alloc a, PathRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Path s;\n    s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n    s.tiles = TileRef(raw2);\n    return s;\n}\n\nvoid write_tile_alloc(uint el_ix, Alloc a)\n{\n}\n\nAlloc read_tile_alloc(uint el_ix)\n{\n    uint param = 0u;\n    uint param_1 = uint(int(uint(_307.memory.length())) * 4);\n    return new_alloc(param, param_1);\n}\n\nTile Tile_read(Alloc a, TileRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Tile s;\n    s.tile = TileSegRef(raw0);\n    s.backdrop = int(raw1);\n    return s;\n}\n\nAnnoFill AnnoFill_read(Alloc a, AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    AnnoFill s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.rgba_color = raw4;\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    AnnoFillRef param_1 = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param, param_1);\n}\n\nMallocResult malloc(uint size)\n{\n    MallocResult r;\n    r.failed = false;\n    uint _313 = atomicAdd(_307.mem_offset, size);\n    uint offset = _313;\n    uint param = offset;\n    uint param_1 = size;\n    r.alloc = new_alloc(param, param_1);\n    if ((offset + size) > uint(int(uint(_307.memory.length())) * 4))\n    {\n        r.failed = true;\n        uint _334 = atomicMax(_307.mem_error, 1u);\n        return r;\n    }\n    return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _307.memory[offset] = val;\n}\n\nvoid CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.new_ref;\n    write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 9u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdJumpRef param_4 = CmdJumpRef(ref.offset + 4u);\n    CmdJump param_5 = s;\n    CmdJump_write(param_3, param_4, param_5);\n}\n\nbool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit)\n{\n    if (cmd_ref.offset < cmd_limit)\n    {\n        return true;\n    }\n    uint param = 1024u;\n    MallocResult _1136 = malloc(param);\n    MallocResult new_cmd = _1136;\n    if (new_cmd.failed)\n    {\n        return false;\n    }\n    CmdJump jump = CmdJump(new_cmd.alloc.offset);\n    Alloc param_1 = cmd_alloc;\n    CmdRef param_2 = cmd_ref;\n    CmdJump param_3 = jump;\n    Cmd_Jump_write(param_1, param_2, param_3);\n    cmd_alloc = new_cmd.alloc;\n    cmd_ref = CmdRef(cmd_alloc.offset);\n    cmd_limit = (cmd_alloc.offset + 1024u) - 40u;\n    return true;\n}\n\nvoid CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.tile_ref;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = uint(s.backdrop);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = s.rgba_color;\n    write_mem(param_6, param_7, param_8);\n}\n\nvoid Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 1u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdFillRef param_4 = CmdFillRef(ref.offset + 4u);\n    CmdFill param_5 = s;\n    CmdFill_write(param_3, param_4, param_5);\n}\n\nvoid CmdSolid_write(Alloc a, CmdSolidRef ref, CmdSolid s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.rgba_color;\n    write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_Solid_write(Alloc a, CmdRef ref, CmdSolid s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 7u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdSolidRef param_4 = CmdSolidRef(ref.offset + 4u);\n    CmdSolid param_5 = s;\n    CmdSolid_write(param_3, param_4, param_5);\n}\n\nAnnoFillImage AnnoFillImage_read(Alloc a, AnnoFillImageRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    Alloc param_10 = a;\n    uint param_11 = ix + 5u;\n    uint raw5 = read_mem(param_10, param_11);\n    AnnoFillImage s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.index = raw4;\n    s.offset = ivec2(int(raw5 << uint(16)) >> 16, int(raw5) >> 16);\n    return s;\n}\n\nAnnoFillImage Annotated_FillImage_read(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    AnnoFillImageRef param_1 = AnnoFillImageRef(ref.offset + 4u);\n    return AnnoFillImage_read(param, param_1);\n}\n\nvoid CmdFillImage_write(Alloc a, CmdFillImageRef ref, CmdFillImage s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.tile_ref;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = uint(s.backdrop);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = s.index;\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));\n    write_mem(param_9, param_10, param_11);\n}\n\nvoid Cmd_FillImage_write(Alloc a, CmdRef ref, CmdFillImage s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 2u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdFillImageRef param_4 = CmdFillImageRef(ref.offset + 4u);\n    CmdFillImage param_5 = s;\n    CmdFillImage_write(param_3, param_4, param_5);\n}\n\nvoid CmdSolidImage_write(Alloc a, CmdSolidImageRef ref, CmdSolidImage s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.index;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));\n    write_mem(param_3, param_4, param_5);\n}\n\nvoid Cmd_SolidImage_write(Alloc a, CmdRef ref, CmdSolidImage s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 8u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdSolidImageRef param_4 = CmdSolidImageRef(ref.offset + 4u);\n    CmdSolidImage param_5 = s;\n    CmdSolidImage_write(param_3, param_4, param_5);\n}\n\nvoid CmdBeginClip_write(Alloc a, CmdBeginClipRef ref, CmdBeginClip s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.tile_ref;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = uint(s.backdrop);\n    write_mem(param_3, param_4, param_5);\n}\n\nvoid Cmd_BeginClip_write(Alloc a, CmdRef ref, CmdBeginClip s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 3u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdBeginClipRef param_4 = CmdBeginClipRef(ref.offset + 4u);\n    CmdBeginClip param_5 = s;\n    CmdBeginClip_write(param_3, param_4, param_5);\n}\n\nvoid CmdBeginSolidClip_write(Alloc a, CmdBeginSolidClipRef ref, CmdBeginSolidClip s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.alpha);\n    write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_BeginSolidClip_write(Alloc a, CmdRef ref, CmdBeginSolidClip s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 4u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdBeginSolidClipRef param_4 = CmdBeginSolidClipRef(ref.offset + 4u);\n    CmdBeginSolidClip param_5 = s;\n    CmdBeginSolidClip_write(param_3, param_4, param_5);\n}\n\nvoid CmdEndClip_write(Alloc a, CmdEndClipRef ref, CmdEndClip s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.alpha);\n    write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 5u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdEndClipRef param_4 = CmdEndClipRef(ref.offset + 4u);\n    CmdEndClip param_5 = s;\n    CmdEndClip_write(param_3, param_4, param_5);\n}\n\nAnnoStroke AnnoStroke_read(Alloc a, AnnoStrokeRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    Alloc param_10 = a;\n    uint param_11 = ix + 5u;\n    uint raw5 = read_mem(param_10, param_11);\n    AnnoStroke s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.rgba_color = raw4;\n    s.linewidth = uintBitsToFloat(raw5);\n    return s;\n}\n\nAnnoStroke Annotated_Stroke_read(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    AnnoStrokeRef param_1 = AnnoStrokeRef(ref.offset + 4u);\n    return AnnoStroke_read(param, param_1);\n}\n\nvoid CmdStroke_write(Alloc a, CmdStrokeRef ref, CmdStroke s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.tile_ref;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.half_width);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = s.rgba_color;\n    write_mem(param_6, param_7, param_8);\n}\n\nvoid Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 6u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdStrokeRef param_4 = CmdStrokeRef(ref.offset + 4u);\n    CmdStroke param_5 = s;\n    CmdStroke_write(param_3, param_4, param_5);\n}\n\nvoid Cmd_End_write(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 0u;\n    write_mem(param, param_1, param_2);\n}\n\nvoid main()\n{\n    if (_307.mem_error != 0u)\n    {\n        return;\n    }\n    uint width_in_bins = ((_1176.conf.width_in_tiles + 16u) - 1u) / 16u;\n    uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;\n    uint partition_ix = 0u;\n    uint n_partitions = ((_1176.conf.n_elements + 128u) - 1u) / 128u;\n    uint th_ix = gl_LocalInvocationID.x;\n    uint bin_tile_x = 16u * gl_WorkGroupID.x;\n    uint bin_tile_y = 8u * gl_WorkGroupID.y;\n    uint tile_x = gl_LocalInvocationID.x % 16u;\n    uint tile_y = gl_LocalInvocationID.x / 16u;\n    uint this_tile_ix = (((bin_tile_y + tile_y) * _1176.conf.width_in_tiles) + bin_tile_x) + tile_x;\n    Alloc param;\n    param.offset = _1176.conf.ptcl_alloc.offset;\n    uint param_1 = this_tile_ix * 1024u;\n    uint param_2 = 1024u;\n    Alloc cmd_alloc = slice_mem(param, param_1, param_2);\n    CmdRef cmd_ref = CmdRef(cmd_alloc.offset);\n    uint cmd_limit = (cmd_ref.offset + 1024u) - 40u;\n    uint clip_depth = 0u;\n    uint clip_zero_depth = 0u;\n    uint clip_one_mask = 0u;\n    uint rd_ix = 0u;\n    uint wr_ix = 0u;\n    uint part_start_ix = 0u;\n    uint ready_ix = 0u;\n    Alloc param_3;\n    Alloc param_5;\n    uint _1452;\n    uint element_ix;\n    AnnotatedRef ref;\n    Alloc param_13;\n    Alloc param_15;\n    uint tile_count;\n    Alloc param_21;\n    uint _1764;\n    bool include_tile;\n    Alloc param_26;\n    Tile tile_1;\n    Alloc param_31;\n    CmdFill cmd_fill;\n    Alloc param_45;\n    CmdFillImage cmd_fill_img;\n    CmdSolidImage cmd_solid_img;\n    CmdBeginClip cmd_begin_clip;\n    Alloc param_77;\n    CmdStroke cmd_stroke;\n    while (true)\n    {\n        for (uint i = 0u; i < 4u; i++)\n        {\n            sh_bitmaps[i][th_ix] = 0u;\n        }\n        bool _1504;\n        for (;;)\n        {\n            if ((ready_ix == wr_ix) && (partition_ix < n_partitions))\n            {\n                part_start_ix = ready_ix;\n                uint count = 0u;\n                bool _1302 = th_ix < 128u;\n                bool _1310;\n                if (_1302)\n                {\n                    _1310 = (partition_ix + th_ix) < n_partitions;\n                }\n                else\n                {\n                    _1310 = _1302;\n                }\n                if (_1310)\n                {\n                    uint in_ix = (_1176.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 128u) + bin_ix) * 2u);\n                    param_3.offset = _1176.conf.bin_alloc.offset;\n                    uint param_4 = in_ix;\n                    count = read_mem(param_3, param_4);\n                    param_5.offset = _1176.conf.bin_alloc.offset;\n                    uint param_6 = in_ix + 1u;\n                    uint offset = read_mem(param_5, param_6);\n                    uint param_7 = offset;\n                    uint param_8 = count * 4u;\n                    sh_part_elements[th_ix] = new_alloc(param_7, param_8);\n                }\n                for (uint i_1 = 0u; i_1 < 7u; i_1++)\n                {\n                    if (th_ix < 128u)\n                    {\n                        sh_part_count[th_ix] = count;\n                    }\n                    barrier();\n                    if (th_ix < 128u)\n                    {\n                        if (th_ix >= uint(1 << int(i_1)))\n                        {\n                            count += sh_part_count[th_ix - uint(1 << int(i_1))];\n                        }\n                    }\n                    barrier();\n                }\n                if (th_ix < 128u)\n                {\n                    sh_part_count[th_ix] = part_start_ix + count;\n                }\n                barrier();\n                ready_ix = sh_part_count[127];\n                partition_ix += 128u;\n            }\n            uint ix = rd_ix + th_ix;\n            if ((ix >= wr_ix) && (ix < ready_ix))\n            {\n                uint part_ix = 0u;\n                for (uint i_2 = 0u; i_2 < 7u; i_2++)\n                {\n                    uint probe = part_ix + uint(64 >> int(i_2));\n                    if (ix >= sh_part_count[probe - 1u])\n                    {\n                        part_ix = probe;\n                    }\n                }\n                if (part_ix > 0u)\n                {\n                    _1452 = sh_part_count[part_ix - 1u];\n                }\n                else\n                {\n                    _1452 = part_start_ix;\n                }\n                ix -= _1452;\n                Alloc bin_alloc = sh_part_elements[part_ix];\n                BinInstanceRef inst_ref = BinInstanceRef(bin_alloc.offset);\n                BinInstanceRef param_9 = inst_ref;\n                uint param_10 = ix;\n                Alloc param_11 = bin_alloc;\n                BinInstanceRef param_12 = BinInstance_index(param_9, param_10);\n                BinInstance inst = BinInstance_read(param_11, param_12);\n                sh_elements[th_ix] = inst.element_ix;\n            }\n            barrier();\n            wr_ix = min((rd_ix + 128u), ready_ix);\n            bool _1494 = (wr_ix - rd_ix) < 128u;\n            if (_1494)\n            {\n                _1504 = (wr_ix < ready_ix) || (partition_ix < n_partitions);\n            }\n            else\n            {\n                _1504 = _1494;\n            }\n            if (_1504)\n            {\n                continue;\n            }\n            else\n            {\n                break;\n            }\n        }\n        uint tag = 0u;\n        if ((th_ix + rd_ix) < wr_ix)\n        {\n            element_ix = sh_elements[th_ix];\n            ref = AnnotatedRef(_1176.conf.anno_alloc.offset + (element_ix * 28u));\n            param_13.offset = _1176.conf.anno_alloc.offset;\n            AnnotatedRef param_14 = ref;\n            tag = Annotated_tag(param_13, param_14);\n        }\n        switch (tag)\n        {\n            case 2u:\n            case 3u:\n            case 1u:\n            case 4u:\n            case 5u:\n            {\n                uint path_ix = element_ix;\n                param_15.offset = _1176.conf.tile_alloc.offset;\n                PathRef param_16 = PathRef(_1176.conf.tile_alloc.offset + (path_ix * 12u));\n                Path path = Path_read(param_15, param_16);\n                uint stride = path.bbox.z - path.bbox.x;\n                sh_tile_stride[th_ix] = stride;\n                int dx = int(path.bbox.x) - int(bin_tile_x);\n                int dy = int(path.bbox.y) - int(bin_tile_y);\n                int x0 = clamp(dx, 0, 16);\n                int y0 = clamp(dy, 0, 8);\n                int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16);\n                int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 8);\n                sh_tile_width[th_ix] = uint(x1 - x0);\n                sh_tile_x0[th_ix] = uint(x0);\n                sh_tile_y0[th_ix] = uint(y0);\n                tile_count = uint(x1 - x0) * uint(y1 - y0);\n                uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u);\n                sh_tile_base[th_ix] = base;\n                uint param_17 = path.tiles.offset;\n                uint param_18 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;\n                Alloc path_alloc = new_alloc(param_17, param_18);\n                uint param_19 = th_ix;\n                Alloc param_20 = path_alloc;\n                write_tile_alloc(param_19, param_20);\n                break;\n            }\n            default:\n            {\n                tile_count = 0u;\n                break;\n            }\n        }\n        sh_tile_count[th_ix] = tile_count;\n        for (uint i_3 = 0u; i_3 < 7u; i_3++)\n        {\n            barrier();\n            if (th_ix >= uint(1 << int(i_3)))\n            {\n                tile_count += sh_tile_count[th_ix - uint(1 << int(i_3))];\n            }\n            barrier();\n            sh_tile_count[th_ix] = tile_count;\n        }\n        barrier();\n        uint total_tile_count = sh_tile_count[127];\n        for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 128u)\n        {\n            uint el_ix = 0u;\n            for (uint i_4 = 0u; i_4 < 7u; i_4++)\n            {\n                uint probe_1 = el_ix + uint(64 >> int(i_4));\n                if (ix_1 >= sh_tile_count[probe_1 - 1u])\n                {\n                    el_ix = probe_1;\n                }\n            }\n            AnnotatedRef ref_1 = AnnotatedRef(_1176.conf.anno_alloc.offset + (sh_elements[el_ix] * 28u));\n            param_21.offset = _1176.conf.anno_alloc.offset;\n            AnnotatedRef param_22 = ref_1;\n            uint tag_1 = Annotated_tag(param_21, param_22);\n            if (el_ix > 0u)\n            {\n                _1764 = sh_tile_count[el_ix - 1u];\n            }\n            else\n            {\n                _1764 = 0u;\n            }\n            uint seq_ix = ix_1 - _1764;\n            uint width = sh_tile_width[el_ix];\n            uint x = sh_tile_x0[el_ix] + (seq_ix % width);\n            uint y = sh_tile_y0[el_ix] + (seq_ix / width);\n            if ((tag_1 == 4u) || (tag_1 == 5u))\n            {\n                include_tile = true;\n            }\n            else\n            {\n                uint param_23 = el_ix;\n                Alloc param_24 = read_tile_alloc(param_23);\n                TileRef param_25 = TileRef(sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u));\n                Tile tile = Tile_read(param_24, param_25);\n                bool _1825 = tile.tile.offset != 0u;\n                bool _1832;\n                if (!_1825)\n                {\n                    _1832 = tile.backdrop != 0;\n                }\n                else\n                {\n                    _1832 = _1825;\n                }\n                include_tile = _1832;\n            }\n            if (include_tile)\n            {\n                uint el_slice = el_ix / 32u;\n                uint el_mask = uint(1 << int(el_ix & 31u));\n                uint _1853 = atomicOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask);\n            }\n        }\n        barrier();\n        uint slice_ix = 0u;\n        uint bitmap = sh_bitmaps[0][th_ix];\n        while (true)\n        {\n            if (bitmap == 0u)\n            {\n                slice_ix++;\n                if (slice_ix == 4u)\n                {\n                    break;\n                }\n                bitmap = sh_bitmaps[slice_ix][th_ix];\n                if (bitmap == 0u)\n                {\n                    continue;\n                }\n            }\n            uint element_ref_ix = (slice_ix * 32u) + uint(findLSB(bitmap));\n            uint element_ix_1 = sh_elements[element_ref_ix];\n            bitmap &= (bitmap - 1u);\n            ref = AnnotatedRef(_1176.conf.anno_alloc.offset + (element_ix_1 * 28u));\n            param_26.offset = _1176.conf.anno_alloc.offset;\n            AnnotatedRef param_27 = ref;\n            tag = Annotated_tag(param_26, param_27);\n            if (clip_zero_depth == 0u)\n            {\n                switch (tag)\n                {\n                    case 2u:\n                    {\n                        uint param_28 = element_ref_ix;\n                        Alloc param_29 = read_tile_alloc(param_28);\n                        TileRef param_30 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                        tile_1 = Tile_read(param_29, param_30);\n                        param_31.offset = _1176.conf.anno_alloc.offset;\n                        AnnotatedRef param_32 = ref;\n                        AnnoFill fill = Annotated_Fill_read(param_31, param_32);\n                        Alloc param_33 = cmd_alloc;\n                        CmdRef param_34 = cmd_ref;\n                        uint param_35 = cmd_limit;\n                        bool _1961 = alloc_cmd(param_33, param_34, param_35);\n                        cmd_alloc = param_33;\n                        cmd_ref = param_34;\n                        cmd_limit = param_35;\n                        if (!_1961)\n                        {\n                            break;\n                        }\n                        if (tile_1.tile.offset != 0u)\n                        {\n                            cmd_fill.tile_ref = tile_1.tile.offset;\n                            cmd_fill.backdrop = tile_1.backdrop;\n                            cmd_fill.rgba_color = fill.rgba_color;\n                            Alloc param_36 = cmd_alloc;\n                            CmdRef param_37 = cmd_ref;\n                            CmdFill param_38 = cmd_fill;\n                            Cmd_Fill_write(param_36, param_37, param_38);\n                        }\n                        else\n                        {\n                            Alloc param_39 = cmd_alloc;\n                            CmdRef param_40 = cmd_ref;\n                            CmdSolid param_41 = CmdSolid(fill.rgba_color);\n                            Cmd_Solid_write(param_39, param_40, param_41);\n                        }\n                        cmd_ref.offset += 20u;\n                        break;\n                    }\n                    case 3u:\n                    {\n                        uint param_42 = element_ref_ix;\n                        Alloc param_43 = read_tile_alloc(param_42);\n                        TileRef param_44 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                        tile_1 = Tile_read(param_43, param_44);\n                        param_45.offset = _1176.conf.anno_alloc.offset;\n                        AnnotatedRef param_46 = ref;\n                        AnnoFillImage fill_img = Annotated_FillImage_read(param_45, param_46);\n                        Alloc param_47 = cmd_alloc;\n                        CmdRef param_48 = cmd_ref;\n                        uint param_49 = cmd_limit;\n                        bool _2041 = alloc_cmd(param_47, param_48, param_49);\n                        cmd_alloc = param_47;\n                        cmd_ref = param_48;\n                        cmd_limit = param_49;\n                        if (!_2041)\n                        {\n                            break;\n                        }\n                        if (tile_1.tile.offset != 0u)\n                        {\n                            cmd_fill_img.tile_ref = tile_1.tile.offset;\n                            cmd_fill_img.backdrop = tile_1.backdrop;\n                            cmd_fill_img.index = fill_img.index;\n                            cmd_fill_img.offset = fill_img.offset;\n                            Alloc param_50 = cmd_alloc;\n                            CmdRef param_51 = cmd_ref;\n                            CmdFillImage param_52 = cmd_fill_img;\n                            Cmd_FillImage_write(param_50, param_51, param_52);\n                        }\n                        else\n                        {\n                            cmd_solid_img.index = fill_img.index;\n                            cmd_solid_img.offset = fill_img.offset;\n                            Alloc param_53 = cmd_alloc;\n                            CmdRef param_54 = cmd_ref;\n                            CmdSolidImage param_55 = cmd_solid_img;\n                            Cmd_SolidImage_write(param_53, param_54, param_55);\n                        }\n                        cmd_ref.offset += 20u;\n                        break;\n                    }\n                    case 4u:\n                    {\n                        uint param_56 = element_ref_ix;\n                        Alloc param_57 = read_tile_alloc(param_56);\n                        TileRef param_58 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                        tile_1 = Tile_read(param_57, param_58);\n                        bool _2115 = tile_1.tile.offset == 0u;\n                        bool _2121;\n                        if (_2115)\n                        {\n                            _2121 = tile_1.backdrop == 0;\n                        }\n                        else\n                        {\n                            _2121 = _2115;\n                        }\n                        if (_2121)\n                        {\n                            clip_zero_depth = clip_depth + 1u;\n                        }\n                        else\n                        {\n                            if ((tile_1.tile.offset == 0u) && (clip_depth < 32u))\n                            {\n                                clip_one_mask |= uint(1 << int(clip_depth));\n                            }\n                            else\n                            {\n                                Alloc param_59 = cmd_alloc;\n                                CmdRef param_60 = cmd_ref;\n                                uint param_61 = cmd_limit;\n                                bool _2147 = alloc_cmd(param_59, param_60, param_61);\n                                cmd_alloc = param_59;\n                                cmd_ref = param_60;\n                                cmd_limit = param_61;\n                                if (!_2147)\n                                {\n                                    break;\n                                }\n                                if (tile_1.tile.offset != 0u)\n                                {\n                                    cmd_begin_clip.tile_ref = tile_1.tile.offset;\n                                    cmd_begin_clip.backdrop = tile_1.backdrop;\n                                    Alloc param_62 = cmd_alloc;\n                                    CmdRef param_63 = cmd_ref;\n                                    CmdBeginClip param_64 = cmd_begin_clip;\n                                    Cmd_BeginClip_write(param_62, param_63, param_64);\n                                }\n                                else\n                                {\n                                    float alpha = (tile_1.backdrop == 0) ? 0.0 : 1.0;\n                                    Alloc param_65 = cmd_alloc;\n                                    CmdRef param_66 = cmd_ref;\n                                    CmdBeginSolidClip param_67 = CmdBeginSolidClip(alpha);\n                                    Cmd_BeginSolidClip_write(param_65, param_66, param_67);\n                                }\n                                cmd_ref.offset += 20u;\n                                if (clip_depth < 32u)\n                                {\n                                    clip_one_mask &= uint(~(1 << int(clip_depth)));\n                                }\n                            }\n                        }\n                        clip_depth++;\n                        break;\n                    }\n                    case 5u:\n                    {\n                        clip_depth--;\n                        bool _2210 = clip_depth >= 32u;\n                        bool _2220;\n                        if (!_2210)\n                        {\n                            _2220 = (clip_one_mask & uint(1 << int(clip_depth))) == 0u;\n                        }\n                        else\n                        {\n                            _2220 = _2210;\n                        }\n                        if (_2220)\n                        {\n                            Alloc param_68 = cmd_alloc;\n                            CmdRef param_69 = cmd_ref;\n                            uint param_70 = cmd_limit;\n                            bool _2229 = alloc_cmd(param_68, param_69, param_70);\n                            cmd_alloc = param_68;\n                            cmd_ref = param_69;\n                            cmd_limit = param_70;\n                            if (!_2229)\n                            {\n                                break;\n                            }\n                            Alloc param_71 = cmd_alloc;\n                            CmdRef param_72 = cmd_ref;\n                            CmdEndClip param_73 = CmdEndClip(1.0);\n                            Cmd_EndClip_write(param_71, param_72, param_73);\n                            cmd_ref.offset += 20u;\n                        }\n                        break;\n                    }\n                    case 1u:\n                    {\n                        uint param_74 = element_ref_ix;\n                        Alloc param_75 = read_tile_alloc(param_74);\n                        TileRef param_76 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                        tile_1 = Tile_read(param_75, param_76);\n                        param_77.offset = _1176.conf.anno_alloc.offset;\n                        AnnotatedRef param_78 = ref;\n                        AnnoStroke stroke = Annotated_Stroke_read(param_77, param_78);\n                        cmd_stroke.tile_ref = tile_1.tile.offset;\n                        cmd_stroke.half_width = 0.5 * stroke.linewidth;\n                        cmd_stroke.rgba_color = stroke.rgba_color;\n                        Alloc param_79 = cmd_alloc;\n                        CmdRef param_80 = cmd_ref;\n                        uint param_81 = cmd_limit;\n                        bool _2295 = alloc_cmd(param_79, param_80, param_81);\n                        cmd_alloc = param_79;\n                        cmd_ref = param_80;\n                        cmd_limit = param_81;\n                        if (!_2295)\n                        {\n                            break;\n                        }\n                        Alloc param_82 = cmd_alloc;\n                        CmdRef param_83 = cmd_ref;\n                        CmdStroke param_84 = cmd_stroke;\n                        Cmd_Stroke_write(param_82, param_83, param_84);\n                        cmd_ref.offset += 20u;\n                        break;\n                    }\n                }\n            }\n            else\n            {\n                switch (tag)\n                {\n                    case 4u:\n                    {\n                        clip_depth++;\n                        break;\n                    }\n                    case 5u:\n                    {\n                        if (clip_depth == clip_zero_depth)\n                        {\n                            clip_zero_depth = 0u;\n                        }\n                        clip_depth--;\n                        break;\n                    }\n                }\n            }\n        }\n        barrier();\n        rd_ix += 128u;\n        if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions))\n        {\n            break;\n        }\n    }\n    bool _2350 = (bin_tile_x + tile_x) < _1176.conf.width_in_tiles;\n    bool _2359;\n    if (_2350)\n    {\n        _2359 = (bin_tile_y + tile_y) < _1176.conf.height_in_tiles;\n    }\n    else\n    {\n        _2359 = _2350;\n    }\n    if (_2359)\n    {\n        Alloc param_85 = cmd_alloc;\n        CmdRef param_86 = cmd_ref;\n        Cmd_End_write(param_85, param_86);\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct MallocResult\n{\n    Alloc alloc;\n    bool failed;\n};\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    vec4 bbox;\n    uint rgba_color;\n};\n\nstruct AnnoFillImageRef\n{\n    uint offset;\n};\n\nstruct AnnoFillImage\n{\n    vec4 bbox;\n    uint index;\n    ivec2 offset;\n};\n\nstruct AnnoStrokeRef\n{\n    uint offset;\n};\n\nstruct AnnoStroke\n{\n    vec4 bbox;\n    uint rgba_color;\n    float linewidth;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct BinInstanceRef\n{\n    uint offset;\n};\n\nstruct BinInstance\n{\n    uint element_ix;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct Tile\n{\n    TileSegRef tile;\n    int backdrop;\n};\n\nstruct CmdStrokeRef\n{\n    uint offset;\n};\n\nstruct CmdStroke\n{\n    uint tile_ref;\n    float half_width;\n    uint rgba_color;\n};\n\nstruct CmdFillRef\n{\n    uint offset;\n};\n\nstruct CmdFill\n{\n    uint tile_ref;\n    int backdrop;\n    uint rgba_color;\n};\n\nstruct CmdFillImageRef\n{\n    uint offset;\n};\n\nstruct CmdFillImage\n{\n    uint tile_ref;\n    int backdrop;\n    uint index;\n    ivec2 offset;\n};\n\nstruct CmdBeginClipRef\n{\n    uint offset;\n};\n\nstruct CmdBeginClip\n{\n    uint tile_ref;\n    int backdrop;\n};\n\nstruct CmdBeginSolidClipRef\n{\n    uint offset;\n};\n\nstruct CmdBeginSolidClip\n{\n    float alpha;\n};\n\nstruct CmdEndClipRef\n{\n    uint offset;\n};\n\nstruct CmdEndClip\n{\n    float alpha;\n};\n\nstruct CmdSolidRef\n{\n    uint offset;\n};\n\nstruct CmdSolid\n{\n    uint rgba_color;\n};\n\nstruct CmdSolidImageRef\n{\n    uint offset;\n};\n\nstruct CmdSolidImage\n{\n    uint index;\n    ivec2 offset;\n};\n\nstruct CmdJumpRef\n{\n    uint offset;\n};\n\nstruct CmdJump\n{\n    uint new_ref;\n};\n\nstruct CmdRef\n{\n    uint offset;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n    Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _307;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _1176;\n\nshared uint sh_bitmaps[4][128];\nshared Alloc sh_part_elements[128];\nshared uint sh_part_count[128];\nshared uint sh_elements[128];\nshared uint sh_tile_stride[128];\nshared uint sh_tile_width[128];\nshared uint sh_tile_x0[128];\nshared uint sh_tile_y0[128];\nshared uint sh_tile_base[128];\nshared uint sh_tile_count[128];\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n    uint param = a.offset + offset;\n    uint param_1 = size;\n    return new_alloc(param, param_1);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _307.memory[offset];\n    return v;\n}\n\nBinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)\n{\n    return BinInstanceRef(ref.offset + (index * 4u));\n}\n\nBinInstance BinInstance_read(Alloc a, BinInstanceRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    BinInstance s;\n    s.element_ix = raw0;\n    return s;\n}\n\nuint Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nPath Path_read(Alloc a, PathRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Path s;\n    s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n    s.tiles = TileRef(raw2);\n    return s;\n}\n\nvoid write_tile_alloc(uint el_ix, Alloc a)\n{\n}\n\nAlloc read_tile_alloc(uint el_ix)\n{\n    uint param = 0u;\n    uint param_1 = uint(int(uint(_307.memory.length())) * 4);\n    return new_alloc(param, param_1);\n}\n\nTile Tile_read(Alloc a, TileRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Tile s;\n    s.tile = TileSegRef(raw0);\n    s.backdrop = int(raw1);\n    return s;\n}\n\nAnnoFill AnnoFill_read(Alloc a, AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    AnnoFill s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.rgba_color = raw4;\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    AnnoFillRef param_1 = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param, param_1);\n}\n\nMallocResult malloc(uint size)\n{\n    MallocResult r;\n    r.failed = false;\n    uint _313 = atomicAdd(_307.mem_offset, size);\n    uint offset = _313;\n    uint param = offset;\n    uint param_1 = size;\n    r.alloc = new_alloc(param, param_1);\n    if ((offset + size) > uint(int(uint(_307.memory.length())) * 4))\n    {\n        r.failed = true;\n        uint _334 = atomicMax(_307.mem_error, 1u);\n        return r;\n    }\n    return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _307.memory[offset] = val;\n}\n\nvoid CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.new_ref;\n    write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 9u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdJumpRef param_4 = CmdJumpRef(ref.offset + 4u);\n    CmdJump param_5 = s;\n    CmdJump_write(param_3, param_4, param_5);\n}\n\nbool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit)\n{\n    if (cmd_ref.offset < cmd_limit)\n    {\n        return true;\n    }\n    uint param = 1024u;\n    MallocResult _1136 = malloc(param);\n    MallocResult new_cmd = _1136;\n    if (new_cmd.failed)\n    {\n        return false;\n    }\n    CmdJump jump = CmdJump(new_cmd.alloc.offset);\n    Alloc param_1 = cmd_alloc;\n    CmdRef param_2 = cmd_ref;\n    CmdJump param_3 = jump;\n    Cmd_Jump_write(param_1, param_2, param_3);\n    cmd_alloc = new_cmd.alloc;\n    cmd_ref = CmdRef(cmd_alloc.offset);\n    cmd_limit = (cmd_alloc.offset + 1024u) - 40u;\n    return true;\n}\n\nvoid CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.tile_ref;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = uint(s.backdrop);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = s.rgba_color;\n    write_mem(param_6, param_7, param_8);\n}\n\nvoid Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 1u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdFillRef param_4 = CmdFillRef(ref.offset + 4u);\n    CmdFill param_5 = s;\n    CmdFill_write(param_3, param_4, param_5);\n}\n\nvoid CmdSolid_write(Alloc a, CmdSolidRef ref, CmdSolid s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.rgba_color;\n    write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_Solid_write(Alloc a, CmdRef ref, CmdSolid s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 7u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdSolidRef param_4 = CmdSolidRef(ref.offset + 4u);\n    CmdSolid param_5 = s;\n    CmdSolid_write(param_3, param_4, param_5);\n}\n\nAnnoFillImage AnnoFillImage_read(Alloc a, AnnoFillImageRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    Alloc param_10 = a;\n    uint param_11 = ix + 5u;\n    uint raw5 = read_mem(param_10, param_11);\n    AnnoFillImage s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.index = raw4;\n    s.offset = ivec2(int(raw5 << uint(16)) >> 16, int(raw5) >> 16);\n    return s;\n}\n\nAnnoFillImage Annotated_FillImage_read(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    AnnoFillImageRef param_1 = AnnoFillImageRef(ref.offset + 4u);\n    return AnnoFillImage_read(param, param_1);\n}\n\nvoid CmdFillImage_write(Alloc a, CmdFillImageRef ref, CmdFillImage s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.tile_ref;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = uint(s.backdrop);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = s.index;\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));\n    write_mem(param_9, param_10, param_11);\n}\n\nvoid Cmd_FillImage_write(Alloc a, CmdRef ref, CmdFillImage s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 2u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdFillImageRef param_4 = CmdFillImageRef(ref.offset + 4u);\n    CmdFillImage param_5 = s;\n    CmdFillImage_write(param_3, param_4, param_5);\n}\n\nvoid CmdSolidImage_write(Alloc a, CmdSolidImageRef ref, CmdSolidImage s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.index;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));\n    write_mem(param_3, param_4, param_5);\n}\n\nvoid Cmd_SolidImage_write(Alloc a, CmdRef ref, CmdSolidImage s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 8u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdSolidImageRef param_4 = CmdSolidImageRef(ref.offset + 4u);\n    CmdSolidImage param_5 = s;\n    CmdSolidImage_write(param_3, param_4, param_5);\n}\n\nvoid CmdBeginClip_write(Alloc a, CmdBeginClipRef ref, CmdBeginClip s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.tile_ref;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = uint(s.backdrop);\n    write_mem(param_3, param_4, param_5);\n}\n\nvoid Cmd_BeginClip_write(Alloc a, CmdRef ref, CmdBeginClip s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 3u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdBeginClipRef param_4 = CmdBeginClipRef(ref.offset + 4u);\n    CmdBeginClip param_5 = s;\n    CmdBeginClip_write(param_3, param_4, param_5);\n}\n\nvoid CmdBeginSolidClip_write(Alloc a, CmdBeginSolidClipRef ref, CmdBeginSolidClip s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.alpha);\n    write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_BeginSolidClip_write(Alloc a, CmdRef ref, CmdBeginSolidClip s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 4u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdBeginSolidClipRef param_4 = CmdBeginSolidClipRef(ref.offset + 4u);\n    CmdBeginSolidClip param_5 = s;\n    CmdBeginSolidClip_write(param_3, param_4, param_5);\n}\n\nvoid CmdEndClip_write(Alloc a, CmdEndClipRef ref, CmdEndClip s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.alpha);\n    write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 5u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdEndClipRef param_4 = CmdEndClipRef(ref.offset + 4u);\n    CmdEndClip param_5 = s;\n    CmdEndClip_write(param_3, param_4, param_5);\n}\n\nAnnoStroke AnnoStroke_read(Alloc a, AnnoStrokeRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    Alloc param_10 = a;\n    uint param_11 = ix + 5u;\n    uint raw5 = read_mem(param_10, param_11);\n    AnnoStroke s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.rgba_color = raw4;\n    s.linewidth = uintBitsToFloat(raw5);\n    return s;\n}\n\nAnnoStroke Annotated_Stroke_read(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    AnnoStrokeRef param_1 = AnnoStrokeRef(ref.offset + 4u);\n    return AnnoStroke_read(param, param_1);\n}\n\nvoid CmdStroke_write(Alloc a, CmdStrokeRef ref, CmdStroke s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.tile_ref;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.half_width);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = s.rgba_color;\n    write_mem(param_6, param_7, param_8);\n}\n\nvoid Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 6u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    CmdStrokeRef param_4 = CmdStrokeRef(ref.offset + 4u);\n    CmdStroke param_5 = s;\n    CmdStroke_write(param_3, param_4, param_5);\n}\n\nvoid Cmd_End_write(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 0u;\n    write_mem(param, param_1, param_2);\n}\n\nvoid main()\n{\n    if (_307.mem_error != 0u)\n    {\n        return;\n    }\n    uint width_in_bins = ((_1176.conf.width_in_tiles + 16u) - 1u) / 16u;\n    uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;\n    uint partition_ix = 0u;\n    uint n_partitions = ((_1176.conf.n_elements + 128u) - 1u) / 128u;\n    uint th_ix = gl_LocalInvocationID.x;\n    uint bin_tile_x = 16u * gl_WorkGroupID.x;\n    uint bin_tile_y = 8u * gl_WorkGroupID.y;\n    uint tile_x = gl_LocalInvocationID.x % 16u;\n    uint tile_y = gl_LocalInvocationID.x / 16u;\n    uint this_tile_ix = (((bin_tile_y + tile_y) * _1176.conf.width_in_tiles) + bin_tile_x) + tile_x;\n    Alloc param;\n    param.offset = _1176.conf.ptcl_alloc.offset;\n    uint param_1 = this_tile_ix * 1024u;\n    uint param_2 = 1024u;\n    Alloc cmd_alloc = slice_mem(param, param_1, param_2);\n    CmdRef cmd_ref = CmdRef(cmd_alloc.offset);\n    uint cmd_limit = (cmd_ref.offset + 1024u) - 40u;\n    uint clip_depth = 0u;\n    uint clip_zero_depth = 0u;\n    uint clip_one_mask = 0u;\n    uint rd_ix = 0u;\n    uint wr_ix = 0u;\n    uint part_start_ix = 0u;\n    uint ready_ix = 0u;\n    Alloc param_3;\n    Alloc param_5;\n    uint _1452;\n    uint element_ix;\n    AnnotatedRef ref;\n    Alloc param_13;\n    Alloc param_15;\n    uint tile_count;\n    Alloc param_21;\n    uint _1764;\n    bool include_tile;\n    Alloc param_26;\n    Tile tile_1;\n    Alloc param_31;\n    CmdFill cmd_fill;\n    Alloc param_45;\n    CmdFillImage cmd_fill_img;\n    CmdSolidImage cmd_solid_img;\n    CmdBeginClip cmd_begin_clip;\n    Alloc param_77;\n    CmdStroke cmd_stroke;\n    while (true)\n    {\n        for (uint i = 0u; i < 4u; i++)\n        {\n            sh_bitmaps[i][th_ix] = 0u;\n        }\n        bool _1504;\n        for (;;)\n        {\n            if ((ready_ix == wr_ix) && (partition_ix < n_partitions))\n            {\n                part_start_ix = ready_ix;\n                uint count = 0u;\n                bool _1302 = th_ix < 128u;\n                bool _1310;\n                if (_1302)\n                {\n                    _1310 = (partition_ix + th_ix) < n_partitions;\n                }\n                else\n                {\n                    _1310 = _1302;\n                }\n                if (_1310)\n                {\n                    uint in_ix = (_1176.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 128u) + bin_ix) * 2u);\n                    param_3.offset = _1176.conf.bin_alloc.offset;\n                    uint param_4 = in_ix;\n                    count = read_mem(param_3, param_4);\n                    param_5.offset = _1176.conf.bin_alloc.offset;\n                    uint param_6 = in_ix + 1u;\n                    uint offset = read_mem(param_5, param_6);\n                    uint param_7 = offset;\n                    uint param_8 = count * 4u;\n                    sh_part_elements[th_ix] = new_alloc(param_7, param_8);\n                }\n                for (uint i_1 = 0u; i_1 < 7u; i_1++)\n                {\n                    if (th_ix < 128u)\n                    {\n                        sh_part_count[th_ix] = count;\n                    }\n                    barrier();\n                    if (th_ix < 128u)\n                    {\n                        if (th_ix >= uint(1 << int(i_1)))\n                        {\n                            count += sh_part_count[th_ix - uint(1 << int(i_1))];\n                        }\n                    }\n                    barrier();\n                }\n                if (th_ix < 128u)\n                {\n                    sh_part_count[th_ix] = part_start_ix + count;\n                }\n                barrier();\n                ready_ix = sh_part_count[127];\n                partition_ix += 128u;\n            }\n            uint ix = rd_ix + th_ix;\n            if ((ix >= wr_ix) && (ix < ready_ix))\n            {\n                uint part_ix = 0u;\n                for (uint i_2 = 0u; i_2 < 7u; i_2++)\n                {\n                    uint probe = part_ix + uint(64 >> int(i_2));\n                    if (ix >= sh_part_count[probe - 1u])\n                    {\n                        part_ix = probe;\n                    }\n                }\n                if (part_ix > 0u)\n                {\n                    _1452 = sh_part_count[part_ix - 1u];\n                }\n                else\n                {\n                    _1452 = part_start_ix;\n                }\n                ix -= _1452;\n                Alloc bin_alloc = sh_part_elements[part_ix];\n                BinInstanceRef inst_ref = BinInstanceRef(bin_alloc.offset);\n                BinInstanceRef param_9 = inst_ref;\n                uint param_10 = ix;\n                Alloc param_11 = bin_alloc;\n                BinInstanceRef param_12 = BinInstance_index(param_9, param_10);\n                BinInstance inst = BinInstance_read(param_11, param_12);\n                sh_elements[th_ix] = inst.element_ix;\n            }\n            barrier();\n            wr_ix = min((rd_ix + 128u), ready_ix);\n            bool _1494 = (wr_ix - rd_ix) < 128u;\n            if (_1494)\n            {\n                _1504 = (wr_ix < ready_ix) || (partition_ix < n_partitions);\n            }\n            else\n            {\n                _1504 = _1494;\n            }\n            if (_1504)\n            {\n                continue;\n            }\n            else\n            {\n                break;\n            }\n        }\n        uint tag = 0u;\n        if ((th_ix + rd_ix) < wr_ix)\n        {\n            element_ix = sh_elements[th_ix];\n            ref = AnnotatedRef(_1176.conf.anno_alloc.offset + (element_ix * 28u));\n            param_13.offset = _1176.conf.anno_alloc.offset;\n            AnnotatedRef param_14 = ref;\n            tag = Annotated_tag(param_13, param_14);\n        }\n        switch (tag)\n        {\n            case 2u:\n            case 3u:\n            case 1u:\n            case 4u:\n            case 5u:\n            {\n                uint path_ix = element_ix;\n                param_15.offset = _1176.conf.tile_alloc.offset;\n                PathRef param_16 = PathRef(_1176.conf.tile_alloc.offset + (path_ix * 12u));\n                Path path = Path_read(param_15, param_16);\n                uint stride = path.bbox.z - path.bbox.x;\n                sh_tile_stride[th_ix] = stride;\n                int dx = int(path.bbox.x) - int(bin_tile_x);\n                int dy = int(path.bbox.y) - int(bin_tile_y);\n                int x0 = clamp(dx, 0, 16);\n                int y0 = clamp(dy, 0, 8);\n                int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16);\n                int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 8);\n                sh_tile_width[th_ix] = uint(x1 - x0);\n                sh_tile_x0[th_ix] = uint(x0);\n                sh_tile_y0[th_ix] = uint(y0);\n                tile_count = uint(x1 - x0) * uint(y1 - y0);\n                uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u);\n                sh_tile_base[th_ix] = base;\n                uint param_17 = path.tiles.offset;\n                uint param_18 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;\n                Alloc path_alloc = new_alloc(param_17, param_18);\n                uint param_19 = th_ix;\n                Alloc param_20 = path_alloc;\n                write_tile_alloc(param_19, param_20);\n                break;\n            }\n            default:\n            {\n                tile_count = 0u;\n                break;\n            }\n        }\n        sh_tile_count[th_ix] = tile_count;\n        for (uint i_3 = 0u; i_3 < 7u; i_3++)\n        {\n            barrier();\n            if (th_ix >= uint(1 << int(i_3)))\n            {\n                tile_count += sh_tile_count[th_ix - uint(1 << int(i_3))];\n            }\n            barrier();\n            sh_tile_count[th_ix] = tile_count;\n        }\n        barrier();\n        uint total_tile_count = sh_tile_count[127];\n        for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 128u)\n        {\n            uint el_ix = 0u;\n            for (uint i_4 = 0u; i_4 < 7u; i_4++)\n            {\n                uint probe_1 = el_ix + uint(64 >> int(i_4));\n                if (ix_1 >= sh_tile_count[probe_1 - 1u])\n                {\n                    el_ix = probe_1;\n                }\n            }\n            AnnotatedRef ref_1 = AnnotatedRef(_1176.conf.anno_alloc.offset + (sh_elements[el_ix] * 28u));\n            param_21.offset = _1176.conf.anno_alloc.offset;\n            AnnotatedRef param_22 = ref_1;\n            uint tag_1 = Annotated_tag(param_21, param_22);\n            if (el_ix > 0u)\n            {\n                _1764 = sh_tile_count[el_ix - 1u];\n            }\n            else\n            {\n                _1764 = 0u;\n            }\n            uint seq_ix = ix_1 - _1764;\n            uint width = sh_tile_width[el_ix];\n            uint x = sh_tile_x0[el_ix] + (seq_ix % width);\n            uint y = sh_tile_y0[el_ix] + (seq_ix / width);\n            if ((tag_1 == 4u) || (tag_1 == 5u))\n            {\n                include_tile = true;\n            }\n            else\n            {\n                uint param_23 = el_ix;\n                Alloc param_24 = read_tile_alloc(param_23);\n                TileRef param_25 = TileRef(sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u));\n                Tile tile = Tile_read(param_24, param_25);\n                bool _1825 = tile.tile.offset != 0u;\n                bool _1832;\n                if (!_1825)\n                {\n                    _1832 = tile.backdrop != 0;\n                }\n                else\n                {\n                    _1832 = _1825;\n                }\n                include_tile = _1832;\n            }\n            if (include_tile)\n            {\n                uint el_slice = el_ix / 32u;\n                uint el_mask = uint(1 << int(el_ix & 31u));\n                uint _1853 = atomicOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask);\n            }\n        }\n        barrier();\n        uint slice_ix = 0u;\n        uint bitmap = sh_bitmaps[0][th_ix];\n        while (true)\n        {\n            if (bitmap == 0u)\n            {\n                slice_ix++;\n                if (slice_ix == 4u)\n                {\n                    break;\n                }\n                bitmap = sh_bitmaps[slice_ix][th_ix];\n                if (bitmap == 0u)\n                {\n                    continue;\n                }\n            }\n            uint element_ref_ix = (slice_ix * 32u) + uint(findLSB(bitmap));\n            uint element_ix_1 = sh_elements[element_ref_ix];\n            bitmap &= (bitmap - 1u);\n            ref = AnnotatedRef(_1176.conf.anno_alloc.offset + (element_ix_1 * 28u));\n            param_26.offset = _1176.conf.anno_alloc.offset;\n            AnnotatedRef param_27 = ref;\n            tag = Annotated_tag(param_26, param_27);\n            if (clip_zero_depth == 0u)\n            {\n                switch (tag)\n                {\n                    case 2u:\n                    {\n                        uint param_28 = element_ref_ix;\n                        Alloc param_29 = read_tile_alloc(param_28);\n                        TileRef param_30 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                        tile_1 = Tile_read(param_29, param_30);\n                        param_31.offset = _1176.conf.anno_alloc.offset;\n                        AnnotatedRef param_32 = ref;\n                        AnnoFill fill = Annotated_Fill_read(param_31, param_32);\n                        Alloc param_33 = cmd_alloc;\n                        CmdRef param_34 = cmd_ref;\n                        uint param_35 = cmd_limit;\n                        bool _1961 = alloc_cmd(param_33, param_34, param_35);\n                        cmd_alloc = param_33;\n                        cmd_ref = param_34;\n                        cmd_limit = param_35;\n                        if (!_1961)\n                        {\n                            break;\n                        }\n                        if (tile_1.tile.offset != 0u)\n                        {\n                            cmd_fill.tile_ref = tile_1.tile.offset;\n                            cmd_fill.backdrop = tile_1.backdrop;\n                            cmd_fill.rgba_color = fill.rgba_color;\n                            Alloc param_36 = cmd_alloc;\n                            CmdRef param_37 = cmd_ref;\n                            CmdFill param_38 = cmd_fill;\n                            Cmd_Fill_write(param_36, param_37, param_38);\n                        }\n                        else\n                        {\n                            Alloc param_39 = cmd_alloc;\n                            CmdRef param_40 = cmd_ref;\n                            CmdSolid param_41 = CmdSolid(fill.rgba_color);\n                            Cmd_Solid_write(param_39, param_40, param_41);\n                        }\n                        cmd_ref.offset += 20u;\n                        break;\n                    }\n                    case 3u:\n                    {\n                        uint param_42 = element_ref_ix;\n                        Alloc param_43 = read_tile_alloc(param_42);\n                        TileRef param_44 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                        tile_1 = Tile_read(param_43, param_44);\n                        param_45.offset = _1176.conf.anno_alloc.offset;\n                        AnnotatedRef param_46 = ref;\n                        AnnoFillImage fill_img = Annotated_FillImage_read(param_45, param_46);\n                        Alloc param_47 = cmd_alloc;\n                        CmdRef param_48 = cmd_ref;\n                        uint param_49 = cmd_limit;\n                        bool _2041 = alloc_cmd(param_47, param_48, param_49);\n                        cmd_alloc = param_47;\n                        cmd_ref = param_48;\n                        cmd_limit = param_49;\n                        if (!_2041)\n                        {\n                            break;\n                        }\n                        if (tile_1.tile.offset != 0u)\n                        {\n                            cmd_fill_img.tile_ref = tile_1.tile.offset;\n                            cmd_fill_img.backdrop = tile_1.backdrop;\n                            cmd_fill_img.index = fill_img.index;\n                            cmd_fill_img.offset = fill_img.offset;\n                            Alloc param_50 = cmd_alloc;\n                            CmdRef param_51 = cmd_ref;\n                            CmdFillImage param_52 = cmd_fill_img;\n                            Cmd_FillImage_write(param_50, param_51, param_52);\n                        }\n                        else\n                        {\n                            cmd_solid_img.index = fill_img.index;\n                            cmd_solid_img.offset = fill_img.offset;\n                            Alloc param_53 = cmd_alloc;\n                            CmdRef param_54 = cmd_ref;\n                            CmdSolidImage param_55 = cmd_solid_img;\n                            Cmd_SolidImage_write(param_53, param_54, param_55);\n                        }\n                        cmd_ref.offset += 20u;\n                        break;\n                    }\n                    case 4u:\n                    {\n                        uint param_56 = element_ref_ix;\n                        Alloc param_57 = read_tile_alloc(param_56);\n                        TileRef param_58 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                        tile_1 = Tile_read(param_57, param_58);\n                        bool _2115 = tile_1.tile.offset == 0u;\n                        bool _2121;\n                        if (_2115)\n                        {\n                            _2121 = tile_1.backdrop == 0;\n                        }\n                        else\n                        {\n                            _2121 = _2115;\n                        }\n                        if (_2121)\n                        {\n                            clip_zero_depth = clip_depth + 1u;\n                        }\n                        else\n                        {\n                            if ((tile_1.tile.offset == 0u) && (clip_depth < 32u))\n                            {\n                                clip_one_mask |= uint(1 << int(clip_depth));\n                            }\n                            else\n                            {\n                                Alloc param_59 = cmd_alloc;\n                                CmdRef param_60 = cmd_ref;\n                                uint param_61 = cmd_limit;\n                                bool _2147 = alloc_cmd(param_59, param_60, param_61);\n                                cmd_alloc = param_59;\n                                cmd_ref = param_60;\n                                cmd_limit = param_61;\n                                if (!_2147)\n                                {\n                                    break;\n                                }\n                                if (tile_1.tile.offset != 0u)\n                                {\n                                    cmd_begin_clip.tile_ref = tile_1.tile.offset;\n                                    cmd_begin_clip.backdrop = tile_1.backdrop;\n                                    Alloc param_62 = cmd_alloc;\n                                    CmdRef param_63 = cmd_ref;\n                                    CmdBeginClip param_64 = cmd_begin_clip;\n                                    Cmd_BeginClip_write(param_62, param_63, param_64);\n                                }\n                                else\n                                {\n                                    float alpha = (tile_1.backdrop == 0) ? 0.0 : 1.0;\n                                    Alloc param_65 = cmd_alloc;\n                                    CmdRef param_66 = cmd_ref;\n                                    CmdBeginSolidClip param_67 = CmdBeginSolidClip(alpha);\n                                    Cmd_BeginSolidClip_write(param_65, param_66, param_67);\n                                }\n                                cmd_ref.offset += 20u;\n                                if (clip_depth < 32u)\n                                {\n                                    clip_one_mask &= uint(~(1 << int(clip_depth)));\n                                }\n                            }\n                        }\n                        clip_depth++;\n                        break;\n                    }\n                    case 5u:\n                    {\n                        clip_depth--;\n                        bool _2210 = clip_depth >= 32u;\n                        bool _2220;\n                        if (!_2210)\n                        {\n                            _2220 = (clip_one_mask & uint(1 << int(clip_depth))) == 0u;\n                        }\n                        else\n                        {\n                            _2220 = _2210;\n                        }\n                        if (_2220)\n                        {\n                            Alloc param_68 = cmd_alloc;\n                            CmdRef param_69 = cmd_ref;\n                            uint param_70 = cmd_limit;\n                            bool _2229 = alloc_cmd(param_68, param_69, param_70);\n                            cmd_alloc = param_68;\n                            cmd_ref = param_69;\n                            cmd_limit = param_70;\n                            if (!_2229)\n                            {\n                                break;\n                            }\n                            Alloc param_71 = cmd_alloc;\n                            CmdRef param_72 = cmd_ref;\n                            CmdEndClip param_73 = CmdEndClip(1.0);\n                            Cmd_EndClip_write(param_71, param_72, param_73);\n                            cmd_ref.offset += 20u;\n                        }\n                        break;\n                    }\n                    case 1u:\n                    {\n                        uint param_74 = element_ref_ix;\n                        Alloc param_75 = read_tile_alloc(param_74);\n                        TileRef param_76 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n                        tile_1 = Tile_read(param_75, param_76);\n                        param_77.offset = _1176.conf.anno_alloc.offset;\n                        AnnotatedRef param_78 = ref;\n                        AnnoStroke stroke = Annotated_Stroke_read(param_77, param_78);\n                        cmd_stroke.tile_ref = tile_1.tile.offset;\n                        cmd_stroke.half_width = 0.5 * stroke.linewidth;\n                        cmd_stroke.rgba_color = stroke.rgba_color;\n                        Alloc param_79 = cmd_alloc;\n                        CmdRef param_80 = cmd_ref;\n                        uint param_81 = cmd_limit;\n                        bool _2295 = alloc_cmd(param_79, param_80, param_81);\n                        cmd_alloc = param_79;\n                        cmd_ref = param_80;\n                        cmd_limit = param_81;\n                        if (!_2295)\n                        {\n                            break;\n                        }\n                        Alloc param_82 = cmd_alloc;\n                        CmdRef param_83 = cmd_ref;\n                        CmdStroke param_84 = cmd_stroke;\n                        Cmd_Stroke_write(param_82, param_83, param_84);\n                        cmd_ref.offset += 20u;\n                        break;\n                    }\n                }\n            }\n            else\n            {\n                switch (tag)\n                {\n                    case 4u:\n                    {\n                        clip_depth++;\n                        break;\n                    }\n                    case 5u:\n                    {\n                        if (clip_depth == clip_zero_depth)\n                        {\n                            clip_zero_depth = 0u;\n                        }\n                        clip_depth--;\n                        break;\n                    }\n                }\n            }\n        }\n        barrier();\n        rd_ix += 128u;\n        if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions))\n        {\n            break;\n        }\n    }\n    bool _2350 = (bin_tile_x + tile_x) < _1176.conf.width_in_tiles;\n    bool _2359;\n    if (_2350)\n    {\n        _2359 = (bin_tile_y + tile_y) < _1176.conf.height_in_tiles;\n    }\n    else\n    {\n        _2359 = _2350;\n    }\n    if (_2359)\n    {\n        Alloc param_85 = cmd_alloc;\n        CmdRef param_86 = cmd_ref;\n        Cmd_End_write(param_85, param_86);\n    }\n}\n\n",
	}
	shader_copy_frag = driver.ShaderSources{
		Name:      "copy.frag",


@@ 139,7 139,7 @@ var (
	}
	shader_elements_comp = driver.ShaderSources{
		Name:      "elements.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct ElementRef\n{\n    uint offset;\n};\n\nstruct LineSegRef\n{\n    uint offset;\n};\n\nstruct LineSeg\n{\n    vec2 p0;\n    vec2 p1;\n};\n\nstruct QuadSegRef\n{\n    uint offset;\n};\n\nstruct QuadSeg\n{\n    vec2 p0;\n    vec2 p1;\n    vec2 p2;\n};\n\nstruct CubicSegRef\n{\n    uint offset;\n};\n\nstruct CubicSeg\n{\n    vec2 p0;\n    vec2 p1;\n    vec2 p2;\n    vec2 p3;\n};\n\nstruct FillRef\n{\n    uint offset;\n};\n\nstruct Fill\n{\n    uint rgba_color;\n};\n\nstruct FillImageRef\n{\n    uint offset;\n};\n\nstruct FillImage\n{\n    uint index;\n    ivec2 offset;\n};\n\nstruct StrokeRef\n{\n    uint offset;\n};\n\nstruct Stroke\n{\n    uint rgba_color;\n};\n\nstruct SetLineWidthRef\n{\n    uint offset;\n};\n\nstruct SetLineWidth\n{\n    float width;\n};\n\nstruct TransformRef\n{\n    uint offset;\n};\n\nstruct Transform\n{\n    vec4 mat;\n    vec2 translate;\n};\n\nstruct ClipRef\n{\n    uint offset;\n};\n\nstruct Clip\n{\n    vec4 bbox;\n};\n\nstruct StateRef\n{\n    uint offset;\n};\n\nstruct State\n{\n    vec4 mat;\n    vec2 translate;\n    vec4 bbox;\n    float linewidth;\n    uint flags;\n    uint path_count;\n    uint pathseg_count;\n};\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    vec4 bbox;\n    uint rgba_color;\n};\n\nstruct AnnoFillImageRef\n{\n    uint offset;\n};\n\nstruct AnnoFillImage\n{\n    vec4 bbox;\n    uint index;\n    ivec2 offset;\n};\n\nstruct AnnoStrokeRef\n{\n    uint offset;\n};\n\nstruct AnnoStroke\n{\n    vec4 bbox;\n    uint rgba_color;\n    float linewidth;\n};\n\nstruct AnnoClipRef\n{\n    uint offset;\n};\n\nstruct AnnoClip\n{\n    vec4 bbox;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct PathStrokeCubicRef\n{\n    uint offset;\n};\n\nstruct PathStrokeCubic\n{\n    vec2 p0;\n    vec2 p1;\n    vec2 p2;\n    vec2 p3;\n    uint path_ix;\n    vec2 stroke;\n};\n\nstruct PathSegRef\n{\n    uint offset;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _282;\n\nlayout(binding = 2, std430) readonly buffer SceneBuf\n{\n    uint scene[];\n} _306;\n\nlayout(binding = 3, std430) coherent buffer StateBuf\n{\n    uint part_counter;\n    uint state[];\n} _780;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _2430;\n\nshared uint sh_part_ix;\nshared vec4 sh_mat[32];\nshared vec2 sh_translate[32];\nshared vec4 sh_bbox[32];\nshared float sh_width[32];\nshared uint sh_flags[32];\nshared uint sh_path_count[32];\nshared uint sh_pathseg_count[32];\nshared State sh_prefix;\n\nuint Element_tag(ElementRef ref)\n{\n    return _306.scene[ref.offset >> uint(2)];\n}\n\nLineSeg LineSeg_read(LineSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _306.scene[ix + 0u];\n    uint raw1 = _306.scene[ix + 1u];\n    uint raw2 = _306.scene[ix + 2u];\n    uint raw3 = _306.scene[ix + 3u];\n    LineSeg s;\n    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    return s;\n}\n\nLineSeg Element_FillLine_read(ElementRef ref)\n{\n    LineSegRef param = LineSegRef(ref.offset + 4u);\n    return LineSeg_read(param);\n}\n\nQuadSeg QuadSeg_read(QuadSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _306.scene[ix + 0u];\n    uint raw1 = _306.scene[ix + 1u];\n    uint raw2 = _306.scene[ix + 2u];\n    uint raw3 = _306.scene[ix + 3u];\n    uint raw4 = _306.scene[ix + 4u];\n    uint raw5 = _306.scene[ix + 5u];\n    QuadSeg s;\n    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    return s;\n}\n\nQuadSeg Element_FillQuad_read(ElementRef ref)\n{\n    QuadSegRef param = QuadSegRef(ref.offset + 4u);\n    return QuadSeg_read(param);\n}\n\nCubicSeg CubicSeg_read(CubicSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _306.scene[ix + 0u];\n    uint raw1 = _306.scene[ix + 1u];\n    uint raw2 = _306.scene[ix + 2u];\n    uint raw3 = _306.scene[ix + 3u];\n    uint raw4 = _306.scene[ix + 4u];\n    uint raw5 = _306.scene[ix + 5u];\n    uint raw6 = _306.scene[ix + 6u];\n    uint raw7 = _306.scene[ix + 7u];\n    CubicSeg s;\n    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));\n    return s;\n}\n\nCubicSeg Element_FillCubic_read(ElementRef ref)\n{\n    CubicSegRef param = CubicSegRef(ref.offset + 4u);\n    return CubicSeg_read(param);\n}\n\nSetLineWidth SetLineWidth_read(SetLineWidthRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _306.scene[ix + 0u];\n    SetLineWidth s;\n    s.width = uintBitsToFloat(raw0);\n    return s;\n}\n\nSetLineWidth Element_SetLineWidth_read(ElementRef ref)\n{\n    SetLineWidthRef param = SetLineWidthRef(ref.offset + 4u);\n    return SetLineWidth_read(param);\n}\n\nTransform Transform_read(TransformRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _306.scene[ix + 0u];\n    uint raw1 = _306.scene[ix + 1u];\n    uint raw2 = _306.scene[ix + 2u];\n    uint raw3 = _306.scene[ix + 3u];\n    uint raw4 = _306.scene[ix + 4u];\n    uint raw5 = _306.scene[ix + 5u];\n    Transform s;\n    s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    return s;\n}\n\nTransform Element_Transform_read(ElementRef ref)\n{\n    TransformRef param = TransformRef(ref.offset + 4u);\n    return Transform_read(param);\n}\n\nState map_element(ElementRef ref)\n{\n    ElementRef param = ref;\n    uint tag = Element_tag(param);\n    State c;\n    c.bbox = vec4(0.0);\n    c.mat = vec4(1.0, 0.0, 0.0, 1.0);\n    c.translate = vec2(0.0);\n    c.linewidth = 1.0;\n    c.flags = 0u;\n    c.path_count = 0u;\n    c.pathseg_count = 0u;\n    switch (tag)\n    {\n        case 2u:\n        case 1u:\n        {\n            ElementRef param_1 = ref;\n            LineSeg line = Element_FillLine_read(param_1);\n            vec2 _1778 = min(line.p0, line.p1);\n            c.bbox = vec4(_1778.x, _1778.y, c.bbox.z, c.bbox.w);\n            vec2 _1786 = max(line.p0, line.p1);\n            c.bbox = vec4(c.bbox.x, c.bbox.y, _1786.x, _1786.y);\n            c.pathseg_count = 1u;\n            break;\n        }\n        case 4u:\n        case 3u:\n        {\n            ElementRef param_2 = ref;\n            QuadSeg quad = Element_FillQuad_read(param_2);\n            vec2 _1803 = min(min(quad.p0, quad.p1), quad.p2);\n            c.bbox = vec4(_1803.x, _1803.y, c.bbox.z, c.bbox.w);\n            vec2 _1814 = max(max(quad.p0, quad.p1), quad.p2);\n            c.bbox = vec4(c.bbox.x, c.bbox.y, _1814.x, _1814.y);\n            c.pathseg_count = 1u;\n            break;\n        }\n        case 6u:\n        case 5u:\n        {\n            ElementRef param_3 = ref;\n            CubicSeg cubic = Element_FillCubic_read(param_3);\n            vec2 _1834 = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3));\n            c.bbox = vec4(_1834.x, _1834.y, c.bbox.z, c.bbox.w);\n            vec2 _1848 = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));\n            c.bbox = vec4(c.bbox.x, c.bbox.y, _1848.x, _1848.y);\n            c.pathseg_count = 1u;\n            break;\n        }\n        case 8u:\n        case 13u:\n        case 7u:\n        case 11u:\n        {\n            c.flags = 4u;\n            c.path_count = 1u;\n            break;\n        }\n        case 12u:\n        {\n            c.path_count = 1u;\n            break;\n        }\n        case 9u:\n        {\n            ElementRef param_4 = ref;\n            SetLineWidth lw = Element_SetLineWidth_read(param_4);\n            c.linewidth = lw.width;\n            c.flags = 1u;\n            break;\n        }\n        case 10u:\n        {\n            ElementRef param_5 = ref;\n            Transform t = Element_Transform_read(param_5);\n            c.mat = t.mat;\n            c.translate = t.translate;\n            break;\n        }\n    }\n    return c;\n}\n\nElementRef Element_index(ElementRef ref, uint index)\n{\n    return ElementRef(ref.offset + (index * 36u));\n}\n\nState combine_state(State a, State b)\n{\n    State c;\n    c.bbox.x = (min(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + min(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w)) + a.translate.x;\n    c.bbox.y = (min(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + min(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w)) + a.translate.y;\n    c.bbox.z = (max(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + max(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w)) + a.translate.x;\n    c.bbox.w = (max(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + max(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w)) + a.translate.y;\n    bool _1549 = (a.flags & 4u) == 0u;\n    bool _1557;\n    if (_1549)\n    {\n        _1557 = b.bbox.z <= b.bbox.x;\n    }\n    else\n    {\n        _1557 = _1549;\n    }\n    bool _1565;\n    if (_1557)\n    {\n        _1565 = b.bbox.w <= b.bbox.y;\n    }\n    else\n    {\n        _1565 = _1557;\n    }\n    if (_1565)\n    {\n        c.bbox = a.bbox;\n    }\n    else\n    {\n        bool _1575 = (a.flags & 4u) == 0u;\n        bool _1582;\n        if (_1575)\n        {\n            _1582 = (b.flags & 2u) == 0u;\n        }\n        else\n        {\n            _1582 = _1575;\n        }\n        bool _1599;\n        if (_1582)\n        {\n            bool _1589 = a.bbox.z > a.bbox.x;\n            bool _1598;\n            if (!_1589)\n            {\n                _1598 = a.bbox.w > a.bbox.y;\n            }\n            else\n            {\n                _1598 = _1589;\n            }\n            _1599 = _1598;\n        }\n        else\n        {\n            _1599 = _1582;\n        }\n        if (_1599)\n        {\n            vec2 _1608 = min(a.bbox.xy, c.bbox.xy);\n            c.bbox = vec4(_1608.x, _1608.y, c.bbox.z, c.bbox.w);\n            vec2 _1618 = max(a.bbox.zw, c.bbox.zw);\n            c.bbox = vec4(c.bbox.x, c.bbox.y, _1618.x, _1618.y);\n        }\n    }\n    c.mat.x = (a.mat.x * b.mat.x) + (a.mat.z * b.mat.y);\n    c.mat.y = (a.mat.y * b.mat.x) + (a.mat.w * b.mat.y);\n    c.mat.z = (a.mat.x * b.mat.z) + (a.mat.z * b.mat.w);\n    c.mat.w = (a.mat.y * b.mat.z) + (a.mat.w * b.mat.w);\n    c.translate.x = ((a.mat.x * b.translate.x) + (a.mat.z * b.translate.y)) + a.translate.x;\n    c.translate.y = ((a.mat.y * b.translate.x) + (a.mat.w * b.translate.y)) + a.translate.y;\n    float _1704;\n    if ((b.flags & 1u) == 0u)\n    {\n        _1704 = a.linewidth;\n    }\n    else\n    {\n        _1704 = b.linewidth;\n    }\n    c.linewidth = _1704;\n    c.flags = (a.flags & 3u) | b.flags;\n    c.flags |= ((a.flags & 4u) >> uint(1));\n    c.path_count = a.path_count + b.path_count;\n    c.pathseg_count = a.pathseg_count + b.pathseg_count;\n    return c;\n}\n\nStateRef state_aggregate_ref(uint partition_ix)\n{\n    return StateRef(4u + (partition_ix * 116u));\n}\n\nvoid State_write(StateRef ref, State s)\n{\n    uint ix = ref.offset >> uint(2);\n    _780.state[ix + 0u] = floatBitsToUint(s.mat.x);\n    _780.state[ix + 1u] = floatBitsToUint(s.mat.y);\n    _780.state[ix + 2u] = floatBitsToUint(s.mat.z);\n    _780.state[ix + 3u] = floatBitsToUint(s.mat.w);\n    _780.state[ix + 4u] = floatBitsToUint(s.translate.x);\n    _780.state[ix + 5u] = floatBitsToUint(s.translate.y);\n    _780.state[ix + 6u] = floatBitsToUint(s.bbox.x);\n    _780.state[ix + 7u] = floatBitsToUint(s.bbox.y);\n    _780.state[ix + 8u] = floatBitsToUint(s.bbox.z);\n    _780.state[ix + 9u] = floatBitsToUint(s.bbox.w);\n    _780.state[ix + 10u] = floatBitsToUint(s.linewidth);\n    _780.state[ix + 11u] = s.flags;\n    _780.state[ix + 12u] = s.path_count;\n    _780.state[ix + 13u] = s.pathseg_count;\n}\n\nStateRef state_prefix_ref(uint partition_ix)\n{\n    return StateRef((4u + (partition_ix * 116u)) + 56u);\n}\n\nuint state_flag_index(uint partition_ix)\n{\n    return partition_ix * 29u;\n}\n\nState State_read(StateRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _780.state[ix + 0u];\n    uint raw1 = _780.state[ix + 1u];\n    uint raw2 = _780.state[ix + 2u];\n    uint raw3 = _780.state[ix + 3u];\n    uint raw4 = _780.state[ix + 4u];\n    uint raw5 = _780.state[ix + 5u];\n    uint raw6 = _780.state[ix + 6u];\n    uint raw7 = _780.state[ix + 7u];\n    uint raw8 = _780.state[ix + 8u];\n    uint raw9 = _780.state[ix + 9u];\n    uint raw10 = _780.state[ix + 10u];\n    uint raw11 = _780.state[ix + 11u];\n    uint raw12 = _780.state[ix + 12u];\n    uint raw13 = _780.state[ix + 13u];\n    State s;\n    s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9));\n    s.linewidth = uintBitsToFloat(raw10);\n    s.flags = raw11;\n    s.path_count = raw12;\n    s.pathseg_count = raw13;\n    return s;\n}\n\nLineSeg Element_StrokeLine_read(ElementRef ref)\n{\n    LineSegRef param = LineSegRef(ref.offset + 4u);\n    return LineSeg_read(param);\n}\n\nvec2 get_linewidth(State st)\n{\n    return vec2(length(st.mat.xz), length(st.mat.yw)) * (0.5 * st.linewidth);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _282.memory[offset] = val;\n}\n\nvoid PathStrokeCubic_write(Alloc a, PathStrokeCubicRef ref, PathStrokeCubic s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.p0.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.p0.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.p1.x);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.p1.y);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = floatBitsToUint(s.p2.x);\n    write_mem(param_12, param_13, param_14);\n    Alloc param_15 = a;\n    uint param_16 = ix + 5u;\n    uint param_17 = floatBitsToUint(s.p2.y);\n    write_mem(param_15, param_16, param_17);\n    Alloc param_18 = a;\n    uint param_19 = ix + 6u;\n    uint param_20 = floatBitsToUint(s.p3.x);\n    write_mem(param_18, param_19, param_20);\n    Alloc param_21 = a;\n    uint param_22 = ix + 7u;\n    uint param_23 = floatBitsToUint(s.p3.y);\n    write_mem(param_21, param_22, param_23);\n    Alloc param_24 = a;\n    uint param_25 = ix + 8u;\n    uint param_26 = s.path_ix;\n    write_mem(param_24, param_25, param_26);\n    Alloc param_27 = a;\n    uint param_28 = ix + 9u;\n    uint param_29 = floatBitsToUint(s.stroke.x);\n    write_mem(param_27, param_28, param_29);\n    Alloc param_30 = a;\n    uint param_31 = ix + 10u;\n    uint param_32 = floatBitsToUint(s.stroke.y);\n    write_mem(param_30, param_31, param_32);\n}\n\nQuadSeg Element_StrokeQuad_read(ElementRef ref)\n{\n    QuadSegRef param = QuadSegRef(ref.offset + 4u);\n    return QuadSeg_read(param);\n}\n\nCubicSeg Element_StrokeCubic_read(ElementRef ref)\n{\n    CubicSegRef param = CubicSegRef(ref.offset + 4u);\n    return CubicSeg_read(param);\n}\n\nStroke Stroke_read(StrokeRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _306.scene[ix + 0u];\n    Stroke s;\n    s.rgba_color = raw0;\n    return s;\n}\n\nStroke Element_Stroke_read(ElementRef ref)\n{\n    StrokeRef param = StrokeRef(ref.offset + 4u);\n    return Stroke_read(param);\n}\n\nvoid AnnoStroke_write(Alloc a, AnnoStrokeRef ref, AnnoStroke s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.bbox.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.bbox.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.bbox.z);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.bbox.w);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = s.rgba_color;\n    write_mem(param_12, param_13, param_14);\n    Alloc param_15 = a;\n    uint param_16 = ix + 5u;\n    uint param_17 = floatBitsToUint(s.linewidth);\n    write_mem(param_15, param_16, param_17);\n}\n\nvoid Annotated_Stroke_write(Alloc a, AnnotatedRef ref, AnnoStroke s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 1u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    AnnoStrokeRef param_4 = AnnoStrokeRef(ref.offset + 4u);\n    AnnoStroke param_5 = s;\n    AnnoStroke_write(param_3, param_4, param_5);\n}\n\nFill Fill_read(FillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _306.scene[ix + 0u];\n    Fill s;\n    s.rgba_color = raw0;\n    return s;\n}\n\nFill Element_Fill_read(ElementRef ref)\n{\n    FillRef param = FillRef(ref.offset + 4u);\n    return Fill_read(param);\n}\n\nvoid AnnoFill_write(Alloc a, AnnoFillRef ref, AnnoFill s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.bbox.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.bbox.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.bbox.z);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.bbox.w);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = s.rgba_color;\n    write_mem(param_12, param_13, param_14);\n}\n\nvoid Annotated_Fill_write(Alloc a, AnnotatedRef ref, AnnoFill s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 2u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    AnnoFillRef param_4 = AnnoFillRef(ref.offset + 4u);\n    AnnoFill param_5 = s;\n    AnnoFill_write(param_3, param_4, param_5);\n}\n\nFillImage FillImage_read(FillImageRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _306.scene[ix + 0u];\n    uint raw1 = _306.scene[ix + 1u];\n    FillImage s;\n    s.index = raw0;\n    s.offset = ivec2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);\n    return s;\n}\n\nFillImage Element_FillImage_read(ElementRef ref)\n{\n    FillImageRef param = FillImageRef(ref.offset + 4u);\n    return FillImage_read(param);\n}\n\nvoid AnnoFillImage_write(Alloc a, AnnoFillImageRef ref, AnnoFillImage s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.bbox.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.bbox.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.bbox.z);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.bbox.w);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = s.index;\n    write_mem(param_12, param_13, param_14);\n    Alloc param_15 = a;\n    uint param_16 = ix + 5u;\n    uint param_17 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));\n    write_mem(param_15, param_16, param_17);\n}\n\nvoid Annotated_FillImage_write(Alloc a, AnnotatedRef ref, AnnoFillImage s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 3u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    AnnoFillImageRef param_4 = AnnoFillImageRef(ref.offset + 4u);\n    AnnoFillImage param_5 = s;\n    AnnoFillImage_write(param_3, param_4, param_5);\n}\n\nClip Clip_read(ClipRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _306.scene[ix + 0u];\n    uint raw1 = _306.scene[ix + 1u];\n    uint raw2 = _306.scene[ix + 2u];\n    uint raw3 = _306.scene[ix + 3u];\n    Clip s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    return s;\n}\n\nClip Element_BeginClip_read(ElementRef ref)\n{\n    ClipRef param = ClipRef(ref.offset + 4u);\n    return Clip_read(param);\n}\n\nvoid AnnoClip_write(Alloc a, AnnoClipRef ref, AnnoClip s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.bbox.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.bbox.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.bbox.z);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.bbox.w);\n    write_mem(param_9, param_10, param_11);\n}\n\nvoid Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, AnnoClip s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 4u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    AnnoClipRef param_4 = AnnoClipRef(ref.offset + 4u);\n    AnnoClip param_5 = s;\n    AnnoClip_write(param_3, param_4, param_5);\n}\n\nClip Element_EndClip_read(ElementRef ref)\n{\n    ClipRef param = ClipRef(ref.offset + 4u);\n    return Clip_read(param);\n}\n\nvoid Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoClip s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 5u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    AnnoClipRef param_4 = AnnoClipRef(ref.offset + 4u);\n    AnnoClip param_5 = s;\n    AnnoClip_write(param_3, param_4, param_5);\n}\n\nvoid main()\n{\n    if (_282.mem_error != 0u)\n    {\n        return;\n    }\n    if (gl_LocalInvocationID.x == 0u)\n    {\n        uint _1917 = atomicAdd(_780.part_counter, 1u);\n        sh_part_ix = _1917;\n    }\n    barrier();\n    uint part_ix = sh_part_ix;\n    uint ix = (part_ix * 128u) + (gl_LocalInvocationID.x * 4u);\n    ElementRef ref = ElementRef(ix * 36u);\n    ElementRef param = ref;\n    State th_state[4];\n    th_state[0] = map_element(param);\n    for (uint i = 1u; i < 4u; i++)\n    {\n        ElementRef param_1 = ref;\n        uint param_2 = i;\n        ElementRef param_3 = Element_index(param_1, param_2);\n        State param_4 = th_state[i - 1u];\n        State param_5 = map_element(param_3);\n        th_state[i] = combine_state(param_4, param_5);\n    }\n    State agg = th_state[3];\n    sh_mat[gl_LocalInvocationID.x] = agg.mat;\n    sh_translate[gl_LocalInvocationID.x] = agg.translate;\n    sh_bbox[gl_LocalInvocationID.x] = agg.bbox;\n    sh_width[gl_LocalInvocationID.x] = agg.linewidth;\n    sh_flags[gl_LocalInvocationID.x] = agg.flags;\n    sh_path_count[gl_LocalInvocationID.x] = agg.path_count;\n    sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;\n    State other;\n    for (uint i_1 = 0u; i_1 < 5u; i_1++)\n    {\n        barrier();\n        if (gl_LocalInvocationID.x >= uint(1 << int(i_1)))\n        {\n            uint ix_1 = gl_LocalInvocationID.x - uint(1 << int(i_1));\n            other.mat = sh_mat[ix_1];\n            other.translate = sh_translate[ix_1];\n            other.bbox = sh_bbox[ix_1];\n            other.linewidth = sh_width[ix_1];\n            other.flags = sh_flags[ix_1];\n            other.path_count = sh_path_count[ix_1];\n            other.pathseg_count = sh_pathseg_count[ix_1];\n            State param_6 = other;\n            State param_7 = agg;\n            agg = combine_state(param_6, param_7);\n        }\n        barrier();\n        sh_mat[gl_LocalInvocationID.x] = agg.mat;\n        sh_translate[gl_LocalInvocationID.x] = agg.translate;\n        sh_bbox[gl_LocalInvocationID.x] = agg.bbox;\n        sh_width[gl_LocalInvocationID.x] = agg.linewidth;\n        sh_flags[gl_LocalInvocationID.x] = agg.flags;\n        sh_path_count[gl_LocalInvocationID.x] = agg.path_count;\n        sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;\n    }\n    State exclusive;\n    exclusive.bbox = vec4(0.0);\n    exclusive.mat = vec4(1.0, 0.0, 0.0, 1.0);\n    exclusive.translate = vec2(0.0);\n    exclusive.linewidth = 1.0;\n    exclusive.flags = 0u;\n    exclusive.path_count = 0u;\n    exclusive.pathseg_count = 0u;\n    if (gl_LocalInvocationID.x == 31u)\n    {\n        uint param_8 = part_ix;\n        StateRef param_9 = state_aggregate_ref(param_8);\n        State param_10 = agg;\n        State_write(param_9, param_10);\n        uint flag = 1u;\n        memoryBarrierBuffer();\n        if (part_ix == 0u)\n        {\n            uint param_11 = part_ix;\n            StateRef param_12 = state_prefix_ref(param_11);\n            State param_13 = agg;\n            State_write(param_12, param_13);\n            flag = 2u;\n        }\n        uint param_14 = part_ix;\n        _780.state[state_flag_index(param_14)] = flag;\n        if (part_ix != 0u)\n        {\n            uint look_back_ix = part_ix - 1u;\n            uint their_ix = 0u;\n            State their_agg;\n            while (true)\n            {\n                uint param_15 = look_back_ix;\n                flag = _780.state[state_flag_index(param_15)];\n                if (flag == 2u)\n                {\n                    uint param_16 = look_back_ix;\n                    StateRef param_17 = state_prefix_ref(param_16);\n                    State their_prefix = State_read(param_17);\n                    State param_18 = their_prefix;\n                    State param_19 = exclusive;\n                    exclusive = combine_state(param_18, param_19);\n                    break;\n                }\n                else\n                {\n                    if (flag == 1u)\n                    {\n                        uint param_20 = look_back_ix;\n                        StateRef param_21 = state_aggregate_ref(param_20);\n                        their_agg = State_read(param_21);\n                        State param_22 = their_agg;\n                        State param_23 = exclusive;\n                        exclusive = combine_state(param_22, param_23);\n                        look_back_ix--;\n                        their_ix = 0u;\n                        continue;\n                    }\n                }\n                ElementRef ref_1 = ElementRef(((look_back_ix * 128u) + their_ix) * 36u);\n                ElementRef param_24 = ref_1;\n                State s = map_element(param_24);\n                if (their_ix == 0u)\n                {\n                    their_agg = s;\n                }\n                else\n                {\n                    State param_25 = their_agg;\n                    State param_26 = s;\n                    their_agg = combine_state(param_25, param_26);\n                }\n                their_ix++;\n                if (their_ix == 128u)\n                {\n                    State param_27 = their_agg;\n                    State param_28 = exclusive;\n                    exclusive = combine_state(param_27, param_28);\n                    if (look_back_ix == 0u)\n                    {\n                        break;\n                    }\n                    look_back_ix--;\n                    their_ix = 0u;\n                }\n            }\n            State param_29 = exclusive;\n            State param_30 = agg;\n            State inclusive_prefix = combine_state(param_29, param_30);\n            sh_prefix = exclusive;\n            uint param_31 = part_ix;\n            StateRef param_32 = state_prefix_ref(param_31);\n            State param_33 = inclusive_prefix;\n            State_write(param_32, param_33);\n            memoryBarrierBuffer();\n            flag = 2u;\n            uint param_34 = part_ix;\n            _780.state[state_flag_index(param_34)] = flag;\n        }\n    }\n    barrier();\n    if (part_ix != 0u)\n    {\n        exclusive = sh_prefix;\n    }\n    State row = exclusive;\n    if (gl_LocalInvocationID.x > 0u)\n    {\n        uint ix_2 = gl_LocalInvocationID.x - 1u;\n        State other_1;\n        other_1.mat = sh_mat[ix_2];\n        other_1.translate = sh_translate[ix_2];\n        other_1.bbox = sh_bbox[ix_2];\n        other_1.linewidth = sh_width[ix_2];\n        other_1.flags = sh_flags[ix_2];\n        other_1.path_count = sh_path_count[ix_2];\n        other_1.pathseg_count = sh_pathseg_count[ix_2];\n        State param_35 = row;\n        State param_36 = other_1;\n        row = combine_state(param_35, param_36);\n    }\n    vec2 p0;\n    vec2 p1;\n    PathStrokeCubic path_cubic;\n    PathSegRef path_out_ref;\n    uint out_tag;\n    Alloc param_44;\n    Alloc param_47;\n    Alloc param_52;\n    Alloc param_55;\n    Alloc param_60;\n    Alloc param_63;\n    AnnoStroke anno_stroke;\n    AnnotatedRef out_ref;\n    Alloc param_68;\n    AnnoFill anno_fill;\n    Alloc param_72;\n    AnnoFillImage anno_fill_img;\n    Alloc param_76;\n    Alloc param_80;\n    Alloc param_84;\n    for (uint i_2 = 0u; i_2 < 4u; i_2++)\n    {\n        State param_37 = row;\n        State param_38 = th_state[i_2];\n        State st = combine_state(param_37, param_38);\n        ElementRef param_39 = ref;\n        uint param_40 = i_2;\n        ElementRef this_ref = Element_index(param_39, param_40);\n        ElementRef param_41 = this_ref;\n        uint tag = Element_tag(param_41);\n        switch (tag)\n        {\n            case 2u:\n            case 1u:\n            {\n                ElementRef param_42 = this_ref;\n                LineSeg line = Element_StrokeLine_read(param_42);\n                p0 = ((st.mat.xy * line.p0.x) + (st.mat.zw * line.p0.y)) + st.translate;\n                p1 = ((st.mat.xy * line.p1.x) + (st.mat.zw * line.p1.y)) + st.translate;\n                path_cubic.p0 = p0;\n                path_cubic.p1 = mix(p0, p1, vec2(0.3333333432674407958984375));\n                path_cubic.p2 = mix(p1, p0, vec2(0.3333333432674407958984375));\n                path_cubic.p3 = p1;\n                path_cubic.path_ix = st.path_count;\n                if (tag == 1u)\n                {\n                    State param_43 = st;\n                    path_cubic.stroke = get_linewidth(param_43);\n                }\n                else\n                {\n                    path_cubic.stroke = vec2(0.0);\n                }\n                path_out_ref = PathSegRef(_2430.conf.pathseg_alloc.offset + ((st.pathseg_count - 1u) * 48u));\n                out_tag = uint((tag == 2u) ? 1 : 2);\n                param_44.offset = _2430.conf.pathseg_alloc.offset;\n                uint param_45 = path_out_ref.offset >> uint(2);\n                uint param_46 = out_tag;\n                write_mem(param_44, param_45, param_46);\n                param_47.offset = _2430.conf.pathseg_alloc.offset;\n                PathStrokeCubicRef param_48 = PathStrokeCubicRef(path_out_ref.offset + 4u);\n                PathStrokeCubic param_49 = path_cubic;\n                PathStrokeCubic_write(param_47, param_48, param_49);\n                break;\n            }\n            case 4u:\n            case 3u:\n            {\n                ElementRef param_50 = this_ref;\n                QuadSeg quad = Element_StrokeQuad_read(param_50);\n                p0 = ((st.mat.xy * quad.p0.x) + (st.mat.zw * quad.p0.y)) + st.translate;\n                p1 = ((st.mat.xy * quad.p1.x) + (st.mat.zw * quad.p1.y)) + st.translate;\n                vec2 p2 = ((st.mat.xy * quad.p2.x) + (st.mat.zw * quad.p2.y)) + st.translate;\n                path_cubic.p0 = p0;\n                path_cubic.p1 = mix(p1, p0, vec2(0.3333333432674407958984375));\n                path_cubic.p2 = mix(p1, p2, vec2(0.3333333432674407958984375));\n                path_cubic.p3 = p2;\n                path_cubic.path_ix = st.path_count;\n                if (tag == 3u)\n                {\n                    State param_51 = st;\n                    path_cubic.stroke = get_linewidth(param_51);\n                }\n                else\n                {\n                    path_cubic.stroke = vec2(0.0);\n                }\n                path_out_ref = PathSegRef(_2430.conf.pathseg_alloc.offset + ((st.pathseg_count - 1u) * 48u));\n                out_tag = uint((tag == 4u) ? 1 : 2);\n                param_52.offset = _2430.conf.pathseg_alloc.offset;\n                uint param_53 = path_out_ref.offset >> uint(2);\n                uint param_54 = out_tag;\n                write_mem(param_52, param_53, param_54);\n                param_55.offset = _2430.conf.pathseg_alloc.offset;\n                PathStrokeCubicRef param_56 = PathStrokeCubicRef(path_out_ref.offset + 4u);\n                PathStrokeCubic param_57 = path_cubic;\n                PathStrokeCubic_write(param_55, param_56, param_57);\n                break;\n            }\n            case 6u:\n            case 5u:\n            {\n                ElementRef param_58 = this_ref;\n                CubicSeg cubic = Element_StrokeCubic_read(param_58);\n                path_cubic.p0 = ((st.mat.xy * cubic.p0.x) + (st.mat.zw * cubic.p0.y)) + st.translate;\n                path_cubic.p1 = ((st.mat.xy * cubic.p1.x) + (st.mat.zw * cubic.p1.y)) + st.translate;\n                path_cubic.p2 = ((st.mat.xy * cubic.p2.x) + (st.mat.zw * cubic.p2.y)) + st.translate;\n                path_cubic.p3 = ((st.mat.xy * cubic.p3.x) + (st.mat.zw * cubic.p3.y)) + st.translate;\n                path_cubic.path_ix = st.path_count;\n                if (tag == 5u)\n                {\n                    State param_59 = st;\n                    path_cubic.stroke = get_linewidth(param_59);\n                }\n                else\n                {\n                    path_cubic.stroke = vec2(0.0);\n                }\n                path_out_ref = PathSegRef(_2430.conf.pathseg_alloc.offset + ((st.pathseg_count - 1u) * 48u));\n                out_tag = uint((tag == 6u) ? 1 : 2);\n                param_60.offset = _2430.conf.pathseg_alloc.offset;\n                uint param_61 = path_out_ref.offset >> uint(2);\n                uint param_62 = out_tag;\n                write_mem(param_60, param_61, param_62);\n                param_63.offset = _2430.conf.pathseg_alloc.offset;\n                PathStrokeCubicRef param_64 = PathStrokeCubicRef(path_out_ref.offset + 4u);\n                PathStrokeCubic param_65 = path_cubic;\n                PathStrokeCubic_write(param_63, param_64, param_65);\n                break;\n            }\n            case 7u:\n            {\n                ElementRef param_66 = this_ref;\n                Stroke stroke = Element_Stroke_read(param_66);\n                anno_stroke.rgba_color = stroke.rgba_color;\n                State param_67 = st;\n                vec2 lw = get_linewidth(param_67);\n                anno_stroke.bbox = st.bbox + vec4(-lw, lw);\n                anno_stroke.linewidth = st.linewidth * sqrt(abs((st.mat.x * st.mat.w) - (st.mat.y * st.mat.z)));\n                out_ref = AnnotatedRef(_2430.conf.anno_alloc.offset + ((st.path_count - 1u) * 28u));\n                param_68.offset = _2430.conf.anno_alloc.offset;\n                AnnotatedRef param_69 = out_ref;\n                AnnoStroke param_70 = anno_stroke;\n                Annotated_Stroke_write(param_68, param_69, param_70);\n                break;\n            }\n            case 8u:\n            {\n                ElementRef param_71 = this_ref;\n                Fill fill = Element_Fill_read(param_71);\n                anno_fill.rgba_color = fill.rgba_color;\n                anno_fill.bbox = st.bbox;\n                out_ref = AnnotatedRef(_2430.conf.anno_alloc.offset + ((st.path_count - 1u) * 28u));\n                param_72.offset = _2430.conf.anno_alloc.offset;\n                AnnotatedRef param_73 = out_ref;\n                AnnoFill param_74 = anno_fill;\n                Annotated_Fill_write(param_72, param_73, param_74);\n                break;\n            }\n            case 13u:\n            {\n                ElementRef param_75 = this_ref;\n                FillImage fill_img = Element_FillImage_read(param_75);\n                anno_fill_img.index = fill_img.index;\n                anno_fill_img.offset = fill_img.offset;\n                anno_fill_img.bbox = st.bbox;\n                out_ref = AnnotatedRef(_2430.conf.anno_alloc.offset + ((st.path_count - 1u) * 28u));\n                param_76.offset = _2430.conf.anno_alloc.offset;\n                AnnotatedRef param_77 = out_ref;\n                AnnoFillImage param_78 = anno_fill_img;\n                Annotated_FillImage_write(param_76, param_77, param_78);\n                break;\n            }\n            case 11u:\n            {\n                ElementRef param_79 = this_ref;\n                Clip begin_clip = Element_BeginClip_read(param_79);\n                AnnoClip anno_begin_clip = AnnoClip(begin_clip.bbox);\n                anno_begin_clip.bbox = begin_clip.bbox;\n                out_ref = AnnotatedRef(_2430.conf.anno_alloc.offset + ((st.path_count - 1u) * 28u));\n                param_80.offset = _2430.conf.anno_alloc.offset;\n                AnnotatedRef param_81 = out_ref;\n                AnnoClip param_82 = anno_begin_clip;\n                Annotated_BeginClip_write(param_80, param_81, param_82);\n                break;\n            }\n            case 12u:\n            {\n                ElementRef param_83 = this_ref;\n                Clip end_clip = Element_EndClip_read(param_83);\n                AnnoClip anno_end_clip = AnnoClip(end_clip.bbox);\n                out_ref = AnnotatedRef(_2430.conf.anno_alloc.offset + ((st.path_count - 1u) * 28u));\n                param_84.offset = _2430.conf.anno_alloc.offset;\n                AnnotatedRef param_85 = out_ref;\n                AnnoClip param_86 = anno_end_clip;\n                Annotated_EndClip_write(param_84, param_85, param_86);\n                break;\n            }\n        }\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct ElementRef\n{\n    uint offset;\n};\n\nstruct LineSegRef\n{\n    uint offset;\n};\n\nstruct LineSeg\n{\n    vec2 p0;\n    vec2 p1;\n};\n\nstruct QuadSegRef\n{\n    uint offset;\n};\n\nstruct QuadSeg\n{\n    vec2 p0;\n    vec2 p1;\n    vec2 p2;\n};\n\nstruct CubicSegRef\n{\n    uint offset;\n};\n\nstruct CubicSeg\n{\n    vec2 p0;\n    vec2 p1;\n    vec2 p2;\n    vec2 p3;\n};\n\nstruct FillRef\n{\n    uint offset;\n};\n\nstruct Fill\n{\n    uint rgba_color;\n};\n\nstruct FillImageRef\n{\n    uint offset;\n};\n\nstruct FillImage\n{\n    uint index;\n    ivec2 offset;\n};\n\nstruct StrokeRef\n{\n    uint offset;\n};\n\nstruct Stroke\n{\n    uint rgba_color;\n};\n\nstruct SetLineWidthRef\n{\n    uint offset;\n};\n\nstruct SetLineWidth\n{\n    float width;\n};\n\nstruct TransformRef\n{\n    uint offset;\n};\n\nstruct Transform\n{\n    vec4 mat;\n    vec2 translate;\n};\n\nstruct ClipRef\n{\n    uint offset;\n};\n\nstruct Clip\n{\n    vec4 bbox;\n};\n\nstruct StateRef\n{\n    uint offset;\n};\n\nstruct State\n{\n    vec4 mat;\n    vec2 translate;\n    vec4 bbox;\n    float linewidth;\n    uint flags;\n    uint path_count;\n    uint pathseg_count;\n    uint trans_count;\n};\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    vec4 bbox;\n    uint rgba_color;\n};\n\nstruct AnnoFillImageRef\n{\n    uint offset;\n};\n\nstruct AnnoFillImage\n{\n    vec4 bbox;\n    uint index;\n    ivec2 offset;\n};\n\nstruct AnnoStrokeRef\n{\n    uint offset;\n};\n\nstruct AnnoStroke\n{\n    vec4 bbox;\n    uint rgba_color;\n    float linewidth;\n};\n\nstruct AnnoClipRef\n{\n    uint offset;\n};\n\nstruct AnnoClip\n{\n    vec4 bbox;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct PathStrokeCubicRef\n{\n    uint offset;\n};\n\nstruct PathStrokeCubic\n{\n    vec2 p0;\n    vec2 p1;\n    vec2 p2;\n    vec2 p3;\n    uint path_ix;\n    uint trans_ix;\n    vec2 stroke;\n};\n\nstruct TransformSegRef\n{\n    uint offset;\n};\n\nstruct TransformSeg\n{\n    vec4 mat;\n    vec2 translate;\n};\n\nstruct PathSegRef\n{\n    uint offset;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n    Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _292;\n\nlayout(binding = 2, std430) readonly buffer SceneBuf\n{\n    uint scene[];\n} _316;\n\nlayout(binding = 3, std430) coherent buffer StateBuf\n{\n    uint part_counter;\n    uint state[];\n} _790;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _2380;\n\nshared uint sh_part_ix;\nshared State sh_state[32];\nshared State sh_prefix;\n\nuint Element_tag(ElementRef ref)\n{\n    return _316.scene[ref.offset >> uint(2)];\n}\n\nLineSeg LineSeg_read(LineSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _316.scene[ix + 0u];\n    uint raw1 = _316.scene[ix + 1u];\n    uint raw2 = _316.scene[ix + 2u];\n    uint raw3 = _316.scene[ix + 3u];\n    LineSeg s;\n    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    return s;\n}\n\nLineSeg Element_FillLine_read(ElementRef ref)\n{\n    LineSegRef param = LineSegRef(ref.offset + 4u);\n    return LineSeg_read(param);\n}\n\nQuadSeg QuadSeg_read(QuadSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _316.scene[ix + 0u];\n    uint raw1 = _316.scene[ix + 1u];\n    uint raw2 = _316.scene[ix + 2u];\n    uint raw3 = _316.scene[ix + 3u];\n    uint raw4 = _316.scene[ix + 4u];\n    uint raw5 = _316.scene[ix + 5u];\n    QuadSeg s;\n    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    return s;\n}\n\nQuadSeg Element_FillQuad_read(ElementRef ref)\n{\n    QuadSegRef param = QuadSegRef(ref.offset + 4u);\n    return QuadSeg_read(param);\n}\n\nCubicSeg CubicSeg_read(CubicSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _316.scene[ix + 0u];\n    uint raw1 = _316.scene[ix + 1u];\n    uint raw2 = _316.scene[ix + 2u];\n    uint raw3 = _316.scene[ix + 3u];\n    uint raw4 = _316.scene[ix + 4u];\n    uint raw5 = _316.scene[ix + 5u];\n    uint raw6 = _316.scene[ix + 6u];\n    uint raw7 = _316.scene[ix + 7u];\n    CubicSeg s;\n    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));\n    return s;\n}\n\nCubicSeg Element_FillCubic_read(ElementRef ref)\n{\n    CubicSegRef param = CubicSegRef(ref.offset + 4u);\n    return CubicSeg_read(param);\n}\n\nSetLineWidth SetLineWidth_read(SetLineWidthRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _316.scene[ix + 0u];\n    SetLineWidth s;\n    s.width = uintBitsToFloat(raw0);\n    return s;\n}\n\nSetLineWidth Element_SetLineWidth_read(ElementRef ref)\n{\n    SetLineWidthRef param = SetLineWidthRef(ref.offset + 4u);\n    return SetLineWidth_read(param);\n}\n\nTransform Transform_read(TransformRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _316.scene[ix + 0u];\n    uint raw1 = _316.scene[ix + 1u];\n    uint raw2 = _316.scene[ix + 2u];\n    uint raw3 = _316.scene[ix + 3u];\n    uint raw4 = _316.scene[ix + 4u];\n    uint raw5 = _316.scene[ix + 5u];\n    Transform s;\n    s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    return s;\n}\n\nTransform Element_Transform_read(ElementRef ref)\n{\n    TransformRef param = TransformRef(ref.offset + 4u);\n    return Transform_read(param);\n}\n\nState map_element(ElementRef ref)\n{\n    ElementRef param = ref;\n    uint tag = Element_tag(param);\n    State c;\n    c.bbox = vec4(0.0);\n    c.mat = vec4(1.0, 0.0, 0.0, 1.0);\n    c.translate = vec2(0.0);\n    c.linewidth = 1.0;\n    c.flags = 0u;\n    c.path_count = 0u;\n    c.pathseg_count = 0u;\n    c.trans_count = 0u;\n    switch (tag)\n    {\n        case 2u:\n        case 1u:\n        {\n            ElementRef param_1 = ref;\n            LineSeg line = Element_FillLine_read(param_1);\n            vec2 _1882 = min(line.p0, line.p1);\n            c.bbox = vec4(_1882.x, _1882.y, c.bbox.z, c.bbox.w);\n            vec2 _1890 = max(line.p0, line.p1);\n            c.bbox = vec4(c.bbox.x, c.bbox.y, _1890.x, _1890.y);\n            c.pathseg_count = 1u;\n            break;\n        }\n        case 4u:\n        case 3u:\n        {\n            ElementRef param_2 = ref;\n            QuadSeg quad = Element_FillQuad_read(param_2);\n            vec2 _1907 = min(min(quad.p0, quad.p1), quad.p2);\n            c.bbox = vec4(_1907.x, _1907.y, c.bbox.z, c.bbox.w);\n            vec2 _1918 = max(max(quad.p0, quad.p1), quad.p2);\n            c.bbox = vec4(c.bbox.x, c.bbox.y, _1918.x, _1918.y);\n            c.pathseg_count = 1u;\n            break;\n        }\n        case 6u:\n        case 5u:\n        {\n            ElementRef param_3 = ref;\n            CubicSeg cubic = Element_FillCubic_read(param_3);\n            vec2 _1938 = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3));\n            c.bbox = vec4(_1938.x, _1938.y, c.bbox.z, c.bbox.w);\n            vec2 _1952 = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));\n            c.bbox = vec4(c.bbox.x, c.bbox.y, _1952.x, _1952.y);\n            c.pathseg_count = 1u;\n            break;\n        }\n        case 8u:\n        case 13u:\n        case 7u:\n        case 11u:\n        {\n            c.flags = 4u;\n            c.path_count = 1u;\n            break;\n        }\n        case 12u:\n        {\n            c.path_count = 1u;\n            break;\n        }\n        case 9u:\n        {\n            ElementRef param_4 = ref;\n            SetLineWidth lw = Element_SetLineWidth_read(param_4);\n            c.linewidth = lw.width;\n            c.flags = 1u;\n            break;\n        }\n        case 10u:\n        {\n            ElementRef param_5 = ref;\n            Transform t = Element_Transform_read(param_5);\n            c.mat = t.mat;\n            c.translate = t.translate;\n            c.trans_count = 1u;\n            break;\n        }\n    }\n    return c;\n}\n\nElementRef Element_index(ElementRef ref, uint index)\n{\n    return ElementRef(ref.offset + (index * 36u));\n}\n\nState combine_state(State a, State b)\n{\n    State c;\n    c.bbox.x = (min(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + min(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w)) + a.translate.x;\n    c.bbox.y = (min(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + min(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w)) + a.translate.y;\n    c.bbox.z = (max(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + max(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w)) + a.translate.x;\n    c.bbox.w = (max(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + max(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w)) + a.translate.y;\n    bool _1646 = (a.flags & 4u) == 0u;\n    bool _1654;\n    if (_1646)\n    {\n        _1654 = b.bbox.z <= b.bbox.x;\n    }\n    else\n    {\n        _1654 = _1646;\n    }\n    bool _1662;\n    if (_1654)\n    {\n        _1662 = b.bbox.w <= b.bbox.y;\n    }\n    else\n    {\n        _1662 = _1654;\n    }\n    if (_1662)\n    {\n        c.bbox = a.bbox;\n    }\n    else\n    {\n        bool _1672 = (a.flags & 4u) == 0u;\n        bool _1679;\n        if (_1672)\n        {\n            _1679 = (b.flags & 2u) == 0u;\n        }\n        else\n        {\n            _1679 = _1672;\n        }\n        bool _1696;\n        if (_1679)\n        {\n            bool _1686 = a.bbox.z > a.bbox.x;\n            bool _1695;\n            if (!_1686)\n            {\n                _1695 = a.bbox.w > a.bbox.y;\n            }\n            else\n            {\n                _1695 = _1686;\n            }\n            _1696 = _1695;\n        }\n        else\n        {\n            _1696 = _1679;\n        }\n        if (_1696)\n        {\n            vec2 _1705 = min(a.bbox.xy, c.bbox.xy);\n            c.bbox = vec4(_1705.x, _1705.y, c.bbox.z, c.bbox.w);\n            vec2 _1715 = max(a.bbox.zw, c.bbox.zw);\n            c.bbox = vec4(c.bbox.x, c.bbox.y, _1715.x, _1715.y);\n        }\n    }\n    c.mat.x = (a.mat.x * b.mat.x) + (a.mat.z * b.mat.y);\n    c.mat.y = (a.mat.y * b.mat.x) + (a.mat.w * b.mat.y);\n    c.mat.z = (a.mat.x * b.mat.z) + (a.mat.z * b.mat.w);\n    c.mat.w = (a.mat.y * b.mat.z) + (a.mat.w * b.mat.w);\n    c.translate.x = ((a.mat.x * b.translate.x) + (a.mat.z * b.translate.y)) + a.translate.x;\n    c.translate.y = ((a.mat.y * b.translate.x) + (a.mat.w * b.translate.y)) + a.translate.y;\n    float _1801;\n    if ((b.flags & 1u) == 0u)\n    {\n        _1801 = a.linewidth;\n    }\n    else\n    {\n        _1801 = b.linewidth;\n    }\n    c.linewidth = _1801;\n    c.flags = (a.flags & 3u) | b.flags;\n    c.flags |= ((a.flags & 4u) >> uint(1));\n    c.path_count = a.path_count + b.path_count;\n    c.pathseg_count = a.pathseg_count + b.pathseg_count;\n    c.trans_count = a.trans_count + b.trans_count;\n    return c;\n}\n\nStateRef state_aggregate_ref(uint partition_ix)\n{\n    return StateRef(4u + (partition_ix * 124u));\n}\n\nvoid State_write(StateRef ref, State s)\n{\n    uint ix = ref.offset >> uint(2);\n    _790.state[ix + 0u] = floatBitsToUint(s.mat.x);\n    _790.state[ix + 1u] = floatBitsToUint(s.mat.y);\n    _790.state[ix + 2u] = floatBitsToUint(s.mat.z);\n    _790.state[ix + 3u] = floatBitsToUint(s.mat.w);\n    _790.state[ix + 4u] = floatBitsToUint(s.translate.x);\n    _790.state[ix + 5u] = floatBitsToUint(s.translate.y);\n    _790.state[ix + 6u] = floatBitsToUint(s.bbox.x);\n    _790.state[ix + 7u] = floatBitsToUint(s.bbox.y);\n    _790.state[ix + 8u] = floatBitsToUint(s.bbox.z);\n    _790.state[ix + 9u] = floatBitsToUint(s.bbox.w);\n    _790.state[ix + 10u] = floatBitsToUint(s.linewidth);\n    _790.state[ix + 11u] = s.flags;\n    _790.state[ix + 12u] = s.path_count;\n    _790.state[ix + 13u] = s.pathseg_count;\n    _790.state[ix + 14u] = s.trans_count;\n}\n\nStateRef state_prefix_ref(uint partition_ix)\n{\n    return StateRef((4u + (partition_ix * 124u)) + 60u);\n}\n\nuint state_flag_index(uint partition_ix)\n{\n    return partition_ix * 31u;\n}\n\nState State_read(StateRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _790.state[ix + 0u];\n    uint raw1 = _790.state[ix + 1u];\n    uint raw2 = _790.state[ix + 2u];\n    uint raw3 = _790.state[ix + 3u];\n    uint raw4 = _790.state[ix + 4u];\n    uint raw5 = _790.state[ix + 5u];\n    uint raw6 = _790.state[ix + 6u];\n    uint raw7 = _790.state[ix + 7u];\n    uint raw8 = _790.state[ix + 8u];\n    uint raw9 = _790.state[ix + 9u];\n    uint raw10 = _790.state[ix + 10u];\n    uint raw11 = _790.state[ix + 11u];\n    uint raw12 = _790.state[ix + 12u];\n    uint raw13 = _790.state[ix + 13u];\n    uint raw14 = _790.state[ix + 14u];\n    State s;\n    s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9));\n    s.linewidth = uintBitsToFloat(raw10);\n    s.flags = raw11;\n    s.path_count = raw12;\n    s.pathseg_count = raw13;\n    s.trans_count = raw14;\n    return s;\n}\n\nLineSeg Element_StrokeLine_read(ElementRef ref)\n{\n    LineSegRef param = LineSegRef(ref.offset + 4u);\n    return LineSeg_read(param);\n}\n\nvec2 get_linewidth(State st)\n{\n    return vec2(length(st.mat.xz), length(st.mat.yw)) * (0.5 * st.linewidth);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _292.memory[offset] = val;\n}\n\nvoid PathStrokeCubic_write(Alloc a, PathStrokeCubicRef ref, PathStrokeCubic s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.p0.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.p0.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.p1.x);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.p1.y);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = floatBitsToUint(s.p2.x);\n    write_mem(param_12, param_13, param_14);\n    Alloc param_15 = a;\n    uint param_16 = ix + 5u;\n    uint param_17 = floatBitsToUint(s.p2.y);\n    write_mem(param_15, param_16, param_17);\n    Alloc param_18 = a;\n    uint param_19 = ix + 6u;\n    uint param_20 = floatBitsToUint(s.p3.x);\n    write_mem(param_18, param_19, param_20);\n    Alloc param_21 = a;\n    uint param_22 = ix + 7u;\n    uint param_23 = floatBitsToUint(s.p3.y);\n    write_mem(param_21, param_22, param_23);\n    Alloc param_24 = a;\n    uint param_25 = ix + 8u;\n    uint param_26 = s.path_ix;\n    write_mem(param_24, param_25, param_26);\n    Alloc param_27 = a;\n    uint param_28 = ix + 9u;\n    uint param_29 = s.trans_ix;\n    write_mem(param_27, param_28, param_29);\n    Alloc param_30 = a;\n    uint param_31 = ix + 10u;\n    uint param_32 = floatBitsToUint(s.stroke.x);\n    write_mem(param_30, param_31, param_32);\n    Alloc param_33 = a;\n    uint param_34 = ix + 11u;\n    uint param_35 = floatBitsToUint(s.stroke.y);\n    write_mem(param_33, param_34, param_35);\n}\n\nQuadSeg Element_StrokeQuad_read(ElementRef ref)\n{\n    QuadSegRef param = QuadSegRef(ref.offset + 4u);\n    return QuadSeg_read(param);\n}\n\nCubicSeg Element_StrokeCubic_read(ElementRef ref)\n{\n    CubicSegRef param = CubicSegRef(ref.offset + 4u);\n    return CubicSeg_read(param);\n}\n\nStroke Stroke_read(StrokeRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _316.scene[ix + 0u];\n    Stroke s;\n    s.rgba_color = raw0;\n    return s;\n}\n\nStroke Element_Stroke_read(ElementRef ref)\n{\n    StrokeRef param = StrokeRef(ref.offset + 4u);\n    return Stroke_read(param);\n}\n\nvoid AnnoStroke_write(Alloc a, AnnoStrokeRef ref, AnnoStroke s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.bbox.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.bbox.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.bbox.z);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.bbox.w);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = s.rgba_color;\n    write_mem(param_12, param_13, param_14);\n    Alloc param_15 = a;\n    uint param_16 = ix + 5u;\n    uint param_17 = floatBitsToUint(s.linewidth);\n    write_mem(param_15, param_16, param_17);\n}\n\nvoid Annotated_Stroke_write(Alloc a, AnnotatedRef ref, AnnoStroke s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 1u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    AnnoStrokeRef param_4 = AnnoStrokeRef(ref.offset + 4u);\n    AnnoStroke param_5 = s;\n    AnnoStroke_write(param_3, param_4, param_5);\n}\n\nFill Fill_read(FillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _316.scene[ix + 0u];\n    Fill s;\n    s.rgba_color = raw0;\n    return s;\n}\n\nFill Element_Fill_read(ElementRef ref)\n{\n    FillRef param = FillRef(ref.offset + 4u);\n    return Fill_read(param);\n}\n\nvoid AnnoFill_write(Alloc a, AnnoFillRef ref, AnnoFill s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.bbox.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.bbox.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.bbox.z);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.bbox.w);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = s.rgba_color;\n    write_mem(param_12, param_13, param_14);\n}\n\nvoid Annotated_Fill_write(Alloc a, AnnotatedRef ref, AnnoFill s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 2u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    AnnoFillRef param_4 = AnnoFillRef(ref.offset + 4u);\n    AnnoFill param_5 = s;\n    AnnoFill_write(param_3, param_4, param_5);\n}\n\nFillImage FillImage_read(FillImageRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _316.scene[ix + 0u];\n    uint raw1 = _316.scene[ix + 1u];\n    FillImage s;\n    s.index = raw0;\n    s.offset = ivec2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);\n    return s;\n}\n\nFillImage Element_FillImage_read(ElementRef ref)\n{\n    FillImageRef param = FillImageRef(ref.offset + 4u);\n    return FillImage_read(param);\n}\n\nvoid AnnoFillImage_write(Alloc a, AnnoFillImageRef ref, AnnoFillImage s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.bbox.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.bbox.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.bbox.z);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.bbox.w);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = s.index;\n    write_mem(param_12, param_13, param_14);\n    Alloc param_15 = a;\n    uint param_16 = ix + 5u;\n    uint param_17 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));\n    write_mem(param_15, param_16, param_17);\n}\n\nvoid Annotated_FillImage_write(Alloc a, AnnotatedRef ref, AnnoFillImage s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 3u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    AnnoFillImageRef param_4 = AnnoFillImageRef(ref.offset + 4u);\n    AnnoFillImage param_5 = s;\n    AnnoFillImage_write(param_3, param_4, param_5);\n}\n\nClip Clip_read(ClipRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    uint raw0 = _316.scene[ix + 0u];\n    uint raw1 = _316.scene[ix + 1u];\n    uint raw2 = _316.scene[ix + 2u];\n    uint raw3 = _316.scene[ix + 3u];\n    Clip s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    return s;\n}\n\nClip Element_BeginClip_read(ElementRef ref)\n{\n    ClipRef param = ClipRef(ref.offset + 4u);\n    return Clip_read(param);\n}\n\nvoid AnnoClip_write(Alloc a, AnnoClipRef ref, AnnoClip s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.bbox.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.bbox.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.bbox.z);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.bbox.w);\n    write_mem(param_9, param_10, param_11);\n}\n\nvoid Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, AnnoClip s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 4u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    AnnoClipRef param_4 = AnnoClipRef(ref.offset + 4u);\n    AnnoClip param_5 = s;\n    AnnoClip_write(param_3, param_4, param_5);\n}\n\nClip Element_EndClip_read(ElementRef ref)\n{\n    ClipRef param = ClipRef(ref.offset + 4u);\n    return Clip_read(param);\n}\n\nvoid Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoClip s)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    uint param_2 = 5u;\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    AnnoClipRef param_4 = AnnoClipRef(ref.offset + 4u);\n    AnnoClip param_5 = s;\n    AnnoClip_write(param_3, param_4, param_5);\n}\n\nvoid TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.mat.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.mat.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.mat.z);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.mat.w);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = floatBitsToUint(s.translate.x);\n    write_mem(param_12, param_13, param_14);\n    Alloc param_15 = a;\n    uint param_16 = ix + 5u;\n    uint param_17 = floatBitsToUint(s.translate.y);\n    write_mem(param_15, param_16, param_17);\n}\n\nvoid main()\n{\n    if (_292.mem_error != 0u)\n    {\n        return;\n    }\n    if (gl_LocalInvocationID.x == 0u)\n    {\n        uint _2022 = atomicAdd(_790.part_counter, 1u);\n        sh_part_ix = _2022;\n    }\n    barrier();\n    uint part_ix = sh_part_ix;\n    uint ix = (part_ix * 128u) + (gl_LocalInvocationID.x * 4u);\n    ElementRef ref = ElementRef(ix * 36u);\n    ElementRef param = ref;\n    State th_state[4];\n    th_state[0] = map_element(param);\n    for (uint i = 1u; i < 4u; i++)\n    {\n        ElementRef param_1 = ref;\n        uint param_2 = i;\n        ElementRef param_3 = Element_index(param_1, param_2);\n        State param_4 = th_state[i - 1u];\n        State param_5 = map_element(param_3);\n        th_state[i] = combine_state(param_4, param_5);\n    }\n    State agg = th_state[3];\n    sh_state[gl_LocalInvocationID.x] = agg;\n    for (uint i_1 = 0u; i_1 < 5u; i_1++)\n    {\n        barrier();\n        if (gl_LocalInvocationID.x >= uint(1 << int(i_1)))\n        {\n            State other = sh_state[gl_LocalInvocationID.x - uint(1 << int(i_1))];\n            State param_6 = other;\n            State param_7 = agg;\n            agg = combine_state(param_6, param_7);\n        }\n        barrier();\n        sh_state[gl_LocalInvocationID.x] = agg;\n    }\n    State exclusive;\n    exclusive.bbox = vec4(0.0);\n    exclusive.mat = vec4(1.0, 0.0, 0.0, 1.0);\n    exclusive.translate = vec2(0.0);\n    exclusive.linewidth = 1.0;\n    exclusive.flags = 0u;\n    exclusive.path_count = 0u;\n    exclusive.pathseg_count = 0u;\n    exclusive.trans_count = 0u;\n    if (gl_LocalInvocationID.x == 31u)\n    {\n        uint param_8 = part_ix;\n        StateRef param_9 = state_aggregate_ref(param_8);\n        State param_10 = agg;\n        State_write(param_9, param_10);\n        uint flag = 1u;\n        memoryBarrierBuffer();\n        if (part_ix == 0u)\n        {\n            uint param_11 = part_ix;\n            StateRef param_12 = state_prefix_ref(param_11);\n            State param_13 = agg;\n            State_write(param_12, param_13);\n            flag = 2u;\n        }\n        uint param_14 = part_ix;\n        _790.state[state_flag_index(param_14)] = flag;\n        if (part_ix != 0u)\n        {\n            uint look_back_ix = part_ix - 1u;\n            uint their_ix = 0u;\n            State their_agg;\n            while (true)\n            {\n                uint param_15 = look_back_ix;\n                flag = _790.state[state_flag_index(param_15)];\n                if (flag == 2u)\n                {\n                    uint param_16 = look_back_ix;\n                    StateRef param_17 = state_prefix_ref(param_16);\n                    State their_prefix = State_read(param_17);\n                    State param_18 = their_prefix;\n                    State param_19 = exclusive;\n                    exclusive = combine_state(param_18, param_19);\n                    break;\n                }\n                else\n                {\n                    if (flag == 1u)\n                    {\n                        uint param_20 = look_back_ix;\n                        StateRef param_21 = state_aggregate_ref(param_20);\n                        their_agg = State_read(param_21);\n                        State param_22 = their_agg;\n                        State param_23 = exclusive;\n                        exclusive = combine_state(param_22, param_23);\n                        look_back_ix--;\n                        their_ix = 0u;\n                        continue;\n                    }\n                }\n                ElementRef ref_1 = ElementRef(((look_back_ix * 128u) + their_ix) * 36u);\n                ElementRef param_24 = ref_1;\n                State s = map_element(param_24);\n                if (their_ix == 0u)\n                {\n                    their_agg = s;\n                }\n                else\n                {\n                    State param_25 = their_agg;\n                    State param_26 = s;\n                    their_agg = combine_state(param_25, param_26);\n                }\n                their_ix++;\n                if (their_ix == 128u)\n                {\n                    State param_27 = their_agg;\n                    State param_28 = exclusive;\n                    exclusive = combine_state(param_27, param_28);\n                    if (look_back_ix == 0u)\n                    {\n                        break;\n                    }\n                    look_back_ix--;\n                    their_ix = 0u;\n                }\n            }\n            State param_29 = exclusive;\n            State param_30 = agg;\n            State inclusive_prefix = combine_state(param_29, param_30);\n            sh_prefix = exclusive;\n            uint param_31 = part_ix;\n            StateRef param_32 = state_prefix_ref(param_31);\n            State param_33 = inclusive_prefix;\n            State_write(param_32, param_33);\n            memoryBarrierBuffer();\n            flag = 2u;\n            uint param_34 = part_ix;\n            _790.state[state_flag_index(param_34)] = flag;\n        }\n    }\n    barrier();\n    if (part_ix != 0u)\n    {\n        exclusive = sh_prefix;\n    }\n    State row = exclusive;\n    if (gl_LocalInvocationID.x > 0u)\n    {\n        State other_1 = sh_state[gl_LocalInvocationID.x - 1u];\n        State param_35 = row;\n        State param_36 = other_1;\n        row = combine_state(param_35, param_36);\n    }\n    PathStrokeCubic path_cubic;\n    PathSegRef path_out_ref;\n    uint out_tag;\n    Alloc param_44;\n    Alloc param_47;\n    Alloc param_52;\n    Alloc param_55;\n    Alloc param_60;\n    Alloc param_63;\n    AnnoStroke anno_stroke;\n    AnnotatedRef out_ref;\n    Alloc param_68;\n    AnnoFill anno_fill;\n    Alloc param_72;\n    AnnoFillImage anno_fill_img;\n    Alloc param_76;\n    Alloc param_80;\n    Alloc param_84;\n    Alloc param_87;\n    for (uint i_2 = 0u; i_2 < 4u; i_2++)\n    {\n        State param_37 = row;\n        State param_38 = th_state[i_2];\n        State st = combine_state(param_37, param_38);\n        ElementRef param_39 = ref;\n        uint param_40 = i_2;\n        ElementRef this_ref = Element_index(param_39, param_40);\n        ElementRef param_41 = this_ref;\n        uint tag = Element_tag(param_41);\n        switch (tag)\n        {\n            case 2u:\n            case 1u:\n            {\n                ElementRef param_42 = this_ref;\n                LineSeg line = Element_StrokeLine_read(param_42);\n                path_cubic.p0 = line.p0;\n                path_cubic.p1 = mix(line.p0, line.p1, vec2(0.3333333432674407958984375));\n                path_cubic.p2 = mix(line.p1, line.p0, vec2(0.3333333432674407958984375));\n                path_cubic.p3 = line.p1;\n                path_cubic.path_ix = st.path_count;\n                path_cubic.trans_ix = st.trans_count;\n                if (tag == 1u)\n                {\n                    State param_43 = st;\n                    path_cubic.stroke = get_linewidth(param_43);\n                }\n                else\n                {\n                    path_cubic.stroke = vec2(0.0);\n                }\n                path_out_ref = PathSegRef(_2380.conf.pathseg_alloc.offset + ((st.pathseg_count - 1u) * 52u));\n                out_tag = uint((tag == 2u) ? 1 : 2);\n                param_44.offset = _2380.conf.pathseg_alloc.offset;\n                uint param_45 = path_out_ref.offset >> uint(2);\n                uint param_46 = out_tag;\n                write_mem(param_44, param_45, param_46);\n                param_47.offset = _2380.conf.pathseg_alloc.offset;\n                PathStrokeCubicRef param_48 = PathStrokeCubicRef(path_out_ref.offset + 4u);\n                PathStrokeCubic param_49 = path_cubic;\n                PathStrokeCubic_write(param_47, param_48, param_49);\n                break;\n            }\n            case 4u:\n            case 3u:\n            {\n                ElementRef param_50 = this_ref;\n                QuadSeg quad = Element_StrokeQuad_read(param_50);\n                path_cubic.p0 = quad.p0;\n                path_cubic.p1 = mix(quad.p1, quad.p0, vec2(0.3333333432674407958984375));\n                path_cubic.p2 = mix(quad.p1, quad.p2, vec2(0.3333333432674407958984375));\n                path_cubic.p3 = quad.p2;\n                path_cubic.path_ix = st.path_count;\n                path_cubic.trans_ix = st.trans_count;\n                if (tag == 3u)\n                {\n                    State param_51 = st;\n                    path_cubic.stroke = get_linewidth(param_51);\n                }\n                else\n                {\n                    path_cubic.stroke = vec2(0.0);\n                }\n                path_out_ref = PathSegRef(_2380.conf.pathseg_alloc.offset + ((st.pathseg_count - 1u) * 52u));\n                out_tag = uint((tag == 4u) ? 1 : 2);\n                param_52.offset = _2380.conf.pathseg_alloc.offset;\n                uint param_53 = path_out_ref.offset >> uint(2);\n                uint param_54 = out_tag;\n                write_mem(param_52, param_53, param_54);\n                param_55.offset = _2380.conf.pathseg_alloc.offset;\n                PathStrokeCubicRef param_56 = PathStrokeCubicRef(path_out_ref.offset + 4u);\n                PathStrokeCubic param_57 = path_cubic;\n                PathStrokeCubic_write(param_55, param_56, param_57);\n                break;\n            }\n            case 6u:\n            case 5u:\n            {\n                ElementRef param_58 = this_ref;\n                CubicSeg cubic = Element_StrokeCubic_read(param_58);\n                path_cubic.p0 = cubic.p0;\n                path_cubic.p1 = cubic.p1;\n                path_cubic.p2 = cubic.p2;\n                path_cubic.p3 = cubic.p3;\n                path_cubic.path_ix = st.path_count;\n                path_cubic.trans_ix = st.trans_count;\n                if (tag == 5u)\n                {\n                    State param_59 = st;\n                    path_cubic.stroke = get_linewidth(param_59);\n                }\n                else\n                {\n                    path_cubic.stroke = vec2(0.0);\n                }\n                path_out_ref = PathSegRef(_2380.conf.pathseg_alloc.offset + ((st.pathseg_count - 1u) * 52u));\n                out_tag = uint((tag == 6u) ? 1 : 2);\n                param_60.offset = _2380.conf.pathseg_alloc.offset;\n                uint param_61 = path_out_ref.offset >> uint(2);\n                uint param_62 = out_tag;\n                write_mem(param_60, param_61, param_62);\n                param_63.offset = _2380.conf.pathseg_alloc.offset;\n                PathStrokeCubicRef param_64 = PathStrokeCubicRef(path_out_ref.offset + 4u);\n                PathStrokeCubic param_65 = path_cubic;\n                PathStrokeCubic_write(param_63, param_64, param_65);\n                break;\n            }\n            case 7u:\n            {\n                ElementRef param_66 = this_ref;\n                Stroke stroke = Element_Stroke_read(param_66);\n                anno_stroke.rgba_color = stroke.rgba_color;\n                State param_67 = st;\n                vec2 lw = get_linewidth(param_67);\n                anno_stroke.bbox = st.bbox + vec4(-lw, lw);\n                anno_stroke.linewidth = st.linewidth * sqrt(abs((st.mat.x * st.mat.w) - (st.mat.y * st.mat.z)));\n                out_ref = AnnotatedRef(_2380.conf.anno_alloc.offset + ((st.path_count - 1u) * 28u));\n                param_68.offset = _2380.conf.anno_alloc.offset;\n                AnnotatedRef param_69 = out_ref;\n                AnnoStroke param_70 = anno_stroke;\n                Annotated_Stroke_write(param_68, param_69, param_70);\n                break;\n            }\n            case 8u:\n            {\n                ElementRef param_71 = this_ref;\n                Fill fill = Element_Fill_read(param_71);\n                anno_fill.rgba_color = fill.rgba_color;\n                anno_fill.bbox = st.bbox;\n                out_ref = AnnotatedRef(_2380.conf.anno_alloc.offset + ((st.path_count - 1u) * 28u));\n                param_72.offset = _2380.conf.anno_alloc.offset;\n                AnnotatedRef param_73 = out_ref;\n                AnnoFill param_74 = anno_fill;\n                Annotated_Fill_write(param_72, param_73, param_74);\n                break;\n            }\n            case 13u:\n            {\n                ElementRef param_75 = this_ref;\n                FillImage fill_img = Element_FillImage_read(param_75);\n                anno_fill_img.index = fill_img.index;\n                anno_fill_img.offset = fill_img.offset;\n                anno_fill_img.bbox = st.bbox;\n                out_ref = AnnotatedRef(_2380.conf.anno_alloc.offset + ((st.path_count - 1u) * 28u));\n                param_76.offset = _2380.conf.anno_alloc.offset;\n                AnnotatedRef param_77 = out_ref;\n                AnnoFillImage param_78 = anno_fill_img;\n                Annotated_FillImage_write(param_76, param_77, param_78);\n                break;\n            }\n            case 11u:\n            {\n                ElementRef param_79 = this_ref;\n                Clip begin_clip = Element_BeginClip_read(param_79);\n                AnnoClip anno_begin_clip = AnnoClip(begin_clip.bbox);\n                anno_begin_clip.bbox = begin_clip.bbox;\n                out_ref = AnnotatedRef(_2380.conf.anno_alloc.offset + ((st.path_count - 1u) * 28u));\n                param_80.offset = _2380.conf.anno_alloc.offset;\n                AnnotatedRef param_81 = out_ref;\n                AnnoClip param_82 = anno_begin_clip;\n                Annotated_BeginClip_write(param_80, param_81, param_82);\n                break;\n            }\n            case 12u:\n            {\n                ElementRef param_83 = this_ref;\n                Clip end_clip = Element_EndClip_read(param_83);\n                AnnoClip anno_end_clip = AnnoClip(end_clip.bbox);\n                out_ref = AnnotatedRef(_2380.conf.anno_alloc.offset + ((st.path_count - 1u) * 28u));\n                param_84.offset = _2380.conf.anno_alloc.offset;\n                AnnotatedRef param_85 = out_ref;\n                AnnoClip param_86 = anno_end_clip;\n                Annotated_EndClip_write(param_84, param_85, param_86);\n                break;\n            }\n            case 10u:\n            {\n                TransformSeg transform = TransformSeg(st.mat, st.translate);\n                TransformSegRef trans_ref = TransformSegRef(_2380.conf.trans_alloc.offset + ((st.trans_count - 1u) * 24u));\n                param_87.offset = _2380.conf.trans_alloc.offset;\n                TransformSegRef param_88 = trans_ref;\n                TransformSeg param_89 = transform;\n                TransformSeg_write(param_87, param_88, param_89);\n                break;\n            }\n        }\n    }\n}\n\n",
	}
	shader_intersect_frag = driver.ShaderSources{
		Name:      "intersect.frag",


@@ 166,7 166,7 @@ var (
	}
	shader_kernel4_comp = driver.ShaderSources{
		Name:      "kernel4.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 4, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct MallocResult\n{\n    Alloc alloc;\n    bool failed;\n};\n\nstruct CmdStrokeRef\n{\n    uint offset;\n};\n\nstruct CmdStroke\n{\n    uint tile_ref;\n    float half_width;\n    uint rgba_color;\n};\n\nstruct CmdFillRef\n{\n    uint offset;\n};\n\nstruct CmdFill\n{\n    uint tile_ref;\n    int backdrop;\n    uint rgba_color;\n};\n\nstruct CmdFillImageRef\n{\n    uint offset;\n};\n\nstruct CmdFillImage\n{\n    uint tile_ref;\n    int backdrop;\n    uint index;\n    ivec2 offset;\n};\n\nstruct CmdBeginClipRef\n{\n    uint offset;\n};\n\nstruct CmdBeginClip\n{\n    uint tile_ref;\n    int backdrop;\n};\n\nstruct CmdBeginSolidClipRef\n{\n    uint offset;\n};\n\nstruct CmdBeginSolidClip\n{\n    float alpha;\n};\n\nstruct CmdEndClipRef\n{\n    uint offset;\n};\n\nstruct CmdEndClip\n{\n    float alpha;\n};\n\nstruct CmdSolidRef\n{\n    uint offset;\n};\n\nstruct CmdSolid\n{\n    uint rgba_color;\n};\n\nstruct CmdSolidImageRef\n{\n    uint offset;\n};\n\nstruct CmdSolidImage\n{\n    uint index;\n    ivec2 offset;\n};\n\nstruct CmdJumpRef\n{\n    uint offset;\n};\n\nstruct CmdJump\n{\n    uint new_ref;\n};\n\nstruct CmdRef\n{\n    uint offset;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct TileSeg\n{\n    vec2 origin;\n    vec2 vector;\n    float y_edge;\n    TileSegRef next;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _237;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _1138;\n\nlayout(binding = 3, rgba8) uniform readonly highp image2D images[1];\nlayout(binding = 2, rgba8) uniform writeonly highp image2D image;\n\nshared MallocResult sh_clip_alloc;\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n    uint param = a.offset + offset;\n    uint param_1 = size;\n    return new_alloc(param, param_1);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _237.memory[offset];\n    return v;\n}\n\nuint Cmd_tag(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nCmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    CmdStroke s;\n    s.tile_ref = raw0;\n    s.half_width = uintBitsToFloat(raw1);\n    s.rgba_color = raw2;\n    return s;\n}\n\nCmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdStrokeRef param_1 = CmdStrokeRef(ref.offset + 4u);\n    return CmdStroke_read(param, param_1);\n}\n\nTileSeg TileSeg_read(Alloc a, TileSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    Alloc param_10 = a;\n    uint param_11 = ix + 5u;\n    uint raw5 = read_mem(param_10, param_11);\n    TileSeg s;\n    s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.y_edge = uintBitsToFloat(raw4);\n    s.next = TileSegRef(raw5);\n    return s;\n}\n\nvec3 fromsRGB(vec3 srgb)\n{\n    bvec3 cutoff = greaterThanEqual(srgb, vec3(0.040449999272823333740234375));\n    vec3 below = srgb / vec3(12.9200000762939453125);\n    vec3 above = pow((srgb + vec3(0.054999999701976776123046875)) / vec3(1.05499994754791259765625), vec3(2.400000095367431640625));\n    return mix(below, above, cutoff);\n}\n\nvec4 unpacksRGB(uint srgba)\n{\n    vec4 color = unpackUnorm4x8(srgba).wzyx;\n    vec3 param = color.xyz;\n    return vec4(fromsRGB(param), color.w);\n}\n\nCmdFill CmdFill_read(Alloc a, CmdFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    CmdFill s;\n    s.tile_ref = raw0;\n    s.backdrop = int(raw1);\n    s.rgba_color = raw2;\n    return s;\n}\n\nCmdFill Cmd_Fill_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdFillRef param_1 = CmdFillRef(ref.offset + 4u);\n    return CmdFill_read(param, param_1);\n}\n\nfloat[8] computeArea(vec2 xy, int backdrop, uint tile_ref)\n{\n    float area[8];\n    for (uint k = 0u; k < 8u; k++)\n    {\n        area[k] = float(backdrop);\n    }\n    TileSegRef tile_seg_ref = TileSegRef(tile_ref);\n    do\n    {\n        uint param = tile_seg_ref.offset;\n        uint param_1 = 24u;\n        Alloc param_2 = new_alloc(param, param_1);\n        TileSegRef param_3 = tile_seg_ref;\n        TileSeg seg = TileSeg_read(param_2, param_3);\n        for (uint k_1 = 0u; k_1 < 8u; k_1++)\n        {\n            vec2 my_xy = vec2(xy.x, xy.y + float(k_1 * 4u));\n            vec2 start = seg.origin - my_xy;\n            vec2 end = start + seg.vector;\n            vec2 window = clamp(vec2(start.y, end.y), vec2(0.0), vec2(1.0));\n            if (!(window.x == window.y))\n            {\n                vec2 t = (window - vec2(start.y)) / vec2(seg.vector.y);\n                vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));\n                float xmin = min(min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07;\n                float xmax = max(xs.x, xs.y);\n                float b = min(xmax, 1.0);\n                float c = max(b, 0.0);\n                float d = max(xmin, 0.0);\n                float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin);\n                area[k_1] += (a * (window.x - window.y));\n            }\n            area[k_1] += (sign(seg.vector.x) * clamp((my_xy.y - seg.y_edge) + 1.0, 0.0, 1.0));\n        }\n        tile_seg_ref = seg.next;\n    } while (tile_seg_ref.offset != 0u);\n    for (uint k_2 = 0u; k_2 < 8u; k_2++)\n    {\n        area[k_2] = min(abs(area[k_2]), 1.0);\n    }\n    return area;\n}\n\nCmdFillImage CmdFillImage_read(Alloc a, CmdFillImageRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    CmdFillImage s;\n    s.tile_ref = raw0;\n    s.backdrop = int(raw1);\n    s.index = raw2;\n    s.offset = ivec2(int(raw3 << uint(16)) >> 16, int(raw3) >> 16);\n    return s;\n}\n\nCmdFillImage Cmd_FillImage_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdFillImageRef param_1 = CmdFillImageRef(ref.offset + 4u);\n    return CmdFillImage_read(param, param_1);\n}\n\nvec4[8] fillImage(uvec2 xy, CmdSolidImage cmd_img)\n{\n    vec4 rgba[8];\n    for (uint i = 0u; i < 8u; i++)\n    {\n        ivec2 uv = ivec2(int(xy.x), int(xy.y + (i * 4u))) + cmd_img.offset;\n        vec4 fg_rgba = imageLoad(images[0], uv);\n        vec3 param = fg_rgba.xyz;\n        vec3 _1111 = fromsRGB(param);\n        fg_rgba = vec4(_1111.x, _1111.y, _1111.z, fg_rgba.w);\n        rgba[i] = fg_rgba;\n    }\n    return rgba;\n}\n\nMallocResult malloc(uint size)\n{\n    MallocResult r;\n    r.failed = false;\n    uint _243 = atomicAdd(_237.mem_offset, size);\n    uint offset = _243;\n    uint param = offset;\n    uint param_1 = size;\n    r.alloc = new_alloc(param, param_1);\n    if ((offset + size) > uint(int(uint(_237.memory.length())) * 4))\n    {\n        r.failed = true;\n        uint _264 = atomicMax(_237.mem_error, 1u);\n        return r;\n    }\n    return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _237.memory[offset] = val;\n}\n\nMallocResult alloc_clip_buf(uint link)\n{\n    bool _755 = gl_LocalInvocationID.x == 0u;\n    bool _761;\n    if (_755)\n    {\n        _761 = gl_LocalInvocationID.y == 0u;\n    }\n    else\n    {\n        _761 = _755;\n    }\n    if (_761)\n    {\n        uint param = 4100u;\n        MallocResult _767 = malloc(param);\n        MallocResult m = _767;\n        if (!m.failed)\n        {\n            Alloc param_1 = m.alloc;\n            uint param_2 = (m.alloc.offset >> uint(2)) + 1024u;\n            uint param_3 = link;\n            write_mem(param_1, param_2, param_3);\n        }\n        sh_clip_alloc = m;\n    }\n    barrier();\n    return sh_clip_alloc;\n}\n\nCmdBeginClip CmdBeginClip_read(Alloc a, CmdBeginClipRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    CmdBeginClip s;\n    s.tile_ref = raw0;\n    s.backdrop = int(raw1);\n    return s;\n}\n\nCmdBeginClip Cmd_BeginClip_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdBeginClipRef param_1 = CmdBeginClipRef(ref.offset + 4u);\n    return CmdBeginClip_read(param, param_1);\n}\n\nvec3 tosRGB(vec3 rgb)\n{\n    bvec3 cutoff = greaterThanEqual(rgb, vec3(0.003130800090730190277099609375));\n    vec3 below = vec3(12.9200000762939453125) * rgb;\n    vec3 above = (vec3(1.05499994754791259765625) * pow(rgb, vec3(0.416660010814666748046875))) - vec3(0.054999999701976776123046875);\n    return mix(below, above, cutoff);\n}\n\nuint packsRGB(inout vec4 rgba)\n{\n    vec3 param = rgba.xyz;\n    rgba = vec4(tosRGB(param), rgba.w);\n    return packUnorm4x8(rgba.wzyx);\n}\n\nCmdBeginSolidClip CmdBeginSolidClip_read(Alloc a, CmdBeginSolidClipRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    CmdBeginSolidClip s;\n    s.alpha = uintBitsToFloat(raw0);\n    return s;\n}\n\nCmdBeginSolidClip Cmd_BeginSolidClip_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdBeginSolidClipRef param_1 = CmdBeginSolidClipRef(ref.offset + 4u);\n    return CmdBeginSolidClip_read(param, param_1);\n}\n\nCmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    CmdEndClip s;\n    s.alpha = uintBitsToFloat(raw0);\n    return s;\n}\n\nCmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdEndClipRef param_1 = CmdEndClipRef(ref.offset + 4u);\n    return CmdEndClip_read(param, param_1);\n}\n\nCmdSolid CmdSolid_read(Alloc a, CmdSolidRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    CmdSolid s;\n    s.rgba_color = raw0;\n    return s;\n}\n\nCmdSolid Cmd_Solid_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdSolidRef param_1 = CmdSolidRef(ref.offset + 4u);\n    return CmdSolid_read(param, param_1);\n}\n\nCmdSolidImage CmdSolidImage_read(Alloc a, CmdSolidImageRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    CmdSolidImage s;\n    s.index = raw0;\n    s.offset = ivec2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);\n    return s;\n}\n\nCmdSolidImage Cmd_SolidImage_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdSolidImageRef param_1 = CmdSolidImageRef(ref.offset + 4u);\n    return CmdSolidImage_read(param, param_1);\n}\n\nCmdJump CmdJump_read(Alloc a, CmdJumpRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    CmdJump s;\n    s.new_ref = raw0;\n    return s;\n}\n\nCmdJump Cmd_Jump_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdJumpRef param_1 = CmdJumpRef(ref.offset + 4u);\n    return CmdJump_read(param, param_1);\n}\n\nvoid main()\n{\n    if (_237.mem_error != 0u)\n    {\n        return;\n    }\n    uint tile_ix = (gl_WorkGroupID.y * _1138.conf.width_in_tiles) + gl_WorkGroupID.x;\n    Alloc param;\n    param.offset = _1138.conf.ptcl_alloc.offset;\n    uint param_1 = tile_ix * 1024u;\n    uint param_2 = 1024u;\n    Alloc cmd_alloc = slice_mem(param, param_1, param_2);\n    CmdRef cmd_ref = CmdRef(cmd_alloc.offset);\n    uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + (32u * gl_WorkGroupID.y));\n    vec2 xy = vec2(xy_uint);\n    uint blend_spill = 0u;\n    uint blend_sp = 0u;\n    uint param_3 = 0u;\n    uint param_4 = 0u;\n    Alloc clip_tos = new_alloc(param_3, param_4);\n    vec3 rgb[8];\n    float mask[8];\n    for (uint i = 0u; i < 8u; i++)\n    {\n        rgb[i] = vec3(0.5);\n        mask[i] = 1.0;\n    }\n    float df[8];\n    vec4 fg_rgba;\n    float area[8];\n    vec4 rgba[8];\n    uint blend_slot;\n    uint blend_stack[4][8];\n    while (true)\n    {\n        Alloc param_5 = cmd_alloc;\n        CmdRef param_6 = cmd_ref;\n        uint tag = Cmd_tag(param_5, param_6);\n        if (tag == 0u)\n        {\n            break;\n        }\n        switch (tag)\n        {\n            case 6u:\n            {\n                Alloc param_7 = cmd_alloc;\n                CmdRef param_8 = cmd_ref;\n                CmdStroke stroke = Cmd_Stroke_read(param_7, param_8);\n                for (uint k = 0u; k < 8u; k++)\n                {\n                    df[k] = 1000000000.0;\n                }\n                TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);\n                do\n                {\n                    uint param_9 = tile_seg_ref.offset;\n                    uint param_10 = 24u;\n                    Alloc param_11 = new_alloc(param_9, param_10);\n                    TileSegRef param_12 = tile_seg_ref;\n                    TileSeg seg = TileSeg_read(param_11, param_12);\n                    vec2 line_vec = seg.vector;\n                    for (uint k_1 = 0u; k_1 < 8u; k_1++)\n                    {\n                        vec2 dpos = (xy + vec2(0.5)) - seg.origin;\n                        dpos.y += float(k_1 * 4u);\n                        float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);\n                        df[k_1] = min(df[k_1], length((line_vec * t) - dpos));\n                    }\n                    tile_seg_ref = seg.next;\n                } while (tile_seg_ref.offset != 0u);\n                uint param_13 = stroke.rgba_color;\n                fg_rgba = unpacksRGB(param_13);\n                for (uint k_2 = 0u; k_2 < 8u; k_2++)\n                {\n                    float alpha = clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0);\n                    rgb[k_2] = mix(rgb[k_2], fg_rgba.xyz, vec3((mask[k_2] * alpha) * fg_rgba.w));\n                }\n                break;\n            }\n            case 1u:\n            {\n                Alloc param_14 = cmd_alloc;\n                CmdRef param_15 = cmd_ref;\n                CmdFill fill = Cmd_Fill_read(param_14, param_15);\n                vec2 param_16 = xy;\n                int param_17 = fill.backdrop;\n                uint param_18 = fill.tile_ref;\n                area = computeArea(param_16, param_17, param_18);\n                uint param_19 = fill.rgba_color;\n                fg_rgba = unpacksRGB(param_19);\n                for (uint k_3 = 0u; k_3 < 8u; k_3++)\n                {\n                    rgb[k_3] = mix(rgb[k_3], fg_rgba.xyz, vec3((mask[k_3] * area[k_3]) * fg_rgba.w));\n                }\n                break;\n            }\n            case 2u:\n            {\n                Alloc param_20 = cmd_alloc;\n                CmdRef param_21 = cmd_ref;\n                CmdFillImage fill_img = Cmd_FillImage_read(param_20, param_21);\n                vec2 param_22 = xy;\n                int param_23 = fill_img.backdrop;\n                uint param_24 = fill_img.tile_ref;\n                area = computeArea(param_22, param_23, param_24);\n                uvec2 param_25 = xy_uint;\n                CmdSolidImage param_26 = CmdSolidImage(fill_img.index, fill_img.offset);\n                rgba = fillImage(param_25, param_26);\n                for (uint k_4 = 0u; k_4 < 8u; k_4++)\n                {\n                    rgb[k_4] = mix(rgb[k_4], rgba[k_4].xyz, vec3((mask[k_4] * area[k_4]) * rgba[k_4].w));\n                }\n                break;\n            }\n            case 3u:\n            case 4u:\n            {\n                blend_slot = blend_sp % 4u;\n                if (blend_sp == (blend_spill + 4u))\n                {\n                    uint param_27 = clip_tos.offset;\n                    MallocResult _1482 = alloc_clip_buf(param_27);\n                    MallocResult m = _1482;\n                    if (m.failed)\n                    {\n                        return;\n                    }\n                    clip_tos = m.alloc;\n                    uint base_ix = ((clip_tos.offset >> uint(2)) + gl_LocalInvocationID.x) + (32u * gl_LocalInvocationID.y);\n                    for (uint k_5 = 0u; k_5 < 8u; k_5++)\n                    {\n                        Alloc param_28 = clip_tos;\n                        uint param_29 = base_ix + ((k_5 * 32u) * 4u);\n                        uint param_30 = blend_stack[blend_slot][k_5];\n                        write_mem(param_28, param_29, param_30);\n                    }\n                    blend_spill++;\n                }\n                if (tag == 3u)\n                {\n                    Alloc param_31 = cmd_alloc;\n                    CmdRef param_32 = cmd_ref;\n                    CmdBeginClip begin_clip = Cmd_BeginClip_read(param_31, param_32);\n                    vec2 param_33 = xy;\n                    int param_34 = begin_clip.backdrop;\n                    uint param_35 = begin_clip.tile_ref;\n                    area = computeArea(param_33, param_34, param_35);\n                    for (uint k_6 = 0u; k_6 < 8u; k_6++)\n                    {\n                        vec4 param_36 = vec4(rgb[k_6], clamp(abs(area[k_6]), 0.0, 1.0));\n                        uint _1573 = packsRGB(param_36);\n                        blend_stack[blend_slot][k_6] = _1573;\n                    }\n                }\n                else\n                {\n                    Alloc param_37 = cmd_alloc;\n                    CmdRef param_38 = cmd_ref;\n                    CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(param_37, param_38);\n                    float solid_alpha = begin_solid_clip.alpha;\n                    for (uint k_7 = 0u; k_7 < 8u; k_7++)\n                    {\n                        vec4 param_39 = vec4(rgb[k_7], solid_alpha);\n                        uint _1606 = packsRGB(param_39);\n                        blend_stack[blend_slot][k_7] = _1606;\n                    }\n                }\n                blend_sp++;\n                break;\n            }\n            case 5u:\n            {\n                Alloc param_40 = cmd_alloc;\n                CmdRef param_41 = cmd_ref;\n                CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41);\n                blend_slot = (blend_sp - 1u) % 4u;\n                if (blend_sp == blend_spill)\n                {\n                    uint base_ix_1 = ((clip_tos.offset >> uint(2)) + gl_LocalInvocationID.x) + (32u * gl_LocalInvocationID.y);\n                    for (uint k_8 = 0u; k_8 < 8u; k_8++)\n                    {\n                        Alloc param_42 = clip_tos;\n                        uint param_43 = base_ix_1 + ((k_8 * 32u) * 4u);\n                        blend_stack[blend_slot][k_8] = read_mem(param_42, param_43);\n                    }\n                    Alloc param_44 = clip_tos;\n                    uint param_45 = (clip_tos.offset >> uint(2)) + 1024u;\n                    clip_tos.offset = read_mem(param_44, param_45);\n                    blend_spill--;\n                }\n                blend_sp--;\n                for (uint k_9 = 0u; k_9 < 8u; k_9++)\n                {\n                    uint param_46 = blend_stack[blend_slot][k_9];\n                    vec4 rgba_1 = unpacksRGB(param_46);\n                    rgb[k_9] = mix(rgba_1.xyz, rgb[k_9], vec3(end_clip.alpha * rgba_1.w));\n                }\n                break;\n            }\n            case 7u:\n            {\n                Alloc param_47 = cmd_alloc;\n                CmdRef param_48 = cmd_ref;\n                CmdSolid solid = Cmd_Solid_read(param_47, param_48);\n                uint param_49 = solid.rgba_color;\n                fg_rgba = unpacksRGB(param_49);\n                for (uint k_10 = 0u; k_10 < 8u; k_10++)\n                {\n                    rgb[k_10] = mix(rgb[k_10], fg_rgba.xyz, vec3(mask[k_10] * fg_rgba.w));\n                }\n                break;\n            }\n            case 8u:\n            {\n                Alloc param_50 = cmd_alloc;\n                CmdRef param_51 = cmd_ref;\n                CmdSolidImage solid_img = Cmd_SolidImage_read(param_50, param_51);\n                uvec2 param_52 = xy_uint;\n                CmdSolidImage param_53 = solid_img;\n                rgba = fillImage(param_52, param_53);\n                for (uint k_11 = 0u; k_11 < 8u; k_11++)\n                {\n                    rgb[k_11] = mix(rgb[k_11], rgba[k_11].xyz, vec3(mask[k_11] * rgba[k_11].w));\n                }\n                break;\n            }\n            case 9u:\n            {\n                Alloc param_54 = cmd_alloc;\n                CmdRef param_55 = cmd_ref;\n                cmd_ref = CmdRef(Cmd_Jump_read(param_54, param_55).new_ref);\n                cmd_alloc.offset = cmd_ref.offset;\n                continue;\n            }\n        }\n        cmd_ref.offset += 20u;\n    }\n    for (uint i_1 = 0u; i_1 < 8u; i_1++)\n    {\n        vec3 param_56 = rgb[i_1];\n        imageStore(image, ivec2(int(xy_uint.x), int(xy_uint.y + (4u * i_1))), vec4(tosRGB(param_56), 1.0));\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 4, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct MallocResult\n{\n    Alloc alloc;\n    bool failed;\n};\n\nstruct CmdStrokeRef\n{\n    uint offset;\n};\n\nstruct CmdStroke\n{\n    uint tile_ref;\n    float half_width;\n    uint rgba_color;\n};\n\nstruct CmdFillRef\n{\n    uint offset;\n};\n\nstruct CmdFill\n{\n    uint tile_ref;\n    int backdrop;\n    uint rgba_color;\n};\n\nstruct CmdFillImageRef\n{\n    uint offset;\n};\n\nstruct CmdFillImage\n{\n    uint tile_ref;\n    int backdrop;\n    uint index;\n    ivec2 offset;\n};\n\nstruct CmdBeginClipRef\n{\n    uint offset;\n};\n\nstruct CmdBeginClip\n{\n    uint tile_ref;\n    int backdrop;\n};\n\nstruct CmdBeginSolidClipRef\n{\n    uint offset;\n};\n\nstruct CmdBeginSolidClip\n{\n    float alpha;\n};\n\nstruct CmdEndClipRef\n{\n    uint offset;\n};\n\nstruct CmdEndClip\n{\n    float alpha;\n};\n\nstruct CmdSolidRef\n{\n    uint offset;\n};\n\nstruct CmdSolid\n{\n    uint rgba_color;\n};\n\nstruct CmdSolidImageRef\n{\n    uint offset;\n};\n\nstruct CmdSolidImage\n{\n    uint index;\n    ivec2 offset;\n};\n\nstruct CmdJumpRef\n{\n    uint offset;\n};\n\nstruct CmdJump\n{\n    uint new_ref;\n};\n\nstruct CmdRef\n{\n    uint offset;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct TileSeg\n{\n    vec2 origin;\n    vec2 vector;\n    float y_edge;\n    TileSegRef next;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n    Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _237;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _1138;\n\nlayout(binding = 3, rgba8) uniform readonly highp image2D images[1];\nlayout(binding = 2, rgba8) uniform writeonly highp image2D image;\n\nshared MallocResult sh_clip_alloc;\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n    uint param = a.offset + offset;\n    uint param_1 = size;\n    return new_alloc(param, param_1);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _237.memory[offset];\n    return v;\n}\n\nuint Cmd_tag(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nCmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    CmdStroke s;\n    s.tile_ref = raw0;\n    s.half_width = uintBitsToFloat(raw1);\n    s.rgba_color = raw2;\n    return s;\n}\n\nCmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdStrokeRef param_1 = CmdStrokeRef(ref.offset + 4u);\n    return CmdStroke_read(param, param_1);\n}\n\nTileSeg TileSeg_read(Alloc a, TileSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    Alloc param_10 = a;\n    uint param_11 = ix + 5u;\n    uint raw5 = read_mem(param_10, param_11);\n    TileSeg s;\n    s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.y_edge = uintBitsToFloat(raw4);\n    s.next = TileSegRef(raw5);\n    return s;\n}\n\nvec3 fromsRGB(vec3 srgb)\n{\n    bvec3 cutoff = greaterThanEqual(srgb, vec3(0.040449999272823333740234375));\n    vec3 below = srgb / vec3(12.9200000762939453125);\n    vec3 above = pow((srgb + vec3(0.054999999701976776123046875)) / vec3(1.05499994754791259765625), vec3(2.400000095367431640625));\n    return mix(below, above, cutoff);\n}\n\nvec4 unpacksRGB(uint srgba)\n{\n    vec4 color = unpackUnorm4x8(srgba).wzyx;\n    vec3 param = color.xyz;\n    return vec4(fromsRGB(param), color.w);\n}\n\nCmdFill CmdFill_read(Alloc a, CmdFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    CmdFill s;\n    s.tile_ref = raw0;\n    s.backdrop = int(raw1);\n    s.rgba_color = raw2;\n    return s;\n}\n\nCmdFill Cmd_Fill_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdFillRef param_1 = CmdFillRef(ref.offset + 4u);\n    return CmdFill_read(param, param_1);\n}\n\nfloat[8] computeArea(vec2 xy, int backdrop, uint tile_ref)\n{\n    float area[8];\n    for (uint k = 0u; k < 8u; k++)\n    {\n        area[k] = float(backdrop);\n    }\n    TileSegRef tile_seg_ref = TileSegRef(tile_ref);\n    do\n    {\n        uint param = tile_seg_ref.offset;\n        uint param_1 = 24u;\n        Alloc param_2 = new_alloc(param, param_1);\n        TileSegRef param_3 = tile_seg_ref;\n        TileSeg seg = TileSeg_read(param_2, param_3);\n        for (uint k_1 = 0u; k_1 < 8u; k_1++)\n        {\n            vec2 my_xy = vec2(xy.x, xy.y + float(k_1 * 4u));\n            vec2 start = seg.origin - my_xy;\n            vec2 end = start + seg.vector;\n            vec2 window = clamp(vec2(start.y, end.y), vec2(0.0), vec2(1.0));\n            if (!(window.x == window.y))\n            {\n                vec2 t = (window - vec2(start.y)) / vec2(seg.vector.y);\n                vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));\n                float xmin = min(min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07;\n                float xmax = max(xs.x, xs.y);\n                float b = min(xmax, 1.0);\n                float c = max(b, 0.0);\n                float d = max(xmin, 0.0);\n                float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin);\n                area[k_1] += (a * (window.x - window.y));\n            }\n            area[k_1] += (sign(seg.vector.x) * clamp((my_xy.y - seg.y_edge) + 1.0, 0.0, 1.0));\n        }\n        tile_seg_ref = seg.next;\n    } while (tile_seg_ref.offset != 0u);\n    for (uint k_2 = 0u; k_2 < 8u; k_2++)\n    {\n        area[k_2] = min(abs(area[k_2]), 1.0);\n    }\n    return area;\n}\n\nCmdFillImage CmdFillImage_read(Alloc a, CmdFillImageRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    CmdFillImage s;\n    s.tile_ref = raw0;\n    s.backdrop = int(raw1);\n    s.index = raw2;\n    s.offset = ivec2(int(raw3 << uint(16)) >> 16, int(raw3) >> 16);\n    return s;\n}\n\nCmdFillImage Cmd_FillImage_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdFillImageRef param_1 = CmdFillImageRef(ref.offset + 4u);\n    return CmdFillImage_read(param, param_1);\n}\n\nvec4[8] fillImage(uvec2 xy, CmdSolidImage cmd_img)\n{\n    vec4 rgba[8];\n    for (uint i = 0u; i < 8u; i++)\n    {\n        ivec2 uv = ivec2(int(xy.x), int(xy.y + (i * 4u))) + cmd_img.offset;\n        vec4 fg_rgba = imageLoad(images[0], uv);\n        vec3 param = fg_rgba.xyz;\n        vec3 _1111 = fromsRGB(param);\n        fg_rgba = vec4(_1111.x, _1111.y, _1111.z, fg_rgba.w);\n        rgba[i] = fg_rgba;\n    }\n    return rgba;\n}\n\nMallocResult malloc(uint size)\n{\n    MallocResult r;\n    r.failed = false;\n    uint _243 = atomicAdd(_237.mem_offset, size);\n    uint offset = _243;\n    uint param = offset;\n    uint param_1 = size;\n    r.alloc = new_alloc(param, param_1);\n    if ((offset + size) > uint(int(uint(_237.memory.length())) * 4))\n    {\n        r.failed = true;\n        uint _264 = atomicMax(_237.mem_error, 1u);\n        return r;\n    }\n    return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _237.memory[offset] = val;\n}\n\nMallocResult alloc_clip_buf(uint link)\n{\n    bool _755 = gl_LocalInvocationID.x == 0u;\n    bool _761;\n    if (_755)\n    {\n        _761 = gl_LocalInvocationID.y == 0u;\n    }\n    else\n    {\n        _761 = _755;\n    }\n    if (_761)\n    {\n        uint param = 4100u;\n        MallocResult _767 = malloc(param);\n        MallocResult m = _767;\n        if (!m.failed)\n        {\n            Alloc param_1 = m.alloc;\n            uint param_2 = (m.alloc.offset >> uint(2)) + 1024u;\n            uint param_3 = link;\n            write_mem(param_1, param_2, param_3);\n        }\n        sh_clip_alloc = m;\n    }\n    barrier();\n    return sh_clip_alloc;\n}\n\nCmdBeginClip CmdBeginClip_read(Alloc a, CmdBeginClipRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    CmdBeginClip s;\n    s.tile_ref = raw0;\n    s.backdrop = int(raw1);\n    return s;\n}\n\nCmdBeginClip Cmd_BeginClip_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdBeginClipRef param_1 = CmdBeginClipRef(ref.offset + 4u);\n    return CmdBeginClip_read(param, param_1);\n}\n\nvec3 tosRGB(vec3 rgb)\n{\n    bvec3 cutoff = greaterThanEqual(rgb, vec3(0.003130800090730190277099609375));\n    vec3 below = vec3(12.9200000762939453125) * rgb;\n    vec3 above = (vec3(1.05499994754791259765625) * pow(rgb, vec3(0.416660010814666748046875))) - vec3(0.054999999701976776123046875);\n    return mix(below, above, cutoff);\n}\n\nuint packsRGB(inout vec4 rgba)\n{\n    vec3 param = rgba.xyz;\n    rgba = vec4(tosRGB(param), rgba.w);\n    return packUnorm4x8(rgba.wzyx);\n}\n\nCmdBeginSolidClip CmdBeginSolidClip_read(Alloc a, CmdBeginSolidClipRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    CmdBeginSolidClip s;\n    s.alpha = uintBitsToFloat(raw0);\n    return s;\n}\n\nCmdBeginSolidClip Cmd_BeginSolidClip_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdBeginSolidClipRef param_1 = CmdBeginSolidClipRef(ref.offset + 4u);\n    return CmdBeginSolidClip_read(param, param_1);\n}\n\nCmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    CmdEndClip s;\n    s.alpha = uintBitsToFloat(raw0);\n    return s;\n}\n\nCmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdEndClipRef param_1 = CmdEndClipRef(ref.offset + 4u);\n    return CmdEndClip_read(param, param_1);\n}\n\nCmdSolid CmdSolid_read(Alloc a, CmdSolidRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    CmdSolid s;\n    s.rgba_color = raw0;\n    return s;\n}\n\nCmdSolid Cmd_Solid_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdSolidRef param_1 = CmdSolidRef(ref.offset + 4u);\n    return CmdSolid_read(param, param_1);\n}\n\nCmdSolidImage CmdSolidImage_read(Alloc a, CmdSolidImageRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    CmdSolidImage s;\n    s.index = raw0;\n    s.offset = ivec2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);\n    return s;\n}\n\nCmdSolidImage Cmd_SolidImage_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdSolidImageRef param_1 = CmdSolidImageRef(ref.offset + 4u);\n    return CmdSolidImage_read(param, param_1);\n}\n\nCmdJump CmdJump_read(Alloc a, CmdJumpRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    CmdJump s;\n    s.new_ref = raw0;\n    return s;\n}\n\nCmdJump Cmd_Jump_read(Alloc a, CmdRef ref)\n{\n    Alloc param = a;\n    CmdJumpRef param_1 = CmdJumpRef(ref.offset + 4u);\n    return CmdJump_read(param, param_1);\n}\n\nvoid main()\n{\n    if (_237.mem_error != 0u)\n    {\n        return;\n    }\n    uint tile_ix = (gl_WorkGroupID.y * _1138.conf.width_in_tiles) + gl_WorkGroupID.x;\n    Alloc param;\n    param.offset = _1138.conf.ptcl_alloc.offset;\n    uint param_1 = tile_ix * 1024u;\n    uint param_2 = 1024u;\n    Alloc cmd_alloc = slice_mem(param, param_1, param_2);\n    CmdRef cmd_ref = CmdRef(cmd_alloc.offset);\n    uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + (32u * gl_WorkGroupID.y));\n    vec2 xy = vec2(xy_uint);\n    uint blend_spill = 0u;\n    uint blend_sp = 0u;\n    uint param_3 = 0u;\n    uint param_4 = 0u;\n    Alloc clip_tos = new_alloc(param_3, param_4);\n    vec3 rgb[8];\n    float mask[8];\n    for (uint i = 0u; i < 8u; i++)\n    {\n        rgb[i] = vec3(0.5);\n        mask[i] = 1.0;\n    }\n    float df[8];\n    vec4 fg_rgba;\n    float area[8];\n    vec4 rgba[8];\n    uint blend_slot;\n    uint blend_stack[4][8];\n    while (true)\n    {\n        Alloc param_5 = cmd_alloc;\n        CmdRef param_6 = cmd_ref;\n        uint tag = Cmd_tag(param_5, param_6);\n        if (tag == 0u)\n        {\n            break;\n        }\n        switch (tag)\n        {\n            case 6u:\n            {\n                Alloc param_7 = cmd_alloc;\n                CmdRef param_8 = cmd_ref;\n                CmdStroke stroke = Cmd_Stroke_read(param_7, param_8);\n                for (uint k = 0u; k < 8u; k++)\n                {\n                    df[k] = 1000000000.0;\n                }\n                TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);\n                do\n                {\n                    uint param_9 = tile_seg_ref.offset;\n                    uint param_10 = 24u;\n                    Alloc param_11 = new_alloc(param_9, param_10);\n                    TileSegRef param_12 = tile_seg_ref;\n                    TileSeg seg = TileSeg_read(param_11, param_12);\n                    vec2 line_vec = seg.vector;\n                    for (uint k_1 = 0u; k_1 < 8u; k_1++)\n                    {\n                        vec2 dpos = (xy + vec2(0.5)) - seg.origin;\n                        dpos.y += float(k_1 * 4u);\n                        float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);\n                        df[k_1] = min(df[k_1], length((line_vec * t) - dpos));\n                    }\n                    tile_seg_ref = seg.next;\n                } while (tile_seg_ref.offset != 0u);\n                uint param_13 = stroke.rgba_color;\n                fg_rgba = unpacksRGB(param_13);\n                for (uint k_2 = 0u; k_2 < 8u; k_2++)\n                {\n                    float alpha = clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0);\n                    rgb[k_2] = mix(rgb[k_2], fg_rgba.xyz, vec3((mask[k_2] * alpha) * fg_rgba.w));\n                }\n                break;\n            }\n            case 1u:\n            {\n                Alloc param_14 = cmd_alloc;\n                CmdRef param_15 = cmd_ref;\n                CmdFill fill = Cmd_Fill_read(param_14, param_15);\n                vec2 param_16 = xy;\n                int param_17 = fill.backdrop;\n                uint param_18 = fill.tile_ref;\n                area = computeArea(param_16, param_17, param_18);\n                uint param_19 = fill.rgba_color;\n                fg_rgba = unpacksRGB(param_19);\n                for (uint k_3 = 0u; k_3 < 8u; k_3++)\n                {\n                    rgb[k_3] = mix(rgb[k_3], fg_rgba.xyz, vec3((mask[k_3] * area[k_3]) * fg_rgba.w));\n                }\n                break;\n            }\n            case 2u:\n            {\n                Alloc param_20 = cmd_alloc;\n                CmdRef param_21 = cmd_ref;\n                CmdFillImage fill_img = Cmd_FillImage_read(param_20, param_21);\n                vec2 param_22 = xy;\n                int param_23 = fill_img.backdrop;\n                uint param_24 = fill_img.tile_ref;\n                area = computeArea(param_22, param_23, param_24);\n                uvec2 param_25 = xy_uint;\n                CmdSolidImage param_26 = CmdSolidImage(fill_img.index, fill_img.offset);\n                rgba = fillImage(param_25, param_26);\n                for (uint k_4 = 0u; k_4 < 8u; k_4++)\n                {\n                    rgb[k_4] = mix(rgb[k_4], rgba[k_4].xyz, vec3((mask[k_4] * area[k_4]) * rgba[k_4].w));\n                }\n                break;\n            }\n            case 3u:\n            case 4u:\n            {\n                blend_slot = blend_sp % 4u;\n                if (blend_sp == (blend_spill + 4u))\n                {\n                    uint param_27 = clip_tos.offset;\n                    MallocResult _1482 = alloc_clip_buf(param_27);\n                    MallocResult m = _1482;\n                    if (m.failed)\n                    {\n                        return;\n                    }\n                    clip_tos = m.alloc;\n                    uint base_ix = ((clip_tos.offset >> uint(2)) + gl_LocalInvocationID.x) + (32u * gl_LocalInvocationID.y);\n                    for (uint k_5 = 0u; k_5 < 8u; k_5++)\n                    {\n                        Alloc param_28 = clip_tos;\n                        uint param_29 = base_ix + ((k_5 * 32u) * 4u);\n                        uint param_30 = blend_stack[blend_slot][k_5];\n                        write_mem(param_28, param_29, param_30);\n                    }\n                    blend_spill++;\n                }\n                if (tag == 3u)\n                {\n                    Alloc param_31 = cmd_alloc;\n                    CmdRef param_32 = cmd_ref;\n                    CmdBeginClip begin_clip = Cmd_BeginClip_read(param_31, param_32);\n                    vec2 param_33 = xy;\n                    int param_34 = begin_clip.backdrop;\n                    uint param_35 = begin_clip.tile_ref;\n                    area = computeArea(param_33, param_34, param_35);\n                    for (uint k_6 = 0u; k_6 < 8u; k_6++)\n                    {\n                        vec4 param_36 = vec4(rgb[k_6], clamp(abs(area[k_6]), 0.0, 1.0));\n                        uint _1573 = packsRGB(param_36);\n                        blend_stack[blend_slot][k_6] = _1573;\n                    }\n                }\n                else\n                {\n                    Alloc param_37 = cmd_alloc;\n                    CmdRef param_38 = cmd_ref;\n                    CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(param_37, param_38);\n                    float solid_alpha = begin_solid_clip.alpha;\n                    for (uint k_7 = 0u; k_7 < 8u; k_7++)\n                    {\n                        vec4 param_39 = vec4(rgb[k_7], solid_alpha);\n                        uint _1606 = packsRGB(param_39);\n                        blend_stack[blend_slot][k_7] = _1606;\n                    }\n                }\n                blend_sp++;\n                break;\n            }\n            case 5u:\n            {\n                Alloc param_40 = cmd_alloc;\n                CmdRef param_41 = cmd_ref;\n                CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41);\n                blend_slot = (blend_sp - 1u) % 4u;\n                if (blend_sp == blend_spill)\n                {\n                    uint base_ix_1 = ((clip_tos.offset >> uint(2)) + gl_LocalInvocationID.x) + (32u * gl_LocalInvocationID.y);\n                    for (uint k_8 = 0u; k_8 < 8u; k_8++)\n                    {\n                        Alloc param_42 = clip_tos;\n                        uint param_43 = base_ix_1 + ((k_8 * 32u) * 4u);\n                        blend_stack[blend_slot][k_8] = read_mem(param_42, param_43);\n                    }\n                    Alloc param_44 = clip_tos;\n                    uint param_45 = (clip_tos.offset >> uint(2)) + 1024u;\n                    clip_tos.offset = read_mem(param_44, param_45);\n                    blend_spill--;\n                }\n                blend_sp--;\n                for (uint k_9 = 0u; k_9 < 8u; k_9++)\n                {\n                    uint param_46 = blend_stack[blend_slot][k_9];\n                    vec4 rgba_1 = unpacksRGB(param_46);\n                    rgb[k_9] = mix(rgba_1.xyz, rgb[k_9], vec3(end_clip.alpha * rgba_1.w));\n                }\n                break;\n            }\n            case 7u:\n            {\n                Alloc param_47 = cmd_alloc;\n                CmdRef param_48 = cmd_ref;\n                CmdSolid solid = Cmd_Solid_read(param_47, param_48);\n                uint param_49 = solid.rgba_color;\n                fg_rgba = unpacksRGB(param_49);\n                for (uint k_10 = 0u; k_10 < 8u; k_10++)\n                {\n                    rgb[k_10] = mix(rgb[k_10], fg_rgba.xyz, vec3(mask[k_10] * fg_rgba.w));\n                }\n                break;\n            }\n            case 8u:\n            {\n                Alloc param_50 = cmd_alloc;\n                CmdRef param_51 = cmd_ref;\n                CmdSolidImage solid_img = Cmd_SolidImage_read(param_50, param_51);\n                uvec2 param_52 = xy_uint;\n                CmdSolidImage param_53 = solid_img;\n                rgba = fillImage(param_52, param_53);\n                for (uint k_11 = 0u; k_11 < 8u; k_11++)\n                {\n                    rgb[k_11] = mix(rgb[k_11], rgba[k_11].xyz, vec3(mask[k_11] * rgba[k_11].w));\n                }\n                break;\n            }\n            case 9u:\n            {\n                Alloc param_54 = cmd_alloc;\n                CmdRef param_55 = cmd_ref;\n                cmd_ref = CmdRef(Cmd_Jump_read(param_54, param_55).new_ref);\n                cmd_alloc.offset = cmd_ref.offset;\n                continue;\n            }\n        }\n        cmd_ref.offset += 20u;\n    }\n    for (uint i_1 = 0u; i_1 < 8u; i_1++)\n    {\n        vec3 param_56 = rgb[i_1];\n        imageStore(image, ivec2(int(xy_uint.x), int(xy_uint.y + (4u * i_1))), vec4(tosRGB(param_56), 1.0));\n    }\n}\n\n",
	}
	shader_material_frag = driver.ShaderSources{
		Name:      "material.frag",


@@ 188,7 188,7 @@ var (
	}
	shader_path_coarse_comp = driver.ShaderSources{
		Name:      "path_coarse.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct MallocResult\n{\n    Alloc alloc;\n    bool failed;\n};\n\nstruct PathStrokeCubicRef\n{\n    uint offset;\n};\n\nstruct PathStrokeCubic\n{\n    vec2 p0;\n    vec2 p1;\n    vec2 p2;\n    vec2 p3;\n    uint path_ix;\n    vec2 stroke;\n};\n\nstruct PathSegRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct TileSeg\n{\n    vec2 origin;\n    vec2 vector;\n    float y_edge;\n    TileSegRef next;\n};\n\nstruct SubdivResult\n{\n    float val;\n    float a0;\n    float a2;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _135;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _685;\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _135.memory[offset];\n    return v;\n}\n\nuint PathSeg_tag(Alloc a, PathSegRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nPathStrokeCubic PathStrokeCubic_read(Alloc a, PathStrokeCubicRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    Alloc param_10 = a;\n    uint param_11 = ix + 5u;\n    uint raw5 = read_mem(param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 6u;\n    uint raw6 = read_mem(param_12, param_13);\n    Alloc param_14 = a;\n    uint param_15 = ix + 7u;\n    uint raw7 = read_mem(param_14, param_15);\n    Alloc param_16 = a;\n    uint param_17 = ix + 8u;\n    uint raw8 = read_mem(param_16, param_17);\n    Alloc param_18 = a;\n    uint param_19 = ix + 9u;\n    uint raw9 = read_mem(param_18, param_19);\n    Alloc param_20 = a;\n    uint param_21 = ix + 10u;\n    uint raw10 = read_mem(param_20, param_21);\n    PathStrokeCubic s;\n    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));\n    s.path_ix = raw8;\n    s.stroke = vec2(uintBitsToFloat(raw9), uintBitsToFloat(raw10));\n    return s;\n}\n\nPathStrokeCubic PathSeg_StrokeCubic_read(Alloc a, PathSegRef ref)\n{\n    Alloc param = a;\n    PathStrokeCubicRef param_1 = PathStrokeCubicRef(ref.offset + 4u);\n    return PathStrokeCubic_read(param, param_1);\n}\n\nvec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t)\n{\n    float mt = 1.0 - t;\n    return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0)) + (((p2 * (mt * 3.0)) + (p3 * t)) * t)) * t);\n}\n\nfloat approx_parabola_integral(float x)\n{\n    return x * inversesqrt(sqrt(0.3300000131130218505859375 + (0.201511204242706298828125 + ((0.25 * x) * x))));\n}\n\nSubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol)\n{\n    vec2 d01 = p1 - p0;\n    vec2 d12 = p2 - p1;\n    vec2 dd = d01 - d12;\n    float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x);\n    float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross;\n    float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross;\n    float scale = abs(_cross / (length(dd) * (x2 - x0)));\n    float param = x0;\n    float a0 = approx_parabola_integral(param);\n    float param_1 = x2;\n    float a2 = approx_parabola_integral(param_1);\n    float val = 0.0;\n    if (scale < 1000000000.0)\n    {\n        float da = abs(a2 - a0);\n        float sqrt_scale = sqrt(scale);\n        if (sign(x0) == sign(x2))\n        {\n            val = da * sqrt_scale;\n        }\n        else\n        {\n            float xmin = sqrt_tol / sqrt_scale;\n            float param_2 = xmin;\n            val = (sqrt_tol * da) / approx_parabola_integral(param_2);\n        }\n    }\n    return SubdivResult(val, a0, a2);\n}\n\nPath Path_read(Alloc a, PathRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Path s;\n    s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n    s.tiles = TileRef(raw2);\n    return s;\n}\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nfloat approx_parabola_inv_integral(float x)\n{\n    return x * sqrt(0.61000001430511474609375 + (0.1520999968051910400390625 + ((0.25 * x) * x)));\n}\n\nvec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t)\n{\n    float mt = 1.0 - t;\n    return (p0 * (mt * mt)) + (((p1 * (mt * 2.0)) + (p2 * t)) * t);\n}\n\nMallocResult malloc(uint size)\n{\n    MallocResult r;\n    r.failed = false;\n    uint _141 = atomicAdd(_135.mem_offset, size);\n    uint offset = _141;\n    uint param = offset;\n    uint param_1 = size;\n    r.alloc = new_alloc(param, param_1);\n    if ((offset + size) > uint(int(uint(_135.memory.length())) * 4))\n    {\n        r.failed = true;\n        uint _162 = atomicMax(_135.mem_error, 1u);\n        return r;\n    }\n    return r;\n}\n\nTileRef Tile_index(TileRef ref, uint index)\n{\n    return TileRef(ref.offset + (index * 8u));\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _135.memory[offset] = val;\n}\n\nvoid TileSeg_write(Alloc a, TileSegRef ref, TileSeg s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.origin.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.origin.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.vector.x);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.vector.y);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = floatBitsToUint(s.y_edge);\n    write_mem(param_12, param_13, param_14);\n    Alloc param_15 = a;\n    uint param_16 = ix + 5u;\n    uint param_17 = s.next.offset;\n    write_mem(param_15, param_16, param_17);\n}\n\nvoid main()\n{\n    if (_135.mem_error != 0u)\n    {\n        return;\n    }\n    uint element_ix = gl_GlobalInvocationID.x;\n    PathSegRef ref = PathSegRef(_685.conf.pathseg_alloc.offset + (element_ix * 48u));\n    uint tag = 0u;\n    if (element_ix < _685.conf.n_pathseg)\n    {\n        Alloc param;\n        param.offset = _685.conf.pathseg_alloc.offset;\n        PathSegRef param_1 = ref;\n        tag = PathSeg_tag(param, param_1);\n    }\n    switch (tag)\n    {\n        case 1u:\n        case 2u:\n        {\n            Alloc param_2;\n            param_2.offset = _685.conf.pathseg_alloc.offset;\n            PathSegRef param_3 = ref;\n            PathStrokeCubic cubic = PathSeg_StrokeCubic_read(param_2, param_3);\n            vec2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3;\n            float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);\n            uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u);\n            float val = 0.0;\n            vec2 qp0 = cubic.p0;\n            float _step = 1.0 / float(n_quads);\n            for (uint i = 0u; i < n_quads; i++)\n            {\n                float t = float(i + 1u) * _step;\n                vec2 param_4 = cubic.p0;\n                vec2 param_5 = cubic.p1;\n                vec2 param_6 = cubic.p2;\n                vec2 param_7 = cubic.p3;\n                float param_8 = t;\n                vec2 qp2 = eval_cubic(param_4, param_5, param_6, param_7, param_8);\n                vec2 param_9 = cubic.p0;\n                vec2 param_10 = cubic.p1;\n                vec2 param_11 = cubic.p2;\n                vec2 param_12 = cubic.p3;\n                float param_13 = t - (0.5 * _step);\n                vec2 qp1 = eval_cubic(param_9, param_10, param_11, param_12, param_13);\n                qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5);\n                vec2 param_14 = qp0;\n                vec2 param_15 = qp1;\n                vec2 param_16 = qp2;\n                float param_17 = 0.4743416607379913330078125;\n                SubdivResult params = estimate_subdiv(param_14, param_15, param_16, param_17);\n                val += params.val;\n                qp0 = qp2;\n            }\n            uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u);\n            uint path_ix = cubic.path_ix;\n            Alloc param_18;\n            param_18.offset = _685.conf.tile_alloc.offset;\n            PathRef param_19 = PathRef(_685.conf.tile_alloc.offset + (path_ix * 12u));\n            Path path = Path_read(param_18, param_19);\n            uint param_20 = path.tiles.offset;\n            uint param_21 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;\n            Alloc path_alloc = new_alloc(param_20, param_21);\n            ivec4 bbox = ivec4(path.bbox);\n            vec2 p0 = cubic.p0;\n            qp0 = cubic.p0;\n            float v_step = val / float(n);\n            int n_out = 1;\n            float val_sum = 0.0;\n            vec2 p1;\n            float _1103;\n            TileSeg tile_seg;\n            for (uint i_1 = 0u; i_1 < n_quads; i_1++)\n            {\n                float t_1 = float(i_1 + 1u) * _step;\n                vec2 param_22 = cubic.p0;\n                vec2 param_23 = cubic.p1;\n                vec2 param_24 = cubic.p2;\n                vec2 param_25 = cubic.p3;\n                float param_26 = t_1;\n                vec2 qp2_1 = eval_cubic(param_22, param_23, param_24, param_25, param_26);\n                vec2 param_27 = cubic.p0;\n                vec2 param_28 = cubic.p1;\n                vec2 param_29 = cubic.p2;\n                vec2 param_30 = cubic.p3;\n                float param_31 = t_1 - (0.5 * _step);\n                vec2 qp1_1 = eval_cubic(param_27, param_28, param_29, param_30, param_31);\n                qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5);\n                vec2 param_32 = qp0;\n                vec2 param_33 = qp1_1;\n                vec2 param_34 = qp2_1;\n                float param_35 = 0.4743416607379913330078125;\n                SubdivResult params_1 = estimate_subdiv(param_32, param_33, param_34, param_35);\n                float param_36 = params_1.a0;\n                float u0 = approx_parabola_inv_integral(param_36);\n                float param_37 = params_1.a2;\n                float u2 = approx_parabola_inv_integral(param_37);\n                float uscale = 1.0 / (u2 - u0);\n                float target = float(n_out) * v_step;\n                for (;;)\n                {\n                    bool _996 = uint(n_out) == n;\n                    bool _1006;\n                    if (!_996)\n                    {\n                        _1006 = target < (val_sum + params_1.val);\n                    }\n                    else\n                    {\n                        _1006 = _996;\n                    }\n                    if (_1006)\n                    {\n                        if (uint(n_out) == n)\n                        {\n                            p1 = cubic.p3;\n                        }\n                        else\n                        {\n                            float u = (target - val_sum) / params_1.val;\n                            float a = mix(params_1.a0, params_1.a2, u);\n                            float param_38 = a;\n                            float au = approx_parabola_inv_integral(param_38);\n                            float t_2 = (au - u0) * uscale;\n                            vec2 param_39 = qp0;\n                            vec2 param_40 = qp1_1;\n                            vec2 param_41 = qp2_1;\n                            float param_42 = t_2;\n                            p1 = eval_quad(param_39, param_40, param_41, param_42);\n                        }\n                        float xmin = min(p0.x, p1.x) - cubic.stroke.x;\n                        float xmax = max(p0.x, p1.x) + cubic.stroke.x;\n                        float ymin = min(p0.y, p1.y) - cubic.stroke.y;\n                        float ymax = max(p0.y, p1.y) + cubic.stroke.y;\n                        float dx = p1.x - p0.x;\n                        float dy = p1.y - p0.y;\n                        if (abs(dy) < 9.999999717180685365747194737196e-10)\n                        {\n                            _1103 = 1000000000.0;\n                        }\n                        else\n                        {\n                            _1103 = dx / dy;\n                        }\n                        float invslope = _1103;\n                        float c = (cubic.stroke.x + (abs(invslope) * (16.0 + cubic.stroke.y))) * 0.03125;\n                        float b = invslope;\n                        float a_1 = (p0.x - ((p0.y - 16.0) * b)) * 0.03125;\n                        int x0 = int(floor(xmin * 0.03125));\n                        int x1 = int(floor(xmax * 0.03125) + 1.0);\n                        int y0 = int(floor(ymin * 0.03125));\n                        int y1 = int(floor(ymax * 0.03125) + 1.0);\n                        x0 = clamp(x0, bbox.x, bbox.z);\n                        y0 = clamp(y0, bbox.y, bbox.w);\n                        x1 = clamp(x1, bbox.x, bbox.z);\n                        y1 = clamp(y1, bbox.y, bbox.w);\n                        float xc = a_1 + (b * float(y0));\n                        int stride = bbox.z - bbox.x;\n                        int base = ((y0 - bbox.y) * stride) - bbox.x;\n                        uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));\n                        uint param_43 = n_tile_alloc * 24u;\n                        MallocResult _1219 = malloc(param_43);\n                        MallocResult tile_alloc = _1219;\n                        if (tile_alloc.failed)\n                        {\n                            return;\n                        }\n                        uint tile_offset = tile_alloc.alloc.offset;\n                        int xray = int(floor(p0.x * 0.03125));\n                        int last_xray = int(floor(p1.x * 0.03125));\n                        if (p0.y > p1.y)\n                        {\n                            int tmp = xray;\n                            xray = last_xray;\n                            last_xray = tmp;\n                        }\n                        for (int y = y0; y < y1; y++)\n                        {\n                            float tile_y0 = float(y * 32);\n                            int xbackdrop = max((xray + 1), bbox.x);\n                            bool _1273 = tag == 1u;\n                            bool _1283;\n                            if (_1273)\n                            {\n                                _1283 = min(p0.y, p1.y) < tile_y0;\n                            }\n                            else\n                            {\n                                _1283 = _1273;\n                            }\n                            bool _1290;\n                            if (_1283)\n                            {\n                                _1290 = xbackdrop < bbox.z;\n                            }\n                            else\n                            {\n                                _1290 = _1283;\n                            }\n                            if (_1290)\n                            {\n                                int backdrop = (p1.y < p0.y) ? 1 : (-1);\n                                TileRef param_44 = path.tiles;\n                                uint param_45 = uint(base + xbackdrop);\n                                TileRef tile_ref = Tile_index(param_44, param_45);\n                                uint tile_el = tile_ref.offset >> uint(2);\n                                Alloc param_46 = path_alloc;\n                                uint param_47 = tile_el + 1u;\n                                if (touch_mem(param_46, param_47))\n                                {\n                                    uint _1328 = atomicAdd(_135.memory[tile_el + 1u], uint(backdrop));\n                                }\n                            }\n                            int next_xray = last_xray;\n                            if (y < (y1 - 1))\n                            {\n                                float tile_y1 = float((y + 1) * 32);\n                                float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy);\n                                next_xray = int(floor(x_edge * 0.03125));\n                            }\n                            int min_xray = min(xray, next_xray);\n                            int max_xray = max(xray, next_xray);\n                            int xx0 = min(int(floor(xc - c)), min_xray);\n                            int xx1 = max(int(ceil(xc + c)), (max_xray + 1));\n                            xx0 = clamp(xx0, x0, x1);\n                            xx1 = clamp(xx1, x0, x1);\n                            for (int x = xx0; x < xx1; x++)\n                            {\n                                float tile_x0 = float(x * 32);\n                                TileRef param_48 = TileRef(path.tiles.offset);\n                                uint param_49 = uint(base + x);\n                                TileRef tile_ref_1 = Tile_index(param_48, param_49);\n                                uint tile_el_1 = tile_ref_1.offset >> uint(2);\n                                uint old = 0u;\n                                Alloc param_50 = path_alloc;\n                                uint param_51 = tile_el_1;\n                                if (touch_mem(param_50, param_51))\n                                {\n                                    uint _1431 = atomicExchange(_135.memory[tile_el_1], tile_offset);\n                                    old = _1431;\n                                }\n                                tile_seg.origin = p0;\n                                tile_seg.vector = p1 - p0;\n                                float y_edge = 0.0;\n                                if (tag == 1u)\n                                {\n                                    y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);\n                                    if (min(p0.x, p1.x) < tile_x0)\n                                    {\n                                        vec2 p = vec2(tile_x0, y_edge);\n                                        if (p0.x > p1.x)\n                                        {\n                                            tile_seg.vector = p - p0;\n                                        }\n                                        else\n                                        {\n                                            tile_seg.origin = p;\n                                            tile_seg.vector = p1 - p;\n                                        }\n                                        if (tile_seg.vector.x == 0.0)\n                                        {\n                                            tile_seg.vector.x = sign(p1.x - p0.x) * 9.999999717180685365747194737196e-10;\n                                        }\n                                    }\n                                    if ((x <= min_xray) || (max_xray < x))\n                                    {\n                                        y_edge = 1000000000.0;\n                                    }\n                                }\n                                tile_seg.y_edge = y_edge;\n                                tile_seg.next.offset = old;\n                                Alloc param_52 = tile_alloc.alloc;\n                                TileSegRef param_53 = TileSegRef(tile_offset);\n                                TileSeg param_54 = tile_seg;\n                                TileSeg_write(param_52, param_53, param_54);\n                                tile_offset += 24u;\n                            }\n                            xc += b;\n                            base += stride;\n                            xray = next_xray;\n                        }\n                        n_out++;\n                        target += v_step;\n                        p0 = p1;\n                        continue;\n                    }\n                    else\n                    {\n                        break;\n                    }\n                }\n                val_sum += params_1.val;\n                qp0 = qp2_1;\n            }\n            break;\n        }\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct MallocResult\n{\n    Alloc alloc;\n    bool failed;\n};\n\nstruct PathStrokeCubicRef\n{\n    uint offset;\n};\n\nstruct PathStrokeCubic\n{\n    vec2 p0;\n    vec2 p1;\n    vec2 p2;\n    vec2 p3;\n    uint path_ix;\n    uint trans_ix;\n    vec2 stroke;\n};\n\nstruct PathSegRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct TileSegRef\n{\n    uint offset;\n};\n\nstruct TileSeg\n{\n    vec2 origin;\n    vec2 vector;\n    float y_edge;\n    TileSegRef next;\n};\n\nstruct TransformSegRef\n{\n    uint offset;\n};\n\nstruct TransformSeg\n{\n    vec4 mat;\n    vec2 translate;\n};\n\nstruct SubdivResult\n{\n    float val;\n    float a0;\n    float a2;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n    Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _144;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _773;\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _144.memory[offset];\n    return v;\n}\n\nuint PathSeg_tag(Alloc a, PathSegRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nPathStrokeCubic PathStrokeCubic_read(Alloc a, PathStrokeCubicRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    Alloc param_10 = a;\n    uint param_11 = ix + 5u;\n    uint raw5 = read_mem(param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 6u;\n    uint raw6 = read_mem(param_12, param_13);\n    Alloc param_14 = a;\n    uint param_15 = ix + 7u;\n    uint raw7 = read_mem(param_14, param_15);\n    Alloc param_16 = a;\n    uint param_17 = ix + 8u;\n    uint raw8 = read_mem(param_16, param_17);\n    Alloc param_18 = a;\n    uint param_19 = ix + 9u;\n    uint raw9 = read_mem(param_18, param_19);\n    Alloc param_20 = a;\n    uint param_21 = ix + 10u;\n    uint raw10 = read_mem(param_20, param_21);\n    Alloc param_22 = a;\n    uint param_23 = ix + 11u;\n    uint raw11 = read_mem(param_22, param_23);\n    PathStrokeCubic s;\n    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));\n    s.path_ix = raw8;\n    s.trans_ix = raw9;\n    s.stroke = vec2(uintBitsToFloat(raw10), uintBitsToFloat(raw11));\n    return s;\n}\n\nPathStrokeCubic PathSeg_StrokeCubic_read(Alloc a, PathSegRef ref)\n{\n    Alloc param = a;\n    PathStrokeCubicRef param_1 = PathStrokeCubicRef(ref.offset + 4u);\n    return PathStrokeCubic_read(param, param_1);\n}\n\nTransformSeg TransformSeg_read(Alloc a, TransformSegRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    Alloc param_10 = a;\n    uint param_11 = ix + 5u;\n    uint raw5 = read_mem(param_10, param_11);\n    TransformSeg s;\n    s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));\n    return s;\n}\n\nvec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t)\n{\n    float mt = 1.0 - t;\n    return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0)) + (((p2 * (mt * 3.0)) + (p3 * t)) * t)) * t);\n}\n\nfloat approx_parabola_integral(float x)\n{\n    return x * inversesqrt(sqrt(0.3300000131130218505859375 + (0.201511204242706298828125 + ((0.25 * x) * x))));\n}\n\nSubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol)\n{\n    vec2 d01 = p1 - p0;\n    vec2 d12 = p2 - p1;\n    vec2 dd = d01 - d12;\n    float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x);\n    float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross;\n    float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross;\n    float scale = abs(_cross / (length(dd) * (x2 - x0)));\n    float param = x0;\n    float a0 = approx_parabola_integral(param);\n    float param_1 = x2;\n    float a2 = approx_parabola_integral(param_1);\n    float val = 0.0;\n    if (scale < 1000000000.0)\n    {\n        float da = abs(a2 - a0);\n        float sqrt_scale = sqrt(scale);\n        if (sign(x0) == sign(x2))\n        {\n            val = da * sqrt_scale;\n        }\n        else\n        {\n            float xmin = sqrt_tol / sqrt_scale;\n            float param_2 = xmin;\n            val = (sqrt_tol * da) / approx_parabola_integral(param_2);\n        }\n    }\n    return SubdivResult(val, a0, a2);\n}\n\nPath Path_read(Alloc a, PathRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Path s;\n    s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n    s.tiles = TileRef(raw2);\n    return s;\n}\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nfloat approx_parabola_inv_integral(float x)\n{\n    return x * sqrt(0.61000001430511474609375 + (0.1520999968051910400390625 + ((0.25 * x) * x)));\n}\n\nvec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t)\n{\n    float mt = 1.0 - t;\n    return (p0 * (mt * mt)) + (((p1 * (mt * 2.0)) + (p2 * t)) * t);\n}\n\nMallocResult malloc(uint size)\n{\n    MallocResult r;\n    r.failed = false;\n    uint _150 = atomicAdd(_144.mem_offset, size);\n    uint offset = _150;\n    uint param = offset;\n    uint param_1 = size;\n    r.alloc = new_alloc(param, param_1);\n    if ((offset + size) > uint(int(uint(_144.memory.length())) * 4))\n    {\n        r.failed = true;\n        uint _171 = atomicMax(_144.mem_error, 1u);\n        return r;\n    }\n    return r;\n}\n\nTileRef Tile_index(TileRef ref, uint index)\n{\n    return TileRef(ref.offset + (index * 8u));\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _144.memory[offset] = val;\n}\n\nvoid TileSeg_write(Alloc a, TileSegRef ref, TileSeg s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = floatBitsToUint(s.origin.x);\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = floatBitsToUint(s.origin.y);\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = floatBitsToUint(s.vector.x);\n    write_mem(param_6, param_7, param_8);\n    Alloc param_9 = a;\n    uint param_10 = ix + 3u;\n    uint param_11 = floatBitsToUint(s.vector.y);\n    write_mem(param_9, param_10, param_11);\n    Alloc param_12 = a;\n    uint param_13 = ix + 4u;\n    uint param_14 = floatBitsToUint(s.y_edge);\n    write_mem(param_12, param_13, param_14);\n    Alloc param_15 = a;\n    uint param_16 = ix + 5u;\n    uint param_17 = s.next.offset;\n    write_mem(param_15, param_16, param_17);\n}\n\nvoid main()\n{\n    if (_144.mem_error != 0u)\n    {\n        return;\n    }\n    uint element_ix = gl_GlobalInvocationID.x;\n    PathSegRef ref = PathSegRef(_773.conf.pathseg_alloc.offset + (element_ix * 52u));\n    uint tag = 0u;\n    if (element_ix < _773.conf.n_pathseg)\n    {\n        Alloc param;\n        param.offset = _773.conf.pathseg_alloc.offset;\n        PathSegRef param_1 = ref;\n        tag = PathSeg_tag(param, param_1);\n    }\n    switch (tag)\n    {\n        case 1u:\n        case 2u:\n        {\n            Alloc param_2;\n            param_2.offset = _773.conf.pathseg_alloc.offset;\n            PathSegRef param_3 = ref;\n            PathStrokeCubic cubic = PathSeg_StrokeCubic_read(param_2, param_3);\n            uint trans_ix = cubic.trans_ix;\n            if (trans_ix > 0u)\n            {\n                TransformSegRef trans_ref = TransformSegRef(_773.conf.trans_alloc.offset + ((trans_ix - 1u) * 24u));\n                Alloc param_4;\n                param_4.offset = _773.conf.trans_alloc.offset;\n                TransformSegRef param_5 = trans_ref;\n                TransformSeg trans = TransformSeg_read(param_4, param_5);\n                cubic.p0 = ((trans.mat.xy * cubic.p0.x) + (trans.mat.zw * cubic.p0.y)) + trans.translate;\n                cubic.p1 = ((trans.mat.xy * cubic.p1.x) + (trans.mat.zw * cubic.p1.y)) + trans.translate;\n                cubic.p2 = ((trans.mat.xy * cubic.p2.x) + (trans.mat.zw * cubic.p2.y)) + trans.translate;\n                cubic.p3 = ((trans.mat.xy * cubic.p3.x) + (trans.mat.zw * cubic.p3.y)) + trans.translate;\n            }\n            vec2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3;\n            float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);\n            uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u);\n            float val = 0.0;\n            vec2 qp0 = cubic.p0;\n            float _step = 1.0 / float(n_quads);\n            for (uint i = 0u; i < n_quads; i++)\n            {\n                float t = float(i + 1u) * _step;\n                vec2 param_6 = cubic.p0;\n                vec2 param_7 = cubic.p1;\n                vec2 param_8 = cubic.p2;\n                vec2 param_9 = cubic.p3;\n                float param_10 = t;\n                vec2 qp2 = eval_cubic(param_6, param_7, param_8, param_9, param_10);\n                vec2 param_11 = cubic.p0;\n                vec2 param_12 = cubic.p1;\n                vec2 param_13 = cubic.p2;\n                vec2 param_14 = cubic.p3;\n                float param_15 = t - (0.5 * _step);\n                vec2 qp1 = eval_cubic(param_11, param_12, param_13, param_14, param_15);\n                qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5);\n                vec2 param_16 = qp0;\n                vec2 param_17 = qp1;\n                vec2 param_18 = qp2;\n                float param_19 = 0.4743416607379913330078125;\n                SubdivResult params = estimate_subdiv(param_16, param_17, param_18, param_19);\n                val += params.val;\n                qp0 = qp2;\n            }\n            uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u);\n            uint path_ix = cubic.path_ix;\n            Alloc param_20;\n            param_20.offset = _773.conf.tile_alloc.offset;\n            PathRef param_21 = PathRef(_773.conf.tile_alloc.offset + (path_ix * 12u));\n            Path path = Path_read(param_20, param_21);\n            uint param_22 = path.tiles.offset;\n            uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;\n            Alloc path_alloc = new_alloc(param_22, param_23);\n            ivec4 bbox = ivec4(path.bbox);\n            vec2 p0 = cubic.p0;\n            qp0 = cubic.p0;\n            float v_step = val / float(n);\n            int n_out = 1;\n            float val_sum = 0.0;\n            vec2 p1;\n            float _1285;\n            TileSeg tile_seg;\n            for (uint i_1 = 0u; i_1 < n_quads; i_1++)\n            {\n                float t_1 = float(i_1 + 1u) * _step;\n                vec2 param_24 = cubic.p0;\n                vec2 param_25 = cubic.p1;\n                vec2 param_26 = cubic.p2;\n                vec2 param_27 = cubic.p3;\n                float param_28 = t_1;\n                vec2 qp2_1 = eval_cubic(param_24, param_25, param_26, param_27, param_28);\n                vec2 param_29 = cubic.p0;\n                vec2 param_30 = cubic.p1;\n                vec2 param_31 = cubic.p2;\n                vec2 param_32 = cubic.p3;\n                float param_33 = t_1 - (0.5 * _step);\n                vec2 qp1_1 = eval_cubic(param_29, param_30, param_31, param_32, param_33);\n                qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5);\n                vec2 param_34 = qp0;\n                vec2 param_35 = qp1_1;\n                vec2 param_36 = qp2_1;\n                float param_37 = 0.4743416607379913330078125;\n                SubdivResult params_1 = estimate_subdiv(param_34, param_35, param_36, param_37);\n                float param_38 = params_1.a0;\n                float u0 = approx_parabola_inv_integral(param_38);\n                float param_39 = params_1.a2;\n                float u2 = approx_parabola_inv_integral(param_39);\n                float uscale = 1.0 / (u2 - u0);\n                float target = float(n_out) * v_step;\n                for (;;)\n                {\n                    bool _1178 = uint(n_out) == n;\n                    bool _1188;\n                    if (!_1178)\n                    {\n                        _1188 = target < (val_sum + params_1.val);\n                    }\n                    else\n                    {\n                        _1188 = _1178;\n                    }\n                    if (_1188)\n                    {\n                        if (uint(n_out) == n)\n                        {\n                            p1 = cubic.p3;\n                        }\n                        else\n                        {\n                            float u = (target - val_sum) / params_1.val;\n                            float a = mix(params_1.a0, params_1.a2, u);\n                            float param_40 = a;\n                            float au = approx_parabola_inv_integral(param_40);\n                            float t_2 = (au - u0) * uscale;\n                            vec2 param_41 = qp0;\n                            vec2 param_42 = qp1_1;\n                            vec2 param_43 = qp2_1;\n                            float param_44 = t_2;\n                            p1 = eval_quad(param_41, param_42, param_43, param_44);\n                        }\n                        float xmin = min(p0.x, p1.x) - cubic.stroke.x;\n                        float xmax = max(p0.x, p1.x) + cubic.stroke.x;\n                        float ymin = min(p0.y, p1.y) - cubic.stroke.y;\n                        float ymax = max(p0.y, p1.y) + cubic.stroke.y;\n                        float dx = p1.x - p0.x;\n                        float dy = p1.y - p0.y;\n                        if (abs(dy) < 9.999999717180685365747194737196e-10)\n                        {\n                            _1285 = 1000000000.0;\n                        }\n                        else\n                        {\n                            _1285 = dx / dy;\n                        }\n                        float invslope = _1285;\n                        float c = (cubic.stroke.x + (abs(invslope) * (16.0 + cubic.stroke.y))) * 0.03125;\n                        float b = invslope;\n                        float a_1 = (p0.x - ((p0.y - 16.0) * b)) * 0.03125;\n                        int x0 = int(floor(xmin * 0.03125));\n                        int x1 = int(floor(xmax * 0.03125) + 1.0);\n                        int y0 = int(floor(ymin * 0.03125));\n                        int y1 = int(floor(ymax * 0.03125) + 1.0);\n                        x0 = clamp(x0, bbox.x, bbox.z);\n                        y0 = clamp(y0, bbox.y, bbox.w);\n                        x1 = clamp(x1, bbox.x, bbox.z);\n                        y1 = clamp(y1, bbox.y, bbox.w);\n                        float xc = a_1 + (b * float(y0));\n                        int stride = bbox.z - bbox.x;\n                        int base = ((y0 - bbox.y) * stride) - bbox.x;\n                        uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));\n                        uint param_45 = n_tile_alloc * 24u;\n                        MallocResult _1400 = malloc(param_45);\n                        MallocResult tile_alloc = _1400;\n                        if (tile_alloc.failed)\n                        {\n                            return;\n                        }\n                        uint tile_offset = tile_alloc.alloc.offset;\n                        int xray = int(floor(p0.x * 0.03125));\n                        int last_xray = int(floor(p1.x * 0.03125));\n                        if (p0.y > p1.y)\n                        {\n                            int tmp = xray;\n                            xray = last_xray;\n                            last_xray = tmp;\n                        }\n                        for (int y = y0; y < y1; y++)\n                        {\n                            float tile_y0 = float(y * 32);\n                            int xbackdrop = max((xray + 1), bbox.x);\n                            bool _1454 = tag == 1u;\n                            bool _1464;\n                            if (_1454)\n                            {\n                                _1464 = min(p0.y, p1.y) < tile_y0;\n                            }\n                            else\n                            {\n                                _1464 = _1454;\n                            }\n                            bool _1471;\n                            if (_1464)\n                            {\n                                _1471 = xbackdrop < bbox.z;\n                            }\n                            else\n                            {\n                                _1471 = _1464;\n                            }\n                            if (_1471)\n                            {\n                                int backdrop = (p1.y < p0.y) ? 1 : (-1);\n                                TileRef param_46 = path.tiles;\n                                uint param_47 = uint(base + xbackdrop);\n                                TileRef tile_ref = Tile_index(param_46, param_47);\n                                uint tile_el = tile_ref.offset >> uint(2);\n                                Alloc param_48 = path_alloc;\n                                uint param_49 = tile_el + 1u;\n                                if (touch_mem(param_48, param_49))\n                                {\n                                    uint _1509 = atomicAdd(_144.memory[tile_el + 1u], uint(backdrop));\n                                }\n                            }\n                            int next_xray = last_xray;\n                            if (y < (y1 - 1))\n                            {\n                                float tile_y1 = float((y + 1) * 32);\n                                float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy);\n                                next_xray = int(floor(x_edge * 0.03125));\n                            }\n                            int min_xray = min(xray, next_xray);\n                            int max_xray = max(xray, next_xray);\n                            int xx0 = min(int(floor(xc - c)), min_xray);\n                            int xx1 = max(int(ceil(xc + c)), (max_xray + 1));\n                            xx0 = clamp(xx0, x0, x1);\n                            xx1 = clamp(xx1, x0, x1);\n                            for (int x = xx0; x < xx1; x++)\n                            {\n                                float tile_x0 = float(x * 32);\n                                TileRef param_50 = TileRef(path.tiles.offset);\n                                uint param_51 = uint(base + x);\n                                TileRef tile_ref_1 = Tile_index(param_50, param_51);\n                                uint tile_el_1 = tile_ref_1.offset >> uint(2);\n                                uint old = 0u;\n                                Alloc param_52 = path_alloc;\n                                uint param_53 = tile_el_1;\n                                if (touch_mem(param_52, param_53))\n                                {\n                                    uint _1612 = atomicExchange(_144.memory[tile_el_1], tile_offset);\n                                    old = _1612;\n                                }\n                                tile_seg.origin = p0;\n                                tile_seg.vector = p1 - p0;\n                                float y_edge = 0.0;\n                                if (tag == 1u)\n                                {\n                                    y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);\n                                    if (min(p0.x, p1.x) < tile_x0)\n                                    {\n                                        vec2 p = vec2(tile_x0, y_edge);\n                                        if (p0.x > p1.x)\n                                        {\n                                            tile_seg.vector = p - p0;\n                                        }\n                                        else\n                                        {\n                                            tile_seg.origin = p;\n                                            tile_seg.vector = p1 - p;\n                                        }\n                                        if (tile_seg.vector.x == 0.0)\n                                        {\n                                            tile_seg.vector.x = sign(p1.x - p0.x) * 9.999999717180685365747194737196e-10;\n                                        }\n                                    }\n                                    if ((x <= min_xray) || (max_xray < x))\n                                    {\n                                        y_edge = 1000000000.0;\n                                    }\n                                }\n                                tile_seg.y_edge = y_edge;\n                                tile_seg.next.offset = old;\n                                Alloc param_54 = tile_alloc.alloc;\n                                TileSegRef param_55 = TileSegRef(tile_offset);\n                                TileSeg param_56 = tile_seg;\n                                TileSeg_write(param_54, param_55, param_56);\n                                tile_offset += 24u;\n                            }\n                            xc += b;\n                            base += stride;\n                            xray = next_xray;\n                        }\n                        n_out++;\n                        target += v_step;\n                        p0 = p1;\n                        continue;\n                    }\n                    else\n                    {\n                        break;\n                    }\n                }\n                val_sum += params_1.val;\n                qp0 = qp2_1;\n            }\n            break;\n        }\n    }\n}\n\n",
	}
	shader_stencil_frag = driver.ShaderSources{
		Name:      "stencil.frag",


@@ 214,6 214,6 @@ var (
	}
	shader_tile_alloc_comp = driver.ShaderSources{
		Name:      "tile_alloc.comp",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct MallocResult\n{\n    Alloc alloc;\n    bool failed;\n};\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    vec4 bbox;\n    uint rgba_color;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _95;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _310;\n\nshared uint sh_tile_count[128];\nshared MallocResult sh_tile_alloc;\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _95.memory[offset];\n    return v;\n}\n\nuint Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nAnnoFill AnnoFill_read(Alloc a, AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    AnnoFill s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.rgba_color = raw4;\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    AnnoFillRef param_1 = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param, param_1);\n}\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nMallocResult malloc(uint size)\n{\n    MallocResult r;\n    r.failed = false;\n    uint _101 = atomicAdd(_95.mem_offset, size);\n    uint offset = _101;\n    uint param = offset;\n    uint param_1 = size;\n    r.alloc = new_alloc(param, param_1);\n    if ((offset + size) > uint(int(uint(_95.memory.length())) * 4))\n    {\n        r.failed = true;\n        uint _122 = atomicMax(_95.mem_error, 1u);\n        return r;\n    }\n    return r;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n    uint param = a.offset + offset;\n    uint param_1 = size;\n    return new_alloc(param, param_1);\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _95.memory[offset] = val;\n}\n\nvoid Path_write(Alloc a, PathRef ref, Path s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.bbox.x | (s.bbox.y << uint(16));\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = s.bbox.z | (s.bbox.w << uint(16));\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = s.tiles.offset;\n    write_mem(param_6, param_7, param_8);\n}\n\nvoid main()\n{\n    if (_95.mem_error != 0u)\n    {\n        return;\n    }\n    uint th_ix = gl_LocalInvocationID.x;\n    uint element_ix = gl_GlobalInvocationID.x;\n    PathRef path_ref = PathRef(_310.conf.tile_alloc.offset + (element_ix * 12u));\n    AnnotatedRef ref = AnnotatedRef(_310.conf.anno_alloc.offset + (element_ix * 28u));\n    uint tag = 0u;\n    if (element_ix < _310.conf.n_elements)\n    {\n        Alloc param;\n        param.offset = _310.conf.anno_alloc.offset;\n        AnnotatedRef param_1 = ref;\n        tag = Annotated_tag(param, param_1);\n    }\n    int x0 = 0;\n    int y0 = 0;\n    int x1 = 0;\n    int y1 = 0;\n    switch (tag)\n    {\n        case 2u:\n        case 3u:\n        case 1u:\n        case 4u:\n        case 5u:\n        {\n            Alloc param_2;\n            param_2.offset = _310.conf.anno_alloc.offset;\n            AnnotatedRef param_3 = ref;\n            AnnoFill fill = Annotated_Fill_read(param_2, param_3);\n            x0 = int(floor(fill.bbox.x * 0.03125));\n            y0 = int(floor(fill.bbox.y * 0.03125));\n            x1 = int(ceil(fill.bbox.z * 0.03125));\n            y1 = int(ceil(fill.bbox.w * 0.03125));\n            break;\n        }\n    }\n    x0 = clamp(x0, 0, int(_310.conf.width_in_tiles));\n    y0 = clamp(y0, 0, int(_310.conf.height_in_tiles));\n    x1 = clamp(x1, 0, int(_310.conf.width_in_tiles));\n    y1 = clamp(y1, 0, int(_310.conf.height_in_tiles));\n    Path path;\n    path.bbox = uvec4(uint(x0), uint(y0), uint(x1), uint(y1));\n    uint tile_count = uint((x1 - x0) * (y1 - y0));\n    if (tag == 5u)\n    {\n        tile_count = 0u;\n    }\n    sh_tile_count[th_ix] = tile_count;\n    uint total_tile_count = tile_count;\n    for (uint i = 0u; i < 7u; i++)\n    {\n        barrier();\n        if (th_ix >= uint(1 << int(i)))\n        {\n            total_tile_count += sh_tile_count[th_ix - uint(1 << int(i))];\n        }\n        barrier();\n        sh_tile_count[th_ix] = total_tile_count;\n    }\n    if (th_ix == 127u)\n    {\n        uint param_4 = total_tile_count * 8u;\n        MallocResult _483 = malloc(param_4);\n        sh_tile_alloc = _483;\n    }\n    barrier();\n    MallocResult alloc_start = sh_tile_alloc;\n    if (alloc_start.failed)\n    {\n        return;\n    }\n    if (element_ix < _310.conf.n_elements)\n    {\n        uint _500;\n        if (th_ix > 0u)\n        {\n            _500 = sh_tile_count[th_ix - 1u];\n        }\n        else\n        {\n            _500 = 0u;\n        }\n        uint tile_subix = _500;\n        Alloc param_5 = alloc_start.alloc;\n        uint param_6 = 8u * tile_subix;\n        uint param_7 = 8u * tile_count;\n        Alloc tiles_alloc = slice_mem(param_5, param_6, param_7);\n        path.tiles = TileRef(tiles_alloc.offset);\n        Alloc param_8;\n        param_8.offset = _310.conf.tile_alloc.offset;\n        PathRef param_9 = path_ref;\n        Path param_10 = path;\n        Path_write(param_8, param_9, param_10);\n    }\n    uint total_count = sh_tile_count[127] * 2u;\n    uint start_ix = alloc_start.alloc.offset >> uint(2);\n    for (uint i_1 = th_ix; i_1 < total_count; i_1 += 128u)\n    {\n        Alloc param_11 = alloc_start.alloc;\n        uint param_12 = start_ix + i_1;\n        uint param_13 = 0u;\n        write_mem(param_11, param_12, param_13);\n    }\n}\n\n",
		GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n    uint offset;\n};\n\nstruct MallocResult\n{\n    Alloc alloc;\n    bool failed;\n};\n\nstruct AnnoFillRef\n{\n    uint offset;\n};\n\nstruct AnnoFill\n{\n    vec4 bbox;\n    uint rgba_color;\n};\n\nstruct AnnotatedRef\n{\n    uint offset;\n};\n\nstruct PathRef\n{\n    uint offset;\n};\n\nstruct TileRef\n{\n    uint offset;\n};\n\nstruct Path\n{\n    uvec4 bbox;\n    TileRef tiles;\n};\n\nstruct Config\n{\n    uint n_elements;\n    uint n_pathseg;\n    uint width_in_tiles;\n    uint height_in_tiles;\n    Alloc tile_alloc;\n    Alloc bin_alloc;\n    Alloc ptcl_alloc;\n    Alloc pathseg_alloc;\n    Alloc anno_alloc;\n    Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n    uint mem_offset;\n    uint mem_error;\n    uint memory[];\n} _95;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n    Config conf;\n} _310;\n\nshared uint sh_tile_count[128];\nshared MallocResult sh_tile_alloc;\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n    return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return 0u;\n    }\n    uint v = _95.memory[offset];\n    return v;\n}\n\nuint Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    uint param_1 = ref.offset >> uint(2);\n    return read_mem(param, param_1);\n}\n\nAnnoFill AnnoFill_read(Alloc a, AnnoFillRef ref)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint raw0 = read_mem(param, param_1);\n    Alloc param_2 = a;\n    uint param_3 = ix + 1u;\n    uint raw1 = read_mem(param_2, param_3);\n    Alloc param_4 = a;\n    uint param_5 = ix + 2u;\n    uint raw2 = read_mem(param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 3u;\n    uint raw3 = read_mem(param_6, param_7);\n    Alloc param_8 = a;\n    uint param_9 = ix + 4u;\n    uint raw4 = read_mem(param_8, param_9);\n    AnnoFill s;\n    s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n    s.rgba_color = raw4;\n    return s;\n}\n\nAnnoFill Annotated_Fill_read(Alloc a, AnnotatedRef ref)\n{\n    Alloc param = a;\n    AnnoFillRef param_1 = AnnoFillRef(ref.offset + 4u);\n    return AnnoFill_read(param, param_1);\n}\n\nAlloc new_alloc(uint offset, uint size)\n{\n    Alloc a;\n    a.offset = offset;\n    return a;\n}\n\nMallocResult malloc(uint size)\n{\n    MallocResult r;\n    r.failed = false;\n    uint _101 = atomicAdd(_95.mem_offset, size);\n    uint offset = _101;\n    uint param = offset;\n    uint param_1 = size;\n    r.alloc = new_alloc(param, param_1);\n    if ((offset + size) > uint(int(uint(_95.memory.length())) * 4))\n    {\n        r.failed = true;\n        uint _122 = atomicMax(_95.mem_error, 1u);\n        return r;\n    }\n    return r;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n    uint param = a.offset + offset;\n    uint param_1 = size;\n    return new_alloc(param, param_1);\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n    Alloc param = alloc;\n    uint param_1 = offset;\n    if (!touch_mem(param, param_1))\n    {\n        return;\n    }\n    _95.memory[offset] = val;\n}\n\nvoid Path_write(Alloc a, PathRef ref, Path s)\n{\n    uint ix = ref.offset >> uint(2);\n    Alloc param = a;\n    uint param_1 = ix + 0u;\n    uint param_2 = s.bbox.x | (s.bbox.y << uint(16));\n    write_mem(param, param_1, param_2);\n    Alloc param_3 = a;\n    uint param_4 = ix + 1u;\n    uint param_5 = s.bbox.z | (s.bbox.w << uint(16));\n    write_mem(param_3, param_4, param_5);\n    Alloc param_6 = a;\n    uint param_7 = ix + 2u;\n    uint param_8 = s.tiles.offset;\n    write_mem(param_6, param_7, param_8);\n}\n\nvoid main()\n{\n    if (_95.mem_error != 0u)\n    {\n        return;\n    }\n    uint th_ix = gl_LocalInvocationID.x;\n    uint element_ix = gl_GlobalInvocationID.x;\n    PathRef path_ref = PathRef(_310.conf.tile_alloc.offset + (element_ix * 12u));\n    AnnotatedRef ref = AnnotatedRef(_310.conf.anno_alloc.offset + (element_ix * 28u));\n    uint tag = 0u;\n    if (element_ix < _310.conf.n_elements)\n    {\n        Alloc param;\n        param.offset = _310.conf.anno_alloc.offset;\n        AnnotatedRef param_1 = ref;\n        tag = Annotated_tag(param, param_1);\n    }\n    int x0 = 0;\n    int y0 = 0;\n    int x1 = 0;\n    int y1 = 0;\n    switch (tag)\n    {\n        case 2u:\n        case 3u:\n        case 1u:\n        case 4u:\n        case 5u:\n        {\n            Alloc param_2;\n            param_2.offset = _310.conf.anno_alloc.offset;\n            AnnotatedRef param_3 = ref;\n            AnnoFill fill = Annotated_Fill_read(param_2, param_3);\n            x0 = int(floor(fill.bbox.x * 0.03125));\n            y0 = int(floor(fill.bbox.y * 0.03125));\n            x1 = int(ceil(fill.bbox.z * 0.03125));\n            y1 = int(ceil(fill.bbox.w * 0.03125));\n            break;\n        }\n    }\n    x0 = clamp(x0, 0, int(_310.conf.width_in_tiles));\n    y0 = clamp(y0, 0, int(_310.conf.height_in_tiles));\n    x1 = clamp(x1, 0, int(_310.conf.width_in_tiles));\n    y1 = clamp(y1, 0, int(_310.conf.height_in_tiles));\n    Path path;\n    path.bbox = uvec4(uint(x0), uint(y0), uint(x1), uint(y1));\n    uint tile_count = uint((x1 - x0) * (y1 - y0));\n    if (tag == 5u)\n    {\n        tile_count = 0u;\n    }\n    sh_tile_count[th_ix] = tile_count;\n    uint total_tile_count = tile_count;\n    for (uint i = 0u; i < 7u; i++)\n    {\n        barrier();\n        if (th_ix >= uint(1 << int(i)))\n        {\n            total_tile_count += sh_tile_count[th_ix - uint(1 << int(i))];\n        }\n        barrier();\n        sh_tile_count[th_ix] = total_tile_count;\n    }\n    if (th_ix == 127u)\n    {\n        uint param_4 = total_tile_count * 8u;\n        MallocResult _483 = malloc(param_4);\n        sh_tile_alloc = _483;\n    }\n    barrier();\n    MallocResult alloc_start = sh_tile_alloc;\n    if (alloc_start.failed)\n    {\n        return;\n    }\n    if (element_ix < _310.conf.n_elements)\n    {\n        uint _500;\n        if (th_ix > 0u)\n        {\n            _500 = sh_tile_count[th_ix - 1u];\n        }\n        else\n        {\n            _500 = 0u;\n        }\n        uint tile_subix = _500;\n        Alloc param_5 = alloc_start.alloc;\n        uint param_6 = 8u * tile_subix;\n        uint param_7 = 8u * tile_count;\n        Alloc tiles_alloc = slice_mem(param_5, param_6, param_7);\n        path.tiles = TileRef(tiles_alloc.offset);\n        Alloc param_8;\n        param_8.offset = _310.conf.tile_alloc.offset;\n        PathRef param_9 = path_ref;\n        Path param_10 = path;\n        Path_write(param_8, param_9, param_10);\n    }\n    uint total_count = sh_tile_count[127] * 2u;\n    uint start_ix = alloc_start.alloc.offset >> uint(2);\n    for (uint i_1 = th_ix; i_1 < total_count; i_1 += 128u)\n    {\n        Alloc param_11 = alloc_start.alloc;\n        uint param_12 = start_ix + i_1;\n        uint param_13 = 0u;\n        write_mem(param_11, param_12, param_13);\n    }\n}\n\n",
	}
)

M gpu/shaders/elements.comp => gpu/shaders/elements.comp +30 -61
@@ 39,6 39,7 @@ layout(set = 0, binding = 3) volatile buffer StateBuf {
#include "state.h"
#include "annotated.h"
#include "pathseg.h"
#include "tile.h"

#define StateBuf_stride (4 + 2 * State_size)



@@ 91,6 92,7 @@ State combine_state(State a, State b) {
    c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
    c.path_count = a.path_count + b.path_count;
    c.pathseg_count = a.pathseg_count + b.pathseg_count;
    c.trans_count = a.trans_count + b.trans_count;
    return c;
}



@@ 106,6 108,7 @@ State map_element(ElementRef ref) {
    c.flags = 0;
    c.path_count = 0;
    c.pathseg_count = 0;
    c.trans_count = 0;
    switch (tag) {
    case Element_FillLine:
    case Element_StrokeLine:


@@ 147,6 150,7 @@ State map_element(ElementRef ref) {
        Transform t = Element_Transform_read(ref);
        c.mat = t.mat;
        c.translate = t.translate;
        c.trans_count = 1;
        break;
    }
    return c;


@@ 158,16 162,7 @@ vec2 get_linewidth(State st) {
    return 0.5 * st.linewidth * vec2(length(st.mat.xz), length(st.mat.yw));
}

// We should be able to use an array of structs but the NV shader compiler
// doesn't seem to like it :/
//shared State sh_state[WG_SIZE];
shared vec4 sh_mat[WG_SIZE];
shared vec2 sh_translate[WG_SIZE];
shared vec4 sh_bbox[WG_SIZE];
shared float sh_width[WG_SIZE];
shared uint sh_flags[WG_SIZE];
shared uint sh_path_count[WG_SIZE];
shared uint sh_pathseg_count[WG_SIZE];
shared State sh_state[WG_SIZE];

shared uint sh_part_ix;
shared State sh_prefix;


@@ 196,35 191,15 @@ void main() {
        th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i)));
    }
    State agg = th_state[N_ROWS - 1];
    sh_mat[gl_LocalInvocationID.x] = agg.mat;
    sh_translate[gl_LocalInvocationID.x] = agg.translate;
    sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
    sh_width[gl_LocalInvocationID.x] = agg.linewidth;
    sh_flags[gl_LocalInvocationID.x] = agg.flags;
    sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
    sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
    sh_state[gl_LocalInvocationID.x] = agg;
    for (uint i = 0; i < LG_WG_SIZE; i++) {
        barrier();
        if (gl_LocalInvocationID.x >= (1 << i)) {
            State other;
            uint ix = gl_LocalInvocationID.x - (1 << i);
            other.mat = sh_mat[ix];
            other.translate = sh_translate[ix];
            other.bbox = sh_bbox[ix];
            other.linewidth = sh_width[ix];
            other.flags = sh_flags[ix];
            other.path_count = sh_path_count[ix];
            other.pathseg_count = sh_pathseg_count[ix];
            State other = sh_state[gl_LocalInvocationID.x - (1 << i)];
            agg = combine_state(other, agg);
        }
        barrier();
        sh_mat[gl_LocalInvocationID.x] = agg.mat;
        sh_translate[gl_LocalInvocationID.x] = agg.translate;
        sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
        sh_width[gl_LocalInvocationID.x] = agg.linewidth;
        sh_flags[gl_LocalInvocationID.x] = agg.flags;
        sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
        sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
        sh_state[gl_LocalInvocationID.x] = agg;
    }

    State exclusive;


@@ 235,6 210,7 @@ void main() {
    exclusive.flags = 0;
    exclusive.path_count = 0;
    exclusive.pathseg_count = 0;
    exclusive.trans_count = 0;

    // Publish aggregate for this partition
    if (gl_LocalInvocationID.x == WG_SIZE - 1) {


@@ 305,15 281,7 @@ void main() {

    State row = exclusive;
    if (gl_LocalInvocationID.x > 0) {
        uint ix = gl_LocalInvocationID.x - 1;
        State other;
        other.mat = sh_mat[ix];
        other.translate = sh_translate[ix];
        other.bbox = sh_bbox[ix];
        other.linewidth = sh_width[ix];
        other.flags = sh_flags[ix];
        other.path_count = sh_path_count[ix];
        other.pathseg_count = sh_pathseg_count[ix];
        State other = sh_state[gl_LocalInvocationID.x - 1];
        row = combine_state(row, other);
    }
    for (uint i = 0; i < N_ROWS; i++) {


@@ 328,14 296,13 @@ void main() {
        case Element_FillLine:
        case Element_StrokeLine:
            LineSeg line = Element_StrokeLine_read(this_ref);
            vec2 p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate;
            vec2 p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate;
            PathStrokeCubic path_cubic;
            path_cubic.p0 = p0;
            path_cubic.p1 = mix(p0, p1, 1.0 / 3.0);
            path_cubic.p2 = mix(p1, p0, 1.0 / 3.0);
            path_cubic.p3 = p1;
            path_cubic.p0 = line.p0;
            path_cubic.p1 = mix(line.p0, line.p1, 1.0 / 3.0);
            path_cubic.p2 = mix(line.p1, line.p0, 1.0 / 3.0);
            path_cubic.p3 = line.p1;
            path_cubic.path_ix = st.path_count;
            path_cubic.trans_ix = st.trans_count;
            if (tag == Element_StrokeLine) {
                path_cubic.stroke = get_linewidth(st);
            } else {


@@ 351,15 318,12 @@ void main() {
        case Element_FillQuad:
        case Element_StrokeQuad:
            QuadSeg quad = Element_StrokeQuad_read(this_ref);
            p0 = st.mat.xy * quad.p0.x + st.mat.zw * quad.p0.y + st.translate;
            p1 = st.mat.xy * quad.p1.x + st.mat.zw * quad.p1.y + st.translate;
            vec2 p2 = st.mat.xy * quad.p2.x + st.mat.zw * quad.p2.y + st.translate;
            path_cubic;
            path_cubic.p0 = p0;
            path_cubic.p1 = mix(p1, p0, 1.0 / 3.0);
            path_cubic.p2 = mix(p1, p2, 1.0 / 3.0);
            path_cubic.p3 = p2;
            path_cubic.p0 = quad.p0;
            path_cubic.p1 = mix(quad.p1, quad.p0, 1.0 / 3.0);
            path_cubic.p2 = mix(quad.p1, quad.p2, 1.0 / 3.0);
            path_cubic.p3 = quad.p2;
            path_cubic.path_ix = st.path_count;
            path_cubic.trans_ix = st.trans_count;
            if (tag == Element_StrokeQuad) {
                path_cubic.stroke = get_linewidth(st);
            } else {


@@ 375,12 339,12 @@ void main() {
        case Element_FillCubic:
        case Element_StrokeCubic:
            CubicSeg cubic = Element_StrokeCubic_read(this_ref);
            path_cubic;
            path_cubic.p0 = st.mat.xy * cubic.p0.x + st.mat.zw * cubic.p0.y + st.translate;
            path_cubic.p1 = st.mat.xy * cubic.p1.x + st.mat.zw * cubic.p1.y + st.translate;
            path_cubic.p2 = st.mat.xy * cubic.p2.x + st.mat.zw * cubic.p2.y + st.translate;
            path_cubic.p3 = st.mat.xy * cubic.p3.x + st.mat.zw * cubic.p3.y + st.translate;
            path_cubic.p0 = cubic.p0;
            path_cubic.p1 = cubic.p1;
            path_cubic.p2 = cubic.p2;
            path_cubic.p3 = cubic.p3;
            path_cubic.path_ix = st.path_count;
            path_cubic.trans_ix = st.trans_count;
            if (tag == Element_StrokeCubic) {
                path_cubic.stroke = get_linewidth(st);
            } else {


@@ 435,6 399,11 @@ void main() {
            out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
            Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
            break;
        case Element_Transform:
            TransformSeg transform = TransformSeg(st.mat, st.translate);
            TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (st.trans_count - 1) * TransformSeg_size);
            TransformSeg_write(conf.trans_alloc, trans_ref, transform);
            break;
        }
    }
}

M gpu/shaders/path_coarse.comp => gpu/shaders/path_coarse.comp +11 -0
@@ 102,6 102,17 @@ void main() {
    case PathSeg_FillCubic:
    case PathSeg_StrokeCubic:
        PathStrokeCubic cubic = PathSeg_StrokeCubic_read(conf.pathseg_alloc, ref);

        uint trans_ix = cubic.trans_ix;
        if (trans_ix > 0) {
            TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size);
            TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref);
            cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate;
            cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate;
            cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate;
            cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate;
        }

        vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
        float err = err_v.x * err_v.x + err_v.y * err_v.y;
        // The number of quadratics.

M gpu/shaders/pathseg.h => gpu/shaders/pathseg.h +14 -6
@@ 20,9 20,10 @@ struct PathFillCubic {
    vec2 p2;
    vec2 p3;
    uint path_ix;
    uint trans_ix;
};

#define PathFillCubic_size 36
#define PathFillCubic_size 40

PathFillCubicRef PathFillCubic_index(PathFillCubicRef ref, uint index) {
    return PathFillCubicRef(ref.offset + index * PathFillCubic_size);


@@ 34,10 35,11 @@ struct PathStrokeCubic {
    vec2 p2;
    vec2 p3;
    uint path_ix;
    uint trans_ix;
    vec2 stroke;
};

#define PathStrokeCubic_size 44
#define PathStrokeCubic_size 48

PathStrokeCubicRef PathStrokeCubic_index(PathStrokeCubicRef ref, uint index) {
    return PathStrokeCubicRef(ref.offset + index * PathStrokeCubic_size);


@@ 46,7 48,7 @@ PathStrokeCubicRef PathStrokeCubic_index(PathStrokeCubicRef ref, uint index) {
#define PathSeg_Nop 0
#define PathSeg_FillCubic 1
#define PathSeg_StrokeCubic 2
#define PathSeg_size 48
#define PathSeg_size 52

PathSegRef PathSeg_index(PathSegRef ref, uint index) {
    return PathSegRef(ref.offset + index * PathSeg_size);


@@ 63,12 65,14 @@ PathFillCubic PathFillCubic_read(Alloc a, PathFillCubicRef ref) {
    uint raw6 = read_mem(a, ix + 6);
    uint raw7 = read_mem(a, ix + 7);
    uint raw8 = read_mem(a, ix + 8);
    uint raw9 = read_mem(a, ix + 9);
    PathFillCubic s;
    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
    s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
    s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
    s.path_ix = raw8;
    s.trans_ix = raw9;
    return s;
}



@@ 83,6 87,7 @@ void PathFillCubic_write(Alloc a, PathFillCubicRef ref, PathFillCubic s) {
    write_mem(a, ix + 6, floatBitsToUint(s.p3.x));
    write_mem(a, ix + 7, floatBitsToUint(s.p3.y));
    write_mem(a, ix + 8, s.path_ix);
    write_mem(a, ix + 9, s.trans_ix);
}

PathStrokeCubic PathStrokeCubic_read(Alloc a, PathStrokeCubicRef ref) {


@@ 98,13 103,15 @@ PathStrokeCubic PathStrokeCubic_read(Alloc a, PathStrokeCubicRef ref) {
    uint raw8 = read_mem(a, ix + 8);
    uint raw9 = read_mem(a, ix + 9);
    uint raw10 = read_mem(a, ix + 10);
    uint raw11 = read_mem(a, ix + 11);
    PathStrokeCubic s;
    s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
    s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
    s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
    s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
    s.path_ix = raw8;
    s.stroke = vec2(uintBitsToFloat(raw9), uintBitsToFloat(raw10));
    s.trans_ix = raw9;
    s.stroke = vec2(uintBitsToFloat(raw10), uintBitsToFloat(raw11));
    return s;
}



@@ 119,8 126,9 @@ void PathStrokeCubic_write(Alloc a, PathStrokeCubicRef ref, PathStrokeCubic s) {
    write_mem(a, ix + 6, floatBitsToUint(s.p3.x));
    write_mem(a, ix + 7, floatBitsToUint(s.p3.y));
    write_mem(a, ix + 8, s.path_ix);
    write_mem(a, ix + 9, floatBitsToUint(s.stroke.x));
    write_mem(a, ix + 10, floatBitsToUint(s.stroke.y));
    write_mem(a, ix + 9, s.trans_ix);
    write_mem(a, ix + 10, floatBitsToUint(s.stroke.x));
    write_mem(a, ix + 11, floatBitsToUint(s.stroke.y));
}

uint PathSeg_tag(Alloc a, PathSegRef ref) {

M gpu/shaders/setup.h => gpu/shaders/setup.h +1 -0
@@ 35,4 35,5 @@ struct Config {
    Alloc ptcl_alloc;
    Alloc pathseg_alloc;
    Alloc anno_alloc;
    Alloc trans_alloc;
};

M gpu/shaders/state.h => gpu/shaders/state.h +5 -1
@@ 14,9 14,10 @@ struct State {
    uint flags;
    uint path_count;
    uint pathseg_count;
    uint trans_count;
};

#define State_size 56
#define State_size 60

StateRef State_index(StateRef ref, uint index) {
    return StateRef(ref.offset + index * State_size);


@@ 38,6 39,7 @@ State State_read(StateRef ref) {
    uint raw11 = state[ix + 11];
    uint raw12 = state[ix + 12];
    uint raw13 = state[ix + 13];
    uint raw14 = state[ix + 14];
    State s;
    s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
    s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));


@@ 46,6 48,7 @@ State State_read(StateRef ref) {
    s.flags = raw11;
    s.path_count = raw12;
    s.pathseg_count = raw13;
    s.trans_count = raw14;
    return s;
}



@@ 65,5 68,6 @@ void State_write(StateRef ref, State s) {
    state[ix + 11] = s.flags;
    state[ix + 12] = s.path_count;
    state[ix + 13] = s.pathseg_count;
    state[ix + 14] = s.trans_count;
}


M gpu/shaders/tile.h => gpu/shaders/tile.h +39 -0
@@ 14,6 14,10 @@ struct TileSegRef {
    uint offset;
};

struct TransformSegRef {
    uint offset;
};

struct Path {
    uvec4 bbox;
    TileRef tiles;


@@ 49,6 53,17 @@ TileSegRef TileSeg_index(TileSegRef ref, uint index) {
    return TileSegRef(ref.offset + index * TileSeg_size);
}

struct TransformSeg {
    vec4 mat;
    vec2 translate;
};

#define TransformSeg_size 24

TransformSegRef TransformSeg_index(TransformSegRef ref, uint index) {
    return TransformSegRef(ref.offset + index * TransformSeg_size);
}

Path Path_read(Alloc a, PathRef ref) {
    uint ix = ref.offset >> 2;
    uint raw0 = read_mem(a, ix + 0);


@@ 109,3 124,27 @@ void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s) {
    write_mem(a, ix + 5, s.next.offset);
}

TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref) {
    uint ix = ref.offset >> 2;
    uint raw0 = read_mem(a, ix + 0);
    uint raw1 = read_mem(a, ix + 1);
    uint raw2 = read_mem(a, ix + 2);
    uint raw3 = read_mem(a, ix + 3);
    uint raw4 = read_mem(a, ix + 4);
    uint raw5 = read_mem(a, ix + 5);
    TransformSeg s;
    s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
    s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
    return s;
}

void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) {
    uint ix = ref.offset >> 2;
    write_mem(a, ix + 0, floatBitsToUint(s.mat.x));
    write_mem(a, ix + 1, floatBitsToUint(s.mat.y));
    write_mem(a, ix + 2, floatBitsToUint(s.mat.z));
    write_mem(a, ix + 3, floatBitsToUint(s.mat.w));
    write_mem(a, ix + 4, floatBitsToUint(s.translate.x));
    write_mem(a, ix + 5, floatBitsToUint(s.translate.y));
}