~quf/include_assets

4b7d489eed5f63c4343a30392da7f97235388c0d — Lukas Himbert 6 months ago
initial
A  => .gitignore +1 -0
@@ 1,1 @@
target
\ No newline at end of file

A  => Cargo.lock +359 -0
@@ 1,359 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3

[[package]]
name = "anyhow"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"

[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

[[package]]
name = "blake2"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe"
dependencies = [
 "digest",
]

[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
 "generic-array",
]

[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
dependencies = [
 "jobserver",
]

[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
 "generic-array",
 "typenum",
]

[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
 "block-buffer",
 "crypto-common",
 "subtle",
]

[[package]]
name = "enums"
version = "0.1.0"
dependencies = [
 "hexhex",
 "include_assets",
]

[[package]]
name = "fallible-iterator"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"

[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
 "typenum",
 "version_check",
]

[[package]]
name = "hexhex"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb1ebf6a5f2918db41d6ac1a1bf3899e4fc4fc9e00a722d5d2542d029d0dc91"
dependencies = [
 "hexhex_impl",
 "hexhex_macros",
]

[[package]]
name = "hexhex_impl"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "130a1dcc2b7ea73d9a4818f208db6f022ff4402be5fe7a3db5d8178c62cf5f2e"
dependencies = [
 "fallible-iterator",
]

[[package]]
name = "hexhex_macros"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4be14289347b0bcff5e279a8fd723e9f3f4a313f75220a297c1bab66dd7a30b"
dependencies = [
 "hexhex_impl",
]

[[package]]
name = "include_assets"
version = "1.0.0"
dependencies = [
 "include_assets_decode",
 "include_assets_encode",
]

[[package]]
name = "include_assets_decode"
version = "0.1.0"
dependencies = [
 "blake2",
 "hexhex",
 "lz4_flex",
 "smartstring",
 "yazi",
 "zstd",
]

[[package]]
name = "include_assets_encode"
version = "0.1.0"
dependencies = [
 "anyhow",
 "include_assets_decode",
 "proc-macro2",
 "quote",
 "smartstring",
 "syn",
 "thiserror",
 "walkdir",
]

[[package]]
name = "jobserver"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2"
dependencies = [
 "libc",
]

[[package]]
name = "libc"
version = "0.2.144"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"

[[package]]
name = "lz4_flex"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83"

[[package]]
name = "named"
version = "0.1.0"
dependencies = [
 "include_assets",
]

[[package]]
name = "pkg-config"
version = "0.3.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"

[[package]]
name = "proc-macro2"
version = "1.0.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b"
dependencies = [
 "unicode-ident",
]

[[package]]
name = "quote"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
dependencies = [
 "proc-macro2",
]

[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
 "winapi-util",
]

[[package]]
name = "smartstring"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29"
dependencies = [
 "autocfg",
 "static_assertions",
 "version_check",
]

[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"

[[package]]
name = "subtle"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"

[[package]]
name = "syn"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
dependencies = [
 "proc-macro2",
 "quote",
 "unicode-ident",
]

[[package]]
name = "thiserror"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
dependencies = [
 "thiserror-impl",
]

[[package]]
name = "thiserror-impl"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "typenum"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"

[[package]]
name = "unicode-ident"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"

[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"

[[package]]
name = "walkdir"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
dependencies = [
 "same-file",
 "winapi-util",
]

[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
 "winapi-i686-pc-windows-gnu",
 "winapi-x86_64-pc-windows-gnu",
]

[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"

[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
 "winapi",
]

[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

[[package]]
name = "yazi"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c94451ac9513335b5e23d7a8a2b61a7102398b8cca5160829d313e84c9d98be1"

[[package]]
name = "zstd"
version = "0.12.3+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806"
dependencies = [
 "zstd-safe",
]

[[package]]
name = "zstd-safe"
version = "6.0.5+zstd.1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b"
dependencies = [
 "libc",
 "zstd-sys",
]

[[package]]
name = "zstd-sys"
version = "2.0.8+zstd.1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c"
dependencies = [
 "cc",
 "libc",
 "pkg-config",
]

A  => Cargo.toml +8 -0
@@ 1,8 @@
[workspace]
members = ["include_assets", "include_assets_encode", "include_assets_decode", "examples/named", "examples/enums"]

[profile.release]
opt-level = "s"
debug = false
strip = true
lto = true

A  => README.md +119 -0
@@ 1,119 @@
# `include_assets` in your executable

`include_assets` provides convenient ways to include assets (arbitrary files) in a Rust binary.
Assets are compressed and can either be looked up by file name or by variants of an enumeration.


## assets by name

This is probably the most straightforward approach.
Include all files in a directory with the `include_dir!()` macro, load (decompress) the assets at runtime using `NamedArchive::load`.
Once they are loaded, use the `NamedArchive` more or less as you would a `HashMap<&str, &[u8]>`.

For examples, see the [docs](https://docs.rs/crate/include_assets/latest/include_assets/macro.include_dir.html) and [`examples/named/src/main.rs`](examples/named/src/main.rs).


## assets by enum variant

This approach might be a little unusual.
Declare an enum with one unit variant per asset, and derive the trait `EnumAsset` using the derive macro that comes with this crate.
Load the uncompressed assets using `EnumArchive::<MyEnum>::load()` (replacing `MyEnum` with whatever name you chose for your enum).
Then look up the asset data via indexing (`&archive[MyAsset::SomeVariant]`) - this is infallible!

This approach has two distinct advantages:

- You cannot accidentally use any asset that's not included in the executable: If you try, that's a compile-time error.
- If you include an asset in the binary but never use it (i.e. never construct the corresponding enum variant) that causes a compile-time warning.

A disadvantage is that you cannot iterate over assets.
Additionally, their names are erased at runtime.

Despite the lack of iteration, asset data can be mapped using `AssetEnum::map`.
You can then look up assets by enum variant in the resulting `EnumMap`.
This may be useful for homogenous assets, you could for example parse templates, decode sound/image files, &c.
Note that you can have multiple `EnumAsset`s in the same program; mapping will be more useful if you have different enums for different types of asset.

For examples, see the [docs](https://docs.rs/crate/include_assets/latest/include_assets/struct.EnumArchive.html) and [`examples/enums/src/main.rs`](examples/enums/src/main.rs).

## Build script

If you want to rebuild the executable whenever one of the assets changes, you should use a [`build.rs`](https://doc.rust-lang.org/cargo/reference/build-scripts.html) like this:

```
fn main() {
    println!("cargo:rerun-if-changed=path/to/assets");
    println!("cargo:rerun-if-changed=more/assets");
}
```


## Licence

This crate is (probably going to be but NOT CURRENTLY) licensed under the LGPL v3.


## Compression

Currently supported: zstd, lz4, deflate, no compression.


## Checksums

At compile time, a checksum is computed for each asset.
These checksums are included in the binary.
When loading/decompressing assets, the checksum of decompressed assets is compared against the compile-time checksum as a measure against data corruption and (more importantly) bugs.

Currently, blake2b is used for this, but this may change in the future.


## Limitations

At runtime, main memory needs to be big enough to hold all assets at the same time in compressed and uncompressed form.
At compile time, main memory needs to be big enough to hold all assets at the same time in compressed form and twice in uncompressed form.

The total size of each asset archive cannot exceed `u32::MAX` (4 GiB).
Each asset archive can contain at most `u32::MAX` (roughly 4e9) distinct assets.
If your use case exceeds these limits, reconsider if this is really the right approach.

`usize` is required to be at least 32 bits wide.


## Related work

Rust `core` includes the `include_bytes!` macro which allows including a single file (uncompressed).

There are several crates which allow including compressed files, and even directories.

As far as I know, this crate is the only one which compresses included files as a whole rather than seperately.
This approach has a significant disadvantage: To decompress a single file, all files have to be decompressed.
However, it leads to better compression because the compression algorithm can take advantage of similarities between files in addition to similarities within each file.


## Future work

- Compile times suck.
  I'm not sure how the `include_bytes!` macro works, but it _probably_ doesn't just dump a huge bytestring into the AST.
  I'd love to write the data to `OUT_DIR` and then include that blob with `include_bytes!`, but that's [not available in proc macros](https://github.com/rust-lang/cargo/issues/9084).
  As a workaround it may be (more?) useful to generate code and a compressed blob from `build.rs`, then `include!` it from the main code.
- If the assets are big, decompression can be rather slow.
  It may be worth investigating zstandard (and lz4) dictionary compression.
  At compile time, a dictionary can be created by analyzing each asset.
  Then, each asset can be compressed independently using this dictionary.
  Hopefully, this wouldn't result in significantly bigger files than compressing them together.
  The advantage of this approach is that runtime compression could be done in parallel using multiple threads.
  Alternatively, each file could be decompressed as needed, but this is not currently a goal of this crate.
- It may be useful to deduplicate assets based on contents before compression.
  Compression can obviously reduce size but only if the redundant files are not too far apart.
  This can be implemented with a layer of indirection: asset name maps to a blob id, blob id maps to a data range.
  Currently deduplication is best achieved in application code: `archive.get(override_asset).unwrap_or_else(|| archive[fallback_asset])`
- It may be useful to provide options to use other checksum algorithms.
  Possible options: CRC for smaller hashes, SHA256 for (possibly) faster hashing (special CPU instructions!), `[u8; 0]` to effectively disable checking.
  The choice is probably best handled through feature flags.
- Error handling of the macros could use some work, but this is blocked on stable Rust allowing proc macro diagnostics.


## Contributing

Bug reports are very welcome.
Feature requests are also welcome, but no promises.
I do not plan to accept patches at this time.

A  => examples/enums/Cargo.toml +8 -0
@@ 1,8 @@
[package]
name = "enums"
version = "0.1.0"
edition = "2021"

[dependencies]
include_assets = { path = "../../include_assets" }
hexhex = "1"

A  => examples/enums/build.rs +3 -0
@@ 1,3 @@
fn main() {
    println!("cargo:rerun-if-changed=."); // this folder is the asset directory, so rebuild on any changes
}

A  => examples/enums/src/main.rs +24 -0
@@ 1,24 @@
use include_assets::{AssetEnum, EnumArchive};

#[derive(AssetEnum)]
#[archive(base_path = ".", compression = "zstd", level = 5)]
enum Asset {
    #[asset(path = "build.rs")]
    BuildScript,
    #[asset(path = "src/main.rs")]
    Main,
    #[asset(path = "Cargo.toml")]
    Cargo, // There should be a warning: "variant `Cargo` is never constructed"
}

fn main() {
    let archive: EnumArchive<Asset> = EnumArchive::load();

    let main_size = archive[Asset::Main].len();
    println!("main is {} bytes large", main_size);

    // map all files to String for convenience
    let string_archive = archive.map(|data| std::str::from_utf8(data).unwrap().to_owned());

    print!("builds.rs for this example:\n{}", string_archive[Asset::BuildScript]);
}

A  => examples/named/Cargo.toml +7 -0
@@ 1,7 @@
[package]
name = "named"
version = "0.1.0"
edition = "2021"

[dependencies]
include_assets = { path = "../../include_assets", features = ["zstd", "deflate", "lz4"] }

A  => examples/named/build.rs +3 -0
@@ 1,3 @@
fn main() {
    println!("cargo:rerun-if-changed=."); // this folder is the asset directory, so rebuild on any changes
}

A  => examples/named/src/main.rs +22 -0
@@ 1,22 @@
use include_assets::{include_dir, NamedArchive};

fn main() {
    // include_dir! path is relative to the workspace directory
    let archive = NamedArchive::load(include_dir!("."));

    // alternative examples:
    //let archive = NamedArchive::load(include_dir!("examples/named/", compression = "uncompressed"));
    //let archive = NamedArchive::load(include_dir!("examples/named/", compression = "lz4", links = "follow"));
    //let archive = NamedArchive::load(include_dir!("examples/named/", compression = "zstd", level = 5));
    //let archive = NamedArchive::load(include_dir!("examples/named/", compression = "deflate", level = 9, links = "forbid"));

    println!("the following {} assets included in this executable:", archive.number_of_assets());
    for (name, data) in archive.assets() {
        println!("{}: {} bytes", name, data.len());
    }
    println!();

    println!("Source code of this executable:");
    let main_rs = std::str::from_utf8(&archive["src/main.rs"]).unwrap();
    print!("{}", main_rs);
}

A  => include_assets/Cargo.toml +19 -0
@@ 1,19 @@
[package]
name = "include_assets"
version = "1.0.0"
edition = "2021"
license = "LGPL-3.0-only"
description = "include compressed assets in a Rust executable"

[features]
default = ["all"]

all = ["deflate", "lz4", "zstd"]

deflate = ["include_assets_encode/deflate", "include_assets_decode/deflate"]
lz4 = ["include_assets_encode/lz4", "include_assets_decode/lz4"]
zstd = ["include_assets_encode/zstd", "include_assets_decode/zstd"]

[dependencies]
include_assets_decode = { path = "../include_assets_decode", version = "0.1.0" }
include_assets_encode = { path = "../include_assets_encode", version = "0.1.0" }

A  => include_assets/README.md +1 -0
@@ 1,1 @@
../README.md
\ No newline at end of file

A  => include_assets/assets/.gitkeep +1 -0
@@ 1,1 @@
this is used for doctests
\ No newline at end of file

A  => include_assets/assets/hello.txt +1 -0
@@ 1,1 @@
Hello, world!
\ No newline at end of file

A  => include_assets/assets/unused.txt +0 -0
A  => include_assets/other_assets/.gitkeep +1 -0
@@ 1,1 @@
this is used for doctests
\ No newline at end of file

A  => include_assets/src/lib.rs +319 -0
@@ 1,319 @@
/*! # `include_assets` in your executable

This crate provides convenient ways to include assets (arbitrary files) in a Rust executable.
It's like [`std::include_bytes!`] but works for multiple files.

Files are collected into archives, which are compressed at compile time and can be decompressed at runtime.
Archives are ["solid"](https://en.wikipedia.org/wiki/Solid_compression): Instead of compressing each asset independently, assets are first concatenated, then compressed as a whole.
As far as I'm aware, this crate is the only which does this!
Solid compression leads to smaller sizes since the compression algorithm can take advantage of redundancy between files.
However, all assets must be decompressed at once - if your assets cannot completely fit into main memory at the same time, or startup time is an issue, don't use this crate!

Potential use cases are:
- games shipping with fonts, sprites/textures, sounds, &c.,
- webservers serving static content (HTML templates, pictures, &c.),
- installers, or
- self-extracting archives.

# Include an asset directory and look up data by path name

Arguably the more straightforward approach.
Include an asset directory using the [`include_dir!`] macro.
Load (decompress) it at runtime using [`NamedArchive::load`].

Once loaded, use [`NamedArchive::get`] or `&archive["asset name"]` to look up asset data by name, or iterate through all assets with [`NamedArchive::assets`].

```
use include_assets::{NamedArchive, include_dir};

let archive = NamedArchive::load(include_dir!("assets"));
let hello_asset = archive.get("hello.txt").unwrap(); // Panics at runtime if the asset isn't present!
assert_eq!(hello_asset, b"Hello, world!");
println!("{} assets were included", archive.number_of_assets());
```

For more examples, see [`include_dir!`].


# Include assets and look up data by enum variant

A perhaps less intuitive approach with advantages and disadvantages to the previous one.
Declare an enumeration with one variant for each asset, and `#[derive(AssetEnum)]` with annotations for the (compile-time) path of the asset.
At runtime, load (decompress) the asset archive with the derived `load` method for your enum.
Look up asset data by enum variant using indexing.
Transform raw asset data using [`EnumArchive::map`] and [`EnumArchive::try_map`].

```
use include_assets::EnumArchive;

#[derive(include_assets::AssetEnum)]
#[archive(base_path = "assets")]
enum Asset {
    #[asset(path = "hello.txt")]
    Hello,
    #[asset(path = "unused.txt")]
    Unused, // Unused asset causes a compile-time warning!
}

let archive = EnumArchive::<Asset>::load();
let hello_asset = &archive[Asset::Hello]; // Presence of asset is ensured at compile time!
assert_eq!(hello_asset, b"Hello, world!");

let strings = archive.map(|data| std::str::from_utf8(data).unwrap().to_owned());
assert_eq!(&strings[Asset::Hello], "Hello, world!");
```

As indicated by the code comments, this method has the advantage that use of assets is checked at compile time.
Assets that are not present cannot be used, and unused assets cause compile-time warnings.

On the other hand, assets have to be declared manually, and cannot be iterated over.
(A `map` function is provided though.)

For more examples, see [`EnumArchive`].

# Build script

It is probably a good idea to tell Cargo to rebuild the executable whenever an asset changes.
This can be achieved with a `build.rs` such as:

```
fn main() {
    println!("cargo:rerun-if-changed=path/to/assets/");
    println!("cargo:rerun-if-changed=more/assets");
}
```

# Options

The macros that include assets have a few optional arguments.
These options must always be specified in the form of a `identifier = literal` assignment, where `identifier` is one of the following values:

- `compression`:
   Specifies the compression algorithm to be used.
   The default choice depends on crate features and is not bound by semver.
   It attempts to strike a balance between compression speed, decompression speed, and size reduction.
   The following values are potentially allowed:
   - `"zstd"` (requires feature `zstd`),
   - `"lz4"` (requires feature `lz4`),
   - `"deflate"` (requires feature `deflate`), and
   - `"uncompressed"`. This option should generally not be used except for assets which are already compressed (e.g. JPEG/PNG/FLAC).
- `level`:
  Compression level parameter.
  Meaning and allowed values depend on the chosen compression algorithm.
  Default values are unspecified and not bound by semver.
  - for `compression = "zstd"`:
    Smaller values are generally faster with worse compression quality.
    "Normal" compression levels are `1..=19`, "high" compression levels are `20..=22`, negative values signify "fast" compression levels.
  - for `compression = "lz4"`:
    This argument is not allowed.
  - for `compression = "deflate"`:
    Levels are in `1..=10`. Smaller values are generally faster with marginally worse compression quality.
  - for `compression = "uncompressed"`:
    This argument is not allowed.
- `links`:
  Specifies behaviour when a symbolic link is encountered.
  This option is only available for the [`include_dir!`] macro.
  Valid values are:
  - `links = "forbid"`:
    A compilation error is generated when a symbolic link is encountered.
    This is the default behaviour.
  - `links = "ignore"`:
    Symbolic links are ignored.
    If the link points (directly or indirectly) to a file, this file is not included via the link.
    If the link points to a directory, files in the directory are not included via the link.
  - `links = "follow"`:
    Symbolic links are treated as if they were the target directory or file.

# Limitations

At runtime, main memory needs to be big enough to hold all assets at the same time in compressed and uncompressed form.
At compile time, main memory needs to be big enough to hold all assets at the same time in compressed form and twice in uncompressed form.
(It would be possible to optimize compile time memory use, but if you can only barely compile it, users probably can't run it.)

The total size of each asset archive cannot exceed `u32::MAX` (4 GiB).
Each asset archive can contain at most `u32::MAX` (roughly 4e9) distinct assets.
If your use case exceeds these limits, reconsider if this is really the right approach.

`usize` is required to be at least 32 bits wide.
*/

pub use include_assets_decode::named::NamedArchive;

/// Include all files in a directory in compressed form.
/// At runtime, the files can be decompressed and their contents looked up by relative path name.
///
/// # Usage
///
/// The first argument must be a string literal specifying the path of the directory to be included.
/// This can be an absolute path or a path relative to the [`CARGO_MANIFEST_DIR`](https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-crates).
/// This path can be absolute (though this should be avoided) or relative to `cargo`'s working directory.
///
/// In addition, any of the options described in the [`crate`] level documentation may be used to specify compression options.
///
/// # Examples
///
/// Include the directory "assets":
///
/// ```
/// use include_assets::{NamedArchive, include_dir};
/// let archive = NamedArchive::load(include_dir!("assets"));
/// println!("{} assets were included", archive.number_of_assets());
/// ```
///
/// Include the directory "assets".
/// Assets will be compressed using zstd at level 5.
/// Symbolic links will be treated as if they were the file/directory pointed to:
///
/// ```
/// use include_assets::{NamedArchive, include_dir};
/// let archive = NamedArchive::load(include_dir!("assets", compression = "zstd", level = 5, links = "follow"));
/// println!("{} assets were included", archive.number_of_assets());
/// ```
///
/// Include the two directories "assets" (compressed with zstd level 22) and "other_assets" (lz4 compressed):
///
/// ```
/// use include_assets::{NamedArchive, include_dir};
/// let archive1 = NamedArchive::load(include_dir!("assets", compression = "zstd", level = 22));
/// let archive2 = NamedArchive::load(include_dir!("other_assets", compression = "lz4"));
/// println!("{} assets were included", archive1.number_of_assets() + archive2.number_of_assets());
/// ```
///
/// # Limitations
///
/// - The directory may only contains files, directories, or symbolic links which point (directly or indirectly) to a file or directory.
///   Special files are not allowed.
/// - Paths must be UTF-8
/// - Paths must not contain null bytes (U+0000)
pub use include_assets_encode::include_dir;

/// Derive the AssetEnum trait.
///
/// The trait should _never_ be implemented or used manually, _only_ with this derive macro.
/// Details (methods, associated types/constants) for the trait are not bound by semver!
///
/// This macro only works for enums.
/// Every enum variant must be unit (i.e. have no fields), and must not have an explicit discriminator.
/// There needs to be an outer attribute `#[archive(base_path = "path")]` on the enum specifying the base path of all assets.
/// This can be an absolute path or a path relative to the [`CARGO_MANIFEST_DIR`](https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-crates).
/// Every variant needs to have an attribute `#[asset(path = "relative path")]` specifying the (compile time) path of the asset relative to the base path.
///
/// Additionally, options described in the [`crate`] level documentation may be added to the outer enum attribute to specify compression options.
///
/// # Examples
///
/// Basic use.
/// Include `"assets/hello.txt"` and `"assets/unused.txt"` allowing lookup by `Asset::Hello` and `Asset::Unused`, respectively.
/// If the variant `Unused` is never used (as in this example), this will cause a compile-time warning.
///
/// ```
/// use include_assets::EnumArchive;
///
/// #[derive(include_assets::AssetEnum)]
/// #[archive(base_path = "assets")]
/// enum Asset {
///     #[asset(path = "hello.txt")]
///     Hello,
///     #[asset(path = "unused.txt")]
///     Unused,
/// }
///
/// let archive = EnumArchive::<Asset>::load();
/// assert_eq!(&archive[Asset::Hello], b"Hello, world!");
/// ```
///
/// For more information on how to use the decompressed assets, see [`EnumArchive`].
///
/// A more specific `derive` with the same assets.
/// Assets will be compressed using zstd at level 5.
/// The enum representation is explicitely chosen as `u8`:
///
/// ```
/// #[derive(include_assets::AssetEnum)]
/// #[archive(base_path = "assets", compression = "zstd", level = 5)]
/// #[repr(u8)]
/// enum Asset {
///     #[asset(path = "hello.txt")]
///     Hello,
///     #[asset(path = "unused.txt")]
///     Unused,
/// }
/// ```
///
/// Assets may not have fields or explicit discriminators:
///
/// ```compile_fail
/// #[derive(include_assets::AssetEnum)]
/// #[archive(base_path = "assets")]
/// enum Asset {
///     #[asset(path = "hello.txt")]
///     Hello(String), // field is not allowed
///     #[asset(path = "hello.txt")]
///     Hello2 { who: String }, // struct-like variant is not allowed
///     #[asset(path = "unused.txt")]
///     Unused = 42, // explicit discriminator is not allowed
/// }
/// ```
pub use include_assets_encode::AssetEnum;

#[doc(hidden)]
pub use include_assets_decode::enums::AssetEnum;

/// Archive holding uncompressed data for an [`AssetEnum`](derive@`AssetEnum`).
///
/// An `AssetEnum` is an `enum` type with unit variants.
/// Each variant corresponds to an asset.
/// An `EnumArchive` for a given `AssetEnum` allows looking up the enum data via indexing.
///
/// Iteration over all assets is not possible, but mapping the data is.
///
/// # Examples
///
/// Include assets and look up data by name:
///
/// ```
/// use include_assets::EnumArchive;
///
/// #[derive(include_assets::AssetEnum)]
/// #[archive(base_path = "assets")]
/// enum Asset {
///     #[asset(path = "hello.txt")]
///     Hello,
///     #[asset(path = "unused.txt")]
///     _Unused,
/// }
///
/// let archive = EnumArchive::<Asset>::load();
/// assert_eq!(&archive[Asset::Hello], b"Hello, world!");
/// ```
///
/// Include data and apply some transformation (here we convert the `&[u8]` data to `String` since all assets are plain text).
/// The transformed data can be looked up by enum variant:
///
/// ```
/// use include_assets::EnumArchive;
///
/// #[derive(include_assets::AssetEnum)]
/// #[archive(base_path = "assets")]
/// enum Asset {
///     #[asset(path = "hello.txt")]
///     Hello,
///     #[asset(path = "unused.txt")]
///     _Unused,
/// }
///
/// let archive = EnumArchive::<Asset>::load().map(|data| std::str::from_utf8(data).unwrap().to_owned());
/// assert_eq!(archive[Asset::Hello].as_str(), "Hello, world!");
/// ```
pub use include_assets_decode::enums::EnumArchive;

pub use include_assets_decode::enums::EnumMap;

#[doc(hidden)]
pub use include_assets_decode::named::CompressedNamedArchive;

#[doc(hidden)]
pub mod do_not_use_this_directly {
    pub use include_assets_decode::checksum::Checksum;
    pub use include_assets_decode::codec;
}

A  => include_assets_decode/Cargo.toml +22 -0
@@ 1,22 @@
[package]
name = "include_assets_decode"
version = "0.1.0"
edition = "2021"
description = "use include_assets instead of this"
license = "LGPL-3.0-only"

[features]
default = []

deflate = ["dep:yazi"]
lz4 = ["dep:lz4_flex"]
zstd = ["dep:zstd"]

[dependencies]
blake2 = "0.10.6"
hexhex = "1.0.0"
smartstring = "1.0.1"

lz4_flex = { version = "0.10.0", optional = true, default-features = false, features = ["std", "safe-encode", "safe-decode"] }
yazi = { version = "0.1.6", optional = true }
zstd = { version = "0.12.3", optional = true, default-features = false }

A  => include_assets_decode/src/checksum.rs +41 -0
@@ 1,41 @@
use blake2::Digest as _;

pub type Checksum = [u8; 64];

pub fn compute_checksum(data: &[u8]) -> Checksum {
    blake2::Blake2b512::digest(data).try_into().expect("blake2b output is 64 byte long")
}

pub struct Mismatch {
    expected: Checksum,
    actual: Checksum,
}

impl core::fmt::Display for Mismatch {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        write!(
            f,
            "Checksum mismatch: expected {}, got {}",
            hexhex::Hex::new(self.expected),
            hexhex::Hex::new(self.actual)
        )
    }
}

impl core::fmt::Debug for Mismatch {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        <Self as core::fmt::Display>::fmt(self, f)
    }
}

impl std::error::Error for Mismatch {}

#[allow(clippy::result_large_err)]
pub fn check(data: &[u8], expected: &Checksum) -> Result<(), Mismatch> {
    let actual = compute_checksum(data);
    if &actual != expected {
        Err(Mismatch { expected: *expected, actual })
    } else {
        Ok(())
    }
}

A  => include_assets_decode/src/codec.rs +208 -0
@@ 1,208 @@
/// Compression codec for the `include_assets` crate
pub trait Codec {
    /// Errors that might occur during compression
    type CompressionError: std::error::Error + Send + Sync + 'static; // Send + Sync + 'static is for use with the anyhow crate.
    /// Errors that might occur during decompression
    type DecompressionError: std::error::Error + Send + Sync + 'static;

    /// Compress data to a newly allocated vector.
    fn compress(&self, data: &[u8]) -> Result<std::vec::Vec<u8>, Self::CompressionError>;

    /// Decompress data in `src` to `dst`.
    ///
    /// Fails if the length of `dst` doesn't exactly match the length of the uncompressed data.
    ///
    /// If decompression fails for any reason, the contents of `dst` are unspecified.
    fn decompress_checked(&self, src: &[u8], dst: &mut [u8]) -> Result<(), Self::DecompressionError>;

    /// Like [`Codec::decompress_checked`], but panics on error.
    fn decompress(&self, src: &[u8], dst: &mut [u8]) {
        self.decompress_checked(src, dst).expect("decompression should succeed")
    }

    /// Decompresses data into a new vector with the given length.
    /// Panics on error.
    fn decompress_with_length(&self, src: &[u8], len: usize) -> std::vec::Vec<u8> {
        let mut dst = vec![0u8; len];
        self.decompress(src, &mut dst);
        dst
    }
}

/// No compression whatsoever
#[derive(Debug, Clone, Copy)]
pub struct Uncompressed {}

#[derive(Debug, Clone, Copy)]
pub struct UncompressedSizeMismatch {
    expected: usize,
    actual: usize,
}

impl core::fmt::Display for UncompressedSizeMismatch {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        write!(f, "unexpected uncompressed size: expected {}, got {}", self.expected, self.actual)
    }
}

impl std::error::Error for UncompressedSizeMismatch {}

impl Codec for Uncompressed {
    type CompressionError = std::convert::Infallible;
    type DecompressionError = UncompressedSizeMismatch;

    fn compress(&self, data: &[u8]) -> Result<std::vec::Vec<u8>, Self::CompressionError> {
        Ok(data.to_vec())
    }

    /// Copy data from `src` to `dst`
    ///
    /// This can only fail if the lengths of `src` and `dst` do not match.
    fn decompress_checked(&self, src: &[u8], dst: &mut [u8]) -> Result<(), Self::DecompressionError> {
        if dst.len() == src.len() {
            dst.copy_from_slice(src);
            Ok(())
        } else {
            Err(UncompressedSizeMismatch {
                expected: dst.len(),
                actual: src.len(),
            })
        }
    }
}

#[cfg(feature = "lz4")]
/// lz4 block compression
#[derive(Debug, Clone, Copy)]
pub struct Lz4 {}

#[cfg(feature = "lz4")]
impl Codec for Lz4 {
    type CompressionError = std::convert::Infallible;
    type DecompressionError = lz4_flex::block::DecompressError;

    fn compress(&self, data: &[u8]) -> Result<std::vec::Vec<u8>, Self::CompressionError> {
        Ok(lz4_flex::block::compress(data))
    }

    fn decompress_checked(&self, src: &[u8], dst: &mut [u8]) -> Result<(), Self::DecompressionError> {
        let uncompressed_size = lz4_flex::block::decompress_into(src, dst)?;
        if uncompressed_size != dst.len() {
            Err(lz4_flex::block::DecompressError::UncompressedSizeDiffers {
                expected: dst.len(),
                actual: uncompressed_size,
            })
        } else {
            Ok(())
        }
    }
}

#[cfg(feature = "zstd")]
/// zstd compression
#[derive(Debug, Clone, Copy)]
pub struct Zstd {
    /// Zstd compression level.
    ///
    /// Higher is better compression with slower speed.
    /// Level 5 is recommended.
    pub level: i32,
}

#[cfg(feature = "zstd")]
impl Codec for Zstd {
    type CompressionError = std::io::Error;
    type DecompressionError = std::io::Error;

    fn compress(&self, data: &[u8]) -> Result<std::vec::Vec<u8>, Self::CompressionError> {
        zstd::bulk::compress(data, self.level)
    }

    fn decompress_checked(&self, src: &[u8], dst: &mut [u8]) -> Result<(), Self::DecompressionError> {
        let uncompressed_size = zstd::bulk::decompress_to_buffer(src, dst)?;
        if uncompressed_size != dst.len() {
            Err(std::io::Error::new(
                std::io::ErrorKind::Other,
                UncompressedSizeMismatch {
                    expected: dst.len(),
                    actual: uncompressed_size,
                },
            ))
        } else {
            Ok(())
        }
    }
}

#[cfg(feature = "deflate")]
/// raw DEFLATE compression (no wrapper format)
#[derive(Debug, Clone, Copy)]
pub struct Deflate {
    /// Compression level
    ///
    /// Higher is better compression with slower speed.
    /// 0 is uncompressed, 10 is the maximum.
    pub level: u8,
}

#[cfg(feature = "deflate")]
/// yazi::Error doesn't implement std::error::Error, so we wrap it and implement it ourselves
pub struct YaziError(yazi::Error);

#[cfg(feature = "deflate")]
impl core::fmt::Debug for YaziError {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        <yazi::Error as core::fmt::Debug>::fmt(&self.0, f)
    }
}

#[cfg(feature = "deflate")]
impl core::fmt::Display for YaziError {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        match &self.0 {
            yazi::Error::Underflow => write!(f, "yazi error: not enough input was provided"),
            yazi::Error::InvalidBitstream => write!(f, "yazi error: invalid bitstream"),
            yazi::Error::Overflow => write!(f, "yazi error: output buffer was too small"),
            yazi::Error::Finished => write!(f, "yazi error: attempted to write into a finished stream"),
            yazi::Error::Io(err) => write!(f, "yazi io error: {}", err),
        }
    }
}

#[cfg(feature = "deflate")]
impl std::error::Error for YaziError {}

#[cfg(feature = "deflate")]
impl Codec for Deflate {
    type CompressionError = YaziError;
    type DecompressionError = YaziError;

    fn compress(&self, data: &[u8]) -> Result<std::vec::Vec<u8>, Self::CompressionError> {
        yazi::compress(data, yazi::Format::Raw, yazi::CompressionLevel::Specific(self.level)).map_err(YaziError)
    }

    fn decompress_checked(&self, src: &[u8], dst: &mut [u8]) -> Result<(), Self::DecompressionError> {
        let mut decoder = yazi::Decoder::new();
        let mut stream = decoder.stream_into_buf(dst);
        // Write compressed bytes into the decoder stream.
        // This will finish successfully once all bytes are decoded.
        // It will finish with an error if the destination buffer is too short.
        let compressed_written = std::io::copy(&mut std::io::Cursor::new(src), &mut stream)
            .map_err(yazi::Error::Io)
            .map_err(YaziError)?;
        assert_eq!(
            usize::try_from(compressed_written),
            Ok(src.len()),
            "number of bytes decompressed should equal compressed size"
        );
        // Flush remaining uncompressed output
        let (uncompressed_size, checksum) = stream.finish().map_err(YaziError)?;
        assert!(checksum.is_none(), "checksum should not be present for raw stream");
        // Check if the output buffer has been fully overwritten
        if usize::try_from(uncompressed_size) != Ok(dst.len()) {
            Err(YaziError(yazi::Error::Underflow))
        } else {
            Ok(())
        }
    }
}

A  => include_assets_decode/src/common.rs +43 -0
@@ 1,43 @@
use crate::codec::Codec;

const _I_DONT_CARE_ABOUT_16_BIT_TARGETS: () = if std::mem::size_of::<usize>() < 4 {
    panic!("yeah, no, this ain't gonna fly")
} else {
};

#[inline(always)]
pub fn u32_to_usize(x: u32) -> usize {
    x as _
}

#[inline(always)]
pub fn u32_to_usize_range<R: core::borrow::Borrow<core::ops::Range<u32>>>(range: R) -> core::ops::Range<usize> {
    u32_to_usize(range.borrow().start)..u32_to_usize(range.borrow().end)
}

pub fn decompress_ranges<C: Codec>(codec: &C, compressed_lengths: &[u8], number_of_entries: usize) -> std::vec::Vec<core::ops::Range<u32>> {
    let decompressed_len = number_of_entries
        .checked_mul(4)
        .expect("multiplication should not overflow at runtime because it would have overflowed at compile time already");
    let decompressed_lengths = codec.decompress_with_length(compressed_lengths, decompressed_len);
    let mut ranges = std::vec::Vec::<std::ops::Range<u32>>::with_capacity(number_of_entries);
    for slice in decompressed_lengths.chunks(4) {
        let len = u32::from_le_bytes(slice.try_into().expect("length is divisible by 4"));
        let start = ranges.last().map(|range| range.end).unwrap_or(0);
        let end = start
            .checked_add(len)
            .expect("overflow should have been caught during construction at compile time");
        ranges.push(start..end);
    }
    ranges
}

pub fn decompress_names<C: Codec>(
    codec: &C,
    compressed_names_with_null_delimiters: &[u8],
    decompressed_len: u32,
) -> std::vec::Vec<smartstring::SmartString<smartstring::LazyCompact>> {
    let decompressed_data = codec.decompress_with_length(compressed_names_with_null_delimiters, u32_to_usize(decompressed_len));
    let names = decompressed_data.split(|b| *b == 0);
    names.map(|bytes| std::str::from_utf8(bytes).expect("names should be UTF-8").into()).collect()
}

A  => include_assets_decode/src/enums.rs +129 -0
@@ 1,129 @@
use crate::checksum::{check, Checksum};
use crate::codec::Codec;
use crate::common::u32_to_usize;

/// Trait for assets that can be lookup up by enum.
///
/// This should _never_ be implemented manually, only derived.
pub trait AssetEnum: Sized {
    /// Compressed asset data
    const DATA: &'static [u8];

    /// Position of the end of the asset data for each enum within the uncompressed combined data.
    const DATA_END_OFFSETS: &'static [u32];

    /// Checksums for all assets
    const CHECKSUMS: &'static [Checksum];

    /// Type of compression codec
    type C: Codec;

    /// Compression codec with which to decompress the asset data
    const CODEC: Self::C;

    /// This method should map an enum variant to its discriminator (via `as` casting).
    ///
    /// The reason this exists is that the `Index` implementation for [`EnumArchive`] cannot perform this cast (because it doesn't know that implementers are enums)
    fn index(self) -> usize;

    /// Load (decompress) compressed data for this enum.
    fn load() -> EnumArchive<Self> {
        let mut data = vec![0u8; u32_to_usize(Self::DATA_END_OFFSETS.last().copied().unwrap_or(0))];
        Self::CODEC.decompress(Self::DATA, &mut data);
        let result = EnumArchive {
            data,
            _spooky: core::marker::PhantomData,
        };
        for i in 0..Self::CHECKSUMS.len() {
            check(result.lookup(i), &Self::CHECKSUMS[i]).expect("checksum should match");
        }

        result
    }
}

// Archive holding uncompressed data for an AssetEnum.
// User-facing documentation is in the include_assets crate.
pub struct EnumArchive<E> {
    data: std::vec::Vec<u8>,
    _spooky: core::marker::PhantomData<E>,
}

impl<E: AssetEnum> EnumArchive<E> {
    pub fn load() -> Self {
        E::load()
    }

    fn lookup(&self, i: usize) -> &[u8] {
        let end = u32_to_usize(E::DATA_END_OFFSETS[i]);
        let start = i.checked_sub(1).map(|j| E::DATA_END_OFFSETS[j]).map(u32_to_usize).unwrap_or(0);
        &self.data[start..end]
    }

    /// Apply the mapping function to the asset data.
    pub fn map<T, F: Fn(&[u8]) -> T>(&self, f: F) -> EnumMap<E, T> {
        EnumMap {
            data: (0..E::CHECKSUMS.len()).map(|i| self.lookup(i)).map(f).collect(),
            _spooky: core::marker::PhantomData,
        }
    }

    /// Apply a fallible mapping function to asset data and return an enum map if each invocation succeeds, or an `Err` otherwise.
    pub fn try_map<T, Err, F: Fn(&[u8]) -> Result<T, Err>>(&self, f: F) -> Result<EnumMap<E, T>, Err> {
        let data: Result<_, Err> = (0..E::CHECKSUMS.len()).map(|i| self.lookup(i)).map(f).collect();
        Ok(EnumMap {
            data: data?,
            _spooky: core::marker::PhantomData,
        })
    }
}

impl<E: AssetEnum> core::ops::Index<E> for EnumArchive<E> {
    type Output = [u8];

    /// Look up the asset data corresponding to the enum variant
    fn index(&self, e: E) -> &[u8] {
        self.lookup(e.index())
    }
}

/// A structure which holds a value of some type `T` for each variant of an [`AssetEnum`]
pub struct EnumMap<E: AssetEnum, T> {
    data: std::vec::Vec<T>,
    _spooky: core::marker::PhantomData<E>,
}

impl<E: AssetEnum, T> EnumMap<E, T> {
    /// Apply the mapping function to the asset data.
    pub fn map<U, F: Fn(&T) -> U>(&self, f: F) -> EnumMap<E, U> {
        EnumMap {
            data: (0..E::CHECKSUMS.len()).map(|i| f(&self.data[i])).collect(),
            _spooky: core::marker::PhantomData,
        }
    }

    /// Apply a fallible mapping function to asset data and return an enum map if each invocation succeeds, or an `Err` otherwise.
    pub fn try_map<U, Err, F: Fn(&T) -> Result<U, Err>>(&self, f: F) -> Result<EnumMap<E, U>, Err> {
        let data: Result<_, Err> = (0..E::CHECKSUMS.len()).map(|i| f(&self.data[i])).collect();
        Ok(EnumMap {
            data: data?,
            _spooky: core::marker::PhantomData,
        })
    }
}

impl<E: AssetEnum, T> core::ops::Index<E> for EnumMap<E, T> {
    type Output = T;

    /// Look up the value for the given enum variant
    fn index(&self, e: E) -> &T {
        &self.data[e.index()]
    }
}

impl<E: AssetEnum, T> core::ops::IndexMut<E> for EnumMap<E, T> {
    /// Provide an exclusive reference to the value for the given enum variant
    fn index_mut(&mut self, e: E) -> &mut T {
        &mut self.data[e.index()]
    }
}

A  => include_assets_decode/src/lib.rs +8 -0
@@ 1,8 @@
#![forbid(unsafe_code)]

pub mod common;

pub mod checksum;
pub mod codec;
pub mod enums;
pub mod named;

A  => include_assets_decode/src/named.rs +132 -0
@@ 1,132 @@
use crate::checksum;
/// Named asset archives provide maps from path name to asset content.
/// This crate contains functionality specific to this kind of asset archives.
use crate::codec::Codec;

use crate::common::{decompress_names, decompress_ranges, u32_to_usize, u32_to_usize_range};

/// Compressed named archive
///
/// Contains the compressed asset data and all information required to uncompress it.
///
/// Users should only create these archives via the `include_dir!` macro and only read or access them via [`NamedArchive::load`].
#[derive(Clone, Copy)]
pub struct CompressedNamedArchive<C: Codec> {
    /// Compression codec with which the data was compressed
    pub codec: C,

    /// Raw compressed data
    pub data: &'static [u8],

    /// Size of the data after decompression.
    /// Limited to at most 4 GiB.
    pub uncompressed_data_size: u32,

    /// Names of the assets in some order, separated by null bytes (U+0000)
    ///
    /// The order needs to match the order of blobs in the uncompressed archive data.
    /// The final name is _not_ null-terminated.
    ///
    /// Names are currently sorted such that all files in a directory are sorted.
    /// This is for two reasons:
    /// - It likely leads to better compression if all names with the same (path) prefix are close together, and
    /// - It makes reproducible builds easier since we don't rely on file system iteration order.
    pub compressed_names: &'static [u8],

    /// Lengths of the uncompressed names (including separating null bytes)
    pub uncompressed_names_size: u32,

    /// List of asset checksums in the same order as [`CompressedNamedArchive::compressed_names`].
    pub checksums: &'static [checksum::Checksum],

    /// Compressed data sizes of the assets.
    ///
    /// Once uncompressed, these will be `u32`s (little endian) in the same order as [`CompressedNamedArchive::compressed_names`].
    pub compressed_sizes: &'static [u8],
}

/// Unpacked archive of named assets
///
/// Can be used to look up assets by name (i.e. path).
pub struct NamedArchive {
    data: std::vec::Vec<u8>,
    ranges: std::collections::HashMap<smartstring::SmartString<smartstring::LazyCompact>, std::ops::Range<u32>>,
}

impl NamedArchive {
    /// Load (decompress) compressed asset archive at runtime
    ///
    /// # Panics
    ///
    /// Panics if loading fails.
    /// This is only possible in the case of internal bugs, assuming that the compressed asset were created with the `include_dir!` macro.
    pub fn load<C: Codec>(compressed: CompressedNamedArchive<C>) -> Self {
        let CompressedNamedArchive {
            codec,
            data: compressed_data,
            uncompressed_data_size,
            compressed_names,
            uncompressed_names_size,
            checksums,
            compressed_sizes,
        } = compressed;

        // decompress data
        let data = codec.decompress_with_length(compressed_data, u32_to_usize(uncompressed_data_size));

        // decompress names and data ranges
        let names = decompress_names(&codec, compressed_names, uncompressed_names_size);
        let ranges = decompress_ranges(&codec, compressed_sizes, checksums.len());
        assert_eq!(names.len(), ranges.len(), "number of asset names should equal number of asset data ranges");

        // Data ranges were constructed in decompress_ranges.
        // We know that they are all non-overlapping, increasing, and don't leave any space.
        // We know the first range starts at 0.
        // The final range should end where the data ends.
        assert_eq!(ranges.last().map(|range| range.end).unwrap_or(0), uncompressed_data_size);

        let ranges: std::collections::HashMap<_, _> = names.into_iter().zip(ranges.into_iter()).collect();

        Self { data, ranges }
    }

    /// Get the content of the asset with the given `name`.
    ///
    /// Returns `None` if the archive does not contain an asset with this `name`.
    pub fn get<'a>(&'a self, name: &str) -> Option<&'a [u8]> {
        self.ranges.get(name).map(|range| &self.data[u32_to_usize_range(range)])
    }

    /// Returns the number of assets included in the archive.
    pub fn number_of_assets(&self) -> usize {
        self.ranges.len()
    }

    /// Returns an iterator of all asset names and contents in unspecified order.
    pub fn assets(&self) -> impl Iterator<Item = (&str, &[u8])> + ExactSizeIterator + '_ {
        self.ranges.iter().map(|(name, range)| (name.as_ref(), &self.data[u32_to_usize_range(range)]))
    }

    /// Returns true if an asset with the given `name` is included in the archive.
    pub fn contains(&self, name: &str) -> bool {
        self.get(name).is_some()
    }

    /// Returns an iterator of all asset names in unspecified order.
    pub fn names(&self) -> impl Iterator<Item = &str> + ExactSizeIterator + '_ {
        self.ranges.keys().map(|s| s.as_ref())
    }
}

impl<S: AsRef<str>> core::ops::Index<S> for NamedArchive {
    type Output = [u8];

    /// Return the contents of the asset with the given name.
    /// Panics it the asset is not present.
    fn index(&self, s: S) -> &[u8] {
        match self.get(s.as_ref()) {
            Some(data) => data,
            None => panic!("asset '{}' not found", s.as_ref()),
        }
    }
}

A  => include_assets_encode/Cargo.toml +26 -0
@@ 1,26 @@
[package]
name = "include_assets_encode"
version = "0.1.0"
edition = "2021"
description = "use include_assets instead of this"
license = "LGPL-3.0-only"

[lib]
proc-macro = true

[features]
default = []

deflate = ["include_assets_decode/deflate"]
lz4 = ["include_assets_decode/lz4"]
zstd = ["include_assets_decode/zstd"]

[dependencies]
anyhow = "1.0.71"
include_assets_decode = { path = "../include_assets_decode", version = "0.1.0" }
proc-macro2 = "1.0.59"
quote = "1.0.28"
smartstring = "1.0.1"
syn = { version = "2.0.17", features = ["full"] }
thiserror = "1.0.40"
walkdir = "2.3.3"

A  => include_assets_encode/src/common.rs +168 -0
@@ 1,168 @@
use anyhow::Context as _;

use include_assets_decode::checksum::compute_checksum;
use include_assets_decode::codec::Codec;

pub fn compress_sizes<C: Codec + ?Sized, S: AsRef<str>, I: Iterator<Item = (S, usize)>>(codec: &C, sizes: I) -> anyhow::Result<std::vec::Vec<u8>> {
    let mut sizes_vec = vec![];
    for (name, size) in sizes {
        let size: u32 = size
            .try_into()
            .with_context(|| format!("asset {} is too big ({} bytes)", name.as_ref(), size))?;
        sizes_vec.extend_from_slice(&size.to_le_bytes());
    }
    // ensure that the uncompressed lengths aren't longer than 4 GiB (i.e. the length fits in a u32)
    if u32::try_from(sizes_vec.len()).is_err() {
        return Err(anyhow::Error::msg(format!(
            "too many assets: size of uncompressed asset sizes is too big ({} bytes)",
            sizes_vec.len()
        )));
    }
    codec.compress(sizes_vec.as_slice()).context("couldn't compress asset data sizes")
}

pub fn compress_names<C: Codec + ?Sized, S: AsRef<str>, I: Iterator<Item = S>>(codec: &C, mut names: I) -> anyhow::Result<(std::vec::Vec<u8>, u32)> {
    let mut uncompressed_names = vec![];
    if let Some(first) = names.next() {
        assert!(!first.as_ref().as_bytes().contains(&0));
        uncompressed_names.extend_from_slice(first.as_ref().as_bytes());
        for name in names {
            assert!(!name.as_ref().as_bytes().contains(&0));
            uncompressed_names.extend_from_slice(&[0u8]);
            uncompressed_names.extend_from_slice(name.as_ref().as_bytes());
        }
    };
    let uncompressed_size = u32::try_from(uncompressed_names.len())
        .map_err(|_| anyhow::Error::msg(format!("uncompressed names are too long ({} bytes)", uncompressed_names.len())))?;
    let compressed_names = codec.compress(uncompressed_names.as_slice()).context("couldn't compress asset names")?;
    Ok((compressed_names, uncompressed_size))
}

/// Wrapper for `anyhow::Error`, required because `anyhow::Error` doesn't `impl std::error::Error`.
#[derive(thiserror::Error, Debug)]
#[error(transparent)]
pub struct MyError(anyhow::Error);

pub struct DynCodec<C> {
    codec: C,
}

impl<C> DynCodec<C> {
    pub fn new(codec: C) -> Self {
        Self { codec }
    }
}

impl<C: Codec> Codec for DynCodec<C> {
    type CompressionError = MyError;
    type DecompressionError = MyError;

    fn compress(&self, data: &[u8]) -> Result<std::vec::Vec<u8>, MyError> {
        self.codec.compress(data).map_err(anyhow::Error::msg).map_err(MyError)
    }

    fn decompress_checked(&self, src: &[u8], dst: &mut [u8]) -> Result<(), MyError> {
        self.codec.decompress_checked(src, dst).map_err(anyhow::Error::msg).map_err(MyError)
    }
}

pub fn parse_codec(
    compression: Option<syn::Lit>,
    level: Option<syn::Lit>,
) -> (
    Box<dyn Codec<CompressionError = MyError, DecompressionError = MyError>>,
    proc_macro2::TokenStream,
    proc_macro2::TokenStream,
) {
    let compression_string = if let Some(lit) = compression {
        if let syn::Lit::Str(s) = lit {
            s.value()
        } else {
            panic!("invalid compression option (expected a string literal)");
        }
    } else {
        let available = [
            #[cfg(feature = "zstd")]
            "zstd",
            #[cfg(feature = "lz4")]
            "lz4",
            #[cfg(feature = "deflate")]
            "deflate",
            "uncompressed",
        ];
        available[0].to_owned()
    };

    match &compression_string[..] {
        "uncompressed" => {
            if level.is_some() {
                panic!("compression 'uncompressed' does not have levels");
            } else {
                let codec = DynCodec::new(include_assets_decode::codec::Uncompressed {});
                let expr = quote::quote! { ::include_assets::do_not_use_this_directly::codec::Uncompressed{} };
                let type_expr = quote::quote! { ::include_assets::do_not_use_this_directly::codec::Uncompressed };
                let boxed_codec: Box<dyn Codec<CompressionError = MyError, DecompressionError = MyError>> = Box::new(codec);
                (boxed_codec, expr, type_expr)
            }
        }
        #[cfg(feature = "lz4")]
        "lz4" => {
            if level.is_some() {
                panic!("compression 'lz4' does not (currently) support levels");
            } else {
                let codec = DynCodec::new(include_assets_decode::codec::Lz4 {});
                let expr = quote::quote! {::include_assets::do_not_use_this_directly::codec::Lz4{} };
                let type_expr = quote::quote! { ::include_assets::do_not_use_this_directly::codec::Lz4 };
                let boxed_codec: Box<dyn Codec<CompressionError = MyError, DecompressionError = MyError>> = Box::new(codec);
                (boxed_codec, expr, type_expr)
            }
        }
        #[cfg(feature = "deflate")]
        "deflate" => {
            let level: u8 = match level {
                None => 2,
                Some(syn::Lit::Int(int)) => {
                    if let Ok(n) = int.base10_parse() {
                        n
                    } else {
                        panic!("Invalid compression level {}", int);
                    }
                }
                _ => panic!("Invalid compression level"),
            };
            let codec = DynCodec::new(include_assets_decode::codec::Deflate { level });
            let expr = quote::quote! {::include_assets::do_not_use_this_directly::codec::Deflate{ level: #level } };
            let type_expr = quote::quote! { ::include_assets::do_not_use_this_directly::codec::Deflate };
            let boxed_codec: Box<dyn Codec<CompressionError = MyError, DecompressionError = MyError>> = Box::new(codec);
            (boxed_codec, expr, type_expr)
        }
        #[cfg(feature = "zstd")]
        "zstd" => {
            let level: i32 = match level {
                None => 5,
                Some(syn::Lit::Int(int)) => {
                    if let Ok(n) = int.base10_parse() {
                        n
                    } else {
                        panic!("Invalid compression level {}", int);
                    }
                }
                _ => panic!("Invalid compression level"),
            };
            let codec = DynCodec::new(include_assets_decode::codec::Zstd { level });
            let expr = quote::quote_spanned! {proc_macro2::Span::mixed_site()=> ::include_assets::do_not_use_this_directly::codec::Zstd{ level: #level } };
            let type_expr = quote::quote! { ::include_assets::do_not_use_this_directly::codec::Zstd };
            let boxed_codec: Box<dyn Codec<CompressionError = MyError, DecompressionError = MyError>> = Box::new(codec);
            (boxed_codec, expr, type_expr)
        }
        s => panic!("invalid/unsupported compression '{s}'"),
    }
}

pub fn checksums_tokens<T: AsRef<[u8]>, I: Iterator<Item = T>>(asset_data: I) -> proc_macro2::TokenStream {
    let checksums: std::vec::Vec<_> = asset_data
        .map(|data| compute_checksum(data.as_ref()))
        .map(|checksum| quote::quote! { [#(#checksum),*] })
        .collect();
    quote::quote! {&[#(#checksums),*]}
}

A  => include_assets_encode/src/enums.rs +129 -0
@@ 1,129 @@
use include_assets_decode::codec::Codec;

pub struct AssetEnumOptions {
    pub enum_name: syn::Ident,
    pub base_path: syn::LitStr,
    pub compression_lit: Option<syn::Lit>,
    pub level_lit: Option<syn::Lit>,
    pub variant_paths: std::vec::Vec<syn::LitStr>,
}

pub fn check_enum_and_return_options(e: syn::ItemEnum) -> AssetEnumOptions {
    // check outer attributes of the enum
    let mut opts = std::collections::HashMap::new();
    for attr in e.attrs.iter() {
        match &attr.meta {
            syn::Meta::Path(path) => {
                if path.is_ident("archive") || path.is_ident("asset") {
                    panic!("path style attribute is not supported");
                }
                // otherwise ignore
            }
            syn::Meta::List(list) => {
                if list.path.is_ident("archive") {
                    let kv_opts: crate::parse::KVList = syn::parse2(list.tokens.clone()).unwrap();
                    for (k, v) in crate::parse::kv_args_to_hashmap(kv_opts.kvs.into_iter(), ["base_path", "compression", "level"].into_iter().collect()) {
                        opts.insert(k, v);
                    }
                } else if list.path.is_ident("asset") {
                    panic!("invalid attribute 'asset' for AssetEnum");
                } else {
                    // ignore
                }
            }
            syn::Meta::NameValue(namevalue) => {
                let path = &namevalue.path;
                match path.get_ident() {
                    Some(s) if s == "archive" || s == "asset" => {
                        panic!("{s} = value style attribute is not supported");
                    }
                    _ => {} // ignore
                }
            }
        }
    }

    let base_path = match opts.remove("base_path") {
        None => panic!("attribute base_path is missing"),
        Some(lit) => match lit {
            syn::Lit::Str(s) => s,
            _ => panic!("unexpected value for attribute base_path, expected a string literal"),
        },
    };

    // collect relative path of all variants.
    // while we're at it, ensure that all variants are unit and (most importantly) have no explicit discriminator.
    // we need enums to have discriminators 0..N!
    let mut variant_paths = vec![];
    for var in e.variants {
        let name = var.ident.to_string();
        if !matches!(var.fields, syn::Fields::Unit) {
            panic!("{name} is not a unit variant");
        }
        if var.discriminant.is_some() {
            panic!("variant {name} has an explicit discriminant, which is not allowed");
        }
        match &var.attrs[..] {
            [] => panic!("variant {name} is missing attribute"),
            [attr] => match &attr.meta {
                syn::Meta::Path(_) => panic!("invalid attribute for variant {name}"),
                syn::Meta::NameValue(_) => panic!("invalid attribute for variant {name}"),
                syn::Meta::List(list) => {
                    if !list.path.is_ident("asset") {
                        panic!("invalid attribute for variant {name}, expected 'asset'");
                    }
                    let kv_opts: crate::parse::KVList = syn::parse2(list.tokens.clone()).unwrap();
                    let mut opts = crate::parse::kv_args_to_hashmap(kv_opts.kvs.into_iter(), ["path"].into_iter().collect());
                    match opts.remove("path") {
                        None => panic!("variant {name} is missing attribute 'path'"),
                        Some(syn::Lit::Str(s)) => {
                            variant_paths.push(s);
                        }
                        Some(_) => panic!("invalid attribute for variant {name}"),
                    }
                }
            },
            _ => panic!("variant {name} has more than one attribute"),
        }
    }

    AssetEnumOptions {
        enum_name: e.ident,
        base_path,
        compression_lit: opts.remove("compression"),
        level_lit: opts.remove("level"),
        variant_paths,
    }
}

pub fn get_files(base_path: syn::LitStr, variant_paths: std::vec::Vec<syn::LitStr>) -> std::vec::Vec<std::vec::Vec<u8>> {
    let base = std::path::PathBuf::from(base_path.value());
    let mut data = vec![];
    for var_path in variant_paths {
        let name = base.join(var_path.value());
        match std::fs::read(&name) {
            Ok(blob) => data.push(blob),
            Err(err) => panic!("Couldn't read file {}: {}", name.display(), err),
        }
    }
    data
}

pub struct EnumArchive {
    pub compressed_data: std::vec::Vec<u8>,
    pub data_end_offsets: std::vec::Vec<u32>,
}

pub fn prepare_asset_archive<C: Codec + ?Sized>(codec: &C, data: std::vec::Vec<std::vec::Vec<u8>>) -> EnumArchive {
    let mut uncompressed_data = vec![];
    let mut data_end_offsets = vec![];
    for blob in data {
        uncompressed_data.extend_from_slice(blob.as_slice());
        data_end_offsets.push(u32::try_from(uncompressed_data.len()).unwrap());
    }
    let compressed_data = codec.compress(uncompressed_data.as_slice()).expect("compression should succeed");
    EnumArchive {
        compressed_data,
        data_end_offsets,
    }
}

A  => include_assets_encode/src/lib.rs +96 -0
@@ 1,96 @@
pub(crate) mod common;
pub(crate) mod enums;
pub(crate) mod named;
pub(crate) mod parse;

use include_assets_decode::codec::Codec;
use std::borrow::Borrow as _;

#[proc_macro]
pub fn include_dir(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
    let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
    std::env::set_current_dir(manifest_dir).unwrap();

    let args = syn::parse_macro_input!(tokens as parse::IncludeDirArgs);
    let opts = parse::kv_args_to_hashmap(args.opts.into_iter(), ["compression", "level", "links"].into_iter().collect());

    //println!("current directory: {}", std::env::current_dir().unwrap().display());
    //println!("path: {}", args.path.value());

    let (codec, codec_tokens, _codec_type_tokens) = common::parse_codec(opts.get("compression").cloned(), opts.get("level").cloned());
    let symlink_rules = named::parse_symlink_rules(opts.get("links").cloned());

    let named::NamedArchive {
        compressed_data,
        uncompressed_data_size,
        compressed_names,
        uncompressed_names_size,
        compressed_sizes,
        checksums,
    } = named::prepare_named_archive(
        codec.borrow() as &dyn Codec<CompressionError = common::MyError, DecompressionError = common::MyError>,
        named::read_dir(args.path.value(), symlink_rules).unwrap(),
    )
    .unwrap();

    let data_token = syn::LitByteStr::new(&compressed_data, proc_macro2::Span::call_site());
    let names_token = syn::LitByteStr::new(&compressed_names, proc_macro2::Span::call_site());
    let checksums_token = common::checksums_tokens(checksums.into_iter());
    let sizes_token = syn::LitByteStr::new(&compressed_sizes, proc_macro2::Span::call_site());

    quote::quote! {
        ::include_assets::CompressedNamedArchive {
            codec: #codec_tokens,
            data: #data_token,
            uncompressed_data_size: #uncompressed_data_size,
            compressed_names: #names_token,
            uncompressed_names_size: #uncompressed_names_size,
            checksums: #checksums_token,
            compressed_sizes: #sizes_token
        }
    }
    .into()
}

#[proc_macro_derive(AssetEnum, attributes(archive, asset))]
pub fn derive_asset_enum(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
    let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
    std::env::set_current_dir(manifest_dir).unwrap();

    let e = syn::parse_macro_input!(tokens as syn::ItemEnum);

    let enums::AssetEnumOptions {
        enum_name,
        base_path,
        compression_lit,
        level_lit,
        variant_paths,
    } = enums::check_enum_and_return_options(e);

    let (codec, codec_expr, codec_type) = common::parse_codec(compression_lit, level_lit);

    let file_data = enums::get_files(base_path, variant_paths);
    let checksums_token = common::checksums_tokens(file_data.iter());
    let enums::EnumArchive {
        compressed_data,
        data_end_offsets,
    } = enums::prepare_asset_archive(
        codec.borrow() as &dyn Codec<CompressionError = common::MyError, DecompressionError = common::MyError>,
        file_data,
    );
    let data_token = syn::LitByteStr::new(&compressed_data, proc_macro2::Span::call_site());

    quote::quote! {
        impl include_assets::AssetEnum for #enum_name {
            const DATA: &'static [u8] = #data_token;
            const DATA_END_OFFSETS: &'static [u32] = &[#(#data_end_offsets),*];
            const CHECKSUMS: &'static [include_assets::do_not_use_this_directly::Checksum] = #checksums_token;
            type C = #codec_type;
            const CODEC: Self::C = #codec_expr;
            fn index(self) -> usize {
                self as usize
            }
        }
    }
    .into()
}

A  => include_assets_encode/src/named.rs +123 -0
@@ 1,123 @@
use anyhow::Context as _;

use crate::common::{compress_names, compress_sizes};
use include_assets_decode::checksum::{compute_checksum, Checksum};
use include_assets_decode::codec::Codec;

pub struct NamedArchive {
    /// Compressed data
    ///
    /// All assets are concatenated
    /// The order of asset data must match the order of assets in `compressed_names`.
    pub compressed_data: std::vec::Vec<u8>,
    /// Size of the data after decompression
    pub uncompressed_data_size: u32,
    /// Compressed names of the assets in ascending order, with separating null bytes
    pub compressed_names: std::vec::Vec<u8>,
    /// Size of the uncompressed names (including separating null bytes)
    pub uncompressed_names_size: u32,
    /// Sizes of asset data, in the same order as `compressed_names`.
    pub compressed_sizes: std::vec::Vec<u8>,
    /// Asset checksums, in the same order as `compressed_names`.
    pub checksums: std::vec::Vec<Checksum>,
}

pub fn prepare_named_archive<C: Codec + ?Sized>(
    codec: &C,
    assets: std::vec::Vec<(smartstring::SmartString<smartstring::LazyCompact>, std::vec::Vec<u8>)>,
) -> anyhow::Result<NamedArchive> {
    // ensure that names are unique
    {
        let mut names = std::collections::HashSet::new();
        for (name, _) in assets.iter() {
            let is_new = names.insert(name);
            if !is_new {
                panic!("duplicate asset name: {name}")
            }
        }
    }

    // compress asset names, sizes, and compute checksums
    let (compressed_names, uncompressed_names_size) = compress_names(codec, assets.iter().map(|(name, _)| name)).context("couldn't compress asset names")?;
    let compressed_sizes = compress_sizes(codec, assets.iter().map(|(name, data)| (name, data.len()))).context("couldn't compress asset sizes")?;
    let checksums: std::vec::Vec<Checksum> = assets.iter().map(|(_, data)| compute_checksum(data.as_ref())).collect();

    // compress data
    let mut uncompressed_data = vec![];
    for (_, asset_data) in assets.iter() {
        uncompressed_data.extend_from_slice(asset_data.as_slice());
    }
    let compressed_data = codec.compress(uncompressed_data.as_slice()).context("couldn't compress asset data")?;

    // ensure that the uncompressed data isn't too big
    let uncompressed_data_size: u32 = uncompressed_data
        .len()
        .try_into()
        .map_err(|_| anyhow::Error::msg(format!("too much data ({} bytes)", uncompressed_data.len())))?;

    Ok(NamedArchive {
        compressed_data,
        uncompressed_data_size,
        compressed_names,
        uncompressed_names_size,
        compressed_sizes,
        checksums,
    })
}

#[derive(Clone, Copy)]
pub enum SymlinkRules {
    Forbid,
    Ignore,
    Follow,
}

pub fn parse_symlink_rules(lit: Option<syn::Lit>) -> SymlinkRules {
    match lit {
        None => SymlinkRules::Forbid,
        Some(syn::Lit::Str(s)) => match &s.value()[..] {
            "forbid" => SymlinkRules::Forbid,
            "ignore" => SymlinkRules::Ignore,
            "follow" => SymlinkRules::Follow,
            _ => panic!("invalid/unsupported rule for symbolic links (supported rules are: forbid, ignore, follow)"),
        },
        Some(_) => panic!("invalid/unsupported rule for symbolic links (supported rules are : forbid, ignore, follow)"),
    }
}

pub fn read_dir<P: AsRef<std::path::Path>>(
    base: P,
    symlink_rules: SymlinkRules,
) -> anyhow::Result<std::vec::Vec<(smartstring::SmartString<smartstring::LazyCompact>, std::vec::Vec<u8>)>> {
    let (follow_symlinks, ignore_symlinks) = match symlink_rules {
        SymlinkRules::Forbid => (false, false),
        SymlinkRules::Ignore => (false, true),
        SymlinkRules::Follow => (true, false),
    };
    let mut assets = vec![];
    for dirent in walkdir::WalkDir::new(base.as_ref()).sort_by_file_name().follow_links(follow_symlinks) {
        // Note: sorting by file name is important to ensure the same compressed data independent of the creation/modification order of assets
        let ent = dirent?;
        if ent.file_type().is_dir() {
            continue; // ignore
        } else if ent.file_type().is_file() {
            let filename = ent
                .path()
                .strip_prefix(base.as_ref())
                .expect("child path should have parent as prefix")
                .to_str()
                .with_context(|| format!("Non-UTF-8 file name: '{}'", ent.path().display()))?;
            let data = std::fs::read(ent.path()).with_context(|| format!("Couldn't read file '{}'", ent.path().display()))?;
            assets.push((filename.into(), data))
        } else if ent.file_type().is_symlink() {
            if ignore_symlinks {
                continue; // ignore
            } else {
                return Err(anyhow::Error::msg(format!("Encountered a symbolic link: {}", ent.path().display())));
            }
        } else {
            panic!("File {} is neither directory, file, nor symbolic link.", ent.path().display());
        }
    }
    Ok(assets)
}

A  => include_assets_encode/src/parse.rs +65 -0
@@ 1,65 @@
/// assignment of the form `identifier = literal`
pub struct KVIdentLit {
    pub ident: syn::Ident,
    pub lit: syn::Lit,
}

impl syn::parse::Parse for KVIdentLit {
    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
        let ident: syn::Ident = input.parse()?;
        let _: syn::token::Eq = input.parse()?;
        let lit: syn::Lit = input.parse()?;
        Ok(KVIdentLit { ident, lit })
    }
}

/// A literal string, followed by a number of `ident = literal` arguments
pub struct IncludeDirArgs {
    pub path: syn::LitStr,
    pub opts: std::vec::Vec<KVIdentLit>,
}

impl syn::parse::Parse for IncludeDirArgs {
    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
        let path: syn::LitStr = input.parse()?;
        let lookahead = input.lookahead1();
        let opts = if lookahead.peek(syn::Token![,]) {
            let _: syn::token::Comma = input.parse()?;
            let kv = syn::punctuated::Punctuated::<KVIdentLit, syn::Token![,]>::parse_terminated(input)?;
            kv.into_iter().collect()
        } else {
            vec![]
        };
        Ok(IncludeDirArgs { path, opts })
    }
}

pub struct KVList {
    pub kvs: std::vec::Vec<KVIdentLit>,
}

impl syn::parse::Parse for KVList {
    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
        Ok(Self {
            kvs: syn::punctuated::Punctuated::<KVIdentLit, syn::Token![,]>::parse_terminated(input)?
                .into_iter()
                .collect(),
        })
    }
}

pub fn kv_args_to_hashmap<I: Iterator<Item = KVIdentLit>>(kvs: I, allowed: std::collections::HashSet<&str>) -> std::collections::HashMap<&str, syn::Lit> {
    let mut result = std::collections::HashMap::new();
    for kv in kvs {
        let key = kv.ident.to_string();
        if let Some(s) = allowed.get(key.as_str()) {
            let is_new = result.insert(*s, kv.lit).is_none();
            if !is_new {
                panic!("Duplicate option {s}");
            }
        } else {
            panic!("Unknown/invalid option {key}")
        }
    }
    result
}

A  => rustfmt.toml +1 -0
@@ 1,1 @@
max_width=159
\ No newline at end of file