~quf/babel-poc

b9f4b338c07b90dfc8e4861b8059f2738496dff2 — Lukas Himbert 6 months ago trunk
initial
12 files changed, 1206 insertions(+), 0 deletions(-)

A .gitignore
A Cargo.lock
A Cargo.toml
A README.md
A build.rs
A i18n/de.csv
A i18n/en.csv
A i18n/fr.csv
A i18n/ja.csv
A src/babel.rs
A src/fmt.rs
A src/main.rs
A  => .gitignore +1 -0
@@ 1,1 @@
/target

A  => Cargo.lock +673 -0
@@ 1,673 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3

[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"

[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
 "libc",
]

[[package]]
name = "anstream"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
dependencies = [
 "anstyle",
 "anstyle-parse",
 "anstyle-query",
 "anstyle-wincon",
 "colorchoice",
 "is-terminal",
 "utf8parse",
]

[[package]]
name = "anstyle"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d"

[[package]]
name = "anstyle-parse"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee"
dependencies = [
 "utf8parse",
]

[[package]]
name = "anstyle-query"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
dependencies = [
 "windows-sys",
]

[[package]]
name = "anstyle-wincon"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
dependencies = [
 "anstyle",
 "windows-sys",
]

[[package]]
name = "anyhow"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"

[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

[[package]]
name = "babel-poc"
version = "0.1.0"
dependencies = [
 "anyhow",
 "chrono",
 "clap",
 "csv",
 "flate2",
 "grid",
 "indexmap",
 "miniz_oxide",
 "prettyplease",
 "proc-macro2",
 "quote",
 "serde",
 "syn",
]

[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"

[[package]]
name = "bumpalo"
version = "3.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c6ed94e98ecff0c12dd1b04c15ec0d7d9458ca8fe806cea6f12954efe74c63b"

[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"

[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"

[[package]]
name = "chrono"
version = "0.4.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b"
dependencies = [
 "iana-time-zone",
 "js-sys",
 "num-integer",
 "num-traits",
 "time",
 "wasm-bindgen",
 "winapi",
]

[[package]]
name = "clap"
version = "4.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34d21f9bf1b425d2968943631ec91202fe5e837264063503708b83013f8fc938"
dependencies = [
 "clap_builder",
 "clap_derive",
 "once_cell",
]

[[package]]
name = "clap_builder"
version = "4.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "914c8c79fb560f238ef6429439a30023c862f7a28e688c58f7203f12b29970bd"
dependencies = [
 "anstream",
 "anstyle",
 "bitflags",
 "clap_lex",
 "strsim",
]

[[package]]
name = "clap_derive"
version = "4.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4"
dependencies = [
 "heck",
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "clap_lex"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1"

[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"

[[package]]
name = "core-foundation-sys"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"

[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
 "cfg-if",
]

[[package]]
name = "csv"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad"
dependencies = [
 "csv-core",
 "itoa",
 "ryu",
 "serde",
]

[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
 "memchr",
]

[[package]]
name = "errno"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
dependencies = [
 "errno-dragonfly",
 "libc",
 "windows-sys",
]

[[package]]
name = "errno-dragonfly"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
dependencies = [
 "cc",
 "libc",
]

[[package]]
name = "flate2"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
dependencies = [
 "crc32fast",
 "miniz_oxide",
]

[[package]]
name = "grid"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eec1c01eb1de97451ee0d60de7d81cf1e72aabefb021616027f3d1c3ec1c723c"

[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"

[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"

[[package]]
name = "hermit-abi"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"

[[package]]
name = "iana-time-zone"
version = "0.1.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c"
dependencies = [
 "android_system_properties",
 "core-foundation-sys",
 "iana-time-zone-haiku",
 "js-sys",
 "wasm-bindgen",
 "windows",
]

[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
 "cc",
]

[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
 "autocfg",
 "hashbrown",
]

[[package]]
name = "io-lifetimes"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220"
dependencies = [
 "hermit-abi",
 "libc",
 "windows-sys",
]

[[package]]
name = "is-terminal"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f"
dependencies = [
 "hermit-abi",
 "io-lifetimes",
 "rustix",
 "windows-sys",
]

[[package]]
name = "itoa"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"

[[package]]
name = "js-sys"
version = "0.3.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790"
dependencies = [
 "wasm-bindgen",
]

[[package]]
name = "libc"
version = "0.2.144"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"

[[package]]
name = "linux-raw-sys"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"

[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
 "cfg-if",
]

[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"

[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
 "adler",
]

[[package]]
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
 "autocfg",
 "num-traits",
]

[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
 "autocfg",
]

[[package]]
name = "once_cell"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"

[[package]]
name = "prettyplease"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "617feabb81566b593beb4886fb8c1f38064169dae4dccad0e3220160c3b37203"
dependencies = [
 "proc-macro2",
 "syn",
]

[[package]]
name = "proc-macro2"
version = "1.0.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4ec6d5fe0b140acb27c9a0444118cf55bfbb4e0b259739429abb4521dd67c16"
dependencies = [
 "unicode-ident",
]

[[package]]
name = "quote"
version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500"
dependencies = [
 "proc-macro2",
]

[[package]]
name = "rustix"
version = "0.37.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
dependencies = [
 "bitflags",
 "errno",
 "io-lifetimes",
 "libc",
 "linux-raw-sys",
 "windows-sys",
]

[[package]]
name = "ryu"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"

[[package]]
name = "serde"
version = "1.0.163"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
dependencies = [
 "serde_derive",
]

[[package]]
name = "serde_derive"
version = "1.0.163"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"

[[package]]
name = "syn"
version = "2.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01"
dependencies = [
 "proc-macro2",
 "quote",
 "unicode-ident",
]

[[package]]
name = "time"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
dependencies = [
 "libc",
 "wasi",
 "winapi",
]

[[package]]
name = "unicode-ident"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"

[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"

[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"

[[package]]
name = "wasm-bindgen"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
dependencies = [
 "cfg-if",
 "wasm-bindgen-macro",
]

[[package]]
name = "wasm-bindgen-backend"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb"
dependencies = [
 "bumpalo",
 "log",
 "once_cell",
 "proc-macro2",
 "quote",
 "syn",
 "wasm-bindgen-shared",
]

[[package]]
name = "wasm-bindgen-macro"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258"
dependencies = [
 "quote",
 "wasm-bindgen-macro-support",
]

[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 "wasm-bindgen-backend",
 "wasm-bindgen-shared",
]

[[package]]
name = "wasm-bindgen-shared"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"

[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
 "winapi-i686-pc-windows-gnu",
 "winapi-x86_64-pc-windows-gnu",
]

[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"

[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

[[package]]
name = "windows"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
dependencies = [
 "windows-targets",
]

[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
 "windows-targets",
]

[[package]]
name = "windows-targets"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5"
dependencies = [
 "windows_aarch64_gnullvm",
 "windows_aarch64_msvc",
 "windows_i686_gnu",
 "windows_i686_msvc",
 "windows_x86_64_gnu",
 "windows_x86_64_gnullvm",
 "windows_x86_64_msvc",
]

[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"

[[package]]
name = "windows_aarch64_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"

[[package]]
name = "windows_i686_gnu"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"

[[package]]
name = "windows_i686_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"

[[package]]
name = "windows_x86_64_gnu"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"

[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"

[[package]]
name = "windows_x86_64_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"

A  => Cargo.toml +23 -0
@@ 1,23 @@
[package]
name = "babel-poc"
version = "0.1.0"
edition = "2021"

[dependencies]
chrono = "0.4.24"
clap = { version = "4.2.7", features = ["derive"] }
grid = "0.10.0"
miniz_oxide = "0.7.1"
flate2 = "1.0.26"

[build-dependencies]
anyhow = "1.0.71"
csv = "1.2.1"
miniz_oxide = "0.7.1"
prettyplease = "0.2.5"
proc-macro2 = "1.0.57"
quote = "1.0.27"
serde = { version = "1.0.163", features = ["derive"] }
syn = "2.0.16"
indexmap = "1.9.3"
# TODO: compression

A  => README.md +84 -0
@@ 1,84 @@
babel
=====

This is a proof of concept for multi-language text (where the language is chosen at runtime) with compile-time checks:

- If a certain piece of text (or language) is not available or duplicate, this is a compile-time error.
- If a certain piece of text (or language) is available but never used, this is a comile-time warning.

The complete text in all languages is embedded in the binary.

How does it work?
-----------------

The folder "`i18n`" contains several `csv` files with four columns: a language identifier, a text identifier, the text, and (optionally) a comment.

`build.rs` reads and analyzes these files and generates code based on them:

The language identifiers across all `csv` files are collected and an enum `Language` with one variant for each language is created:
If the languages `En`, `Ja`, `De`, and `Fr` are present, `build.rs` will create the enum:

    enum Language {
        De,
        En,
        Fr,
        Ja,
    }

Similarly, the text identifiers are collected and turned into an enum `Text` with one variant per (unique) identifier.

The text for each language and identifier is collected and compressed, and included in the binary.
The offset of each individual text within the whole is also compressed and included in the binary.

Comments are not used and not included in the binary.
They are intended to provide space for clarifications, TODO markers, etc.

At runtime, the text and all offsets are uncompressed at the start.
Then, the `Language` and `TextId` enums can be used to look up the location of the corresponding text in the collection.

String interpolation
--------------------

Because static strings are not enough for internationalization, some kind of runtime string interpolation is needed.
Here, an extremely basic version is implemented.

How to use
----------

The program is extremely basic: It prints the current date, asks for your name and then says hello.
Choose the language by handing a command line argument.

    $ cargo run ger
    Heute ist der 16. 5. 2023.

    Wie heißt du?
    > Marvin
    Hallo Marvin.

    $ cargo run 日本語
    今日は2023年5月16日です。

    名前はなんですか。
    > Zaphod
    Zaphod、こんにちは。

    $ cargo run en
    It's 5/16/2023.

    What's your name?
    > Arthur
    Hello Arthur.

Limitations
-----------

For production use, language-specific formatting for dates and numbers (decimal separators) would be required.
This could be done at a higher level by first formatting the date and then interpolating the formatted date into the string.
Here it's done by specifying the date order in the .csv but I'm not sure this approach would fly for bigger projects.

Runtime text interpolation (the user's name) is only checked at runtime.
It would be great to verify at compile time that the value to be interpolated is guaranteed to be present.
Unfortunately, I'm not sure how to do that.
My best idea is a macro `fmt!(dictionary, language, text_id, "key1"=value3, "key2"=value2, "key3"=value3, ...)` that reads the csvs and checks that every key which appears in the text `text_id` for any language also has a corresponding `"key"=value` argument.
However, this would only work if the `text_id` is used verbatim at compile-time (which may be a reasonable assumptions).
(Checking that every `"key"=value` argument also appears in the text for _every_ language is probably a bad idea, but checking that every `"key"=value` argument appears in _some_ language may be reasonable.)

A  => build.rs +170 -0
@@ 1,170 @@
use anyhow::Context as _;

use serde::Deserialize;

#[derive(Deserialize)]
struct I18nRow {
    language: String,
    id: String,
    text: String,
    comment: String,
}

struct I18nFile {
    name: String,
    rows: std::vec::Vec<I18nRow>,
}

struct IndexEntry {
    filename: String,
    row: usize,
    text: String,
    //comment: String,
}

fn main() -> anyhow::Result<()> {
    println!("cargo:rerun-if-changed=i18n");

    // read all csvs in the i18n directory
    let mut files: std::vec::Vec<I18nFile> = vec![];
    for dirent in std::fs::read_dir("i18n").context("couldn't read")? {
        let path = dirent.context("couldn't read i18n dirent")?.path();
        if !path.is_file() {
            continue;
        }
        if path.extension() != Some(std::ffi::OsStr::new("csv")) {
            continue;
        }

        let f = std::fs::File::open(&path)
            .with_context(|| format!("couldn't open {}", path.display()))?;
        let mut reader = csv::Reader::from_reader(std::io::BufReader::new(f));
        let mut rows = vec![];
        for (i, row) in reader.deserialize().enumerate() {
            rows.push(row.with_context(|| format!("error @ {}:{}", path.display(), i + 1))?);
        }
        files.push(I18nFile {
            name: path.into_os_string().into_string().unwrap(),
            rows,
        });
    }

    // collect all languages and all text ids
    // Note:
    // We need BTreeMap to ensure the same order
    // - of languages across runs: to get consistent error messages
    // - and of different texts across languages: to ensure the indexing works out
    // I've considered reading only a single .CSV, but I imagine it gets too large to handle in a nontrivial project.
    // Splitting the csv up across different languages or across different parts of the program seems very reasonable.
    let mut languages = std::collections::BTreeSet::new(); // hashset of all languages across all files and rows
    let mut ids = std::collections::BTreeSet::new(); // hashset of all text ids across all files and rows
    let mut text = std::collections::BTreeMap::new(); // maps language -> (id -> (filename, row number, text, comment))
    for file in files.iter() {
        for (i, row) in file.rows.iter().enumerate() {
            let row_number = i + 1;
            let new_language = languages.insert(&row.language);
            ids.insert(&row.id);
            if new_language {
                text.insert(&row.language, std::collections::BTreeMap::new());
            }
            if row.text.is_empty() && row.comment.is_empty() {
                println!(
                    "cargo:warning={}:{}: text {}/{} is empty and there is no comment",
                    file.name, row_number, row.language, row.id,
                );
            }
            match text.get_mut(&row.language).unwrap().entry(&row.id) {
                std::collections::btree_map::Entry::Vacant(entry) => {
                    entry.insert(IndexEntry {
                        filename: file.name.clone(),
                        row: row_number,
                        text: row.text.clone(),
                        //comment: row.comment.clone(),
                    });
                }
                std::collections::btree_map::Entry::Occupied(entry) => {
                    panic!("Error: duplicate text {}/{}. The first occurence is at {}:{}, the second at {}:{}", row.language, row.id, entry.get().filename, entry.get().row, file.name, row_number);
                }
            }
        }
    }
    let languages: std::vec::Vec<&String> = languages.into_iter().collect();
    let ids: std::vec::Vec<&String> = ids.into_iter().collect();

    // confirm that each language has every text
    for (language, rows_for_language) in text.iter() {
        for id in ids.iter() {
            if !rows_for_language.contains_key(id) {
                panic!("Error: language '{}' is missing text '{}'", language, id);
            }
        }
    }

    // concatenate text and mark the start of each part
    let mut all_text = vec![];
    let mut all_offsets = vec![];
    for (_, text_for_language) in text.iter() {
        for (_, entry) in text_for_language.iter() {
            let off: u32 = all_text.len().try_into().expect("less than 4 GiB text");
            all_offsets.push(off);
            all_text.extend_from_slice(entry.text.as_bytes());
        }
    }

    // compute text offset deltas for better compression
    let mut offset_deltas = vec![];
    for i in 0..all_offsets.len() {
        let delta = all_offsets[i]
            .checked_sub(all_offsets[i.saturating_sub(1)])
            .unwrap();
        offset_deltas.extend_from_slice(&delta.to_le_bytes());
    }

    // compress text and text offset deltas
    let compressed_text = miniz_oxide::deflate::compress_to_vec(&all_text, 10);
    let data_token = syn::LitByteStr::new(&compressed_text, proc_macro2::Span::call_site());

    let compressed_offset_deltas = miniz_oxide::deflate::compress_to_vec(&offset_deltas, 10);
    let offset_deltas_token =
        syn::LitByteStr::new(&compressed_offset_deltas, proc_macro2::Span::call_site());

    let language_tokens: std::vec::Vec<_> = languages
        .iter()
        .map(|s| syn::Ident::new(s, proc_macro2::Span::call_site()))
        .collect();
    let text_id_tokens: std::vec::Vec<_> = ids
        .iter()
        .map(|s| syn::Ident::new(s, proc_macro2::Span::call_site()))
        .collect();

    let number_of_languages = languages.len();
    let number_of_texts = ids.len();

    let ast = quote::quote! {
        mod internal {
            #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
            pub enum Language {
                #(#language_tokens),*
            }

            #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
            pub enum Text {
                #(#text_id_tokens),*
            }

            pub const COMPRESSED_TEXT: &[u8] = #data_token;
            pub const COMPRESSED_OFFSET_DELTAS: &[u8] = #offset_deltas_token;
            pub const NUMBER_OF_LANGUAGES: usize = #number_of_languages;
            pub const NUMBER_OF_TEXTS: usize = #number_of_texts;
        }
    };

    let s = prettyplease::unparse(&syn::parse_file(&ast.to_string()).unwrap());
    std::fs::write(
        std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("i18n.rs"),
        s,
    )
    .unwrap();

    Ok(())
}

A  => i18n/de.csv +8 -0
@@ 1,8 @@
language,id,text,comment
De,ShowDate,"Heute ist der {day}. {month}. {year}.
",
De,AskName,Wie heißt du?,
De,Hello,"Hallo {name}.",
Fr,Unused,,This demonstrates using different languages in the same file
De,Unused,,
De,Prompt,> ,

A  => i18n/en.csv +7 -0
@@ 1,7 @@
language,id,text,comment
En,ShowDate,"It's {month}/{day}/{year}.
",sorry brits this is just to show off the different date formats
En,AskName,What's your name?,
En,Hello,Hello {name}.,
En,Unused,whatever,
En,Prompt,> ,

A  => i18n/fr.csv +6 -0
@@ 1,6 @@
language,id,text,comment
Fr,ShowDate,"Aujurd'hui, c'est le {day}. {month}. {year}.
",
Fr,AskName,comment tu t'appelle?,need to look up how to spell this (but really this is just to demonstrate comments)
Fr,Hello,"Bonjour, {name}.",
Fr,Prompt,> ,

A  => i18n/ja.csv +7 -0
@@ 1,7 @@
language,id,text,comment
Ja,ShowDate,"今日は{year}年{month}月{day}日です。
",
Ja,AskName,名前はなんですか。,
Ja,Hello,{name}、こんにちは。,
Ja,Unused,,unused
Ja,Prompt,> ,

A  => src/babel.rs +88 -0
@@ 1,88 @@
use std::io::Read;

include! {concat!(env!("OUT_DIR"), "/i18n.rs")}

pub use internal::{Language, Text};

pub struct Dictionary {
    text: String,
    offsets: std::vec::Vec<u32>,
}

fn decompress_offset_deltas() -> std::vec::Vec<u32> {
    let mut decompressor = flate2::read::DeflateDecoder::new(internal::COMPRESSED_OFFSET_DELTAS);
    let items = internal::NUMBER_OF_LANGUAGES
        .checked_mul(internal::NUMBER_OF_TEXTS)
        .unwrap();
    let mut uncompressed_offsets = std::vec::Vec::<u32>::with_capacity(items);
    for _ in 0..items {
        let mut buf = [0u8; 4];
        decompressor.read_exact(&mut buf).unwrap();
        let delta = u32::from_le_bytes(buf);
        uncompressed_offsets.push(delta);
    }
    let mut tmp = [0u8];
    match decompressor.read_exact(&mut tmp) {
        Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => (),
        _ => panic!("leftover data"),
    };
    uncompressed_offsets
}

pub fn cumsum<T: std::ops::Add<Output = T> + Copy>(
    mut deltas: std::vec::Vec<T>,
) -> std::vec::Vec<T> {
    for i in 1..deltas.len() {
        deltas[i] = deltas[i] + deltas[i - 1];
    }
    deltas
}

impl Dictionary {
    pub fn load() -> Self {
        // uncompress text
        let text = String::from_utf8(
            miniz_oxide::inflate::decompress_to_vec(internal::COMPRESSED_TEXT).unwrap(),
        )
        .unwrap();
        // uncompress offsets
        let offsets = cumsum(decompress_offset_deltas());
        Dictionary { text, offsets }
    }

    pub fn with_default_language(self, language: Language) -> DictionaryWithDefaultLanguage {
        DictionaryWithDefaultLanguage {
            dictionary: self,
            language,
        }
    }
}

pub struct DictionaryWithDefaultLanguage {
    dictionary: Dictionary,
    language: Language,
}

impl core::ops::Index<(Language, Text)> for Dictionary {
    type Output = str;

    fn index(&self, (language, id): (Language, Text)) -> &Self::Output {
        let i = (language as usize) * internal::NUMBER_OF_TEXTS + (id as usize);
        let start: usize = self.offsets[i].try_into().unwrap();
        let end: usize = self
            .offsets
            .get(i + 1)
            .copied()
            .map(|n| n.try_into().unwrap())
            .unwrap_or(self.text.len());
        &self.text[start..end]
    }
}

impl core::ops::Index<Text> for DictionaryWithDefaultLanguage {
    type Output = str;

    fn index(&self, id: Text) -> &Self::Output {
        &self.dictionary[(self.language, id)]
    }
}

A  => src/fmt.rs +84 -0
@@ 1,84 @@
// Quick and dirty runtime string formatting/interpolation/templating

use std::io::Write;

pub trait Context {
    type Value;
    fn get(&self, key: &str) -> Option<&Self::Value>;
}

#[derive(Debug)]
pub enum FmtError {
    /// Format string is malformed
    MalformedFormatString,
    /// Value that's supposed to be interpolated is missing from context
    MissingValue(String),
    /// File error
    WriteError(std::io::Error),
    /// std::fmt::Display error
    ValueFmtError,
}

pub fn write<C, D, W>(mut w: W, fmt: &str, ctx: &C) -> Result<(), FmtError>
where
    D: std::fmt::Display,
    C: Context<Value = D>,
    W: std::io::Write,
{
    //let mut written = 0;
    let mut chars = fmt.char_indices();
    while let Some((i, c)) = chars.next() {
        if c == '{' {
            // we don't bother with escaping
            let key_start = i + c.len_utf8();
            let mut got_end = false;
            while let Some((i, c)) = chars.next() {
                if c == '}' {
                    got_end = true;
                    let key = &fmt[key_start..i];
                    let value = ctx.get(key).ok_or(FmtError::MissingValue(key.to_owned()))?;
                    write!(w, "{}", value).map_err(|_| FmtError::ValueFmtError)?;
                    break;
                }
            }
            if !got_end {
                return Err(FmtError::MalformedFormatString);
            }
        } else {
            write!(w, "{}", &fmt[i..i + c.len_utf8()]).map_err(FmtError::WriteError)?;
        }
    }
    Ok(())
}

/*
pub fn format<C, D>(fmt: &str, ctx: &C) -> Result<String, FmtError>
where
    D: std::fmt::Display,
    C: Context<Value = D>,
{
    let mut s = vec![0u8; 0];
    write(&mut s, fmt, ctx)?;
    Ok(String::from_utf8(s).unwrap())
}
*/

pub fn println<C, D>(s: &str, ctx: &C) -> Result<(), FmtError>
where
    D: std::fmt::Display,
    C: Context<Value = D>,
{
    let mut stdout = std::io::stdout();
    write(&stdout, s, ctx)?;
    writeln!(stdout, "").map_err(|_| FmtError::ValueFmtError)?;
    stdout.flush().map_err(FmtError::WriteError)?;
    Ok(())
}

impl<'a, D: std::fmt::Display> Context for std::collections::HashMap<&'a str, D> {
    type Value = D;

    fn get(&self, key: &str) -> Option<&Self::Value> {
        self.get(key)
    }
}

A  => src/main.rs +55 -0
@@ 1,55 @@
use chrono::Datelike as _;
use clap::Parser;
use std::io::Write as _;

mod babel;
mod fmt;

use babel::{Dictionary, Language, Text};
use fmt::println;

#[derive(Parser)]
struct Args {
    /// language
    language: String,
}

fn read_name(prompt: &str) -> String {
    let mut stdio = std::io::stdout();
    stdio.write(prompt.as_bytes()).unwrap();
    stdio.flush().unwrap();
    let mut buf = String::new();
    std::io::stdin().read_line(&mut buf).unwrap();
    buf.trim().to_owned()
}

fn main() {
    // parse CLI, this part is not internationalized.
    // in practice, you'd have to query the OS or query the user interactively (while showing all available languages)
    let Args { language } = Args::parse();
    let language = match &language.to_lowercase()[..] {
        "en" | "english" | "eng" => Language::En,
        "de" | "deutsch" | "ger" | "german" => Language::De,
        "fr" | "français" | "fra" | "fre" | "french" => Language::Fr,
        "ja" | "日本語" | "jpn" => Language::Ja,
        _ => panic!(),
    };

    let text = Dictionary::load().with_default_language(language);
    let mut ctx = std::collections::HashMap::<&'static str, Box<dyn std::fmt::Display>>::new();
    let date = chrono::Local::now().date_naive();
    ctx.insert("year", Box::new(date.year()));
    ctx.insert("month", Box::new(date.month()));
    ctx.insert("day", Box::new(date.day()));

    // show date and ask for name
    println(&text[Text::ShowDate], &ctx).unwrap();
    println(&text[Text::AskName], &ctx).unwrap();
    let name = read_name(&text[Text::Prompt]);

    // update context
    ctx.insert("name", Box::new(name));

    // say hello
    println(&text[Text::Hello], &ctx).unwrap();
}