~damien/statement-parser

326ca230e912479c01ffc0e9fd9e66b5e27c8272 — Damien Radtke 2 years ago master
Initial commit
5 files changed, 904 insertions(+), 0 deletions(-)

A .gitignore
A Cargo.lock
A Cargo.toml
A src/fmt.rs
A src/main.rs
A  => .gitignore +1 -0
@@ 1,1 @@
/target

A  => Cargo.lock +681 -0
@@ 1,681 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3

[[package]]
name = "adler32"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"

[[package]]
name = "aes"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "884391ef1066acaa41e766ba8f596341b96e93ce34f9a43e7d24bf0a0eaf0561"
dependencies = [
 "aes-soft",
 "aesni",
 "cipher",
]

[[package]]
name = "aes-soft"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be14c7498ea50828a38d0e24a765ed2effe92a705885b57d029cd67d45744072"
dependencies = [
 "cipher",
 "opaque-debug",
]

[[package]]
name = "aesni"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea2e11f5e94c2f7d386164cc2aa1f97823fed6f259e486940a71c174dd01b0ce"
dependencies = [
 "cipher",
 "opaque-debug",
]

[[package]]
name = "aho-corasick"
version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
dependencies = [
 "memchr",
]

[[package]]
name = "arrayvec"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"

[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
 "hermit-abi",
 "libc",
 "winapi",
]

[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"

[[package]]
name = "block-buffer"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
dependencies = [
 "generic-array",
]

[[package]]
name = "block-modes"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57a0e8073e8baa88212fb5823574c02ebccb395136ba9a164ab89379ec6072f0"
dependencies = [
 "block-padding",
 "cipher",
]

[[package]]
name = "block-padding"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d696c370c750c948ada61c69a0ee2cbbb9c50b1019ddb86d9317157a99c2cae"

[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"

[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"

[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
 "libc",
 "num-integer",
 "num-traits",
 "time",
 "winapi",
]

[[package]]
name = "cipher"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12f8e7987cbd042a63249497f41aed09f8e65add917ea6566effbc56578d6801"
dependencies = [
 "generic-array",
]

[[package]]
name = "clap"
version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d76c22c9b9b215eeb8d016ad3a90417bd13cb24cf8142756e6472445876cab7"
dependencies = [
 "atty",
 "bitflags",
 "clap_derive",
 "indexmap",
 "lazy_static",
 "os_str_bytes",
 "strsim",
 "termcolor",
 "textwrap",
]

[[package]]
name = "clap_derive"
version = "3.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fd1122e63869df2cb309f449da1ad54a7c6dfeb7c7e6ccd8e0825d9eb93bb72"
dependencies = [
 "heck",
 "proc-macro-error",
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "cpufeatures"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469"
dependencies = [
 "libc",
]

[[package]]
name = "deflate"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f95bf05dffba6e6cce8dfbb30def788154949ccd9aed761b472119c21e01c70"
dependencies = [
 "adler32",
]

[[package]]
name = "digest"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
dependencies = [
 "generic-array",
]

[[package]]
name = "doc-comment"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"

[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"

[[package]]
name = "fax"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be28317220fbcf14cead021ae0a973a4433df68fd3c3a022bf8a4fb3bdc3ba8e"
dependencies = [
 "fax_derive",
]

[[package]]
name = "fax_derive"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c1d7ffc9f2dc8316348c75281a99c8fdc60c1ddf4f82a366d117bf1b74d5a39"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "generic-array"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803"
dependencies = [
 "typenum",
 "version_check",
]

[[package]]
name = "glob"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"

[[package]]
name = "hashbrown"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"

[[package]]
name = "heck"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"

[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
 "libc",
]

[[package]]
name = "indexmap"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223"
dependencies = [
 "autocfg",
 "hashbrown",
]

[[package]]
name = "inflate"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cdb29978cc5797bd8dcc8e5bf7de604891df2a8dc576973d71a281e916db2ff"
dependencies = [
 "adler32",
]

[[package]]
name = "itertools"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3"
dependencies = [
 "either",
]

[[package]]
name = "jpeg-decoder"
version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "229d53d58899083193af11e15917b5640cd40b29ff475a1fe4ef725deb02d0f2"

[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"

[[package]]
name = "libc"
version = "0.2.119"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4"

[[package]]
name = "log"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [
 "cfg-if",
]

[[package]]
name = "md5"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

[[package]]
name = "memchr"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"

[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
 "autocfg",
 "num-traits",
]

[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
 "autocfg",
]

[[package]]
name = "once_cell"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"

[[package]]
name = "opaque-debug"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"

[[package]]
name = "ordermap"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91409674c628d07a6b4b79cc877c6b63ba5ccbfbadddd77ca822f55069ed1bd4"

[[package]]
name = "os_str_bytes"
version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
dependencies = [
 "memchr",
]

[[package]]
name = "parser"
version = "0.1.0"
dependencies = [
 "chrono",
 "clap",
 "lazy_static",
 "pdf",
 "regex",
 "rust_decimal",
]

[[package]]
name = "pdf"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f48644b2f4e9c3f3468d7c31fa75e9b823dfb167c8581ec15e90a90c34430d9"
dependencies = [
 "aes",
 "block-modes",
 "byteorder",
 "chrono",
 "deflate",
 "fax",
 "glob",
 "inflate",
 "itertools",
 "jpeg-decoder",
 "log",
 "md5",
 "num-traits",
 "once_cell",
 "ordermap",
 "pdf_derive",
 "sha2",
 "snafu",
 "stringprep",
 "weezl",
]

[[package]]
name = "pdf_derive"
version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f4007262775d0798de87b15cbc64cf1aed5f7ee87eec847e297b69d8ed4b4f8"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
 "proc-macro-error-attr",
 "proc-macro2",
 "quote",
 "syn",
 "version_check",
]

[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
 "proc-macro2",
 "quote",
 "version_check",
]

[[package]]
name = "proc-macro2"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
dependencies = [
 "unicode-xid",
]

[[package]]
name = "quote"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145"
dependencies = [
 "proc-macro2",
]

[[package]]
name = "regex"
version = "1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
dependencies = [
 "aho-corasick",
 "memchr",
 "regex-syntax",
]

[[package]]
name = "regex-syntax"
version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"

[[package]]
name = "rust_decimal"
version = "1.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d37baa70cf8662d2ba1c1868c5983dda16ef32b105cce41fb5c47e72936a90b3"
dependencies = [
 "arrayvec",
 "num-traits",
 "serde",
]

[[package]]
name = "serde"
version = "1.0.136"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"

[[package]]
name = "sha2"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800"
dependencies = [
 "block-buffer",
 "cfg-if",
 "cpufeatures",
 "digest",
 "opaque-debug",
]

[[package]]
name = "snafu"
version = "0.6.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eab12d3c261b2308b0d80c26fffb58d17eba81a4be97890101f416b478c79ca7"
dependencies = [
 "doc-comment",
 "snafu-derive",
]

[[package]]
name = "snafu-derive"
version = "0.6.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "stringprep"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ee348cb74b87454fff4b551cbf727025810a004f88aeacae7f85b87f4e9a1c1"
dependencies = [
 "unicode-bidi",
 "unicode-normalization",
]

[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"

[[package]]
name = "syn"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b"
dependencies = [
 "proc-macro2",
 "quote",
 "unicode-xid",
]

[[package]]
name = "termcolor"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
dependencies = [
 "winapi-util",
]

[[package]]
name = "textwrap"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80"

[[package]]
name = "time"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
 "libc",
 "wasi",
 "winapi",
]

[[package]]
name = "tinyvec"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c1c1d5a42b6245520c249549ec267180beaffcc0615401ac8e31853d4b6d8d2"
dependencies = [
 "tinyvec_macros",
]

[[package]]
name = "tinyvec_macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"

[[package]]
name = "typenum"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"

[[package]]
name = "unicode-bidi"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f"

[[package]]
name = "unicode-normalization"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
dependencies = [
 "tinyvec",
]

[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"

[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"

[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"

[[package]]
name = "weezl"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b77fdfd5a253be4ab714e4ffa3c49caf146b4de743e97510c0656cf90f1e8e"

[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
 "winapi-i686-pc-windows-gnu",
 "winapi-x86_64-pc-windows-gnu",
]

[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"

[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
 "winapi",
]

[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

A  => Cargo.toml +14 -0
@@ 1,14 @@
[package]
name = "parser"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
pdf = "0.7.2"
clap = { version = "3.1.1", features = ["derive"] }
rust_decimal = "1.22.0"
chrono = "0.4"
regex = "1"
lazy_static = "1.4.0"

A  => src/fmt.rs +14 -0
@@ 1,14 @@
use lazy_static::lazy_static;
use regex::Regex;
use rust_decimal::prelude::*;

pub fn parse_money(str: &str) -> Option<Decimal> {
    Decimal::from_str(&str.replace("$", "")).ok()
}

pub fn is_date(str: &str) -> bool {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"\d{2}/\d{2}").unwrap();
    }
    RE.is_match(str)
}

A  => src/main.rs +194 -0
@@ 1,194 @@
use clap::{Parser, Subcommand};
use pdf::error::PdfError;
use pdf::content::Operation;
use pdf::file::File;
use pdf::font::Font;
use pdf::object::{MaybeRef, RcRef, Resolve, PageRc, Page};
use pdf::primitive::{Primitive, PdfString};
use std::collections::HashMap;
use std::iter::Iterator;
use std::result::Result;
use std::slice::Iter;

use rust_decimal::prelude::*;

mod fmt;

#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct Args {
    #[clap(subcommand)]
    command: Command,
}

#[derive(Subcommand, Debug)]
enum Command {
    Inspect { path: String },
    Texts { path: String },
    Transactions { path: String },
}

struct TextObjectIter<'a> {
    fonts: &'a HashMap<String, RcRef<Font>>,
    current_font: Option<RcRef<Font>>,
    ops: Iter<'a, Operation>,
    buffer: String,
}

impl<'a> Iterator for TextObjectIter<'a> {
    type Item = String;

    // Logic inspired by: https://adventures.michaelfbryan.com/posts/parsing-pdfs-in-rust/
    fn next(&mut self) -> Option<Self::Item> {
        //println!("Current font: {:?}", &self.current_font);
        while let Some(Operation {operator, operands}) = self.ops.next() {
            match (operator.as_str(), operands.as_slice()) {
                ("BT", _) => {
                    self.buffer.clear();
                }
                ("Tf", [Primitive::Name(name), Primitive::Number(_size)]) => {
                    println!("Selecting font: {}", &name);
                    self.current_font = self.fonts.get(name).map(|rc| rc.clone());
                },
                ("Tj", [Primitive::String(text)]) => {
                    if let Err(err) = decode_text(&self.current_font, text, &mut self.buffer) {
                        println!("Error: {}", err);
                    }
                }
                ("T*", _) => {
                    self.buffer.push_str("\n");
                },
                ("ET", _) => {
                    // TODO: Probably a better way to handle the results
                    return Some(self.buffer.clone());
                }
                _ => continue,
            }
        }
        None
    }
}

fn decode_text(font: &Option<RcRef<Font>>, text: &PdfString, out: &mut String) -> pdf::error::Result<()> {
    match font {
        None => {
            //println!("Decoding using no font");
            if text.data.starts_with(&[0xFE, 0xFF]) {
                unimplemented!("Little-endian BOM");
            } else if text.data.starts_with(&[0xFF, 0xFE]) {
                unimplemented!("Big-endian BOM");
            } else if let Ok(text) = std::str::from_utf8(&text.data) {
                out.push_str(text);
                Ok(())
            } else {
                Err(PdfError::Other{msg: format!("Failed to decode into string: {:?}", text.data)})
            }
        },
        Some(font) => {
            // TODO: need to do this properly
            println!("Decoding with font: {}", &font.name);
            out.push_str(&text.as_str()?);
            Ok(())
        },
    }
}

struct Transaction {
    // TODO: how to parse "12/15" as a date? Need some way to more reliably get the year
    transaction_date: String,
    post_date: String,
    amount: Decimal,
    descriptor: String,
}

// TODO: use a more general error return type here
fn main() -> pdf::error::Result<()> {
    let args = Args::parse();
    match args.command {
        Command::Inspect { path } => inspect(File::open(path)?),
        Command::Texts { path } => show_texts(File::open(path)?),
        Command::Transactions { path } => show_transactions(File::open(path)?),
    }
}

fn foreach_text_object<'a, F>(file: File<Vec<u8>>, mut closure: F) -> pdf::error::Result<()>
    where F: FnMut(String) -> ()
{
    let pages = file.pages().collect::<pdf::error::Result<Vec<PageRc>>>()?;

    let mut fonts = HashMap::new();
    for resources_ref in pages.iter().filter_map(|page| page.resources.as_ref()) {
        match resources_ref {
            MaybeRef::Direct(resources) => {
                for (name, &font_ref) in resources.fonts.iter() {
                    let font = file.get(font_ref)?;
                    fonts.insert(name.clone(), font);
                }
            },
            MaybeRef::Indirect(_resources) => unimplemented!(),
        }
    }

    // There's probably a better way to write this.
    let mut operations = Vec::new();
    for contents in pages.iter().filter_map(|page| page.contents.as_ref()) {
        for op in &contents.operations {
            operations.push(op.clone());
        }
    }

    let texts = TextObjectIter{
        fonts: &fonts,
        ops: operations.iter(),
        current_font: None,
        buffer: String::new(),
    };
    for text in texts {
        closure(text);
    }
    Ok(())
}

fn collect(file: File<Vec<u8>>) -> pdf::error::Result<Vec<String>> {
    let mut texts = Vec::new();
    foreach_text_object(file, |text| texts.push(text.to_string()))?;
    Ok(texts)
}

fn inspect(file: File<Vec<u8>>) -> Result<(), PdfError> {
    for page in file.pages() {
        let page = page?;
        if let Some(contents) = &page.contents {
            for op in &contents.operations {
                println!("{}: {:?}", op.operator, op.operands);
            }
        }
    }
    Ok(())
}

fn show_texts(file: File<Vec<u8>>) -> Result<(), PdfError> {
    let texts = collect(file)?;
    for text in texts {
        println!("{}", text);
    }
    Ok(())
}

fn show_transactions(file: File<Vec<u8>>) -> Result<(), PdfError> {
    let texts = collect(file)?;
    for (i, text) in texts.iter().enumerate() {
        if let Some(amount) = fmt::parse_money(text) {
            let descriptor = texts.get(i-1).unwrap();
            let post_date = texts.get(i-2).unwrap();
            let transaction_date = texts.get(i-3).unwrap();
            if fmt::is_date(post_date) && fmt::is_date(transaction_date) {
                println!("{}\t{}\t{}\t{}", transaction_date, post_date, descriptor, amount);
            }
        }
    }
    Ok(())
}

// TODO: use fmt::parse_money() and others (need to add parse_date) to locate transactions in the
// table.