Compare commits

...

8 Commits

Author SHA1 Message Date
Avril 2404721418
Moved input lines on to stack if input is small. (merged from branch cli.)
2 weeks ago
Avril 9e5257507f
Remove unused req-deps for optional feature
2 weeks ago
Avril 216aa7abd1
Merged new cli options into this branch. Re-added pre-release & `io_uring` feature flag.
2 weeks ago
Avril 5293810b79
Added `--write-only` & `--no-consume` for when loading a chain is all that"s desired. Added 0 line output (default is still 1) for if saving/modifying a chain is all that"s desired.
2 weeks ago
Avril 0a7d530068
format: Added `io_uring::create_write_compressed()`: Spawns a background thread that compresses the written data to a file using the `io_uring` subsystem. Compression & IO operations are performed asynchonously.
2 months ago
Avril 066811444a
Added proper file format for save/load of chain. Added internal ZSTD compression of chain stream.
2 months ago
Avril c4fc2fde1d
Cli: Added basic chain save+load & dynamic num of output lines (1..)
2 months ago
Avril 8b241a41fb
Added TODO: Save+load chains.
3 years ago

667
Cargo.lock generated

@ -1,17 +1,190 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "addr2line"
version = "0.24.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
dependencies = [
"gimli",
]
[[package]]
name = "adler2"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "anstream"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
[[package]]
name = "anstyle-parse"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
dependencies = [
"anstyle",
"once_cell",
"windows-sys 0.59.0",
]
[[package]]
name = "async-compression"
version = "0.4.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522"
dependencies = [
"futures-core",
"memchr",
"pin-project-lite",
"tokio",
"zstd",
"zstd-safe",
]
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "backtrace"
version = "0.3.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
dependencies = [
"addr2line",
"cfg-if 1.0.0",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
"windows-targets",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bytes"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
dependencies = [
"serde",
]
[[package]]
name = "cc"
version = "1.0.94"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17f6e324229dc011159fcc089755d1e2e216a90d43a7dea6853ca740b84f35e7"
dependencies = [
"jobserver",
"libc",
]
[[package]]
name = "cfg-if"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "4.5.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8acebd8ad879283633b343856142139f2da2317c96b05b4dd6181c61e2480184"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ba32cbda51c7e1dfd49acc1457ba1a7dec5b64fe360e828acb13ca8dc9c2f9"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
[[package]]
name = "colorchoice"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "dtoa"
version = "0.4.6"
@ -30,6 +203,83 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d"
[[package]]
name = "futures"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
dependencies = [
"futures-core",
"futures-sink",
]
[[package]]
name = "futures-core"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
[[package]]
name = "futures-io"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
[[package]]
name = "futures-macro"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "futures-sink"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
[[package]]
name = "futures-task"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]]
name = "futures-util"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
"pin-project-lite",
"pin-utils",
"slab",
]
[[package]]
name = "getopts"
version = "0.2.21"
@ -45,17 +295,41 @@ version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6"
dependencies = [
"cfg-if",
"cfg-if 0.1.10",
"libc",
"wasi",
"wasi 0.9.0+wasi-snapshot-preview1",
]
[[package]]
name = "gimli"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "half"
version = "1.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403"
[[package]]
name = "hashbrown"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hermit-abi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
[[package]]
name = "indexmap"
version = "1.6.0"
@ -66,6 +340,22 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "io-uring"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "595a0399f411a508feb2ec1e970a4a30c249351e30208960d58298de8660b0e5"
dependencies = [
"bitflags",
"libc",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itertools"
version = "0.9.0"
@ -75,11 +365,20 @@ dependencies = [
"either",
]
[[package]]
name = "jobserver"
version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
dependencies = [
"libc",
]
[[package]]
name = "libc"
version = "0.2.79"
version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "linked-hash-map"
@ -89,9 +388,21 @@ checksum = "8dd5a6d5999d9907cda8ed67bbd137d3af8085216c2ac62de5be860bd41f304a"
[[package]]
name = "markov"
version = "0.1.1"
version = "0.2.2+1"
dependencies = [
"async-compression",
"bytes",
"clap",
"futures",
"markov 1.1.0",
"num_cpus",
"os_pipe",
"serde",
"serde_cbor",
"smallvec",
"tokio",
"tokio-uring",
"zstd",
]
[[package]]
@ -109,6 +420,67 @@ dependencies = [
"serde_yaml",
]
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "miniz_oxide"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b"
dependencies = [
"adler2",
]
[[package]]
name = "mio"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
dependencies = [
"libc",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.52.0",
]
[[package]]
name = "num_cpus"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "object"
version = "0.36.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
[[package]]
name = "os_pipe"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ffd2b0a5634335b135d5728d84c5e0fd726954b87111f7506a61c502280d982"
dependencies = [
"libc",
"windows-sys 0.59.0",
]
[[package]]
name = "petgraph"
version = "0.5.1"
@ -119,6 +491,24 @@ dependencies = [
"indexmap",
]
[[package]]
name = "pin-project-lite"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkg-config"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
[[package]]
name = "ppv-lite86"
version = "0.2.9"
@ -127,18 +517,18 @@ checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20"
[[package]]
name = "proc-macro2"
version = "1.0.24"
version = "1.0.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
dependencies = [
"unicode-xid",
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.7"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
dependencies = [
"proc-macro2",
]
@ -184,17 +574,36 @@ dependencies = [
"rand_core",
]
[[package]]
name = "rustc-demangle"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "serde"
version = "1.0.116"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96fe57af81d28386a513cbc6858332abc6117cfdb5999647c6444b8f43a370a5"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_cbor"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
dependencies = [
"half",
"serde",
]
[[package]]
name = "serde_derive"
version = "1.0.116"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f630a6370fd8e457873b4bd2ffdae75408bc291ba72be773772a4c2a065d9ae8"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
dependencies = [
"proc-macro2",
"quote",
@ -213,17 +622,97 @@ dependencies = [
"yaml-rust",
]
[[package]]
name = "slab"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
dependencies = [
"autocfg",
]
[[package]]
name = "smallvec"
version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
dependencies = [
"serde",
]
[[package]]
name = "socket2"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "socket2"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "1.0.42"
version = "2.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c51d92969d209b54a98397e1b91c8ae82d8c87a7bb87df0b29aa2ad81454228"
checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.43.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
dependencies = [
"backtrace",
"bytes",
"libc",
"mio",
"pin-project-lite",
"socket2 0.5.8",
"windows-sys 0.52.0",
]
[[package]]
name = "tokio-uring"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "748482e3e13584a34664a710168ad5068e8cb1d968aa4ffa887e83ca6dd27967"
dependencies = [
"bytes",
"futures-util",
"io-uring",
"libc",
"slab",
"socket2 0.4.10",
"tokio",
]
[[package]]
name = "unicode-ident"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
[[package]]
name = "unicode-width"
version = "0.1.8"
@ -231,10 +720,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]]
name = "unicode-xid"
version = "0.2.1"
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "wasi"
@ -242,6 +731,116 @@ version = "0.9.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "yaml-rust"
version = "0.4.4"
@ -250,3 +849,31 @@ checksum = "39f0c922f1a334134dc2f7a8b67dc5d25f0735263feec974345ff706bcf20b0d"
dependencies = [
"linked-hash-map",
]
[[package]]
name = "zstd"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.13+zstd.1.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa"
dependencies = [
"cc",
"pkg-config",
]

@ -1,7 +1,7 @@
[package]
name = "markov"
version = "0.1.2"
description = "Generate string of text from Markov chain fed by stdin"
version = "0.2.2+1"
description = "Generate string of text from Markov chain fed by stdin or file(s)"
authors = ["Avril <flanchan@cumallover.me>"]
edition = "2018"
@ -9,8 +9,35 @@ edition = "2018"
[profile.release]
opt-level = 3
lto = "fat"
lto = true
#"fat"
codegen-units = 1
strip = true
[profile.symbols]
inherits = "release"
lto = "fat"
strip = false
[features]
default = ["threads", "io_uring"]
threads = ["zstd/zstdmt", "dep:num_cpus"]
io_uring = ["dep:tokio-uring", "dep:async-compression", "dep:futures", "dep:tokio", "dep:os_pipe"]
unstable = ["smallvec/specialization", "smallvec/may_dangle"]
[dependencies]
chain = {package = "markov", version = "1.1.0"}
async-compression = { version = "0.4.18", features = ["tokio", "zstd", "zstdmt"], optional = true }
bytes = { version = "1.10.0", features = ["serde"] }
chain = {package = "markov", version = "1.1.0" }
clap = { version = "4.5.29", features = ["derive"] }
futures = { version = "0.3.31", default-features = false, optional = true, features = ["alloc", "async-await", "std"] }
num_cpus = { version = "1.16.0", optional = true }
os_pipe = { version = "1.2.1", optional = true }
serde = { version = "1.0.217", features = ["derive"] }
serde_cbor = { version = "0.11.2", features = ["alloc"] }
smallvec = { version = "1.14.0", features = ["union", "const_generics", "const_new", "serde", "write"] }
tokio = { version = "1.43.0", features = ["io-util"], optional = true }
tokio-uring = { version = "0.5.0", optional = true, features = ["bytes"] }
zstd = { version = "0.13.2", features = [] }

@ -0,0 +1 @@
Add ability to save+load chains, append to chains, etc.

@ -0,0 +1,688 @@
//! Handles the chain load/save format
use super::*;
use std::{
io::{
self,
Read, Write, BufRead,
},
fmt,
};
use bytes::{
Buf, BufMut, Bytes,
};
use zstd::{
Encoder, Decoder,
};
/// The chain that can be saved / loaded.
pub type Chain<T = String> = crate::Chain<T>;
/// The version of the encoded format stream
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)]
#[repr(packed)]
pub struct Version(pub u8,pub u8,pub u8,pub u8);
impl fmt::Display for Version
{
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
write!(f, "{}.{}.{}", self.0,self.1, self.2)?;
if self.3 != 0 {
write!(f, "r{}", self.3)
} else {
Ok(())
}
}
}
impl Version {
/// Current save version
pub const CURRENT: Self = Version(0,0,0,0);
/// Current value as a native integer
const CURRENT_VALUE: u32 = Self::CURRENT.as_native();
pub const fn as_native(&self) -> u32 {
u32::from_be_bytes([self.0, self.1, self.2, self.3])
}
#[inline]
pub const fn from_native(value: u32) -> Self {
let [a,b,c,d] = u32::to_be_bytes(value);
Self(a,b,c,d)
}
}
impl Default for Version
{
#[inline]
fn default() -> Self
{
Self::CURRENT
}
}
pub unsafe trait AutoBinaryFormat: Sized {
#[inline]
fn as_raw_for_encode(&self) -> *const [u8] {
let ptr = self as *const Self;
std::ptr::slice_from_raw_parts(ptr as *const u8, std::mem::size_of::<Self>())
}
#[inline]
fn as_raw_for_decode(&mut self) -> *mut [u8] {
let ptr = self as *mut Self;
std::ptr::slice_from_raw_parts_mut(ptr as *mut u8, std::mem::size_of::<Self>())
}
fn raw_format_read_size(&mut self) -> usize {
std::mem::size_of::<Self>()
}
fn raw_format_write_size(&self) -> usize {
std::mem::size_of::<Self>()
}
}
unsafe impl<T, const N: usize> AutoBinaryFormat for [T; N] {}
pub trait BinaryFormat {
fn read_from<S: ?Sized>(&mut self, stream: &mut S) -> io::Result<usize>
where S: io::Read;
fn write_to<S: ?Sized>(&self, stream: &mut S) -> io::Result<usize>
where S: io::Write;
fn binary_format_read_size(&mut self) -> Option<usize>;
fn binary_format_write_size(&self) -> usize;
}
impl<T> BinaryFormat for T
where T: AutoBinaryFormat
{
#[inline(always)]
fn read_from<S: ?Sized>(&mut self, stream: &mut S) -> io::Result<usize>
where S: io::Read {
let ptr = self.as_raw_for_decode();
// SAFETY: The read data is guaranteed to be valid here.
Ok(unsafe {
stream.read_exact(&mut *ptr)?;
(*ptr).len()
})
}
#[inline(always)]
fn write_to<S: ?Sized>(&self, stream: &mut S) -> io::Result<usize>
where S: io::Write {
let ptr = self.as_raw_for_encode();
// SAFETY: The written data is guaranteed to be valid here.
Ok(unsafe {
stream.write_all(&*ptr)?;
(*ptr).len()
})
}
#[inline]
fn binary_format_read_size(&mut self) -> Option<usize> {
Some(self.raw_format_read_size())
}
#[inline]
fn binary_format_write_size(&self) -> usize {
self.raw_format_write_size()
}
}
impl<T> BinaryFormat for [T]
where T: BinaryFormat
{
#[inline]
fn read_from<S: ?Sized>(&mut self, stream: &mut S) -> io::Result<usize>
where S: io::Read {
let mut sz = 0;
for i in self.iter_mut() {
sz += i.read_from(stream)?;
}
Ok(sz)
}
#[inline]
fn write_to<S: ?Sized>(&self, stream: &mut S) -> io::Result<usize>
where S: io::Write {
let mut sz =0;
for i in self.iter() {
sz += i.write_to(stream)?;
}
Ok(sz)
}
#[inline]
fn binary_format_read_size(&mut self) -> Option<usize> {
self.iter_mut().map(|x| x.binary_format_read_size()).try_fold(0, |x, y| {
match (x, y) {
(x, Some(y)) => Some(x + y),
_ => None,
}
})
}
#[inline]
fn binary_format_write_size(&self) -> usize {
self.iter().map(|x| x.binary_format_write_size()).sum()
}
}
impl BinaryFormat for Version {
#[inline]
fn read_from<S: ?Sized>(&mut self, stream: &mut S) -> io::Result<usize>
where S: io::Read {
let mut vi = [0u8; 4];
stream.read_exact(&mut vi[..])?;
Ok(4)
}
#[inline]
fn write_to<S: ?Sized>(&self, stream: &mut S) -> io::Result<usize>
where S: io::Write {
let vi = [self.0,self.1,self.2,self.3];
stream.write_all(&vi[..])?;
Ok(4)
}
#[inline]
fn binary_format_read_size(&mut self) -> Option<usize> {
Some(std::mem::size_of::<u8>() * 4)
}
#[inline]
fn binary_format_write_size(&self) -> usize {
std::mem::size_of::<u8>() * 4
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy, Default)]
#[repr(u32)]
pub enum Compressed {
#[default]
No = 0,
Zstd = 1,
}
impl Compressed {
#[inline]
const fn to_int(&self) -> u32 {
*self as u32
}
#[inline]
fn try_from_int(val: u32) -> Option<Self> {
match val {
// SAFETY: These variants are known
0..=1 => Some(unsafe {
std::mem::transmute(val)
}),
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
pub struct FormatMetadata {
pub version: Version,
pub compressed: Compressed,
pub chain_size: usize, // NOTE: Unused
pub checksum: u64, // NOTE: Unused
}
impl FormatMetadata {
const MAGIC_NUM: &[u8; 8] = b"MARKOV\x00\xcf";
}
impl BinaryFormat for FormatMetadata
{
fn write_to<S: ?Sized>(&self, mut stream: &mut S) -> io::Result<usize>
where S: io::Write {
let sz = self.version.write_to(&mut stream)?;
let mut obuf = [0u8; std::mem::size_of::<u32>() +std::mem::size_of::<u64>() + std::mem::size_of::<u64>()];
{
let mut obuf = &mut obuf[..];
use std::convert::TryInto;
obuf.put_u32(self.compressed.to_int());
obuf.put_u64(self.chain_size.try_into().map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "Chain size attribute out-of-bounds for format size"))?);
obuf.put_u64(self.checksum);
}
stream.write_all(&obuf[..])?;
Ok(sz + obuf.len())
}
fn read_from<S: ?Sized>(&mut self, mut stream: &mut S) -> io::Result<usize>
where S: io::Read {
let sz = self.version.read_from(&mut stream)?;
if self.version > Version::CURRENT {
return Err(io::Error::new(io::ErrorKind::Unsupported, format!("Unknown format version {}", self.version)));
}
let mut ibuf = [0u8; std::mem::size_of::<u32>() +std::mem::size_of::<u64>() + std::mem::size_of::<u64>()];
stream.read_exact(&mut ibuf[..])?;
{
let mut ibuf = &ibuf[..];
use std::convert::TryInto;
self.compressed = Compressed::try_from_int(ibuf.get_u32()).ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Invalid compression attribute"))?;
self.chain_size = ibuf.get_u64().try_into().map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Chain size attribute out-of-bounds for native size"))?;
self.checksum = ibuf.get_u64();
}
Ok(sz + ibuf.len())
}
#[inline]
fn binary_format_read_size(&mut self) -> Option<usize> {
let szm = self.version.binary_format_read_size()?;
Some(szm + std::mem::size_of::<u32>()
+ std::mem::size_of::<u64>()
+ std::mem::size_of::<u64>())
}
#[inline(always)]
fn binary_format_write_size(&self) -> usize {
self.version.binary_format_write_size()
+ std::mem::size_of::<u32>()
+ std::mem::size_of::<u64>()
+ std::mem::size_of::<u64>()
}
}
/// Load a chain from a stream
#[inline]
pub fn load_chain_from_sync<S>(stream: &mut S) -> io::Result<Chain<String>>
where S: io::Read + ?Sized
{
let mut stream = io::BufReader::new(stream);
{
let mut magic = FormatMetadata::MAGIC_NUM.clone();
stream.read_exact(&mut magic[..])?;
if &magic != FormatMetadata::MAGIC_NUM {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid file header tag magic number"));
}
}
let metadata = {
let mut metadata = FormatMetadata::default();
metadata.read_from(&mut stream)?;
metadata
};
match metadata.version {
Version::CURRENT => {
let read = |read: &mut (dyn io::Read)| serde_cbor::from_reader(read).expect("Failed to read chain from input stream"); // TODO: Error type
match metadata.compressed {
Compressed::No =>
Ok(read(&mut stream)),
Compressed::Zstd => {
let mut stream = zstd::Decoder::with_buffer(stream)?;
//#[cfg(feature="threads")]
//stream.multithread(num_cpus::get() as i32);
//NOTE: Not required here: //stream.finish()?;
Ok(read(&mut stream))
},
}
},
unsup => {
return Err(io::Error::new(io::ErrorKind::Unsupported, format!("Unsupported payload version {}", unsup)));
},
}
}
/// Save a chain to a stream with optional compression.
#[inline]
pub fn save_chain_to_sync<S>(stream: &mut S, chain: &Chain<String>, compress: bool) -> io::Result<()>
where S: io::Write + ?Sized
{
let mut stream = io::BufWriter::new(stream);
let metadata = FormatMetadata {
compressed: compress
.then_some(Compressed::Zstd)
.unwrap_or(Compressed::No),
..Default::default()
};
stream.write_all(FormatMetadata::MAGIC_NUM)?;
metadata.write_to(&mut stream)?;
let write = |stream: &mut (dyn io::Write)| serde_cbor::to_writer(stream, chain).expect("Failed to write chain to output stream"); // TODO: Error type
let mut stream = match metadata.compressed {
Compressed::No => {
write(&mut stream);
stream
},
Compressed::Zstd => {
let mut stream = zstd::Encoder::new(stream, 22)?;
#[cfg(feature="threads")]
stream.multithread(num_cpus::get() as u32)?;
write(&mut stream);
// XXX: Should we flush after write here..?
// NOTE: Required here.
stream.finish()?
},
};
stream.flush()
}
#[cfg(feature="io_uring")]
#[deprecated = "Runs through compression *twice* (one on the sync side, even) see below. Must be re-written to properly use `io_uring::*`"]
pub fn save_chain_to_file(output_file: std::fs::File, chain: &Chain<String>, compress: bool) -> io::Result<()>
{
compile_error!("TODO: Make `save_to_chain() that takes callback for write stream to create compression stream & write to it, since io_uring backing takes care of that, but we want the uncompressed metadata writing to be the same.");
// let output_file = fs::OpenOptions::new()
// .create_new(! force)
// .create(true)
// .truncate(force)
// .write(true)
// .open(path);
let (mut stream, bg) = io_uring::create_write_compressed(output_file)?;
todo!("^^^ Ehh... No, this isn't right... `stream` is the *encoder* stream, we still need to do the metadata thing done in `_sync`()... XXX: Can we factor that out to something that will take a callback maybe??? Idk... im tired...");
save_chain_to_sync(&mut stream, chain, compress)?;
stream.flush()?;
drop(stream); // NOTE: Drop the sending side so the recv side can know there is no more data. **MUST** be done before the `.join()`.
bg.join().expect("Fatal error in background I/O thread")?;
Ok(())
}
#[cfg(feature="io_uring")]
mod io_uring {
use super::*;
use std::{
path::Path,
};
use futures::{
prelude::*,
io::{
//AsyncRead,
//AsyncBufRead,
//AsyncWrite,
AllowStdIo,
},
};
use tokio::{
io::{
AsyncRead,
AsyncBufRead,
AsyncWrite,
ReadHalf, WriteHalf,
SimplexStream,
simplex,
AsyncReadExt,
AsyncWriteExt,
},
};
use tokio_uring::{
fs,
buf,
};
use async_compression::{
Level,
zstd::CParameter,
tokio::{
bufread::{
ZstdDecoder,
ZstdEncoder,
},
write as zstd_write,
},
};
// Add `tokio_uring` version of `save_chain_to_file()`/`load_chain_from_file()` that spawns the `tokio_uring` runtime internally to queue reads/writes from/to a file. (Maybe default-feature-gate this?)
/// Creates an future that, when executed, reads all data from `reader` via `io_uring`, into generic byte sink `writer`.
///
/// # Task dispatch
/// This function *creates* the async function, it should be executed via `spawn()` *inside* a `tokio_uring` context.
///
/// # Returns
/// The future polls to return an `io::Result<usize>` of the number of bytes read from `reader` and written to `writer`.
#[inline]
#[must_use]
fn create_read_pump<'f, B>(reader: &'f fs::File, writer: B) -> impl Future<Output = std::io::Result<usize>> + use<'f, B>
where B: AsyncWrite
{
async move {
let mut pos = 0;
let mut buf = Vec::with_capacity(4096);
futures::pin_mut!(writer);
loop {
let (bytes, nbuf) = reader.read_at(buf, pos as u64).await;
match bytes? {
0 => break,
bytes => {
// Push `&nbuf[..bytes]` to pump.
writer.write_all(&nbuf[..bytes]).await?;
pos += bytes;
buf = nbuf;
},
}
}
Ok::<_, std::io::Error>(pos)
}//).map(|_| ())
}
/// Creates an future that, when executed, reads all data from generic byte source `reader` and writes them into `writer` through `io_uring`.
///
/// # Task dispatch
/// This function *creates* the async function, it should be executed via `spawn()` *inside* a `tokio_uring` context.
///
/// # Returns
/// The future polls to return an `io::Result<usize>` of the number of bytes read from `reader` and written to `writer`.
fn create_write_pump<'f, B>(reader: B, writer: &'f fs::File) -> impl Future<Output = std::io::Result<usize>> + use<'f, B>
where B: AsyncRead
{
async move {
let mut pos =0;
let mut buf = vec![0u8; 4096];
futures::pin_mut!(reader);
loop {
let sz = reader.read(&mut buf[..]).await?;
if sz == 0 {
break;
}
buf.truncate(sz);
let (res, nbuf) = writer.write_all_at(buf, pos as u64).await;
res?;
pos += sz;
buf = nbuf;
}
Ok::<usize, std::io::Error>(pos)
}
}
/// Spawns a task that reads all bytes from `reader` via `io_uring` and writes them into generic byte sink `writer`.
/// Returns a handle to the async task that completes when the task has completed.
///
/// # Runtime
/// The file is closed after the operation completes, ownership is transfered into the background task.
///
/// __NOTE__: **Must** be called from within an active `tokio_uring` runtime.
fn spawn_read_pump<B>(reader: fs::File, writer: B) -> impl Future<Output = std::io::Result<usize>> + Unpin + 'static
where B: AsyncWrite + Send + 'static
{
tokio_uring::spawn(async move {
let sz = create_read_pump(&reader, writer).await?;
let _ = reader.sync_data().await;
reader.close().await.map(move |_| sz)
}).map(|handle| handle.expect("Background reading task panic"))
}
/// Spawns a task that reads all bytes from generic byte source `reader` and writes them into `writer` via `io_uring`.
/// Returns a handle to the async task that completes when the task has completed.
///
/// # Runtime
/// The file is closed after the operation completes, ownership is transfered into the background task.
///
/// __NOTE__: **Must** be called from within an active `tokio_uring` runtime.
fn spawn_write_pump<B>(reader: B, writer: fs::File) -> impl Future<Output = std::io::Result<usize>> + Unpin + 'static
where B: AsyncRead + Send + 'static
{
tokio_uring::spawn(async move {
let sz = create_write_pump(reader, &writer).await?;
let _ = writer.sync_data().await;
writer.close().await.map(move |_| sz)
}).map(|handle| handle.expect("Background writing task panic"))
}
/// Create a wrapper over `output_stream` in which bytes written are compressed via `zstd`.
fn create_write_encoder<S>(output_stream: S) -> zstd_write::ZstdEncoder<S>
where S: AsyncWrite
{
use zstd_write::*;
if cfg!(feature="threads") {
ZstdEncoder::with_quality_and_params(output_stream, Level::Precise(22), &[CParameter::nb_workers(num_cpus::get() as u32)])
} else {
ZstdEncoder::with_quality(output_stream, Level::Precise(22))
}
}
/// Creates a wrapper over `output_stream` in which bytes written to are **de**compressed via `zstd`.
fn create_write_decoder<S>(output_stream: S) -> zstd_write::ZstdDecoder<S>
where S: AsyncWrite
{
use zstd_write::*;
ZstdDecoder::new(output_stream)
}
/// Create a wrapper over `output_stream` in which bytes read are compressed via `zstd`.
fn create_read_encoder<S>(output_stream: S) -> ZstdEncoder<S>
where S: AsyncBufRead
{
if cfg!(feature="threads") {
ZstdEncoder::with_quality_and_params(output_stream, Level::Precise(22), &[CParameter::nb_workers(num_cpus::get() as u32)])
} else {
ZstdEncoder::with_quality(output_stream, Level::Precise(22))
}
}
/// Creates a wrapper over `output_stream` in which bytes read from are **de**compressed via `zstd`.
fn create_read_decoder<S>(output_stream: S) -> ZstdDecoder<S>
where S: AsyncBufRead
{
ZstdDecoder::new(output_stream)
}
pub fn pipe_async_uring() -> std::io::Result<(fs::File, fs::File)>
{
use std::os::unix::io::*;
let (rx, tx) = os_pipe::pipe()?;
let tx = unsafe {
std::fs::File::from_raw_fd(tx.into_raw_fd())
};
let rx = unsafe {
std::fs::File::from_raw_fd(rx.into_raw_fd())
};
Ok((fs::File::from_std(rx), fs::File::from_std(tx)))
}
/// Create a pipe *reader* that decompresses the bytes read from `input_file` on a backing task.
///
/// Must be called within a `tokio_uring` context.
///
/// # Returns
/// A tuple of:
/// - Backing task join-handle. It will complete when all bytes have been copied (__XXX__: and the returned read stream has been closed? Does it?)
/// - Read end of async pipe. When all data has been read, it should be closed and then the handle should be awaited to ensure the data is flushed.
fn pipe_to_decoder(input_file: fs::File) -> (impl Future<Output = std::io::Result<usize>> + Unpin + Send + 'static, ReadHalf<SimplexStream>)
{
let (rx, tx) = simplex(4096 << 1);
let tx = create_write_decoder(tx);
let pump = spawn_read_pump(input_file, tx);
(pump, rx)
}
/// Create a pipe *writer* that compresses the bytes written into `output_file` on a backing task.
///
/// Must be called within a `tokio_uring` context.
///
/// # Returns
/// A tuple of:
/// - Backing task join-handle. It will complete when all bytes have been copied and the returned write stream has been closed.
/// - Write end of async pipe. When all data has been written, it should be closed and then the handle should be awaited to ensure the data is flushed.
fn pipe_to_encoder(output_file: fs::File) -> (impl Future<Output = std::io::Result<usize>> + Unpin + Send + 'static, WriteHalf<SimplexStream>)
{
let (rx, tx) = simplex(4096 << 1);
let rx = create_read_encoder(tokio::io::BufReader::new(rx));
let pump = spawn_write_pump(rx, output_file);
(pump, tx)
}
/// Write `data` to file `output` using `io_uring` (overwriting if `force` is `true`.)
///
/// # Synchonicity
/// This runs on a background thread, the encoding of the object bytes into the stream can be done on the current thread, the compression and writing to the file is done on a backing thread.
///
/// # Returns
/// A synchonous writer to write the data to, and a `JoinHandle` to the backing thread that completes to the number of bytes written to the file.
pub fn create_write_compressed(output_file: std::fs::File) -> std::io::Result<(os_pipe::PipeWriter, std::thread::JoinHandle<std::io::Result<usize>>)>
{
let (rx, tx) = os_pipe::pipe()?;
let b = std::thread::spawn(move || {
tokio_uring::start(async move {
// let output_file = fs::OpenOptions::new()
// .create_new(! force)
// .create(true)
// .truncate(force)
// .write(true)
// .open(output).await?;
let output_file = fs::File::from_std(output_file);
let (bg, tx) = pipe_to_encoder(output_file);
let rx = fs::File::from_std(unsafe {
use std::os::unix::io::*;
std::fs::File::from_raw_fd(rx.into_raw_fd())
});
let bgt = spawn_read_pump(rx, tx);
// Await the two spawned tasks together.
let (sz, szf) = futures::future::try_join(bgt, bg).await?;
if sz != szf {
//XXX: Should we expect these to be the same?
panic!("Invalid size transfer! {} bytes sent to backing, {} bytes sent to file.", sz, szf);
}
Ok::<_, std::io::Error>(szf)
})
});
Ok((tx, b))
}
//TODO: create_read_compressed()
}

@ -1,13 +1,51 @@
//#[macro_use] extern crate serde;
use smallvec::SmallVec;
use clap::{
Parser, Subcommand, ValueEnum,
};
use chain::{
Chain,
};
use std::{
num::NonZeroUsize,
path::{
PathBuf, Path,
},
io::{
BufRead,
self,
},
};
#[derive(Debug, Parser)]
#[command(name = env!("CARGO_PKG_NAME"), version, about = env!("CARGO_PKG_DESCRIPTION"), long_about = None)]
pub struct Cli {
/// Save the chain to the provided output file name after appending to it.
#[arg(short, long)]
save: Option<PathBuf>,
/// Load the chain from the provided output file name before appending to it.
#[arg(short, long)]
load: Option<PathBuf>, // TODO: Group with `save` & add `-F/--file` opt for save & load from same file.
/// Force over-writes of files, and force output of binary data to TTY.
#[arg(short, long)]
force: bool,
/// Do not read into chain from `stdin` if there is a loaded chain that is not empty.
#[arg(short, long)]
no_consume: bool,
/// Do not read into chain from `stdin` ever.
#[arg(short, long="write")]
write_only: bool, // XXX: Should we group this with `no_consume`?
/// The number of lines to output from the chain. Default is 1.
///
/// If 0, generation from chain is skipped.
#[arg(default_value_t = 1)]
lines: usize,
}
fn buffered_read_all_lines<T: BufRead+?Sized, F: FnMut(&str) -> io::Result<()>>(input: &mut T, mut then: F) -> io::Result<usize>
{
let mut buffer = String::new();
@ -23,27 +61,82 @@ fn buffered_read_all_lines<T: BufRead+?Sized, F: FnMut(&str) -> io::Result<()>>(
Ok(total)
}
#[inline]
fn parse_cli() -> Cli {
use clap::Parser;
Cli::parse()
}
mod format;
fn load_chain<S>(stream: &mut S) -> io::Result<Chain<String>>
where S: io::Read + ?Sized
{
format::load_chain_from_sync(stream)
}
fn save_chain<S>(stream: &mut S, chain: &Chain<String>) -> io::Result<()>
where S: io::Write + ?Sized
{
format::save_chain_to_sync(stream, chain, true) //TODO: Change compression to be off for small chains...? We will need to store the chain size info somewhere else.
}
fn create_chain(cli: &Cli) -> Chain<String>
{
if let Some(load) = &cli.load {
let mut input = std::fs::OpenOptions::new()
.read(true)
.open(&load).expect("Failed to open chain load file");
load_chain(&mut input).expect("Failed to load chain from file")
} else {
Chain::new()
}
}
fn complete_chain(cli: &Cli, chain: Chain<String>) -> io::Result<()>
{
if let Some(save) = &cli.save {
let mut output = std::fs::OpenOptions::new()
.create_new(! cli.force)
.create(cli.force)
.write(true)
.truncate(cli.force)
.open(&save).expect("Failed to open chain save file");
save_chain(&mut output, &chain).expect("Failed to save chain to file") // TODO: Error type
}
Ok(())
}
fn main() {
let cli = parse_cli();
let stdin = io::stdin();
let mut stdin = stdin.lock();
let mut chain = Chain::new();
let mut chain = create_chain(&cli);
buffered_read_all_lines(&mut stdin, |string| {
chain.feed(&string.split_whitespace()
.filter(|word| !word.is_empty())
.map(|s| s.to_owned()).collect::<Vec<_>>());
Ok(())
}).expect("Failed to read from stdin");
if !(cli.write_only || (cli.no_consume && ! chain.is_empty())) {
buffered_read_all_lines(&mut stdin, |string| {
chain.feed(&string.split_whitespace()
.filter(|word| !word.is_empty())
.map(|s| s.to_owned()).collect::<SmallVec<[_; 16]>>());
Ok(())
}).expect("Failed to read from stdin");
}
if !chain.is_empty() {
if let Some(num) = std::env::args().skip(1).next() {
let sz: usize = num.parse().expect("Cannot parse number of tokens to generate");
for string in chain.str_iter_for(sz) {
println!("{}", string);
}
} else {
println!("{}", chain.generate_str());
}
match cli.lines {
0 => (),
1 => println!("{}", chain.generate_str()),
lines => {
for string in chain.str_iter_for(lines) {
println!("{}", string);
}
},
};
}
complete_chain(&cli, chain).unwrap();
}

Loading…
Cancel
Save