From 19a3943012308a3619ee1ed29714a9ae45362a39 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 05:28:13 +0100 Subject: [PATCH 01/50] working example --- Cargo.lock | 1446 +++++++++++++++++++++++++++++++++++++++++++++++---- Cargo.toml | 5 +- src/main.rs | 100 ++-- 3 files changed, 1411 insertions(+), 140 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3d3df23..3308926 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,17 +1,152 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86" +dependencies = [ + "memchr", +] + +[[package]] +name = "arc-swap" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d25d88fd6b8041580a654f9d0c581a047baee2b3efee13275f2fc392fc75034" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "autocfg" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" + [[package]] name = "autocfg" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "base64" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff" + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "block-buffer" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" +dependencies = [ + "block-padding", + "byte-tools", + "byteorder", + "generic-array 0.12.3", +] + +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array 0.14.4", +] + +[[package]] +name = "block-padding" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" +dependencies = [ + "byte-tools", +] + +[[package]] +name = "buf_redux" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b953a6887648bb07a535631f2bc00fbdb2a2216f135552cb3f534ed136b9c07f" +dependencies = [ + "memchr", + "safemem", +] + +[[package]] +name = "byte-tools" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" + +[[package]] +name = "byteorder" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" + +[[package]] +name = "bytes" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38" + [[package]] name = "cfg-if" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +[[package]] +name = "cloudabi" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +dependencies = [ + "bitflags", +] + +[[package]] +name = "cpuid-bool" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634" + +[[package]] +name = "digest" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" +dependencies = [ + "generic-array 0.12.3", +] + +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array 0.14.4", +] + [[package]] name = "dtoa" version = "0.4.6" @@ -24,6 +159,25 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "env_logger" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "fake-simd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" + [[package]] name = "fixedbitset" version = "0.2.0" @@ -31,216 +185,1296 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" [[package]] -name = "getopts" -version = "0.2.21" +name = "fnv" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + +[[package]] +name = "fuchsia-zircon" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" dependencies = [ - "unicode-width", + "bitflags", + "fuchsia-zircon-sys", ] [[package]] -name = "getrandom" -version = "0.1.15" +name = "fuchsia-zircon-sys" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6" +checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" + +[[package]] +name = "futures" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d8e3078b7b2a8a671cb7a3d17b4760e4181ea243227776ba83fd043b4ca034e" dependencies = [ - "cfg-if", - "libc", - "wasi", + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", ] [[package]] -name = "hashbrown" -version = "0.9.1" +name = "futures-channel" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +checksum = "a7a4d35f7401e948629c9c3d6638fb9bf94e0b2121e96c3b428cc4e631f3eb74" +dependencies = [ + "futures-core", + "futures-sink", +] [[package]] -name = "indexmap" -version = "1.6.0" +name = "futures-core" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55e2e4c765aa53a0424761bf9f41aa7a6ac1efa87238f59560640e27fca028f2" +checksum = "d674eaa0056896d5ada519900dbf97ead2e46a7b6621e8160d79e2f2e1e2784b" + +[[package]] +name = "futures-io" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc94b64bb39543b4e432f1790b6bf18e3ee3b74653c5449f63310e9a74b123c" + +[[package]] +name = "futures-sink" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8764258ed64ebc5d9ed185cf86a95db5cac810269c5d20ececb32e0088abbd" + +[[package]] +name = "futures-task" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dd26820a9f3637f1302da8bceba3ff33adbe53464b54ca24d4e2d4f1db30f94" dependencies = [ - "autocfg", - "hashbrown", + "once_cell", ] [[package]] -name = "itertools" -version = "0.9.0" +name = "futures-util" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +checksum = "8a894a0acddba51a2d49a6f4263b1e64b8c579ece8af50fa86503d52cd1eea34" dependencies = [ - "either", + "futures-core", + "futures-sink", + "futures-task", + "pin-project", + "pin-utils", + "slab", ] [[package]] -name = "libc" -version = "0.2.79" +name = "generic-array" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743" +checksum = "c68f0274ae0e023facc3c97b2e00f076be70e254bc851d972503b328db79b2ec" +dependencies = [ + "typenum", +] [[package]] -name = "linked-hash-map" -version = "0.5.3" +name = "generic-array" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dd5a6d5999d9907cda8ed67bbd137d3af8085216c2ac62de5be860bd41f304a" +checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" +dependencies = [ + "typenum", + "version_check", +] [[package]] -name = "markov" -version = "0.1.1" +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" dependencies = [ - "markov 1.1.0", + "unicode-width", ] [[package]] -name = "markov" -version = "1.1.0" +name = "getrandom" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc6ad68e26d51a9558f65e93b9795c9422630d0932717a3235668bb9ab71e3fd" +checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6" dependencies = [ - "getopts", - "itertools", - "petgraph", - "rand", - "serde", - "serde_derive", - "serde_yaml", + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", ] [[package]] -name = "petgraph" -version = "0.5.1" +name = "h2" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" +checksum = "993f9e0baeed60001cf565546b0d3dbe6a6ad23f2bd31644a133c641eccf6d53" dependencies = [ - "fixedbitset", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", ] [[package]] -name = "ppv-lite86" -version = "0.2.9" +name = "hashbrown" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20" +checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" [[package]] -name = "proc-macro2" -version = "1.0.24" +name = "headers" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +checksum = "ed18eb2459bf1a09ad2d6b1547840c3e5e62882fa09b9a6a20b1de8e3228848f" dependencies = [ - "unicode-xid", + "base64", + "bitflags", + "bytes", + "headers-core", + "http", + "mime", + "sha-1 0.8.2", + "time", ] [[package]] -name = "quote" -version = "1.0.7" +name = "headers-core" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" dependencies = [ - "proc-macro2", + "http", ] [[package]] -name = "rand" -version = "0.7.3" +name = "hermit-abi" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8" dependencies = [ - "getrandom", "libc", - "rand_chacha", - "rand_core", - "rand_hc", ] [[package]] -name = "rand_chacha" -version = "0.2.2" +name = "http" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +checksum = "28d569972648b2c512421b5f2a405ad6ac9666547189d0c5477a3f200f3e02f9" dependencies = [ - "ppv-lite86", - "rand_core", + "bytes", + "fnv", + "itoa", ] [[package]] -name = "rand_core" -version = "0.5.1" +name = "http-body" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +checksum = "13d5ff830006f7646652e057693569bfe0d51760c0085a071769d142a205111b" dependencies = [ - "getrandom", + "bytes", + "http", ] [[package]] -name = "rand_hc" +name = "httparse" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd179ae861f0c2e53da70d892f5f3029f9594be0c41dc5269cd371691b1dc2f9" + +[[package]] +name = "httpdate" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "494b4d60369511e7dea41cf646832512a94e542f68bb9c49e54518e0f468eb47" + +[[package]] +name = "humantime" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" +dependencies = [ + "quick-error", +] + +[[package]] +name = "hyper" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f3afcfae8af5ad0576a31e768415edb627824129e8e5a29b8bfccb2f234e835" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "idna" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +checksum = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9" dependencies = [ - "rand_core", + "matches", + "unicode-bidi", + "unicode-normalization", ] [[package]] -name = "serde" -version = "1.0.116" +name = "indexmap" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96fe57af81d28386a513cbc6858332abc6117cfdb5999647c6444b8f43a370a5" +checksum = "55e2e4c765aa53a0424761bf9f41aa7a6ac1efa87238f59560640e27fca028f2" +dependencies = [ + "autocfg 1.0.1", + "hashbrown", +] [[package]] -name = "serde_derive" -version = "1.0.116" +name = "input_buffer" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f630a6370fd8e457873b4bd2ffdae75408bc291ba72be773772a4c2a065d9ae8" +checksum = "19a8a95243d5a0398cae618ec29477c6e3cb631152be5c19481f80bc71559754" dependencies = [ - "proc-macro2", - "quote", - "syn", + "bytes", ] [[package]] -name = "serde_yaml" -version = "0.8.13" +name = "iovec" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae3e2dd40a7cdc18ca80db804b7f461a39bb721160a85c9a1fa30134bf3c02a5" +checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" dependencies = [ - "dtoa", - "linked-hash-map", - "serde", - "yaml-rust", + "libc", ] [[package]] -name = "syn" -version = "1.0.42" +name = "itertools" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c51d92969d209b54a98397e1b91c8ae82d8c87a7bb87df0b29aa2ad81454228" +checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", + "either", ] [[package]] -name = "unicode-width" -version = "0.1.8" +name = "itoa" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" +checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" [[package]] -name = "unicode-xid" -version = "0.2.1" +name = "kernel32-sys" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" +checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] [[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" +name = "lazy_static" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743" + +[[package]] +name = "linked-hash-map" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dd5a6d5999d9907cda8ed67bbd137d3af8085216c2ac62de5be860bd41f304a" + +[[package]] +name = "log" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "markov" +version = "0.1.1" +dependencies = [ + "markov 1.1.0", + "pretty_env_logger", + "tokio", + "warp", +] + +[[package]] +name = "markov" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6ad68e26d51a9558f65e93b9795c9422630d0932717a3235668bb9ab71e3fd" +dependencies = [ + "getopts", + "itertools", + "petgraph", + "rand 0.7.3", + "serde", + "serde_derive", + "serde_yaml", +] + +[[package]] +name = "matches" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" + +[[package]] +name = "memchr" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" + +[[package]] +name = "mime" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" + +[[package]] +name = "mime_guess" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2684d4c2e97d99848d30b324b00c8fcc7e5c897b7cbb5819b09e7c90e8baf212" +dependencies = [ + "mime", + "unicase", +] + +[[package]] +name = "mio" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fce347092656428bc8eaf6201042cb551b8d67855af7374542a92a0fbfcac430" +dependencies = [ + "cfg-if", + "fuchsia-zircon", + "fuchsia-zircon-sys", + "iovec", + "kernel32-sys", + "libc", + "log", + "miow 0.2.1", + "net2", + "slab", + "winapi 0.2.8", +] + +[[package]] +name = "mio-named-pipes" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656" +dependencies = [ + "log", + "mio", + "miow 0.3.5", + "winapi 0.3.9", +] + +[[package]] +name = "mio-uds" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0" +dependencies = [ + "iovec", + "libc", + "mio", +] + +[[package]] +name = "miow" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c1f2f3b1cf331de6896aabf6e9d55dca90356cc9960cca7eaaf408a355ae919" +dependencies = [ + "kernel32-sys", + "net2", + "winapi 0.2.8", + "ws2_32-sys", +] + +[[package]] +name = "miow" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07b88fb9795d4d36d62a012dfbf49a8f5cf12751f36d31a9dbe66d528e58979e" +dependencies = [ + "socket2", + "winapi 0.3.9", +] + +[[package]] +name = "multipart" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8209c33c951f07387a8497841122fc6f712165e3f9bda3e6be4645b58188f676" +dependencies = [ + "buf_redux", + "httparse", + "log", + "mime", + "mime_guess", + "quick-error", + "rand 0.6.5", + "safemem", + "tempfile", + "twoway", +] + +[[package]] +name = "net2" +version = "0.2.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ebc3ec692ed7c9a255596c67808dee269f64655d8baf7b4f0638e51ba1d6853" +dependencies = [ + "cfg-if", + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "260e51e7efe62b592207e9e13a68e43692a7a279171d6ba57abd208bf23645ad" + +[[package]] +name = "opaque-debug" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" + +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "petgraph" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "pin-project" +version = "0.4.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13fbdfd6bdee3dc9be46452f86af4a4072975899cf8592466668620bebfbcc17" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "0.4.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c82fb1329f632c3552cf352d14427d57a511b1cf41db93b3a7d77906a82dcc8e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pin-project-lite" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e555d9e657502182ac97b539fb3dae8b79cda19e3e4f8ffb5e8de4f18df93c95" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "ppv-lite86" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20" + +[[package]] +name = "pretty_env_logger" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d" +dependencies = [ + "env_logger", + "log", +] + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" +dependencies = [ + "autocfg 0.1.7", + "libc", + "rand_chacha 0.1.1", + "rand_core 0.4.2", + "rand_hc 0.1.0", + "rand_isaac", + "rand_jitter", + "rand_os", + "rand_pcg", + "rand_xorshift", + "winapi 0.3.9", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc 0.2.0", +] + +[[package]] +name = "rand_chacha" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" +dependencies = [ + "autocfg 0.1.7", + "rand_core 0.3.1", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_hc" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_isaac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "rand_jitter" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b" +dependencies = [ + "libc", + "rand_core 0.4.2", + "winapi 0.3.9", +] + +[[package]] +name = "rand_os" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" +dependencies = [ + "cloudabi", + "fuchsia-cprng", + "libc", + "rand_core 0.4.2", + "rdrand", + "winapi 0.3.9", +] + +[[package]] +name = "rand_pcg" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44" +dependencies = [ + "autocfg 0.1.7", + "rand_core 0.4.2", +] + +[[package]] +name = "rand_xorshift" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + +[[package]] +name = "regex" +version = "1.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "safemem" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" + +[[package]] +name = "scoped-tls" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2" + +[[package]] +name = "serde" +version = "1.0.116" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96fe57af81d28386a513cbc6858332abc6117cfdb5999647c6444b8f43a370a5" + +[[package]] +name = "serde_derive" +version = "1.0.116" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f630a6370fd8e457873b4bd2ffdae75408bc291ba72be773772a4c2a065d9ae8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a230ea9107ca2220eea9d46de97eddcb04cd00e92d13dda78e478dd33fa82bd4" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97" +dependencies = [ + "dtoa", + "itoa", + "serde", + "url", +] + +[[package]] +name = "serde_yaml" +version = "0.8.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae3e2dd40a7cdc18ca80db804b7f461a39bb721160a85c9a1fa30134bf3c02a5" +dependencies = [ + "dtoa", + "linked-hash-map", + "serde", + "yaml-rust", +] + +[[package]] +name = "sha-1" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df" +dependencies = [ + "block-buffer 0.7.3", + "digest 0.8.1", + "fake-simd", + "opaque-debug 0.2.3", +] + +[[package]] +name = "sha-1" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "170a36ea86c864a3f16dd2687712dd6646f7019f301e57537c7f4dc9f5916770" +dependencies = [ + "block-buffer 0.9.0", + "cfg-if", + "cpuid-bool", + "digest 0.9.0", + "opaque-debug 0.3.0", +] + +[[package]] +name = "signal-hook-registry" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e12110bc539e657a646068aaf5eb5b63af9d0c1f7b29c97113fad80e15f035" +dependencies = [ + "arc-swap", + "libc", +] + +[[package]] +name = "slab" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" + +[[package]] +name = "socket2" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1fa70dc5c8104ec096f4fe7ede7a221d35ae13dcd19ba1ad9a81d2cab9a1c44" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "winapi 0.3.9", +] + +[[package]] +name = "syn" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c51d92969d209b54a98397e1b91c8ae82d8c87a7bb87df0b29aa2ad81454228" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "tempfile" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" +dependencies = [ + "cfg-if", + "libc", + "rand 0.7.3", + "redox_syscall", + "remove_dir_all", + "winapi 0.3.9", +] + +[[package]] +name = "termcolor" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", + "winapi 0.3.9", +] + +[[package]] +name = "tinyvec" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "238ce071d267c5710f9d31451efec16c5ee22de34df17cc05e56cbc92e967117" + +[[package]] +name = "tokio" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d34ca54d84bf2b5b4d7d31e901a8464f7b60ac145a284fba25ceb801f2ddccd" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "iovec", + "lazy_static", + "libc", + "memchr", + "mio", + "mio-named-pipes", + "mio-uds", + "num_cpus", + "pin-project-lite", + "signal-hook-registry", + "slab", + "tokio-macros", + "winapi 0.3.9", +] + +[[package]] +name = "tokio-macros" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3acc6aa564495a0f2e1d59fab677cd7f81a19994cfc7f3ad0e64301560389" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-tungstenite" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d9e878ad426ca286e4dcae09cbd4e1973a7f8987d97570e2469703dd7f5720c" +dependencies = [ + "futures-util", + "log", + "pin-project", + "tokio", + "tungstenite", +] + +[[package]] +name = "tokio-util" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be8242891f2b6cbef26a2d7e8605133c2c554cd35b3e4948ea892d6d68436499" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "log", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tower-service" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860" + +[[package]] +name = "tracing" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27" +dependencies = [ + "cfg-if", + "log", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f50de3927f93d202783f4513cda820ab47ef17f624b03c096e86ef00c67e6b5f" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "tracing-futures" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab7bb6f14721aa00656086e9335d363c5c8747bae02ebe32ea2c7dece5689b4c" +dependencies = [ + "pin-project", + "tracing", +] + +[[package]] +name = "try-lock" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" + +[[package]] +name = "tungstenite" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0308d80d86700c5878b9ef6321f020f29b1bb9d5ff3cab25e75e23f3a492a23" +dependencies = [ + "base64", + "byteorder", + "bytes", + "http", + "httparse", + "input_buffer", + "log", + "rand 0.7.3", + "sha-1 0.9.1", + "url", + "utf-8", +] + +[[package]] +name = "twoway" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b11b2b5241ba34be09c3cc85a36e56e48f9888862e19cedf23336d35316ed1" +dependencies = [ + "memchr", +] + +[[package]] +name = "typenum" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33" + +[[package]] +name = "unicase" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +dependencies = [ + "version_check", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" +dependencies = [ + "matches", +] + +[[package]] +name = "unicode-normalization" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb19cf769fa8c6a80a162df694621ebeb4dafb606470b2b2fce0be40a98a977" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "url" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb" +dependencies = [ + "idna", + "matches", + "percent-encoding", +] + +[[package]] +name = "urlencoding" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9232eb53352b4442e40d7900465dfc534e8cb2dc8f18656fcb2ac16112b5593" + +[[package]] +name = "utf-8" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" + +[[package]] +name = "version_check" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" + +[[package]] +name = "want" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +dependencies = [ + "log", + "try-lock", +] + +[[package]] +name = "warp" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f41be6df54c97904af01aa23e613d4521eed7ab23537cede692d4058f6449407" +dependencies = [ + "bytes", + "futures", + "headers", + "http", + "hyper", + "log", + "mime", + "mime_guess", + "multipart", + "pin-project", + "scoped-tls", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-tungstenite", + "tower-service", + "tracing", + "tracing-futures", + "urlencoding", +] + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "ws2_32-sys" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] [[package]] name = "yaml-rust" diff --git a/Cargo.toml b/Cargo.toml index 7a76f0b..b8a9e57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,4 +8,7 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -chain = {package = "markov", version = "1.1.0"} \ No newline at end of file +chain = {package = "markov", version = "1.1.0"} +tokio = {version = "0.2", features=["full"]} +warp = "0.2" +pretty_env_logger = "0.4.0" diff --git a/src/main.rs b/src/main.rs index 22f352d..d5c3f68 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,47 +1,81 @@ use chain::{ Chain, }; +use warp::{ + Filter, + Buf, +}; use std::{ - io::{ - BufRead, - self, + sync::Arc, +}; +use tokio::{ + sync::{ + RwLock, }, + stream::{Stream,StreamExt,}, + prelude::*, }; -fn buffered_read_all_lines io::Result<()>>(input: &mut T, mut then: F) -> io::Result -{ - let mut buffer = String::new(); - let mut read; - let mut total=0; - while {read = input.read_line(&mut buffer)?; read!=0} { - then(&buffer[..])?; - buffer.clear(); - total += read; +const MAX_CONTENT_LENGTH: u64 = 1024 * 16; +const MAX_GEN_SIZE: usize = 256; + +async fn full_body(chain: &mut Chain, mut body: impl Unpin + Stream>) -> Result<(), Box> { + let mut buffer = Vec::new(); + + while let Some(buf) = body.next().await { + let mut body = buf?; + while body.has_remaining() { + buffer.extend_from_slice(body.bytes()); + let cnt = body.bytes().len(); + body.advance(cnt); + } } - Ok(total) + + let buffer = std::str::from_utf8(&buffer[..])?; + chain.feed_str(buffer); + Ok(()) } -fn main() { - let stdin = io::stdin(); - let mut stdin = stdin.lock(); - let mut chain = Chain::new(); +#[tokio::main] +async fn main() { + pretty_env_logger::init(); + + let chain = Arc::new(RwLock::new(Chain::new())); + let chain = warp::any().map(move || Arc::clone(&chain)); - buffered_read_all_lines(&mut stdin, |string| { - chain.feed(&string.split_whitespace() - .filter(|word| !word.is_empty()) - .map(|s| s.to_owned()).collect::>()); - - Ok(()) - }).expect("Failed to read from stdin"); + let push = warp::put() + .and(chain.clone()) + .and(warp::path("put")) + .and(warp::body::content_length_limit(MAX_CONTENT_LENGTH)) + .and(warp::body::stream()) + .and_then(|chain: Arc>>, buf| { + async move { + use std::ops::DerefMut; + let res = format!("{:?}", full_body(chain.write().await.deref_mut(), buf).await); + Ok::(res) + } + }); - if !chain.is_empty() { - if let Some(num) = std::env::args().skip(1).next() { - let sz: usize = num.parse().expect("Cannot parse number of tokens to generate"); - for string in chain.str_iter_for(sz) { - println!("{}", string); + let read = warp::get() + .and(chain.clone()) + .and(warp::path("get")) + .and(warp::path::param().map(|opt: usize| Some(opt)).or(warp::any().map(|| Option::::None)).unify()) + .and_then(|chain: Arc>>, num: Option| { + async move { + let chain = chain.read().await; + if chain.is_empty() { + Ok(String::default()) + } else { + match num { + Some(num) if num < MAX_GEN_SIZE => Ok(chain.str_iter_for(num).collect()), + _ => Ok::(chain.generate_str()), + } + } } - } else { - println!("{}", chain.generate_str()); - } - } + }); + + warp::serve(push + .or(read)) + .bind_with_graceful_shutdown(([127,0,0,1], 8777), async { tokio::signal::ctrl_c().await.unwrap(); }).1 + .await } From ff80dc4730b644e6609e5590ff8a6537d4aa91c7 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 06:40:07 +0100 Subject: [PATCH 02/50] web server working --- Cargo.lock | 25 ++++++--- Cargo.toml | 7 +++ README | 18 ++++-- src/forwarded_list.rs | 74 +++++++++++++++++++++++++ src/main.rs | 125 ++++++++++++++++++++++++++++++++++-------- 5 files changed, 214 insertions(+), 35 deletions(-) create mode 100644 src/forwarded_list.rs diff --git a/Cargo.lock b/Cargo.lock index 3308926..3c8e63a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,6 +114,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "cloudabi" version = "0.0.3" @@ -311,7 +317,7 @@ version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", "libc", "wasi 0.9.0+wasi-snapshot-preview1", ] @@ -529,13 +535,16 @@ version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", ] [[package]] name = "markov" version = "0.1.1" dependencies = [ + "cfg-if 1.0.0", + "hyper", + "log", "markov 1.1.0", "pretty_env_logger", "tokio", @@ -591,7 +600,7 @@ version = "0.6.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fce347092656428bc8eaf6201042cb551b8d67855af7374542a92a0fbfcac430" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", "fuchsia-zircon", "fuchsia-zircon-sys", "iovec", @@ -673,7 +682,7 @@ version = "0.2.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ebc3ec692ed7c9a255596c67808dee269f64655d8baf7b4f0638e51ba1d6853" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", "libc", "winapi 0.3.9", ] @@ -1072,7 +1081,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "170a36ea86c864a3f16dd2687712dd6646f7019f301e57537c7f4dc9f5916770" dependencies = [ "block-buffer 0.9.0", - "cfg-if", + "cfg-if 0.1.10", "cpuid-bool", "digest 0.9.0", "opaque-debug 0.3.0", @@ -1100,7 +1109,7 @@ version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1fa70dc5c8104ec096f4fe7ede7a221d35ae13dcd19ba1ad9a81d2cab9a1c44" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", "libc", "redox_syscall", "winapi 0.3.9", @@ -1123,7 +1132,7 @@ version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", "libc", "rand 0.7.3", "redox_syscall", @@ -1240,7 +1249,7 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", "log", "pin-project-lite", "tracing-core", diff --git a/Cargo.toml b/Cargo.toml index b8a9e57..60d719e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,15 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +# Trust X-Forwarded-For as real IP(s) +trust-x-forwarded-for = [] + [dependencies] chain = {package = "markov", version = "1.1.0"} tokio = {version = "0.2", features=["full"]} warp = "0.2" pretty_env_logger = "0.4.0" +hyper = "0.13.8" +log = "0.4.11" +cfg-if = "1.0.0" diff --git a/README b/README index 5c0318f..c17a163 100644 --- a/README +++ b/README @@ -1,6 +1,16 @@ -Generate strings from markov chain of stdin +HTTP server connecting to a Markov chain -Usage: +Feeding: +# PUT /put +Request body is fed to the chain -$ cat corpus | markov -$ cat corpus | markov +NOTE: Strings fed to the chain must be valid UTF-8 and below 16 KB in size + +Generating: +# GET /get +Generate a string from the chain + +# GET /get/ +Generate strings from the chain + +NOTE: Number must be lower than 256 \ No newline at end of file diff --git a/src/forwarded_list.rs b/src/forwarded_list.rs new file mode 100644 index 0000000..302be9f --- /dev/null +++ b/src/forwarded_list.rs @@ -0,0 +1,74 @@ +use std::{ + net::{ + IpAddr, + AddrParseError, + }, + str, + error, + fmt, +}; + +#[derive(Debug)] +pub struct XFormatError; + +impl error::Error for XFormatError{} + +impl fmt::Display for XFormatError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "X-Forwarded-For was not in the correct format") + } +} + +#[derive(Debug, Clone, PartialOrd, PartialEq, Eq, Default)] +pub struct XForwardedFor(Vec); + +impl XForwardedFor +{ + pub fn new() -> Self + { + Self(Vec::new()) + } + pub fn single(ip: impl Into) -> Self + { + Self(vec![ip.into()]) + } + pub fn addrs(&self) -> &[IpAddr] + { + &self.0[..] + } + + pub fn into_first(self) -> Option + { + self.0.into_iter().next() + } + + pub fn into_addrs(self) -> Vec + { + self.0 + } +} + +impl str::FromStr for XForwardedFor +{ + type Err = XFormatError; + + fn from_str(s: &str) -> Result { + let mut output = Vec::new(); + for next in s.split(',') + { + output.push(next.trim().parse()?) + } + Ok(Self(output)) + } +} + +impl From for XFormatError +{ + #[inline(always)] fn from(_: AddrParseError) -> Self + { + Self + } +} + diff --git a/src/main.rs b/src/main.rs index d5c3f68..c4de034 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,38 +1,103 @@ +#![allow(dead_code)] + +#[macro_use] extern crate log; + use chain::{ Chain, }; use warp::{ Filter, Buf, + reply::Response, }; +use hyper::Body; use std::{ sync::Arc, + fmt, + error, + net::{ + SocketAddr, + IpAddr, + }, }; use tokio::{ sync::{ RwLock, + mpsc, }, stream::{Stream,StreamExt,}, - prelude::*, }; +use cfg_if::cfg_if; + +#[cfg(feature="trust-x-forwarded-for")] +mod forwarded_list; +#[cfg(feature="trust-x-forwarded-for")] +use forwarded_list::XForwardedFor; const MAX_CONTENT_LENGTH: u64 = 1024 * 16; const MAX_GEN_SIZE: usize = 256; -async fn full_body(chain: &mut Chain, mut body: impl Unpin + Stream>) -> Result<(), Box> { +#[derive(Debug)] +pub struct FillBodyError; + +impl error::Error for FillBodyError{} +impl warp::reject::Reject for FillBodyError{} +impl fmt::Display for FillBodyError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "failed to feed chain with this data") + } +} + + +async fn full_body(who: &IpAddr, chain: Arc>>, mut body: impl Unpin + Stream>) -> Result { let mut buffer = Vec::new(); - + + let mut written = 0usize; while let Some(buf) = body.next().await { - let mut body = buf?; + let mut body = buf.map_err(|_| FillBodyError)?; while body.has_remaining() { buffer.extend_from_slice(body.bytes()); let cnt = body.bytes().len(); body.advance(cnt); + written += cnt; } } - let buffer = std::str::from_utf8(&buffer[..])?; + let buffer = std::str::from_utf8(&buffer[..]).map_err(|_| FillBodyError)?; + info!("{} -> {:?}", who, buffer); + let mut chain = chain.write().await; chain.feed_str(buffer); + Ok(written) +} + +#[derive(Debug)] +pub struct GenBodyError(String); + +impl error::Error for GenBodyError{} +impl fmt::Display for GenBodyError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "failed to write {:?} to body", self.0) + } +} + + +async fn gen_body(chain: Arc>>, num: Option, mut output: mpsc::Sender) -> Result<(), GenBodyError> +{ + let chain = chain.read().await; + if !chain.is_empty() { + match num { + Some(num) if num < MAX_GEN_SIZE => { + for string in chain.str_iter_for(num) { + output.send(string).await.map_err(|e| GenBodyError(e.0))?; + } + }, + _ => output.send(chain.generate_str()).await.map_err(|e| GenBodyError(e.0))?, + } + } Ok(()) } @@ -43,39 +108,53 @@ async fn main() { let chain = Arc::new(RwLock::new(Chain::new())); let chain = warp::any().map(move || Arc::clone(&chain)); + cfg_if!{ + if #[cfg(feature="trust-x-forwarded-for")] { + let client_ip = + warp::header("x-forwarded-for") + .map(|ip: XForwardedFor| ip) + .and_then(|x: XForwardedFor| async move { x.into_first().ok_or_else(|| warp::reject::not_found()) }) + .or(warp::filters::addr::remote() + .and_then(|x: Option| async move { x.map(|x| x.ip()).ok_or_else(|| warp::reject::not_found()) })) + .unify(); + } else { + let client_ip = warp::filters::addr::remote().and_then(|x: Option| async move {x.map(|x| x.ip()).ok_or_else(|| warp::reject::not_found())}); + } + } let push = warp::put() .and(chain.clone()) .and(warp::path("put")) + .and(client_ip.clone()) .and(warp::body::content_length_limit(MAX_CONTENT_LENGTH)) .and(warp::body::stream()) - .and_then(|chain: Arc>>, buf| { + .and_then(|chain: Arc>>, host: IpAddr, buf| { async move { - use std::ops::DerefMut; - let res = format!("{:?}", full_body(chain.write().await.deref_mut(), buf).await); - Ok::(res) + full_body(&host, chain, buf).await + .map(|x| format!("{} bytes fed", x)) + .map_err(warp::reject::custom) } }); let read = warp::get() .and(chain.clone()) .and(warp::path("get")) + .and(client_ip.clone()) .and(warp::path::param().map(|opt: usize| Some(opt)).or(warp::any().map(|| Option::::None)).unify()) - .and_then(|chain: Arc>>, num: Option| { + .and_then(|chain: Arc>>, host: IpAddr, num: Option| { async move { - let chain = chain.read().await; - if chain.is_empty() { - Ok(String::default()) - } else { - match num { - Some(num) if num < MAX_GEN_SIZE => Ok(chain.str_iter_for(num).collect()), - _ => Ok::(chain.generate_str()), - } - } + let (tx, rx) = mpsc::channel(16); + tokio::spawn(gen_body(chain, num, tx)); + Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.map(move |x| { + info!("{} <- {:?}", host, x); + Ok::<_, std::convert::Infallible>(x) + })))) } }); - warp::serve(push - .or(read)) - .bind_with_graceful_shutdown(([127,0,0,1], 8777), async { tokio::signal::ctrl_c().await.unwrap(); }).1 - .await + let (addr, server) = warp::serve(push + .or(read)) + .bind_with_graceful_shutdown(([127,0,0,1], 8777), async { tokio::signal::ctrl_c().await.unwrap(); }); + + println!("Server bound on {:?}", addr); + server.await } From f25677f2431d8247d687a6f3153d2908f57f319d Mon Sep 17 00:00:00 2001 From: Flan's server Date: Thu, 8 Oct 2020 05:49:41 +0000 Subject: [PATCH 03/50] change default port --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index c4de034..bdd832a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -153,7 +153,7 @@ async fn main() { let (addr, server) = warp::serve(push .or(read)) - .bind_with_graceful_shutdown(([127,0,0,1], 8777), async { tokio::signal::ctrl_c().await.unwrap(); }); + .bind_with_graceful_shutdown(([127,0,0,1], 8001), async { tokio::signal::ctrl_c().await.unwrap(); }); println!("Server bound on {:?}", addr); server.await From 20656ce80e7f80a283b3b309c89c021d4884a7bf Mon Sep 17 00:00:00 2001 From: Flan's server Date: Thu, 8 Oct 2020 07:12:52 +0000 Subject: [PATCH 04/50] 201 --- src/main.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index bdd832a..d376aea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ use warp::{ Filter, Buf, reply::Response, + http, }; use hyper::Body; use std::{ @@ -133,7 +134,12 @@ async fn main() { .map(|x| format!("{} bytes fed", x)) .map_err(warp::reject::custom) } - }); + }) + .map(|x| http::Response::builder() + .status(201) + .body(x) + .unwrap()) + .with(warp::log("markov::put")); let read = warp::get() .and(chain.clone()) @@ -149,7 +155,8 @@ async fn main() { Ok::<_, std::convert::Infallible>(x) })))) } - }); + }) + .with(warp::log("markov::read")); let (addr, server) = warp::serve(push .or(read)) From 7fe14a8e187289296e2cab98e16ab1df1af4fee4 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 08:19:05 +0100 Subject: [PATCH 05/50] empty body --- src/main.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index d376aea..0625ee9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,7 +9,6 @@ use warp::{ Filter, Buf, reply::Response, - http, }; use hyper::Body; use std::{ @@ -30,6 +29,12 @@ use tokio::{ }; use cfg_if::cfg_if; +macro_rules! status { + ($code:expr) => { + ::warp::http::status::StatusCode::from_u16($code).unwrap() + }; +} + #[cfg(feature="trust-x-forwarded-for")] mod forwarded_list; #[cfg(feature="trust-x-forwarded-for")] @@ -131,14 +136,10 @@ async fn main() { .and_then(|chain: Arc>>, host: IpAddr, buf| { async move { full_body(&host, chain, buf).await - .map(|x| format!("{} bytes fed", x)) + .map(|_| warp::reply::with_status(warp::reply(), status!(201))) .map_err(warp::reject::custom) } }) - .map(|x| http::Response::builder() - .status(201) - .body(x) - .unwrap()) .with(warp::log("markov::put")); let read = warp::get() From 16db9ca7f10b98c8e50c21a562aa705d629b1064 Mon Sep 17 00:00:00 2001 From: Flan's server Date: Thu, 8 Oct 2020 08:14:56 +0000 Subject: [PATCH 06/50] update max size --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 0625ee9..7fdf177 100644 --- a/src/main.rs +++ b/src/main.rs @@ -40,7 +40,7 @@ mod forwarded_list; #[cfg(feature="trust-x-forwarded-for")] use forwarded_list::XForwardedFor; -const MAX_CONTENT_LENGTH: u64 = 1024 * 16; +const MAX_CONTENT_LENGTH: u64 = 1024 * 1024 * 4; //4MB const MAX_GEN_SIZE: usize = 256; #[derive(Debug)] From 4afdba44c56046ab1b9caa3c6e17583ea10464e6 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 09:18:44 +0100 Subject: [PATCH 07/50] makefile --- LICENSE | 674 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ Makefile | 8 + default | 1 + 3 files changed, 683 insertions(+) create mode 100644 LICENSE create mode 100644 Makefile create mode 120000 default diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..11eb097 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ + +markov: + cargo build --release --features trust-x-forwarded-for + strip target/release/markov + +install: build + rc-service restart markov + curl -X PUT -d @default http://127.0.0.1:8001/put diff --git a/default b/default new file mode 120000 index 0000000..7a694c9 --- /dev/null +++ b/default @@ -0,0 +1 @@ +LICENSE \ No newline at end of file From f1a1a5079f0d1c5c46a900c2cd2e4fdc6247b966 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 09:21:48 +0100 Subject: [PATCH 08/50] makefile --- Makefile | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 11eb097..2e920d3 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,13 @@ - markov: cargo build --release --features trust-x-forwarded-for strip target/release/markov -install: build - rc-service restart markov +install: uninstall + cp -f target/release/markov /usr/local/bin/markov + rc-service start markov + sleep 1 curl -X PUT -d @default http://127.0.0.1:8001/put + +uninstall: + -rc-service stop markov + rm /usr/local/bin/markov From 87c8c525cd527e86c6c1f010fef556f20c81f36f Mon Sep 17 00:00:00 2001 From: Flan's server Date: Thu, 8 Oct 2020 08:23:18 +0000 Subject: [PATCH 09/50] installing makefile --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 2e920d3..7872576 100644 --- a/Makefile +++ b/Makefile @@ -4,10 +4,10 @@ markov: install: uninstall cp -f target/release/markov /usr/local/bin/markov - rc-service start markov - sleep 1 + rc-service markov start + sleep 0.2 curl -X PUT -d @default http://127.0.0.1:8001/put uninstall: - -rc-service stop markov + -rc-service markov stop rm /usr/local/bin/markov From 4ead6b5a92727d67523ea2b09e73f1c51489c0fe Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 09:52:49 +0100 Subject: [PATCH 10/50] increase mpsc cap --- Cargo.toml | 2 +- src/main.rs | 23 ++++++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 60d719e..886cb6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.1.1" +version = "0.1.2" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/src/main.rs b/src/main.rs index 7fdf177..478f236 100644 --- a/src/main.rs +++ b/src/main.rs @@ -93,17 +93,18 @@ impl fmt::Display for GenBodyError async fn gen_body(chain: Arc>>, num: Option, mut output: mpsc::Sender) -> Result<(), GenBodyError> { - let chain = chain.read().await; - if !chain.is_empty() { - match num { - Some(num) if num < MAX_GEN_SIZE => { - for string in chain.str_iter_for(num) { - output.send(string).await.map_err(|e| GenBodyError(e.0))?; - } - }, - _ => output.send(chain.generate_str()).await.map_err(|e| GenBodyError(e.0))?, + let chain = chain.read().await; + if !chain.is_empty() { + match num { + Some(num) if num < MAX_GEN_SIZE => { + //This could DoS `full_body` and writes, potentially. + for string in chain.str_iter_for(num) { + output.send(string).await.map_err(|e| GenBodyError(e.0))?; + } + }, + _ => output.send(chain.generate_str()).await.map_err(|e| GenBodyError(e.0))?, + } } - } Ok(()) } @@ -149,7 +150,7 @@ async fn main() { .and(warp::path::param().map(|opt: usize| Some(opt)).or(warp::any().map(|| Option::::None)).unify()) .and_then(|chain: Arc>>, host: IpAddr, num: Option| { async move { - let (tx, rx) = mpsc::channel(16); + let (tx, rx) = mpsc::channel(MAX_GEN_SIZE); tokio::spawn(gen_body(chain, num, tx)); Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.map(move |x| { info!("{} <- {:?}", host, x); From 35f1de6c5e2086c22fd4374d6b47f58639ba262a Mon Sep 17 00:00:00 2001 From: Flan's server Date: Thu, 8 Oct 2020 08:54:07 +0000 Subject: [PATCH 11/50] fix makefile --- Cargo.lock | 2 +- Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3c8e63a..1a9bde9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -540,7 +540,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.1.1" +version = "0.1.2" dependencies = [ "cfg-if 1.0.0", "hyper", diff --git a/Makefile b/Makefile index 7872576..8ff5952 100644 --- a/Makefile +++ b/Makefile @@ -10,4 +10,4 @@ install: uninstall uninstall: -rc-service markov stop - rm /usr/local/bin/markov + rm -f /usr/local/bin/markov From ecc8854e44221dabb0fa8f2fa5d64d1ad15ac67c Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 13:06:02 +0100 Subject: [PATCH 12/50] config! saveload! --- .gitignore | 1 + Cargo.lock | 106 +++++++++++++++++++++++++++- Cargo.toml | 11 +-- markov.toml | 6 ++ src/config.rs | 108 ++++++++++++++++++++++++++++ src/main.rs | 192 ++++++++++++++++++++++++++++++++------------------ src/save.rs | 89 +++++++++++++++++++++++ src/state.rs | 74 +++++++++++++++++++ 8 files changed, 511 insertions(+), 76 deletions(-) create mode 100644 markov.toml create mode 100644 src/config.rs create mode 100644 src/save.rs create mode 100644 src/state.rs diff --git a/.gitignore b/.gitignore index e2a3069..ba830fc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target *~ +chain.dat diff --git a/Cargo.lock b/Cargo.lock index 1a9bde9..583238c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -108,6 +108,15 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38" +[[package]] +name = "cc" +version = "1.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef611cc68ff783f18535d77ddd080185275713d852c4f5cbb6122c462a7a825c" +dependencies = [ + "jobserver", +] + [[package]] name = "cfg-if" version = "0.1.10" @@ -226,6 +235,7 @@ checksum = "5d8e3078b7b2a8a671cb7a3d17b4760e4181ea243227776ba83fd043b4ca034e" dependencies = [ "futures-channel", "futures-core", + "futures-executor", "futures-io", "futures-sink", "futures-task", @@ -248,12 +258,35 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d674eaa0056896d5ada519900dbf97ead2e46a7b6621e8160d79e2f2e1e2784b" +[[package]] +name = "futures-executor" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc709ca1da6f66143b8c9bec8e6260181869893714e9b5a490b169b0414144ab" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fc94b64bb39543b4e432f1790b6bf18e3ee3b74653c5449f63310e9a74b123c" +[[package]] +name = "futures-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f57ed14da4603b2554682e9f2ff3c65d7567b53188db96cb71538217fc64581b" +dependencies = [ + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.6" @@ -275,11 +308,17 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a894a0acddba51a2d49a6f4263b1e64b8c579ece8af50fa86503d52cd1eea34" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", "futures-sink", "futures-task", + "memchr", "pin-project", "pin-utils", + "proc-macro-hack", + "proc-macro-nested", "slab", ] @@ -341,6 +380,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d36fab90f82edc3c747f9d438e06cf0a491055896f2a279638bb5beed6c40177" + [[package]] name = "hashbrown" version = "0.9.1" @@ -501,6 +546,15 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" +[[package]] +name = "jobserver" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c71313ebb9439f74b00d9d2dcec36440beaf57a6aa0623068441dd7cd81a7f2" +dependencies = [ + "libc", +] + [[package]] name = "kernel32-sys" version = "0.2.2" @@ -538,16 +592,32 @@ dependencies = [ "cfg-if 0.1.10", ] +[[package]] +name = "lzzzz" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ba777d9f7fe8793f196dcc7b6cd43a74fb94a98e9e01d5c4f14753a589f9029" +dependencies = [ + "cc", + "pin-project", + "tokio", +] + [[package]] name = "markov" -version = "0.1.2" +version = "0.2.0" dependencies = [ "cfg-if 1.0.0", + "futures", "hyper", "log", + "lzzzz", "markov 1.1.0", "pretty_env_logger", + "serde", + "serde_cbor", "tokio", + "toml", "warp", ] @@ -779,6 +849,18 @@ dependencies = [ "log", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99c605b9a0adc77b7211c6b1f722dcb613d68d66859a44f3d485a6da332b0598" + +[[package]] +name = "proc-macro-nested" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eba180dafb9038b050a4c280019bbedf9f2467b61e5d892dcad585bb57aadc5a" + [[package]] name = "proc-macro2" version = "1.0.24" @@ -1015,6 +1097,19 @@ name = "serde" version = "1.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96fe57af81d28386a513cbc6858332abc6117cfdb5999647c6444b8f43a370a5" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_cbor" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" +dependencies = [ + "half", + "serde", +] [[package]] name = "serde_derive" @@ -1237,6 +1332,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffc92d160b1eef40665be3a05630d003936a3bc7da7421277846c2613e92c71a" +dependencies = [ + "serde", +] + [[package]] name = "tower-service" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index 886cb6e..0bd60bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,16 +1,12 @@ [package] name = "markov" -version = "0.1.2" +version = "0.2.0" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[features] -# Trust X-Forwarded-For as real IP(s) -trust-x-forwarded-for = [] - [dependencies] chain = {package = "markov", version = "1.1.0"} tokio = {version = "0.2", features=["full"]} @@ -19,3 +15,8 @@ pretty_env_logger = "0.4.0" hyper = "0.13.8" log = "0.4.11" cfg-if = "1.0.0" +futures = "0.3.6" +serde_cbor = "0.11.1" +lzzzz = {version = "0.2", features=["tokio-io"]} +serde = {version ="1.0", features=["derive"]} +toml = "0.5.6" diff --git a/markov.toml b/markov.toml new file mode 100644 index 0000000..6090632 --- /dev/null +++ b/markov.toml @@ -0,0 +1,6 @@ +bindpoint = '127.0.0.1:8001' +file = 'chain.dat' +max_content_length = 4194304 +max_gen_size = 256 +#save_interval_secs = 2 +trust_x_forwarded_for = false diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..2940f49 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,108 @@ +//! Server config +use super::*; +use std::{ + net::SocketAddr, + path::Path, + io, + borrow::Cow, + num::NonZeroU64, +}; +use tokio::{ + fs::OpenOptions, + prelude::*, + time::Duration, + io::BufReader, +}; + +pub const DEFAULT_FILE_LOCATION: &'static str = "markov.toml"; + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash, Serialize, Deserialize)] +pub struct Config +{ + pub bindpoint: SocketAddr, + pub file: String, + pub max_content_length: u64, + pub max_gen_size: usize, + pub save_interval_secs: Option, + pub trust_x_forwarded_for: bool, +} + +impl Default for Config +{ + #[inline] + fn default() -> Self + { + Self { + bindpoint: ([127,0,0,1], 8001).into(), + file: "chain.dat".to_owned(), + max_content_length: 1024 * 1024 * 4, + max_gen_size: 256, + save_interval_secs: Some(unsafe{NonZeroU64::new_unchecked(2)}), + trust_x_forwarded_for: false, + } + } +} + +impl Config +{ + pub fn save_interval(&self) -> Option + { + self.save_interval_secs.map(|x| Duration::from_secs(x.into())) + } + pub async fn load(from: impl AsRef) -> io::Result + { + let file = OpenOptions::new() + .read(true) + .open(from).await?; + + let mut buffer= String::new(); + let reader = BufReader::new(file); + let mut lines = reader.lines(); + while let Some(line) = lines.next_line().await? { + buffer.push_str(&line[..]); + buffer.push('\n'); + } + toml::de::from_str(&buffer[..]).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e)) + } + + pub async fn save(&self, to: impl AsRef) -> io::Result<()> + { + let config = toml::ser::to_string_pretty(self).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + let mut file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(to).await?; + file.write_all(config.as_bytes()).await?; + file.shutdown().await?; + Ok(()) + } +} + +/// Try to load config file specified by args, or default config file +pub fn load() -> impl futures::future::Future> +{ + load_args(std::env::args().skip(1)) +} + +async fn load_args>(mut from: I) -> Option +{ + let place = if let Some(arg) = from.next() { + trace!("File {:?} provided", arg); + Cow::Owned(arg) + } else { + warn!("No config file provided. Using default location {:?}", DEFAULT_FILE_LOCATION); + Cow::Borrowed(DEFAULT_FILE_LOCATION) + }; + + match Config::load(place.as_ref()).await { + Ok(cfg) => { + info!("Loaded config file {:?}", place); + Some(cfg) + }, + Err(err) => { + error!("Failed to load config file from {:?}: {}", place, err); + None + }, + } +} diff --git a/src/main.rs b/src/main.rs index 478f236..42652d7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -24,10 +24,14 @@ use tokio::{ sync::{ RwLock, mpsc, + Notify, }, stream::{Stream,StreamExt,}, }; -use cfg_if::cfg_if; +use serde::{ + Serialize, + Deserialize +}; macro_rules! status { ($code:expr) => { @@ -35,14 +39,13 @@ macro_rules! status { }; } -#[cfg(feature="trust-x-forwarded-for")] +mod config; +mod state; +use state::State; +mod save; mod forwarded_list; -#[cfg(feature="trust-x-forwarded-for")] use forwarded_list::XForwardedFor; -const MAX_CONTENT_LENGTH: u64 = 1024 * 1024 * 4; //4MB -const MAX_GEN_SIZE: usize = 256; - #[derive(Debug)] pub struct FillBodyError; @@ -57,7 +60,7 @@ impl fmt::Display for FillBodyError } -async fn full_body(who: &IpAddr, chain: Arc>>, mut body: impl Unpin + Stream>) -> Result { +async fn full_body(who: &IpAddr, state: State, mut body: impl Unpin + Stream>) -> Result { let mut buffer = Vec::new(); let mut written = 0usize; @@ -73,8 +76,12 @@ async fn full_body(who: &IpAddr, chain: Arc>>, mut body: im let buffer = std::str::from_utf8(&buffer[..]).map_err(|_| FillBodyError)?; info!("{} -> {:?}", who, buffer); - let mut chain = chain.write().await; - chain.feed_str(buffer); + let mut chain = state.chain().write().await; + chain.feed(&buffer.split_whitespace() + .filter(|word| !word.is_empty()) + .map(|s| s.to_owned()).collect::>()); + + state.notify_save(); Ok(written) } @@ -91,79 +98,124 @@ impl fmt::Display for GenBodyError } -async fn gen_body(chain: Arc>>, num: Option, mut output: mpsc::Sender) -> Result<(), GenBodyError> +async fn gen_body(state: State, num: Option, mut output: mpsc::Sender) -> Result<(), GenBodyError> { - let chain = chain.read().await; - if !chain.is_empty() { - match num { - Some(num) if num < MAX_GEN_SIZE => { - //This could DoS `full_body` and writes, potentially. - for string in chain.str_iter_for(num) { - output.send(string).await.map_err(|e| GenBodyError(e.0))?; - } - }, - _ => output.send(chain.generate_str()).await.map_err(|e| GenBodyError(e.0))?, - } + let chain = state.chain().read().await; + if !chain.is_empty() { + match num { + Some(num) if num < state.config().max_gen_size => { + //This could DoS `full_body` and writes, potentially. + for string in chain.str_iter_for(num) { + output.send(string).await.map_err(|e| GenBodyError(e.0))?; + } + }, + _ => output.send(chain.generate_str()).await.map_err(|e| GenBodyError(e.0))?, } + } Ok(()) } - #[tokio::main] async fn main() { pretty_env_logger::init(); - - let chain = Arc::new(RwLock::new(Chain::new())); - let chain = warp::any().map(move || Arc::clone(&chain)); - cfg_if!{ - if #[cfg(feature="trust-x-forwarded-for")] { - let client_ip = - warp::header("x-forwarded-for") + let config = match config::load().await { + Some(v) => v, + _ => { + let cfg = config::Config::default(); + #[cfg(debug_assertions)] + { + if let Err(err) = cfg.save(config::DEFAULT_FILE_LOCATION).await { + error!("Failed to create default config file: {}", err); + } + } + cfg + }, + }; + trace!("Using config {:?}", config); + + let chain = Arc::new(RwLock::new(match save::load(&config.file).await { + Ok(chain) => { + info!("Loaded chain from {:?}", config.file); + chain + }, + Err(e) => { + warn!("Failed to load chain, creating new"); + trace!("Error: {}", e); + Chain::new() + }, + })); + { + let (state, chain, saver) = { + let save_when = Arc::new(Notify::new()); + + let state = State::new(config, + Arc::clone(&chain), + Arc::clone(&save_when)); + let state2 = state.clone(); + let saver = tokio::spawn(save::host(state.clone())); + let chain = warp::any().map(move || state.clone()); + (state2, chain, saver) + }; + + let client_ip = if state.config().trust_x_forwarded_for { + warp::header("x-forwarded-for") .map(|ip: XForwardedFor| ip) .and_then(|x: XForwardedFor| async move { x.into_first().ok_or_else(|| warp::reject::not_found()) }) .or(warp::filters::addr::remote() .and_then(|x: Option| async move { x.map(|x| x.ip()).ok_or_else(|| warp::reject::not_found()) })) - .unify(); + .unify().boxed() } else { - let client_ip = warp::filters::addr::remote().and_then(|x: Option| async move {x.map(|x| x.ip()).ok_or_else(|| warp::reject::not_found())}); + warp::filters::addr::remote().and_then(|x: Option| async move {x.map(|x| x.ip()).ok_or_else(|| warp::reject::not_found())}).boxed() + }; + + let push = warp::put() + .and(chain.clone()) + .and(warp::path("put")) + .and(client_ip.clone()) + .and(warp::body::content_length_limit(state.config().max_content_length)) + .and(warp::body::stream()) + .and_then(|state: State, host: IpAddr, buf| { + async move { + full_body(&host, state, buf).await + .map(|_| warp::reply::with_status(warp::reply(), status!(201))) + .map_err(warp::reject::custom) + } + }) + .with(warp::log("markov::put")); + + let read = warp::get() + .and(chain.clone()) + .and(warp::path("get")) + .and(client_ip.clone()) + .and(warp::path::param().map(|opt: usize| Some(opt)).or(warp::any().map(|| Option::::None)).unify()) + .and_then(|state: State, host: IpAddr, num: Option| { + async move { + let (tx, rx) = mpsc::channel(state.config().max_gen_size); + tokio::spawn(gen_body(state, num, tx)); + Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.map(move |x| { + info!("{} <- {:?}", host, x); + Ok::<_, std::convert::Infallible>(x) + })))) + } + }) + .with(warp::log("markov::read")); + + let (addr, server) = warp::serve(push + .or(read)) + .bind_with_graceful_shutdown(state.config().bindpoint, async move { + tokio::signal::ctrl_c().await.unwrap(); + state.shutdown(); + }); + + info!("Server bound on {:?}", addr); + server.await; + + // Cleanup + async move { + trace!("Cleanup"); + + saver.await.expect("Saver panicked"); } - } - let push = warp::put() - .and(chain.clone()) - .and(warp::path("put")) - .and(client_ip.clone()) - .and(warp::body::content_length_limit(MAX_CONTENT_LENGTH)) - .and(warp::body::stream()) - .and_then(|chain: Arc>>, host: IpAddr, buf| { - async move { - full_body(&host, chain, buf).await - .map(|_| warp::reply::with_status(warp::reply(), status!(201))) - .map_err(warp::reject::custom) - } - }) - .with(warp::log("markov::put")); - - let read = warp::get() - .and(chain.clone()) - .and(warp::path("get")) - .and(client_ip.clone()) - .and(warp::path::param().map(|opt: usize| Some(opt)).or(warp::any().map(|| Option::::None)).unify()) - .and_then(|chain: Arc>>, host: IpAddr, num: Option| { - async move { - let (tx, rx) = mpsc::channel(MAX_GEN_SIZE); - tokio::spawn(gen_body(chain, num, tx)); - Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.map(move |x| { - info!("{} <- {:?}", host, x); - Ok::<_, std::convert::Infallible>(x) - })))) - } - }) - .with(warp::log("markov::read")); - - let (addr, server) = warp::serve(push - .or(read)) - .bind_with_graceful_shutdown(([127,0,0,1], 8001), async { tokio::signal::ctrl_c().await.unwrap(); }); - - println!("Server bound on {:?}", addr); - server.await + }.await; + info!("Shut down gracefully") } diff --git a/src/save.rs b/src/save.rs new file mode 100644 index 0000000..b7f8b8a --- /dev/null +++ b/src/save.rs @@ -0,0 +1,89 @@ +//! Saving and loading chain +use super::*; +use std::{ + sync::Arc, + path::{ + Path, + }, + io, +}; +use tokio::{ + time::{ + self, + Duration, + }, + fs::{ + OpenOptions, + }, + prelude::*, +}; +use futures::{ + future::{ + OptionFuture, + }, +}; +use lzzzz::{ + lz4f::{ + self, + AsyncWriteCompressor, + PreferencesBuilder, + AsyncReadDecompressor, + }, +}; + +const SAVE_INTERVAL: Option = Some(Duration::from_secs(2)); + +pub async fn save_now(chain: &Chain, to: impl AsRef) -> io::Result<()> +{ + debug!("Saving chain to {:?}", to.as_ref()); + let file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(to).await?; + let chain = serde_cbor::to_vec(chain).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + + let mut file = AsyncWriteCompressor::new(file, PreferencesBuilder::new() + .compression_level(lz4f::CLEVEL_HIGH).build())?; + file.write_all(&chain[..]).await?; + file.shutdown().await?; + Ok(()) +} + +/// Start the save loop for this chain +pub async fn host(state: State) +{ + let to = &state.config().file; + let interval = state.config().save_interval(); + while Arc::strong_count(state.when()) > 1 { + { + let chain = state.chain().read().await; + use std::ops::Deref; + if let Err(e) = save_now(chain.deref(), &to).await { + error!("Failed to save chain: {}", e); + } else { + info!("Saved chain to {:?}", to); + } + } + if state.has_shutdown() { + break; + } + OptionFuture::from(interval.map(|interval| time::delay_for(interval))).await; + state.when().notified().await; + } + trace!("Saver exiting"); +} + +/// Try to load a chain from this path +pub async fn load(from: impl AsRef) -> io::Result> +{ + debug!("Loading chain from {:?}", from.as_ref()); + let file = OpenOptions::new() + .read(true) + .open(from).await?; + let mut whole = Vec::new(); + let mut file = AsyncReadDecompressor::new(file)?; + tokio::io::copy(&mut file, &mut whole).await?; + serde_cbor::from_slice(&whole[..]) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e)) +} diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 0000000..5cc8d56 --- /dev/null +++ b/src/state.rs @@ -0,0 +1,74 @@ +//! State +use super::*; +use tokio::{ + sync::{ + watch, + }, +}; +use config::Config; + +#[derive(Debug, Clone)] +pub struct State +{ + config: Arc, //to avoid cloning config + chain: Arc>>, + save: Arc, + + shutdown: Arc>, + shutdown_recv: watch::Receiver, +} + +impl State +{ + pub fn new(config: Config, chain: Arc>>, save: Arc) -> Self + { + let (shutdown, shutdown_recv) = watch::channel(false); + Self { + config: Arc::new(config), + chain, + save, + shutdown: Arc::new(shutdown), + shutdown_recv, + } + } + + pub fn config(&self) -> &Config + { + self.config.as_ref() + } + + pub fn notify_save(&self) + { + self.save.notify(); + } + + pub fn chain(&self) -> &RwLock> + { + &self.chain.as_ref() + } + + pub fn when(&self) -> &Arc + { + &self.save + } + + pub fn shutdown(self) + { + self.shutdown.broadcast(true).expect("Failed to communicate shutdown"); + self.save.notify(); + } + + pub fn has_shutdown(&self) -> bool + { + *self.shutdown_recv.borrow() + } + + pub async fn on_shutdown(mut self) + { + if !self.has_shutdown() { + while let Some(false) = self.shutdown_recv.recv().await { + + } + } + } +} From 57c74bd4117fbd9c42bbbafa17652bba34f0e290 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 13:19:03 +0100 Subject: [PATCH 13/50] reorganise --- src/feed.rs | 49 +++++++++++++++++++++++++++++++++++ src/gen.rs | 33 ++++++++++++++++++++++++ src/main.rs | 73 +++-------------------------------------------------- 3 files changed, 86 insertions(+), 69 deletions(-) create mode 100644 src/feed.rs create mode 100644 src/gen.rs diff --git a/src/feed.rs b/src/feed.rs new file mode 100644 index 0000000..8c849e3 --- /dev/null +++ b/src/feed.rs @@ -0,0 +1,49 @@ +//! Feeding the chain +use super::*; + +fn feed(chain: &mut Chain, what: impl AsRef) +{ + chain.feed(what.as_ref().split_whitespace() + .filter(|word| !word.is_empty()) + .map(|s| s.to_owned()).collect::>()); +} + +pub async fn full(who: &IpAddr, state: State, mut body: impl Unpin + Stream>) -> Result { + let mut buffer = Vec::new(); + + let mut written = 0usize; + while let Some(buf) = body.next().await { + let mut body = buf.map_err(|_| FillBodyError)?; + while body.has_remaining() { + buffer.extend_from_slice(body.bytes()); + let cnt = body.bytes().len(); + body.advance(cnt); + written += cnt; + } + } + if !buffer.ends_with(&[b'\n']) { // probably useless eh? + buffer.push(b'\n'); + } + let buffer = std::str::from_utf8(&buffer[..]).map_err(|_| FillBodyError)?; + info!("{} -> {:?}", who, buffer); + let mut chain = state.chain().write().await; + feed(&mut chain, buffer); + + state.notify_save(); + Ok(written) +} + + +#[derive(Debug)] +pub struct FillBodyError; + +impl error::Error for FillBodyError{} +impl warp::reject::Reject for FillBodyError{} +impl fmt::Display for FillBodyError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "failed to feed chain with this data") + } +} + diff --git a/src/gen.rs b/src/gen.rs new file mode 100644 index 0000000..6c1e4e5 --- /dev/null +++ b/src/gen.rs @@ -0,0 +1,33 @@ +//! Generating the strings +use super::*; + +#[derive(Debug)] +pub struct GenBodyError(String); + +impl error::Error for GenBodyError{} +impl fmt::Display for GenBodyError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "failed to write {:?} to body", self.0) + } +} + + +pub async fn body(state: State, num: Option, mut output: mpsc::Sender) -> Result<(), GenBodyError> +{ + let chain = state.chain().read().await; + if !chain.is_empty() { + match num { + Some(num) if num < state.config().max_gen_size => { + //This could DoS `full_body` and writes, potentially. + for string in chain.str_iter_for(num) { + output.send(string).await.map_err(|e| GenBodyError(e.0))?; + } + }, + _ => output.send(chain.generate_str()).await.map_err(|e| GenBodyError(e.0))?, + } + } + Ok(()) +} + diff --git a/src/main.rs b/src/main.rs index 42652d7..1eb0a8f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -46,74 +46,9 @@ mod save; mod forwarded_list; use forwarded_list::XForwardedFor; -#[derive(Debug)] -pub struct FillBodyError; +mod feed; +mod gen; -impl error::Error for FillBodyError{} -impl warp::reject::Reject for FillBodyError{} -impl fmt::Display for FillBodyError -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result - { - write!(f, "failed to feed chain with this data") - } -} - - -async fn full_body(who: &IpAddr, state: State, mut body: impl Unpin + Stream>) -> Result { - let mut buffer = Vec::new(); - - let mut written = 0usize; - while let Some(buf) = body.next().await { - let mut body = buf.map_err(|_| FillBodyError)?; - while body.has_remaining() { - buffer.extend_from_slice(body.bytes()); - let cnt = body.bytes().len(); - body.advance(cnt); - written += cnt; - } - } - - let buffer = std::str::from_utf8(&buffer[..]).map_err(|_| FillBodyError)?; - info!("{} -> {:?}", who, buffer); - let mut chain = state.chain().write().await; - chain.feed(&buffer.split_whitespace() - .filter(|word| !word.is_empty()) - .map(|s| s.to_owned()).collect::>()); - - state.notify_save(); - Ok(written) -} - -#[derive(Debug)] -pub struct GenBodyError(String); - -impl error::Error for GenBodyError{} -impl fmt::Display for GenBodyError -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result - { - write!(f, "failed to write {:?} to body", self.0) - } -} - - -async fn gen_body(state: State, num: Option, mut output: mpsc::Sender) -> Result<(), GenBodyError> -{ - let chain = state.chain().read().await; - if !chain.is_empty() { - match num { - Some(num) if num < state.config().max_gen_size => { - //This could DoS `full_body` and writes, potentially. - for string in chain.str_iter_for(num) { - output.send(string).await.map_err(|e| GenBodyError(e.0))?; - } - }, - _ => output.send(chain.generate_str()).await.map_err(|e| GenBodyError(e.0))?, - } - } - Ok(()) -} #[tokio::main] async fn main() { pretty_env_logger::init(); @@ -176,7 +111,7 @@ async fn main() { .and(warp::body::stream()) .and_then(|state: State, host: IpAddr, buf| { async move { - full_body(&host, state, buf).await + feed::full(&host, state, buf).await .map(|_| warp::reply::with_status(warp::reply(), status!(201))) .map_err(warp::reject::custom) } @@ -191,7 +126,7 @@ async fn main() { .and_then(|state: State, host: IpAddr, num: Option| { async move { let (tx, rx) = mpsc::channel(state.config().max_gen_size); - tokio::spawn(gen_body(state, num, tx)); + tokio::spawn(gen::body(state, num, tx)); Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.map(move |x| { info!("{} <- {:?}", host, x); Ok::<_, std::convert::Infallible>(x) From 66470d6be51d43cac3e933fcea4e77d7bd50516c Mon Sep 17 00:00:00 2001 From: Flan's server Date: Thu, 8 Oct 2020 12:29:42 +0000 Subject: [PATCH 14/50] update makefile --- Makefile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 8ff5952..80c617d 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,17 @@ markov: - cargo build --release --features trust-x-forwarded-for + cargo build --release strip target/release/markov -install: uninstall +install: + rc-service markov stop + cp -f target/release/markov /usr/local/bin/markov + rc-service markov start + +reinstall: uninstall cp -f target/release/markov /usr/local/bin/markov rc-service markov start sleep 0.2 + rm -f /var/nginx/markov.dat curl -X PUT -d @default http://127.0.0.1:8001/put uninstall: From 45a93939de4ba378008c17dd7e503f7727069a22 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 13:57:23 +0100 Subject: [PATCH 15/50] signal handling --- Cargo.toml | 2 +- src/main.rs | 26 +++++++++++++++++----- src/save.rs | 13 +++++++++-- src/signals.rs | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/state.rs | 2 +- 5 files changed, 93 insertions(+), 9 deletions(-) create mode 100644 src/signals.rs diff --git a/Cargo.toml b/Cargo.toml index 0bd60bc..5d8526a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.2.0" +version = "0.3.0" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/src/main.rs b/src/main.rs index 1eb0a8f..248d603 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,6 +32,14 @@ use serde::{ Serialize, Deserialize }; +use futures::{ + future::{ + FutureExt, + BoxFuture, + join_all, + }, +}; + macro_rules! status { ($code:expr) => { @@ -39,6 +47,8 @@ macro_rules! status { }; } +#[cfg(target_family="unix")] +mod signals; mod config; mod state; use state::State; @@ -80,7 +90,8 @@ async fn main() { }, })); { - let (state, chain, saver) = { + let mut tasks = Vec::>::new(); + let (state, chain) = { let save_when = Arc::new(Notify::new()); let state = State::new(config, @@ -89,7 +100,9 @@ async fn main() { let state2 = state.clone(); let saver = tokio::spawn(save::host(state.clone())); let chain = warp::any().map(move || state.clone()); - (state2, chain, saver) + + tasks.push(saver.map(|res| res.expect("Saver panicked")).boxed()); + (state2, chain) }; let client_ip = if state.config().trust_x_forwarded_for { @@ -134,6 +147,10 @@ async fn main() { } }) .with(warp::log("markov::read")); + + + #[cfg(target_family="unix")] + tasks.push(tokio::spawn(signals::handle(state.clone())).map(|res| res.expect("Signal handler panicked")).boxed()); let (addr, server) = warp::serve(push .or(read)) @@ -141,15 +158,14 @@ async fn main() { tokio::signal::ctrl_c().await.unwrap(); state.shutdown(); }); - info!("Server bound on {:?}", addr); server.await; // Cleanup async move { trace!("Cleanup"); - - saver.await.expect("Saver panicked"); + + join_all(tasks).await; } }.await; info!("Shut down gracefully") diff --git a/src/save.rs b/src/save.rs index b7f8b8a..fa8461c 100644 --- a/src/save.rs +++ b/src/save.rs @@ -33,7 +33,16 @@ use lzzzz::{ const SAVE_INTERVAL: Option = Some(Duration::from_secs(2)); -pub async fn save_now(chain: &Chain, to: impl AsRef) -> io::Result<()> + +pub async fn save_now(state: &State) -> io::Result<()> +{ + let chain = state.chain().read().await; + use std::ops::Deref; + let to = &state.config().file; + save_now_to(chain.deref(),to).await +} + +async fn save_now_to(chain: &Chain, to: impl AsRef) -> io::Result<()> { debug!("Saving chain to {:?}", to.as_ref()); let file = OpenOptions::new() @@ -59,7 +68,7 @@ pub async fn host(state: State) { let chain = state.chain().read().await; use std::ops::Deref; - if let Err(e) = save_now(chain.deref(), &to).await { + if let Err(e) = save_now_to(chain.deref(), &to).await { error!("Failed to save chain: {}", e); } else { info!("Saved chain to {:?}", to); diff --git a/src/signals.rs b/src/signals.rs new file mode 100644 index 0000000..8a8a4d0 --- /dev/null +++ b/src/signals.rs @@ -0,0 +1,59 @@ +//! Unix signals +use super::*; +use tokio::{ + signal::unix::{ + self, + SignalKind, + }, +}; + + + +pub async fn handle(mut state: State) +{ + let mut usr1 = unix::signal(SignalKind::user_defined1()).expect("Failed to hook SIGUSR1"); + let mut usr2 = unix::signal(SignalKind::user_defined2()).expect("Failed to hook SIGUSR2"); + let mut quit = unix::signal(SignalKind::quit()).expect("Failed to hook SIGQUIT"); + + loop { + tokio::select! { + _ = state.on_shutdown() => { + break; + } + _ = usr1.recv() => { + info!("Got SIGUSR1. Saving chain immediately."); + if let Err(e) = save::save_now(&state).await { + error!("Failed to save chain: {}", e); + } else{ + trace!("Saved chain okay"); + } + }, + _ = usr2.recv() => { + info!("Got SIGUSR1. Loading chain immediately."); + match save::load(&state.config().file).await { + Ok(new) => { + { + let mut chain = state.chain().write().await; + *chain = new; + } + trace!("Replaced with read chain"); + }, + Err(e) => { + error!("Failed to load chain from file, keeping current: {}", e); + }, + } + }, + _ = quit.recv() => { + warn!("Got SIGQUIT. Saving chain then aborting."); + if let Err(e) = save::save_now(&state).await { + error!("Failed to save chain: {}", e); + } else{ + trace!("Saved chain okay."); + } + error!("Aborting"); + std::process::abort() + }, + } + } + trace!("Graceful shutdown"); +} diff --git a/src/state.rs b/src/state.rs index 5cc8d56..6ec7883 100644 --- a/src/state.rs +++ b/src/state.rs @@ -63,7 +63,7 @@ impl State *self.shutdown_recv.borrow() } - pub async fn on_shutdown(mut self) + pub async fn on_shutdown(&mut self) { if !self.has_shutdown() { while let Some(false) = self.shutdown_recv.recv().await { From d97e23c94f20c7083c849de12456bf6227f9b512 Mon Sep 17 00:00:00 2001 From: Flan's server Date: Thu, 8 Oct 2020 13:25:36 +0000 Subject: [PATCH 16/50] update makefile --- Cargo.lock | 2 +- Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 583238c..928fb90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -605,7 +605,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.2.0" +version = "0.3.0" dependencies = [ "cfg-if 1.0.0", "futures", diff --git a/Makefile b/Makefile index 80c617d..b2f0cfb 100644 --- a/Makefile +++ b/Makefile @@ -9,9 +9,9 @@ install: reinstall: uninstall cp -f target/release/markov /usr/local/bin/markov + rm -f /var/nginx/markov.dat rc-service markov start sleep 0.2 - rm -f /var/nginx/markov.dat curl -X PUT -d @default http://127.0.0.1:8001/put uninstall: From e5f1595b98345df6a7cc60fa42a483767704d85e Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 14:26:52 +0100 Subject: [PATCH 17/50] flush --- Cargo.lock | 2 +- src/save.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 583238c..928fb90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -605,7 +605,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.2.0" +version = "0.3.0" dependencies = [ "cfg-if 1.0.0", "futures", diff --git a/src/save.rs b/src/save.rs index fa8461c..a824774 100644 --- a/src/save.rs +++ b/src/save.rs @@ -55,6 +55,7 @@ async fn save_now_to(chain: &Chain, to: impl AsRef) -> io::Result< let mut file = AsyncWriteCompressor::new(file, PreferencesBuilder::new() .compression_level(lz4f::CLEVEL_HIGH).build())?; file.write_all(&chain[..]).await?; + file.flush().await?; file.shutdown().await?; Ok(()) } From 77001e9c78fefada2ac0e042db519d00a8628329 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 14:29:06 +0100 Subject: [PATCH 18/50] flush --- Cargo.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5d8526a..71e6e0a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,17 @@ [package] name = "markov" -version = "0.3.0" +version = "0.3.1" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[profile.release] +opt-level = 3 +lto = "fat" +codegen-units = 1 + [dependencies] chain = {package = "markov", version = "1.1.0"} tokio = {version = "0.2", features=["full"]} From 8b5b753e9b1570e92c97fe9f1172bff47068a8a0 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 14:38:05 +0100 Subject: [PATCH 19/50] profile --- .gitignore | 1 + Cargo.lock | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ba830fc..7751635 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target *~ chain.dat +flamegraph.svg diff --git a/Cargo.lock b/Cargo.lock index 928fb90..19caf01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -605,7 +605,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.3.0" +version = "0.3.1" dependencies = [ "cfg-if 1.0.0", "futures", From c5993b54e94a476344ba33294079797d11b3fd00 Mon Sep 17 00:00:00 2001 From: Flan's server Date: Thu, 8 Oct 2020 14:06:34 +0000 Subject: [PATCH 20/50] FUCK FUCK YOU FUCK --- Cargo.lock | 2 +- Cargo.toml | 2 +- Makefile | 2 +- src/save.rs | 14 +++++++++----- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 19caf01..f5175d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -605,7 +605,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.3.1" +version = "0.3.2" dependencies = [ "cfg-if 1.0.0", "futures", diff --git a/Cargo.toml b/Cargo.toml index 71e6e0a..a16c3bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.3.1" +version = "0.3.2" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/Makefile b/Makefile index b2f0cfb..ccf8d3d 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ markov: strip target/release/markov install: - rc-service markov stop + -rc-service markov shutdown && sleep 0.6 cp -f target/release/markov /usr/local/bin/markov rc-service markov start diff --git a/src/save.rs b/src/save.rs index a824774..ec09cf8 100644 --- a/src/save.rs +++ b/src/save.rs @@ -45,16 +45,19 @@ pub async fn save_now(state: &State) -> io::Result<()> async fn save_now_to(chain: &Chain, to: impl AsRef) -> io::Result<()> { debug!("Saving chain to {:?}", to.as_ref()); - let file = OpenOptions::new() + let mut file = OpenOptions::new() .write(true) .create(true) .truncate(true) .open(to).await?; let chain = serde_cbor::to_vec(chain).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - - let mut file = AsyncWriteCompressor::new(file, PreferencesBuilder::new() - .compression_level(lz4f::CLEVEL_HIGH).build())?; - file.write_all(&chain[..]).await?; + { + let mut file = AsyncWriteCompressor::new(&mut file, PreferencesBuilder::new() + .compression_level(lz4f::CLEVEL_HIGH).build())?; + file.write_all(&chain[..]).await?; + file.flush().await?; + file.shutdown().await?; + } file.flush().await?; file.shutdown().await?; Ok(()) @@ -94,6 +97,7 @@ pub async fn load(from: impl AsRef) -> io::Result> let mut whole = Vec::new(); let mut file = AsyncReadDecompressor::new(file)?; tokio::io::copy(&mut file, &mut whole).await?; + whole.flush().await?; serde_cbor::from_slice(&whole[..]) .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e)) } From 7bc8f9837aedfbc93ae917df04119cd3d0e4c57d Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 15:36:39 +0100 Subject: [PATCH 21/50] temporarily disable compression --- .#Cargo.toml | 1 + Cargo.toml | 5 ++++- src/save.rs | 26 +++++++++++++++++++------- 3 files changed, 24 insertions(+), 8 deletions(-) create mode 120000 .#Cargo.toml diff --git a/.#Cargo.toml b/.#Cargo.toml new file mode 120000 index 0000000..1f74c0e --- /dev/null +++ b/.#Cargo.toml @@ -0,0 +1 @@ +avril@eientei.869:1602150887 \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index a16c3bd..2e28023 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,9 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +compress-chain = ["lzzzz"] + [profile.release] opt-level = 3 lto = "fat" @@ -22,6 +25,6 @@ log = "0.4.11" cfg-if = "1.0.0" futures = "0.3.6" serde_cbor = "0.11.1" -lzzzz = {version = "0.2", features=["tokio-io"]} +lzzzz = {version = "0.2", features=["tokio-io"], optional=true} serde = {version ="1.0", features=["derive"]} toml = "0.5.6" diff --git a/src/save.rs b/src/save.rs index ec09cf8..809cb5b 100644 --- a/src/save.rs +++ b/src/save.rs @@ -22,6 +22,7 @@ use futures::{ OptionFuture, }, }; +#[cfg(feature="compress-chain")] use lzzzz::{ lz4f::{ self, @@ -51,11 +52,14 @@ async fn save_now_to(chain: &Chain, to: impl AsRef) -> io::Result< .truncate(true) .open(to).await?; let chain = serde_cbor::to_vec(chain).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - { + { + #[cfg(feature="compress-chain")] let mut file = AsyncWriteCompressor::new(&mut file, PreferencesBuilder::new() - .compression_level(lz4f::CLEVEL_HIGH).build())?; + .compression_level(lz4f::CLEVEL_HIGH).build())?; file.write_all(&chain[..]).await?; + #[cfg(feature="compress-chain")] file.flush().await?; + #[cfg(feature="compress-chain")] file.shutdown().await?; } file.flush().await?; @@ -64,9 +68,9 @@ async fn save_now_to(chain: &Chain, to: impl AsRef) -> io::Result< } /// Start the save loop for this chain -pub async fn host(state: State) +pub async fn host(mut state: State) { - let to = &state.config().file; + let to = state.config().file.to_owned(); let interval = state.config().save_interval(); while Arc::strong_count(state.when()) > 1 { { @@ -78,11 +82,17 @@ pub async fn host(state: State) info!("Saved chain to {:?}", to); } } + + tokio::select!{ + _ = OptionFuture::from(interval.map(|interval| time::delay_for(interval))) => {}, + _ = state.on_shutdown() => { + break; + } + } + state.when().notified().await; if state.has_shutdown() { break; } - OptionFuture::from(interval.map(|interval| time::delay_for(interval))).await; - state.when().notified().await; } trace!("Saver exiting"); } @@ -91,10 +101,12 @@ pub async fn host(state: State) pub async fn load(from: impl AsRef) -> io::Result> { debug!("Loading chain from {:?}", from.as_ref()); - let file = OpenOptions::new() + #[allow(unused_mut)] + let mut file = OpenOptions::new() .read(true) .open(from).await?; let mut whole = Vec::new(); + #[cfg(feature="compress-chain")] let mut file = AsyncReadDecompressor::new(file)?; tokio::io::copy(&mut file, &mut whole).await?; whole.flush().await?; From d2bb1cac00d12a53c29fbf83902a0762da962e33 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 15:36:46 +0100 Subject: [PATCH 22/50] temporarily disable compression --- .#Cargo.toml | 1 - Cargo.toml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 120000 .#Cargo.toml diff --git a/.#Cargo.toml b/.#Cargo.toml deleted file mode 120000 index 1f74c0e..0000000 --- a/.#Cargo.toml +++ /dev/null @@ -1 +0,0 @@ -avril@eientei.869:1602150887 \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 2e28023..2e7379e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.3.2" +version = "0.3.3" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" From 32a4964232bc432d74d3029fa9c3ef5715b02e29 Mon Sep 17 00:00:00 2001 From: Flan's server Date: Thu, 8 Oct 2020 14:44:19 +0000 Subject: [PATCH 23/50] eh --- Cargo.lock | 2 +- Makefile | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index f5175d6..4d1af37 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -605,7 +605,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.3.2" +version = "0.3.3" dependencies = [ "cfg-if 1.0.0", "futures", diff --git a/Makefile b/Makefile index ccf8d3d..ec2ae24 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ markov: install: -rc-service markov shutdown && sleep 0.6 + -rc-service markov stop cp -f target/release/markov /usr/local/bin/markov rc-service markov start From 93e1d335dc19250b20d8309826dca6637188e8c2 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 15:53:46 +0100 Subject: [PATCH 24/50] fix dumb shit --- Cargo.lock | 41 +++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 6 ++++-- src/save.rs | 32 +++++++++++++++++++++----------- 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4d1af37..6719278 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,6 +15,19 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d25d88fd6b8041580a654f9d0c581a047baee2b3efee13275f2fc392fc75034" +[[package]] +name = "async-compression" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9021768bcce77296b64648cc7a7460e3df99979b97ed5c925c38d1cc83778d98" +dependencies = [ + "bzip2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", +] + [[package]] name = "atty" version = "0.2.14" @@ -108,6 +121,27 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38" +[[package]] +name = "bzip2" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42b7c3cbf0fa9c1b82308d57191728ca0256cb821220f4e2fd410a72ade26e3b" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.9+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad3b39a260062fca31f7b0b12f207e8f2590a67d32ec7d59c20484b07ea7285e" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "cc" version = "1.0.60" @@ -607,6 +641,7 @@ dependencies = [ name = "markov" version = "0.3.3" dependencies = [ + "async-compression", "cfg-if 1.0.0", "futures", "hyper", @@ -833,6 +868,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d36492546b6af1463394d46f0c834346f31548646f6ba10849802c9c9a27ac33" + [[package]] name = "ppv-lite86" version = "0.2.9" diff --git a/Cargo.toml b/Cargo.toml index 2e7379e..2e31c38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.3.3" +version = "0.3.4" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" @@ -8,7 +8,8 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -compress-chain = ["lzzzz"] +default = ["compress-chain"] +compress-chain = ["async-compression"] [profile.release] opt-level = 3 @@ -28,3 +29,4 @@ serde_cbor = "0.11.1" lzzzz = {version = "0.2", features=["tokio-io"], optional=true} serde = {version ="1.0", features=["derive"]} toml = "0.5.6" +async-compression = {version = "0.3.5", features=["tokio-02", "bzip2"], optional=true} diff --git a/src/save.rs b/src/save.rs index 809cb5b..c7e3c41 100644 --- a/src/save.rs +++ b/src/save.rs @@ -23,18 +23,24 @@ use futures::{ }, }; #[cfg(feature="compress-chain")] -use lzzzz::{ - lz4f::{ - self, - AsyncWriteCompressor, - PreferencesBuilder, - AsyncReadDecompressor, +use async_compression::{ + tokio_02::{ + write::{ + BzEncoder, + BzDecoder, + }, }, }; const SAVE_INTERVAL: Option = Some(Duration::from_secs(2)); +#[cfg(feature="compress-chain")] +type Compressor = BzEncoder; +#[cfg(feature="compress-chain")] +type Decompressor = BzDecoder; + + pub async fn save_now(state: &State) -> io::Result<()> { let chain = state.chain().read().await; @@ -54,12 +60,11 @@ async fn save_now_to(chain: &Chain, to: impl AsRef) -> io::Result< let chain = serde_cbor::to_vec(chain).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; { #[cfg(feature="compress-chain")] - let mut file = AsyncWriteCompressor::new(&mut file, PreferencesBuilder::new() - .compression_level(lz4f::CLEVEL_HIGH).build())?; + let mut file = Compressor::new(&mut file); file.write_all(&chain[..]).await?; - #[cfg(feature="compress-chain")] + #[cfg(feature="compress-chain")] file.flush().await?; - #[cfg(feature="compress-chain")] + #[cfg(feature="compress-chain")] file.shutdown().await?; } file.flush().await?; @@ -105,11 +110,16 @@ pub async fn load(from: impl AsRef) -> io::Result> let mut file = OpenOptions::new() .read(true) .open(from).await?; + #[allow(unused_mut)] let mut whole = Vec::new(); #[cfg(feature="compress-chain")] - let mut file = AsyncReadDecompressor::new(file)?; + let mut whole = Decompressor::new(whole); tokio::io::copy(&mut file, &mut whole).await?; whole.flush().await?; + #[cfg(feature="compress-chain")] + whole.shutdown().await?; + #[cfg(feature="compress-chain")] + let whole = whole.into_inner(); serde_cbor::from_slice(&whole[..]) .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e)) } From d44fa5c40baae70fcf20e8224917d45ac8e972e5 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 16:03:06 +0100 Subject: [PATCH 25/50] wtf is wrong with the space eating --- Cargo.lock | 2 +- src/feed.rs | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6719278..11417aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.3.3" +version = "0.3.4" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/src/feed.rs b/src/feed.rs index 8c849e3..0cbc5f0 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -15,10 +15,12 @@ pub async fn full(who: &IpAddr, state: State, mut body: impl Unpin + Stream 0 { + buffer.extend_from_slice(body.bytes()); // XXX: what the fuck is wrong with this??? why it eat spaces???? + let cnt = body.bytes().len(); + body.advance(cnt); + written += cnt; + } } } if !buffer.ends_with(&[b'\n']) { // probably useless eh? From abd9d2c9413c02dda5c38f4b6d6723fc6efc5c63 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 17:46:12 +0100 Subject: [PATCH 26/50] added api --- Cargo.toml | 7 ++++- src/api.rs | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/feed.rs | 2 +- src/main.rs | 40 ++++++++++++++++++++++++-- 4 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 src/api.rs diff --git a/Cargo.toml b/Cargo.toml index 2e31c38..91e2b5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.3.4" +version = "0.4.0" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" @@ -9,8 +9,13 @@ edition = "2018" [features] default = ["compress-chain"] + +# Compress the chain data file when saved to disk compress-chain = ["async-compression"] +# Enable the /api/ route +api = [] + [profile.release] opt-level = 3 lto = "fat" diff --git a/src/api.rs b/src/api.rs new file mode 100644 index 0000000..95b1415 --- /dev/null +++ b/src/api.rs @@ -0,0 +1,83 @@ +//! For API calls if enabled +use super::*; +use std::{ + fmt, + error, + iter, + convert::Infallible, +}; +use futures::{ + stream::{ + self, + BoxStream, + StreamExt, + }, +}; + +fn aggregate(mut body: impl Buf) -> Result +{ + let mut output = Vec::new(); + while body.has_remaining() { + let bytes = body.bytes(); + output.extend_from_slice(&bytes[..]); + let cnt = bytes.len(); + body.advance(cnt); + } + + String::from_utf8(output) +} + +pub async fn single(host: IpAddr, num: Option, body: impl Buf) -> Result +{ + single_stream(host, num, body).await + .map(|rx| Response::new(Body::wrap_stream(rx.map(move |x| { + info!("{} <- {:?}", host, x); + x + })))) + .map_err(warp::reject::custom) +} + +async fn single_stream(host: IpAddr, num: Option, body: impl Buf) -> Result>, ApiError> +{ + let body = aggregate(body)?; + info!("{} <- {:?}", host, &body[..]); + + let mut chain = Chain::new(); + feed::feed(&mut chain, body); + match num { + None => Ok(stream::iter(iter::once(Ok(chain.generate_str()))).boxed()), + Some(num) => { + let (mut tx, rx) = mpsc::channel(num); + tokio::spawn(async move { + for string in chain.str_iter_for(num) { + tx.send(string).await.expect("Failed to send string to body"); + } + }); + Ok(StreamExt::map(rx, |x| Ok::<_, Infallible>(x)).boxed()) + } + } +} + +#[derive(Debug)] +pub enum ApiError { + Body, +} +impl warp::reject::Reject for ApiError{} +impl error::Error for ApiError{} +impl std::fmt::Display for ApiError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + match self { + Self::Body => write!(f, "invalid data in request body"), + } + } +} + +impl From for ApiError +{ + fn from(_: std::string::FromUtf8Error) -> Self + { + Self::Body + } +} diff --git a/src/feed.rs b/src/feed.rs index 0cbc5f0..2b90da3 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -1,7 +1,7 @@ //! Feeding the chain use super::*; -fn feed(chain: &mut Chain, what: impl AsRef) +pub fn feed(chain: &mut Chain, what: impl AsRef) { chain.feed(what.as_ref().split_whitespace() .filter(|word| !word.is_empty()) diff --git a/src/main.rs b/src/main.rs index 248d603..278db34 100644 --- a/src/main.rs +++ b/src/main.rs @@ -39,14 +39,15 @@ use futures::{ join_all, }, }; - +use cfg_if::cfg_if; macro_rules! status { ($code:expr) => { ::warp::http::status::StatusCode::from_u16($code).unwrap() }; } - +#[cfg(feature="api")] +mod api; #[cfg(target_family="unix")] mod signals; mod config; @@ -131,6 +132,37 @@ async fn main() { }) .with(warp::log("markov::put")); + + cfg_if!{ + if #[cfg(feature="api")] { + let api = { + let api_single = { + let msz = state.config().max_gen_size; + warp::post() + .and(warp::path("single")) + .and(client_ip.clone()) + .and(warp::path::param() + .map(move |sz: usize| { + if sz == 0 || (2..=msz).contains(&sz) { + Some(sz) + } else { + None + } + }) + .or(warp::any().map(|| None)) + .unify()) + .and(warp::body::content_length_limit(state.config().max_content_length)) + .and(warp::body::aggregate()) + .and_then(api::single) + .with(warp::log("markov::api::single")) + }; + + warp::path("api") + .and(api_single) + }; + } + } + let read = warp::get() .and(chain.clone()) .and(warp::path("get")) @@ -147,7 +179,9 @@ async fn main() { } }) .with(warp::log("markov::read")); - + + #[cfg(feature="api")] + let read = read.or(api); #[cfg(target_family="unix")] tasks.push(tokio::spawn(signals::handle(state.clone())).map(|res| res.expect("Signal handler panicked")).boxed()); From cfbe9f385de4941913c73e2d676e4e1833c4274d Mon Sep 17 00:00:00 2001 From: Flan's server Date: Thu, 8 Oct 2020 16:54:04 +0000 Subject: [PATCH 27/50] update makefile --- Cargo.lock | 2 +- Makefile | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 11417aa..4f82c9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.3.4" +version = "0.4.0" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/Makefile b/Makefile index ec2ae24..32c7e43 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,7 @@ +FEATURES:="api" + markov: - cargo build --release + cargo build --release --features $(FEATURES) strip target/release/markov install: From daca2fb6ea8086e1bba2faa6c302d67e318cc994 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 8 Oct 2020 18:54:17 +0100 Subject: [PATCH 28/50] todo --- TODO | 1 + 1 file changed, 1 insertion(+) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 0000000..4b9b689 --- /dev/null +++ b/TODO @@ -0,0 +1 @@ +Allow Unix domain socket for bind From bd7e36b4f84025c31422323704e28af7cb36877b Mon Sep 17 00:00:00 2001 From: Avril Date: Fri, 9 Oct 2020 18:11:14 +0100 Subject: [PATCH 29/50] split-newlines --- Cargo.toml | 3 +++ Makefile | 2 +- src/api.rs | 10 +++++++++- src/feed.rs | 25 +++++++++++++++++-------- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 91e2b5c..3e74899 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,9 @@ default = ["compress-chain"] # Compress the chain data file when saved to disk compress-chain = ["async-compression"] +# Treat each new line as a new set to feed instead of feeding the whole data at once +split-newlines = [] + # Enable the /api/ route api = [] diff --git a/Makefile b/Makefile index 32c7e43..892a3a0 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ reinstall: uninstall rm -f /var/nginx/markov.dat rc-service markov start sleep 0.2 - curl -X PUT -d @default http://127.0.0.1:8001/put + curl -X PUT --data-binary @default http://127.0.0.1:8001/put uninstall: -rc-service markov stop diff --git a/src/api.rs b/src/api.rs index 95b1415..54e36ba 100644 --- a/src/api.rs +++ b/src/api.rs @@ -43,7 +43,15 @@ async fn single_stream(host: IpAddr, num: Option, body: impl Buf) -> Resu info!("{} <- {:?}", host, &body[..]); let mut chain = Chain::new(); - feed::feed(&mut chain, body); + cfg_if!{ + if #[cfg(feature="split-newlines")] { + for body in body.split('\n').filter(|line| !line.trim().is_empty()) { + feed::feed(&mut chain, body); + } + }else { + feed::feed(&mut chain, body); + } + } match num { None => Ok(stream::iter(iter::once(Ok(chain.generate_str()))).boxed()), Some(num) => { diff --git a/src/feed.rs b/src/feed.rs index 2b90da3..4b2281a 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -3,34 +3,43 @@ use super::*; pub fn feed(chain: &mut Chain, what: impl AsRef) { - chain.feed(what.as_ref().split_whitespace() - .filter(|word| !word.is_empty()) - .map(|s| s.to_owned()).collect::>()); + let map = what.as_ref().split_whitespace() + .filter(|word| !word.is_empty()) + .map(|s| s.to_owned()).collect::>(); + if map.len() > 0 { + chain.feed(map); + } } pub async fn full(who: &IpAddr, state: State, mut body: impl Unpin + Stream>) -> Result { let mut buffer = Vec::new(); let mut written = 0usize; + //TODO: Change to pushing lines to mpsc channel, instead of manually aggregating. while let Some(buf) = body.next().await { let mut body = buf.map_err(|_| FillBodyError)?; while body.has_remaining() { if body.bytes().len() > 0 { - buffer.extend_from_slice(body.bytes()); // XXX: what the fuck is wrong with this??? why it eat spaces???? + buffer.extend_from_slice(body.bytes()); let cnt = body.bytes().len(); body.advance(cnt); written += cnt; } } } - if !buffer.ends_with(&[b'\n']) { // probably useless eh? - buffer.push(b'\n'); - } let buffer = std::str::from_utf8(&buffer[..]).map_err(|_| FillBodyError)?; info!("{} -> {:?}", who, buffer); let mut chain = state.chain().write().await; - feed(&mut chain, buffer); + cfg_if! { + if #[cfg(feature="split-newlines")] { + for buffer in buffer.split('\n').filter(|line| !line.trim().is_empty()) { + feed(&mut chain, buffer); + } + } else { + feed(&mut chain, buffer); + } + } state.notify_save(); Ok(written) } From 9dac784dbe69bbac2b0b409b530750958878789f Mon Sep 17 00:00:00 2001 From: Avril Date: Fri, 9 Oct 2020 18:22:44 +0100 Subject: [PATCH 30/50] bump --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3e74899..e3bac99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.4.0" +version = "0.4.1" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" From ff42a0830c752dfd20385ba772657a31cd88064b Mon Sep 17 00:00:00 2001 From: Flan's server Date: Fri, 9 Oct 2020 17:25:15 +0000 Subject: [PATCH 31/50] update makefile --- Cargo.lock | 2 +- Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4f82c9c..931bb1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.4.0" +version = "0.4.1" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/Makefile b/Makefile index 892a3a0..ddfe99b 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -FEATURES:="api" +FEATURES:="api,split-newlines" markov: cargo build --release --features $(FEATURES) From df7c8df1b74fd414ac33e91bba263ca7cb511acd Mon Sep 17 00:00:00 2001 From: Avril Date: Sat, 10 Oct 2020 00:59:23 +0100 Subject: [PATCH 32/50] added streamreader --- Cargo.lock | 2 + Cargo.toml | 2 + src/bytes.rs | 28 ++++++++ src/chunking.rs | 173 ++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 3 + 5 files changed, 208 insertions(+) create mode 100644 src/bytes.rs create mode 100644 src/chunking.rs diff --git a/Cargo.lock b/Cargo.lock index 931bb1d..efeb4d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -645,9 +645,11 @@ dependencies = [ "cfg-if 1.0.0", "futures", "hyper", + "libc", "log", "lzzzz", "markov 1.1.0", + "pin-project", "pretty_env_logger", "serde", "serde_cbor", diff --git a/Cargo.toml b/Cargo.toml index e3bac99..a256ae6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,3 +38,5 @@ lzzzz = {version = "0.2", features=["tokio-io"], optional=true} serde = {version ="1.0", features=["derive"]} toml = "0.5.6" async-compression = {version = "0.3.5", features=["tokio-02", "bzip2"], optional=true} +pin-project = "0.4.26" +libc = "0.2.79" diff --git a/src/bytes.rs b/src/bytes.rs new file mode 100644 index 0000000..06838ab --- /dev/null +++ b/src/bytes.rs @@ -0,0 +1,28 @@ +use libc::{ + c_void, +}; +/// Copy slice of bytes only +/// +/// # Notes +/// `dst` and `src` must not overlap. See [move_slice]. +pub fn copy_slice(dst: &mut [u8], src: &[u8]) -> usize +{ + let sz = std::cmp::min(dst.len(),src.len()); + unsafe { + libc::memcpy(&mut dst[0] as *mut u8 as *mut c_void, &src[0] as *const u8 as *const c_void, sz); + } + sz +} + +/// Move slice of bytes only +/// +/// # Notes +/// `dst` and `src` can overlap. +pub fn move_slice(dst: &mut [u8], src: &[u8]) -> usize +{ + let sz = std::cmp::min(dst.len(),src.len()); + unsafe { + libc::memmove(&mut dst[0] as *mut u8 as *mut c_void, &src[0] as *const u8 as *const c_void, sz); + } + sz +} diff --git a/src/chunking.rs b/src/chunking.rs new file mode 100644 index 0000000..b43f358 --- /dev/null +++ b/src/chunking.rs @@ -0,0 +1,173 @@ +//! Stream related things +use super::*; +use std::{ + task::{ + Poll, + Context, + }, + pin::Pin, +}; +use tokio::{ + io::{ + AsyncBufRead, + AsyncRead, + }, + prelude::*, +}; +use futures::{ + stream::{ + Stream, + }, +}; +use pin_project::pin_project; + +/// Converts a stream of byte-containing objects into an `AsyncRead` and `AsyncBufRead`er. +#[pin_project] +pub struct StreamReader +where I: Stream +{ + #[pin] + source: I, + buffer: Vec, +} + +impl StreamReader +where I: Stream, + T: AsRef<[u8]> +{ + /// The current buffer + pub fn buffer(&self) -> &[u8] + { + &self.buffer[..] + } + /// Consume into the original stream + pub fn into_inner(self) -> I + { + self.source + } + /// Create a new instance with a buffer capacity + pub fn with_capacity(source: I, cap: usize) -> Self + { + Self { + source, + buffer: Vec::with_capacity(cap) + } + } + /// Create a new instance from this stream + pub fn new(source: I) -> Self + { + Self { + source, + buffer: Vec::new(), + } + } + /// Attempt to add to this buffer + #[cold] fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll + { + let this = self.project(); + match this.source.poll_next(cx) { + Poll::Ready(None) => Poll::Ready(0), + Poll::Ready(Some(buf)) if buf.as_ref().len() > 0 => { + let buf = buf.as_ref(); + this.buffer.extend_from_slice(buf); + Poll::Ready(buf.len()) + }, + _ => Poll::Pending, + } + } +} + +impl, I: Stream> AsyncRead for StreamReader +{ + fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + let this = self.project(); + if this.buffer.len() != 0 { + // We can fill the whole buffer, do it. + Poll::Ready(Ok(bytes::copy_slice(buf, this.buffer.drain(..buf.len()).as_slice()))) + } else { + // Buffer is empty, try to fill it + match match this.source.poll_next(cx) { + Poll::Ready(None) => Poll::Ready(0), + Poll::Ready(Some(buf)) if buf.as_ref().len() > 0 => { + let buf = buf.as_ref(); + this.buffer.extend_from_slice(buf); + Poll::Ready(buf.len()) + }, + _ => Poll::Pending, + } { + Poll::Ready(0) => Poll::Ready(Ok(0)), + Poll::Ready(x) => { + // x has been written + Poll::Ready(Ok(bytes::copy_slice(buf, this.buffer.drain(..x).as_slice()))) + }, + _ => Poll::Pending, + } + } + + } + +} + +impl, I: Stream> AsyncBufRead for StreamReader +{ + fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.project(); + if this.buffer.len() < 1 { + // Fetch more into buffer + match match this.source.poll_next(cx) { + Poll::Ready(None) => Poll::Ready(0), + Poll::Ready(Some(buf)) if buf.as_ref().len() > 0 => { + let buf = buf.as_ref(); + this.buffer.extend_from_slice(buf); + Poll::Ready(buf.len()) + }, + _ => Poll::Pending, + } { + Poll::Ready(0) => Poll::Ready(Ok(&[])), // should we return EOF error here? + Poll::Ready(x) => Poll::Ready(Ok(&this.buffer[..x])), + _ => Poll::Pending + } + } else { + Poll::Ready(Ok(&this.buffer[..])) + } + } + fn consume(self: Pin<&mut Self>, amt: usize) { + self.project().buffer.drain(..amt); + } +} + +#[cfg(test)] +mod tests +{ + use super::*; + use tokio::{ + sync::{ + mpsc, + }, + }; + #[tokio::test] + async fn stream_of_vec() + { + let (mut tx, rx) = mpsc::channel(16); + let sender = tokio::spawn(async move { + tx.send("Hello ").await.unwrap(); + tx.send("world").await.unwrap(); + tx.send("\n").await.unwrap(); + tx.send("How ").await.unwrap(); + tx.send("are ").await.unwrap(); + tx.send("you").await.unwrap(); + }); + + let mut reader = StreamReader::new(rx); + + let mut output = String::new(); + let mut read; + while {read = reader.read_line(&mut output).await.expect("Failed to read"); read!=0} { + println!("Read: {}", read); + } + + println!("Done: {:?}", output); + sender.await.expect("Child panic"); + assert_eq!(&output[..], "Hello world\nHow are you"); + } +} diff --git a/src/main.rs b/src/main.rs index 278db34..d85414a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -46,6 +46,9 @@ macro_rules! status { ::warp::http::status::StatusCode::from_u16($code).unwrap() }; } + +mod bytes; +mod chunking; #[cfg(feature="api")] mod api; #[cfg(target_family="unix")] From 206d38300ad727ac0005dde87a3ee2a6fd67cf44 Mon Sep 17 00:00:00 2001 From: Avril Date: Sat, 10 Oct 2020 01:46:48 +0100 Subject: [PATCH 33/50] chunking feed lines feature --- Cargo.toml | 22 ++++++++++++++-- Makefile | 2 +- src/api.rs | 30 ++++++++++++++-------- src/feed.rs | 73 ++++++++++++++++++++++++++++++++++++++--------------- src/main.rs | 10 ++++++++ 5 files changed, 102 insertions(+), 35 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a256ae6..66c22fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.4.1" +version = "0.5.0" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" @@ -8,7 +8,7 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["compress-chain"] +default = ["compress-chain", "split-newlines"] # Compress the chain data file when saved to disk compress-chain = ["async-compression"] @@ -16,6 +16,24 @@ compress-chain = ["async-compression"] # Treat each new line as a new set to feed instead of feeding the whole data at once split-newlines = [] +# Always aggregate incoming buffer instead of streaming them +# This will make feeds faster but allocate full buffers for the aggregated body +# +# Large write: ~95ms +# +# NOTE: This does nothing if `split-newlines` is not enabled +always-aggregate = [] + +# Feeds will hog the buffer lock until the whole body has been fed, instead of acquiring lock every time +# This will make feeds of many lines faster but can potentially cause DoS +# +# With: ~169ms +# Without: ~195ms +# +# NOTE: +# This does nothing if `always-aggregate` is enabled and/or `split-newlines` is not enabled +hog-buffer = [] + # Enable the /api/ route api = [] diff --git a/Makefile b/Makefile index ddfe99b..23a29d5 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -FEATURES:="api,split-newlines" +FEATURES:="api,always-aggregate" markov: cargo build --release --features $(FEATURES) diff --git a/src/api.rs b/src/api.rs index 54e36ba..98cee79 100644 --- a/src/api.rs +++ b/src/api.rs @@ -14,17 +14,17 @@ use futures::{ }, }; -fn aggregate(mut body: impl Buf) -> Result +#[inline] fn aggregate(mut body: impl Buf) -> Result { - let mut output = Vec::new(); + /*let mut output = Vec::new(); while body.has_remaining() { - let bytes = body.bytes(); - output.extend_from_slice(&bytes[..]); - let cnt = bytes.len(); - body.advance(cnt); - } + let bytes = body.bytes(); + output.extend_from_slice(&bytes[..]); + let cnt = bytes.len(); + body.advance(cnt); +}*/ - String::from_utf8(output) + std::str::from_utf8(&body.to_bytes()).map(ToOwned::to_owned) } pub async fn single(host: IpAddr, num: Option, body: impl Buf) -> Result @@ -37,13 +37,18 @@ pub async fn single(host: IpAddr, num: Option, body: impl Buf) -> Result< .map_err(warp::reject::custom) } +//TODO: Change to stream impl like normal `feed` has, instead of taking aggregate? async fn single_stream(host: IpAddr, num: Option, body: impl Buf) -> Result>, ApiError> { let body = aggregate(body)?; info!("{} <- {:?}", host, &body[..]); let mut chain = Chain::new(); - cfg_if!{ + + if_debug! { + let timer = std::time::Instant::now(); + } + cfg_if! { if #[cfg(feature="split-newlines")] { for body in body.split('\n').filter(|line| !line.trim().is_empty()) { feed::feed(&mut chain, body); @@ -52,6 +57,9 @@ async fn single_stream(host: IpAddr, num: Option, body: impl Buf) -> Resu feed::feed(&mut chain, body); } } + if_debug!{ + trace!("Write took {}ms", timer.elapsed().as_millis()); + } match num { None => Ok(stream::iter(iter::once(Ok(chain.generate_str()))).boxed()), Some(num) => { @@ -82,9 +90,9 @@ impl std::fmt::Display for ApiError } } -impl From for ApiError +impl From for ApiError { - fn from(_: std::string::FromUtf8Error) -> Self + fn from(_: std::str::Utf8Error) -> Self { Self::Body } diff --git a/src/feed.rs b/src/feed.rs index 4b2281a..fee939c 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -11,37 +11,68 @@ pub fn feed(chain: &mut Chain, what: impl AsRef) } } -pub async fn full(who: &IpAddr, state: State, mut body: impl Unpin + Stream>) -> Result { - let mut buffer = Vec::new(); +pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream>) -> Result { let mut written = 0usize; - //TODO: Change to pushing lines to mpsc channel, instead of manually aggregating. - while let Some(buf) = body.next().await { - let mut body = buf.map_err(|_| FillBodyError)?; - while body.has_remaining() { - if body.bytes().len() > 0 { - buffer.extend_from_slice(body.bytes()); - let cnt = body.bytes().len(); - body.advance(cnt); - written += cnt; - } - } + if_debug! { + let timer = std::time::Instant::now(); } - let buffer = std::str::from_utf8(&buffer[..]).map_err(|_| FillBodyError)?; - info!("{} -> {:?}", who, buffer); - let mut chain = state.chain().write().await; - cfg_if! { - if #[cfg(feature="split-newlines")] { - for buffer in buffer.split('\n').filter(|line| !line.trim().is_empty()) { - feed(&mut chain, buffer); + cfg_if!{ + if #[cfg(any(not(feature="split-newlines"), feature="always-aggregate"))] { + let mut body = body; + let mut buffer = Vec::new(); + while let Some(buf) = body.next().await { + let mut body = buf.map_err(|_| FillBodyError)?; + while body.has_remaining() { + if body.bytes().len() > 0 { + buffer.extend_from_slice(body.bytes()); + let cnt = body.bytes().len(); + body.advance(cnt); + written += cnt; + } + } + } + let buffer = std::str::from_utf8(&buffer[..]).map_err(|_| FillBodyError)?; + info!("{} -> {:?}", who, buffer); + let mut chain = state.chain().write().await; + cfg_if! { + if #[cfg(feature="split-newlines")] { + for buffer in buffer.split('\n').filter(|line| !line.trim().is_empty()) { + feed(&mut chain, buffer); + } + } else { + feed(&mut chain, buffer); + + } } } else { - feed(&mut chain, buffer); + use tokio::prelude::*; + let reader = chunking::StreamReader::new(body.map(|x| x.map(|mut x| x.to_bytes()).unwrap_or_default())); + let mut lines = reader.lines(); + + #[cfg(feature="hog-buffer")] + let mut chain = state.chain().write().await; + while let Some(line) = lines.next_line().await.map_err(|_| FillBodyError)? { + let line = line.trim(); + if !line.is_empty() { + #[cfg(not(feature="hog-buffer"))] + let mut chain = state.chain().write().await; // Acquire mutex once per line? Is this right? + + feed(&mut chain, line); + info!("{} -> {:?}", who, line); + } + written+=line.len(); + } } } + + if_debug!{ + trace!("Write took {}ms", timer.elapsed().as_millis()); + } state.notify_save(); Ok(written) + } diff --git a/src/main.rs b/src/main.rs index d85414a..6925d8d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -41,6 +41,16 @@ use futures::{ }; use cfg_if::cfg_if; +macro_rules! if_debug { + ($($tt:tt)*) => { + cfg_if::cfg_if!{ + if #[cfg(debug_assertions)] { + $($tt)* + } + } + } +} + macro_rules! status { ($code:expr) => { ::warp::http::status::StatusCode::from_u16($code).unwrap() From f2565088b4bc2cc0eaebcfc8807514e329b13341 Mon Sep 17 00:00:00 2001 From: Avril Date: Sat, 10 Oct 2020 21:20:33 +0100 Subject: [PATCH 34/50] fix fuse error --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/api.rs | 4 ++-- src/chunking.rs | 10 ++++++---- src/feed.rs | 19 ++++++++++++++----- src/main.rs | 2 +- 6 files changed, 25 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index efeb4d7..0eeb643 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.4.1" +version = "0.5.0" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/Cargo.toml b/Cargo.toml index 66c22fd..661860e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.5.0" +version = "0.5.1" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/src/api.rs b/src/api.rs index 98cee79..c6eeb7c 100644 --- a/src/api.rs +++ b/src/api.rs @@ -51,10 +51,10 @@ async fn single_stream(host: IpAddr, num: Option, body: impl Buf) -> Resu cfg_if! { if #[cfg(feature="split-newlines")] { for body in body.split('\n').filter(|line| !line.trim().is_empty()) { - feed::feed(&mut chain, body); + feed::feed(&mut chain, body, 1..); } }else { - feed::feed(&mut chain, body); + feed::feed(&mut chain, body, 1..); } } if_debug!{ diff --git a/src/chunking.rs b/src/chunking.rs index b43f358..47dcc30 100644 --- a/src/chunking.rs +++ b/src/chunking.rs @@ -17,6 +17,8 @@ use tokio::{ use futures::{ stream::{ Stream, + StreamExt, + Fuse, }, }; use pin_project::pin_project; @@ -27,7 +29,7 @@ pub struct StreamReader where I: Stream { #[pin] - source: I, + source: Fuse, buffer: Vec, } @@ -43,13 +45,13 @@ where I: Stream, /// Consume into the original stream pub fn into_inner(self) -> I { - self.source + self.source.into_inner() } /// Create a new instance with a buffer capacity pub fn with_capacity(source: I, cap: usize) -> Self { Self { - source, + source: source.fuse(), buffer: Vec::with_capacity(cap) } } @@ -57,7 +59,7 @@ where I: Stream, pub fn new(source: I) -> Self { Self { - source, + source: source.fuse(), buffer: Vec::new(), } } diff --git a/src/feed.rs b/src/feed.rs index fee939c..6f16027 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -1,13 +1,22 @@ //! Feeding the chain use super::*; -pub fn feed(chain: &mut Chain, what: impl AsRef) +const FEED_BOUNDS: std::ops::RangeFrom = 2..; //TODO: Add to config somehow + + +pub fn feed(chain: &mut Chain, what: impl AsRef, bounds: impl std::ops::RangeBounds) -> bool { let map = what.as_ref().split_whitespace() .filter(|word| !word.is_empty()) .map(|s| s.to_owned()).collect::>(); - if map.len() > 0 { + debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds"); + if bounds.contains(&map.len()) { chain.feed(map); + true + } + else { + debug!("Ignoring feed of invalid length {}", map.len()); + false } } @@ -38,10 +47,10 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream {:?}", who, line); } written+=line.len(); diff --git a/src/main.rs b/src/main.rs index 6925d8d..338a772 100644 --- a/src/main.rs +++ b/src/main.rs @@ -140,7 +140,7 @@ async fn main() { async move { feed::full(&host, state, buf).await .map(|_| warp::reply::with_status(warp::reply(), status!(201))) - .map_err(warp::reject::custom) + .map_err(|_| warp::reject::not_found()) //(warp::reject::custom) //TODO: Recover rejection filter down below for custom error return } }) .with(warp::log("markov::put")); From a4fd1ddfbb2a6a664b1fd00e2a16898fd7500f10 Mon Sep 17 00:00:00 2001 From: Avril Date: Sat, 10 Oct 2020 21:21:59 +0100 Subject: [PATCH 35/50] fix fuse error --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 0eeb643..67345b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.5.0" +version = "0.5.1" dependencies = [ "async-compression", "cfg-if 1.0.0", From 51a5d0aeba279dc0d35602a00ab8a17056468955 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 03:12:41 +0100 Subject: [PATCH 36/50] start sentance, sanitise --- Cargo.toml | 2 +- markov.toml | 2 +- src/api.rs | 99 -------------------------------------------- src/api/error.rs | 55 ++++++++++++++++++++++++ src/api/mod.rs | 32 ++++++++++++++ src/api/sentance.rs | 2 + src/api/single.rs | 47 +++++++++++++++++++++ src/feed.rs | 2 +- src/main.rs | 8 +++- src/sanitise/mod.rs | 37 +++++++++++++++++ src/sanitise/word.rs | 45 ++++++++++++++++++++ 11 files changed, 227 insertions(+), 104 deletions(-) delete mode 100644 src/api.rs create mode 100644 src/api/error.rs create mode 100644 src/api/mod.rs create mode 100644 src/api/sentance.rs create mode 100644 src/api/single.rs create mode 100644 src/sanitise/mod.rs create mode 100644 src/sanitise/word.rs diff --git a/Cargo.toml b/Cargo.toml index 661860e..2ad06f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["compress-chain", "split-newlines"] +default = ["compress-chain", "split-newlines", "api"] # Compress the chain data file when saved to disk compress-chain = ["async-compression"] diff --git a/markov.toml b/markov.toml index 6090632..dc0a048 100644 --- a/markov.toml +++ b/markov.toml @@ -2,5 +2,5 @@ bindpoint = '127.0.0.1:8001' file = 'chain.dat' max_content_length = 4194304 max_gen_size = 256 -#save_interval_secs = 2 +#save_interval_secs = 15 trust_x_forwarded_for = false diff --git a/src/api.rs b/src/api.rs deleted file mode 100644 index c6eeb7c..0000000 --- a/src/api.rs +++ /dev/null @@ -1,99 +0,0 @@ -//! For API calls if enabled -use super::*; -use std::{ - fmt, - error, - iter, - convert::Infallible, -}; -use futures::{ - stream::{ - self, - BoxStream, - StreamExt, - }, -}; - -#[inline] fn aggregate(mut body: impl Buf) -> Result -{ - /*let mut output = Vec::new(); - while body.has_remaining() { - let bytes = body.bytes(); - output.extend_from_slice(&bytes[..]); - let cnt = bytes.len(); - body.advance(cnt); -}*/ - - std::str::from_utf8(&body.to_bytes()).map(ToOwned::to_owned) -} - -pub async fn single(host: IpAddr, num: Option, body: impl Buf) -> Result -{ - single_stream(host, num, body).await - .map(|rx| Response::new(Body::wrap_stream(rx.map(move |x| { - info!("{} <- {:?}", host, x); - x - })))) - .map_err(warp::reject::custom) -} - -//TODO: Change to stream impl like normal `feed` has, instead of taking aggregate? -async fn single_stream(host: IpAddr, num: Option, body: impl Buf) -> Result>, ApiError> -{ - let body = aggregate(body)?; - info!("{} <- {:?}", host, &body[..]); - - let mut chain = Chain::new(); - - if_debug! { - let timer = std::time::Instant::now(); - } - cfg_if! { - if #[cfg(feature="split-newlines")] { - for body in body.split('\n').filter(|line| !line.trim().is_empty()) { - feed::feed(&mut chain, body, 1..); - } - }else { - feed::feed(&mut chain, body, 1..); - } - } - if_debug!{ - trace!("Write took {}ms", timer.elapsed().as_millis()); - } - match num { - None => Ok(stream::iter(iter::once(Ok(chain.generate_str()))).boxed()), - Some(num) => { - let (mut tx, rx) = mpsc::channel(num); - tokio::spawn(async move { - for string in chain.str_iter_for(num) { - tx.send(string).await.expect("Failed to send string to body"); - } - }); - Ok(StreamExt::map(rx, |x| Ok::<_, Infallible>(x)).boxed()) - } - } -} - -#[derive(Debug)] -pub enum ApiError { - Body, -} -impl warp::reject::Reject for ApiError{} -impl error::Error for ApiError{} -impl std::fmt::Display for ApiError -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result - { - match self { - Self::Body => write!(f, "invalid data in request body"), - } - } -} - -impl From for ApiError -{ - fn from(_: std::str::Utf8Error) -> Self - { - Self::Body - } -} diff --git a/src/api/error.rs b/src/api/error.rs new file mode 100644 index 0000000..97aa6e0 --- /dev/null +++ b/src/api/error.rs @@ -0,0 +1,55 @@ +//! API errors +//use super::*; +use std::{ + error, + fmt, +}; +use warp::{ + Rejection, + Reply, +}; + +#[derive(Debug)] +pub enum ApiError { + Body, +} + +impl ApiError +{ + #[inline] fn error_code(&self) -> warp::http::StatusCode + { + status!(match self { + Self::Body => 422, + }) + } +} + +impl warp::reject::Reject for ApiError{} +impl error::Error for ApiError{} +impl std::fmt::Display for ApiError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + match self { + Self::Body => write!(f, "invalid data in request body"), + } + } +} + +impl From for ApiError +{ + fn from(_: std::str::Utf8Error) -> Self + { + Self::Body + } +} + +// Handles API rejections +pub async fn rejection(err: Rejection) -> Result +{ + if let Some(api) = err.find::() { + Ok(warp::reply::with_status(format!("ApiError: {}\n", api), api.error_code())) + } else { + Err(err) + } +} diff --git a/src/api/mod.rs b/src/api/mod.rs new file mode 100644 index 0000000..cfd7fb1 --- /dev/null +++ b/src/api/mod.rs @@ -0,0 +1,32 @@ +//! For API calls if enabled +use super::*; +use std::{ + iter, + convert::Infallible, +}; +use futures::{ + stream::{ + self, + BoxStream, + StreamExt, + }, +}; +pub mod error; +use error::ApiError; + +mod single; + +#[inline] fn aggregate(mut body: impl Buf) -> Result +{ + std::str::from_utf8(&body.to_bytes()).map(ToOwned::to_owned) +} + +pub async fn single(host: IpAddr, num: Option, body: impl Buf) -> Result +{ + single::single_stream(host, num, body).await + .map(|rx| Response::new(Body::wrap_stream(rx.map(move |x| { + info!("{} <- {:?}", host, x); + x + })))) + .map_err(warp::reject::custom) +} diff --git a/src/api/sentance.rs b/src/api/sentance.rs new file mode 100644 index 0000000..9f8f411 --- /dev/null +++ b/src/api/sentance.rs @@ -0,0 +1,2 @@ +//! /sentance/ +use super::*; diff --git a/src/api/single.rs b/src/api/single.rs new file mode 100644 index 0000000..572656b --- /dev/null +++ b/src/api/single.rs @@ -0,0 +1,47 @@ +//! Handler for /single/ +use super::*; + +//TODO: Change to stream impl like normal `feed` has, instead of taking aggregate? +pub async fn single_stream(host: IpAddr, num: Option, body: impl Buf) -> Result>, ApiError> +{ + let body = aggregate(body)?; + info!("{} <- {:?}", host, &body[..]); + + let mut chain = Chain::new(); + + if_debug! { + let timer = std::time::Instant::now(); + } + cfg_if! { + if #[cfg(feature="split-newlines")] { + for body in body.split('\n').filter(|line| !line.trim().is_empty()) { + feed::feed(&mut chain, body, 1..); + } + }else { + feed::feed(&mut chain, body, 1..); + } + } + if_debug!{ + trace!("Write took {}ms", timer.elapsed().as_millis()); + } + if chain.is_empty() { + Ok(stream::empty().boxed()) + } else { + match num { + + None => Ok(stream::iter(iter::once(Ok(chain.generate_str()))).boxed()), + Some(num) => { + let (mut tx, rx) = mpsc::channel(num); + tokio::spawn(async move { + for string in chain.str_iter_for(num) { + if let Err(e) = tx.send(string).await { + error!("Failed to send string to body, aborting: {:?}", e.0); + break; + } + } + }); + Ok(StreamExt::map(rx, |x| Ok::<_, Infallible>(x)).boxed()) + } + } + } +} diff --git a/src/feed.rs b/src/feed.rs index 6f16027..0a9eb8f 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -57,7 +57,7 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream(from: &mut T, to: &mut U) -> Result +{ + todo!() +} + + +#[derive(Debug)] +pub enum Error { + +} +impl error::Error for Error{} +impl fmt::Display for Error +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + Ok(()) + } +} + diff --git a/src/sanitise/word.rs b/src/sanitise/word.rs new file mode 100644 index 0000000..0730282 --- /dev/null +++ b/src/sanitise/word.rs @@ -0,0 +1,45 @@ +//! Word splitting +use super::*; +use std::{ + borrow::Borrow, +}; + +#[derive(Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct Word(str); + +impl Word +{ + pub fn new<'a>(from: &'a str) -> &'a Self + { + unsafe { + std::mem::transmute(from) + } + } +} + +impl<'a> From<&'a str> for &'a Word +{ + fn from(from: &'a str) -> Self + { + Word::new(from) + } +} + +impl AsRef for Word +{ + fn as_ref(&self) -> &str + { + &self.0 + } +} + +impl AsRef for str +{ + fn as_ref(&self) -> &Word + { + Word::new(self) + } +} + +//impl Borrow<> From ecf7ff6f07368a05a8facef88f8a3a1aea693760 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 05:21:54 +0100 Subject: [PATCH 37/50] sentances --- Cargo.lock | 35 ++++++++++ Cargo.toml | 7 ++ src/feed.rs | 44 ++++++++---- src/main.rs | 3 +- src/sanitise/mod.rs | 62 +++++++++++++---- src/sanitise/sentance.rs | 146 +++++++++++++++++++++++++++++++++++++++ src/sanitise/word.rs | 109 +++++++++++++++++++++++++++-- 7 files changed, 373 insertions(+), 33 deletions(-) create mode 100644 src/sanitise/sentance.rs diff --git a/Cargo.lock b/Cargo.lock index 67345b2..ae411b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -645,6 +645,7 @@ dependencies = [ "cfg-if 1.0.0", "futures", "hyper", + "lazy_static", "libc", "log", "lzzzz", @@ -653,6 +654,7 @@ dependencies = [ "pretty_env_logger", "serde", "serde_cbor", + "smallmap", "tokio", "toml", "warp", @@ -1117,6 +1119,15 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + [[package]] name = "ryu" version = "1.0.5" @@ -1135,6 +1146,21 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2" +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + [[package]] name = "serde" version = "1.0.116" @@ -1241,6 +1267,15 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" +[[package]] +name = "smallmap" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2adda73259bbc3ff84f711425ebfb8c90e9dd32a12b05c6528dd49244ea8230f" +dependencies = [ + "rustc_version", +] + [[package]] name = "socket2" version = "0.3.15" diff --git a/Cargo.toml b/Cargo.toml index 2ad06f5..0b23ed6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,11 @@ compress-chain = ["async-compression"] # Treat each new line as a new set to feed instead of feeding the whole data at once split-newlines = [] +# Feed each sentance seperately, instead of just each line / whole body +# Maybe better without `split-newlines`? +# Kinda experimental +split-sentance = [] + # Always aggregate incoming buffer instead of streaming them # This will make feeds faster but allocate full buffers for the aggregated body # @@ -58,3 +63,5 @@ toml = "0.5.6" async-compression = {version = "0.3.5", features=["tokio-02", "bzip2"], optional=true} pin-project = "0.4.26" libc = "0.2.79" +smallmap = "1.1.3" +lazy_static = "1.4.0" diff --git a/src/feed.rs b/src/feed.rs index 0a9eb8f..7b119e8 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -1,22 +1,42 @@ //! Feeding the chain use super::*; +use sanitise::Sentance; const FEED_BOUNDS: std::ops::RangeFrom = 2..; //TODO: Add to config somehow -pub fn feed(chain: &mut Chain, what: impl AsRef, bounds: impl std::ops::RangeBounds) -> bool +pub fn feed(chain: &mut Chain, what: impl AsRef, bounds: impl std::ops::RangeBounds) { - let map = what.as_ref().split_whitespace() - .filter(|word| !word.is_empty()) - .map(|s| s.to_owned()).collect::>(); - debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds"); - if bounds.contains(&map.len()) { - chain.feed(map); - true - } - else { - debug!("Ignoring feed of invalid length {}", map.len()); - false + cfg_if! { + if #[cfg(feature="split-sentance")] { + let map = Sentance::new_iter(&what) //get each sentance in string + .map(|what| what.split_whitespace() // .words() here will remove the punctuation. + .filter(|word| !word.is_empty()) + .map(|s| s.to_owned()).collect::>()); + debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds"); + for map in map {// feed each sentance seperately + if bounds.contains(&map.len()) { + chain.feed(map); + } + else { + debug!("Ignoring feed of invalid length {}", map.len()); + } + } + } else { + let map = Sentance::new_iter(&what) //get each sentance in string + .map(|what| what.split_whitespace() // .words() here will remove the punctuation. + .filter(|word| !word.is_empty())) + .flatten() // add all into one buffer + .map(|s| s.to_owned()).collect::>(); + debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds"); + if bounds.contains(&map.len()) { + chain.feed(map); + } + else { + debug!("Ignoring feed of invalid length {}", map.len()); + } + + } } } diff --git a/src/main.rs b/src/main.rs index 124eb63..7d7200e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -39,6 +39,7 @@ use futures::{ join_all, }, }; +use lazy_static::lazy_static; use cfg_if::cfg_if; macro_rules! if_debug { @@ -171,7 +172,7 @@ async fn main() { .with(warp::log("markov::api::single")) }; let sentance = warp::post() - .and(warp::path("sentance")); + .and(warp::path("sentance")); //TODO: sanitise::Sentance::new_iter the body line warp::path("api") .and(single) diff --git a/src/sanitise/mod.rs b/src/sanitise/mod.rs index 34ba611..3608cc0 100644 --- a/src/sanitise/mod.rs +++ b/src/sanitise/mod.rs @@ -1,37 +1,73 @@ //! Sanitisers use super::*; use std::{ - marker::Unpin, error, fmt, }; -use tokio::{ - prelude::*, - io::{ - AsyncRead, - AsyncBufRead - }, -}; - +mod sentance; +pub use sentance::*; mod word; pub use word::*; - +/* pub fn take_sentance(from: &mut T, to: &mut U) -> Result { todo!() -} +}*/ + #[derive(Debug)] pub enum Error { - + Word(WordError), + Sentance(SentanceError), } + impl error::Error for Error{} + impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - Ok(()) + match self { + Self::Word(_) => write!(f, "couldn't extract word"), + Self::Sentance(_) => write!(f, "couldn't extract sentance"), + } + } +} + +impl From for Error +{ + #[inline] fn from(from: WordError) -> Self + { + Self::Word(from) } } +impl From for Error +{ + #[inline] fn from(from: SentanceError) -> Self + { + Self::Sentance(from) + } +} + +#[cfg(test)] +mod tests +{ + use super::*; + #[test] + fn sentance() + { + let string = r#"Hello world. +I am a string, that is a string. Strings, I love them!!! + +Owo uwu"#; + let sentances = Sentance::new_iter(string); + for sentance in sentances { + let words = Word::new(sentance); + println!("Word in {:?} -> {:?}", sentance, words); + } + + + } +} diff --git a/src/sanitise/sentance.rs b/src/sanitise/sentance.rs new file mode 100644 index 0000000..9144240 --- /dev/null +++ b/src/sanitise/sentance.rs @@ -0,0 +1,146 @@ +//! Sentance splitting +use super::*; +use std::{ + borrow::{ + Borrow, + ToOwned, + }, + ops::{ + Deref,DerefMut, + }, +}; + +#[derive(Debug)] +pub struct SentanceError; + +/// A sentance +#[derive(Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct Sentance(str); + + +macro_rules! new { + ($str:expr) => { + unsafe {Sentance::new_unchecked($str)} + }; +} + +const DEFAULT_BOUNDARIES: &[char] = &['\n', '.', ':', '!']; + +lazy_static! { + static ref BOUNDARIES: smallmap::Map = { + let mut map = smallmap::Map::new(); + for &chr in DEFAULT_BOUNDARIES.iter() { + map.insert(chr, ()); + } + map + }; +} + +#[inline] pub fn is_sentance_boundary(chr: char) -> bool +{ + BOUNDARIES.contains_key(&chr) +} + +impl Sentance +{ + /// Create a new word reference without checking for sentance boundaries + pub unsafe fn new_unchecked<'a>(from: &'a str) -> &'a Self + { + std::mem::transmute(from) + } + + /// Create a single sentance + pub fn single<'a>(from: &'a (impl AsRef + 'a + ?Sized)) -> Result<&'a Self, SentanceError> + { + let from = from.as_ref(); + match from.find(is_sentance_boundary) { + Some(_) => Err(SentanceError), + _ => Ok(new!(from)), + } + } + + /// Create a new section of sentances from this string + #[inline] pub fn new<'a>(from: &'a (impl AsRef + 'a + ?Sized)) -> Vec<&'a Self> + { + Self::new_iter(from) + .collect() + } + + /// Create a new iterator over sentances from this string. + pub fn new_iter<'a>(from: &'a (impl AsRef +'a + ?Sized)) -> impl Iterator + { + let from = from.as_ref(); + from.split(is_sentance_boundary) + .map(|x| new!(x.trim())) + .filter(|x| !x.is_empty()) + } + + /// Get the words in this sentance + pub fn words(&self) -> impl Iterator + { + Word::new_iter(self) + } +} + +impl<'a> From<&'a str> for &'a Sentance +{ + fn from(from: &'a str) -> Self + { + new!(from) + } +} + +impl AsRef for Sentance +{ + fn as_ref(&self) -> &str + { + &self.0 + } +} + +impl AsRef for str +{ + fn as_ref(&self) -> &Sentance + { + new!(self) + } +} + +impl Borrow for String +{ + fn borrow(&self) -> &Sentance { + new!(&self[..]) + } +} + +impl ToOwned for Sentance +{ + type Owned = String; + fn to_owned(&self) -> Self::Owned { + self.0.to_owned() + } +} + +impl Deref for Sentance +{ + type Target = str; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Sentance +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl AsRef for Sentance +{ + #[inline] fn as_ref(&self) -> &Sentance + { + self + } +} diff --git a/src/sanitise/word.rs b/src/sanitise/word.rs index 0730282..365c11d 100644 --- a/src/sanitise/word.rs +++ b/src/sanitise/word.rs @@ -1,28 +1,87 @@ //! Word splitting use super::*; use std::{ - borrow::Borrow, + borrow::{ + Borrow, + ToOwned, + }, + ops::{ + Deref,DerefMut, + }, }; +#[derive(Debug)] +pub struct WordError; + +/// A word is a non-whitespace containing string representing part of a sentance #[derive(Debug, PartialEq, Eq)] #[repr(transparent)] pub struct Word(str); + +macro_rules! new { + ($str:expr) => { + unsafe {Word::new_unchecked($str)} + }; +} + +const DEFAULT_BOUNDARIES: &[char] = &['!', '.', ',']; + +lazy_static! { + static ref BOUNDARIES: smallmap::Map = { + let mut map = smallmap::Map::new(); + for &chr in DEFAULT_BOUNDARIES.iter() { + map.insert(chr, ()); + } + map + }; +} + +#[inline] pub fn is_word_boundary(chr: char) -> bool +{ + chr.is_whitespace() || BOUNDARIES.contains_key(&chr) +} + impl Word { - pub fn new<'a>(from: &'a str) -> &'a Self + /// Create a new word reference without checking for whitespace + pub unsafe fn new_unchecked<'a>(from: &'a str) -> &'a Self { - unsafe { - std::mem::transmute(from) + std::mem::transmute(from) + } + + /// Create a single word + pub fn single<'a>(from: &'a (impl AsRef +?Sized +'a)) -> Result<&'a Self, WordError> + { + let from = from.as_ref(); + match from.find(is_word_boundary) { + Some(_) => Err(WordError), + _ => Ok(new!(from)), } } + + /// Create a new section of words from this sentance + pub fn new<'a>(from: &'a (impl AsRef +?Sized+'a)) -> Vec<&'a Self> + { + Self::new_iter(from) + .collect() + } + + /// Create a new iterator over words from this sentance. + pub fn new_iter<'a>(from: &'a (impl AsRef +?Sized+'a)) -> impl Iterator + { + let from = from.as_ref(); + from.split(is_word_boundary) + .filter(|x| !x.is_empty()) + .map(|x| new!(x)) + } } impl<'a> From<&'a str> for &'a Word { fn from(from: &'a str) -> Self { - Word::new(from) + new!(from) } } @@ -38,8 +97,44 @@ impl AsRef for str { fn as_ref(&self) -> &Word { - Word::new(self) + new!(self) } } -//impl Borrow<> +impl Borrow for String +{ + fn borrow(&self) -> &Word { + new!(&self[..]) + } +} + +impl ToOwned for Word +{ + type Owned = String; + fn to_owned(&self) -> Self::Owned { + self.0.to_owned() + } +} + +impl Deref for Word +{ + type Target = str; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Word +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl AsRef for Word +{ + #[inline] fn as_ref(&self) -> &Word + { + self + } +} From 51d72ec7bd101420b465d0a60fa23dda4a14624a Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 05:22:19 +0100 Subject: [PATCH 38/50] bump ver --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0b23ed6..1184a3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.5.1" +version = "0.5.2" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" From 5b56fcffd5e61a29f352f6cec1079f5a27a59023 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 05:28:30 +0100 Subject: [PATCH 39/50] fix punctuation eating bug. requiring nightly for now... --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/feed.rs | 4 ++-- src/main.rs | 2 ++ src/sanitise/sentance.rs | 2 +- src/sanitise/word.rs | 3 ++- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae411b7..819b64b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.5.1" +version = "0.5.2" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/Cargo.toml b/Cargo.toml index 1184a3b..0c04205 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.5.2" +version = "0.5.3" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/src/feed.rs b/src/feed.rs index 7b119e8..3db4c67 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -10,7 +10,7 @@ pub fn feed(chain: &mut Chain, what: impl AsRef, bounds: impl std:: cfg_if! { if #[cfg(feature="split-sentance")] { let map = Sentance::new_iter(&what) //get each sentance in string - .map(|what| what.split_whitespace() // .words() here will remove the punctuation. + .map(|what| what.words() .filter(|word| !word.is_empty()) .map(|s| s.to_owned()).collect::>()); debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds"); @@ -24,7 +24,7 @@ pub fn feed(chain: &mut Chain, what: impl AsRef, bounds: impl std:: } } else { let map = Sentance::new_iter(&what) //get each sentance in string - .map(|what| what.split_whitespace() // .words() here will remove the punctuation. + .map(|what| what.words() .filter(|word| !word.is_empty())) .flatten() // add all into one buffer .map(|s| s.to_owned()).collect::>(); diff --git a/src/main.rs b/src/main.rs index 7d7200e..c516086 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +#![feature(split_inclusive)] + #![allow(dead_code)] #[macro_use] extern crate log; diff --git a/src/sanitise/sentance.rs b/src/sanitise/sentance.rs index 9144240..2734234 100644 --- a/src/sanitise/sentance.rs +++ b/src/sanitise/sentance.rs @@ -71,7 +71,7 @@ impl Sentance pub fn new_iter<'a>(from: &'a (impl AsRef +'a + ?Sized)) -> impl Iterator { let from = from.as_ref(); - from.split(is_sentance_boundary) + from.split_inclusive(is_sentance_boundary) .map(|x| new!(x.trim())) .filter(|x| !x.is_empty()) } diff --git a/src/sanitise/word.rs b/src/sanitise/word.rs index 365c11d..320fe88 100644 --- a/src/sanitise/word.rs +++ b/src/sanitise/word.rs @@ -71,7 +71,8 @@ impl Word pub fn new_iter<'a>(from: &'a (impl AsRef +?Sized+'a)) -> impl Iterator { let from = from.as_ref(); - from.split(is_word_boundary) + from.split_inclusive(is_word_boundary) + .map(|x| x.trim()) .filter(|x| !x.is_empty()) .map(|x| new!(x)) } From 032e5d74dc0adc8cdda5810d1febfb9ee559353d Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 05:30:10 +0100 Subject: [PATCH 40/50] fix punctuation eating bug. requiring nightly for now... --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 819b64b..74ac5c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.5.2" +version = "0.5.3" dependencies = [ "async-compression", "cfg-if 1.0.0", From 77d4c2da730b6165093b7d3cd4dc1b6bbf72b036 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 05:32:55 +0100 Subject: [PATCH 41/50] removed redundant filter --- Cargo.toml | 2 +- src/feed.rs | 4 +--- src/sanitise/sentance.rs | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0c04205..cd93c39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.5.3" +version = "0.5.4" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/src/feed.rs b/src/feed.rs index 3db4c67..866c1bf 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -11,7 +11,6 @@ pub fn feed(chain: &mut Chain, what: impl AsRef, bounds: impl std:: if #[cfg(feature="split-sentance")] { let map = Sentance::new_iter(&what) //get each sentance in string .map(|what| what.words() - .filter(|word| !word.is_empty()) .map(|s| s.to_owned()).collect::>()); debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds"); for map in map {// feed each sentance seperately @@ -24,8 +23,7 @@ pub fn feed(chain: &mut Chain, what: impl AsRef, bounds: impl std:: } } else { let map = Sentance::new_iter(&what) //get each sentance in string - .map(|what| what.words() - .filter(|word| !word.is_empty())) + .map(|what| what.words()) .flatten() // add all into one buffer .map(|s| s.to_owned()).collect::>(); debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds"); diff --git a/src/sanitise/sentance.rs b/src/sanitise/sentance.rs index 2734234..cc1c364 100644 --- a/src/sanitise/sentance.rs +++ b/src/sanitise/sentance.rs @@ -77,7 +77,7 @@ impl Sentance } /// Get the words in this sentance - pub fn words(&self) -> impl Iterator + #[inline] pub fn words(&self) -> impl Iterator { Word::new_iter(self) } From 31ee30db9e1a04afe1a4ab150022c62b0ab793d2 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 05:37:37 +0100 Subject: [PATCH 42/50] update todo --- Cargo.lock | 2 +- TODO | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 74ac5c0..cc9c496 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.5.3" +version = "0.5.4" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/TODO b/TODO index 4b9b689..eeabf8c 100644 --- a/TODO +++ b/TODO @@ -1 +1,3 @@ +Maybe see if `split-sentance` is stable enough to be enabled in prod now? Allow Unix domain socket for bind + From 633466b901636f704359a74485d7fc8058f998ad Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 10:55:44 +0100 Subject: [PATCH 43/50] added /sentance/ chunking --- Cargo.lock | 5 +- Cargo.toml | 5 +- markov.toml | 5 +- src/api/sentance.rs | 2 - src/config.rs | 23 ++++ src/ext.rs | 27 ++++ src/feed.rs | 18 ++- src/gen.rs | 2 +- src/main.rs | 41 +++++- src/sanitise/.#filter.rs | 1 + src/sanitise/filter.rs | 260 +++++++++++++++++++++++++++++++++++++++ src/sanitise/mod.rs | 3 + src/sentance.rs | 25 ++++ src/state.rs | 9 +- src/util.rs | 41 ++++++ 15 files changed, 449 insertions(+), 18 deletions(-) delete mode 100644 src/api/sentance.rs create mode 100644 src/ext.rs create mode 120000 src/sanitise/.#filter.rs create mode 100644 src/sanitise/filter.rs create mode 100644 src/sentance.rs create mode 100644 src/util.rs diff --git a/Cargo.lock b/Cargo.lock index cc9c496..c366482 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -650,6 +650,7 @@ dependencies = [ "log", "lzzzz", "markov 1.1.0", + "once_cell", "pin-project", "pretty_env_logger", "serde", @@ -1269,9 +1270,9 @@ checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" [[package]] name = "smallmap" -version = "1.1.3" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2adda73259bbc3ff84f711425ebfb8c90e9dd32a12b05c6528dd49244ea8230f" +checksum = "97ce78b988fb0df3b438d106942c0c2438849ecf40e3418af55044f96d27514d" dependencies = [ "rustc_version", ] diff --git a/Cargo.toml b/Cargo.toml index cd93c39..c51f2df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.5.4" +version = "0.6" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" @@ -63,5 +63,6 @@ toml = "0.5.6" async-compression = {version = "0.3.5", features=["tokio-02", "bzip2"], optional=true} pin-project = "0.4.26" libc = "0.2.79" -smallmap = "1.1.3" +smallmap = "1.1.5" lazy_static = "1.4.0" +once_cell = "1.4.1" diff --git a/markov.toml b/markov.toml index dc0a048..e2064b8 100644 --- a/markov.toml +++ b/markov.toml @@ -2,5 +2,8 @@ bindpoint = '127.0.0.1:8001' file = 'chain.dat' max_content_length = 4194304 max_gen_size = 256 -#save_interval_secs = 15 +#save_interval_secs = 2 trust_x_forwarded_for = false + +[filter] +exclude = "<>)([]/" diff --git a/src/api/sentance.rs b/src/api/sentance.rs deleted file mode 100644 index 9f8f411..0000000 --- a/src/api/sentance.rs +++ /dev/null @@ -1,2 +0,0 @@ -//! /sentance/ -use super::*; diff --git a/src/config.rs b/src/config.rs index 2940f49..ebae65e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -25,6 +25,28 @@ pub struct Config pub max_gen_size: usize, pub save_interval_secs: Option, pub trust_x_forwarded_for: bool, + #[serde(default)] + pub filter: FilterConfig, +} + +#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Hash, Serialize, Deserialize)] +pub struct FilterConfig +{ + exclude: String, + +} + +impl FilterConfig +{ + pub fn get_filter(&self) -> sanitise::filter::Filter + { + let filt: sanitise::filter::Filter = self.exclude.parse().unwrap(); + if !filt.is_empty() + { + warn!("Loaded exclude filter: {:?}", filt.iter().collect::()); + } + filt + } } impl Default for Config @@ -39,6 +61,7 @@ impl Default for Config max_gen_size: 256, save_interval_secs: Some(unsafe{NonZeroU64::new_unchecked(2)}), trust_x_forwarded_for: false, + filter: Default::default(), } } } diff --git a/src/ext.rs b/src/ext.rs new file mode 100644 index 0000000..eb9b5ce --- /dev/null +++ b/src/ext.rs @@ -0,0 +1,27 @@ +//! Extensions +use std::{ + iter, +}; + +pub trait StringJoinExt: Sized +{ + fn join>(self, sep: P) -> String; +} + +impl StringJoinExt for I +where I: IntoIterator, + T: AsRef +{ + fn join>(self, sep: P) -> String + { + let mut string = String::new(); + for (first, s) in iter::successors(Some(true), |_| Some(false)).zip(self.into_iter()) + { + if !first { + string.push_str(sep.as_ref()); + } + string.push_str(s.as_ref()); + } + string + } +} diff --git a/src/feed.rs b/src/feed.rs index 866c1bf..48150c2 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -44,6 +44,15 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream { + { + let buffer = $buffer; + feed($chain, &buffer, $bounds) + } + } + } + cfg_if!{ if #[cfg(any(not(feature="split-newlines"), feature="always-aggregate"))] { let mut body = body; @@ -60,15 +69,17 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream {:?}", who, buffer); let mut chain = state.chain().write().await; cfg_if! { if #[cfg(feature="split-newlines")] { for buffer in buffer.split('\n').filter(|line| !line.trim().is_empty()) { - feed(&mut chain, buffer, FEED_BOUNDS); + feed!(&mut chain, buffer, FEED_BOUNDS); + } } else { - feed(&mut chain, buffer, FEED_BOUNDS); + feed!(&mut chain, buffer, FEED_BOUNDS); } } @@ -81,12 +92,13 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream {:?}", who, line); } written+=line.len(); diff --git a/src/gen.rs b/src/gen.rs index 6c1e4e5..911f2e0 100644 --- a/src/gen.rs +++ b/src/gen.rs @@ -2,7 +2,7 @@ use super::*; #[derive(Debug)] -pub struct GenBodyError(String); +pub struct GenBodyError(pub String); impl error::Error for GenBodyError{} impl fmt::Display for GenBodyError diff --git a/src/main.rs b/src/main.rs index c516086..46ad529 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ #![feature(split_inclusive)] +#![feature(min_const_generics)] #![allow(dead_code)] @@ -60,6 +61,9 @@ macro_rules! status { }; } +mod ext; +use ext::*; +mod util; mod sanitise; mod bytes; mod chunking; @@ -76,6 +80,7 @@ use forwarded_list::XForwardedFor; mod feed; mod gen; +mod sentance; #[tokio::main] async fn main() { @@ -135,8 +140,8 @@ async fn main() { }; let push = warp::put() - .and(chain.clone()) .and(warp::path("put")) + .and(chain.clone()) .and(client_ip.clone()) .and(warp::body::content_length_limit(state.config().max_content_length)) .and(warp::body::stream()) @@ -173,9 +178,6 @@ async fn main() { .and_then(api::single) .with(warp::log("markov::api::single")) }; - let sentance = warp::post() - .and(warp::path("sentance")); //TODO: sanitise::Sentance::new_iter the body line - warp::path("api") .and(single) .recover(api::error::rejection) @@ -183,11 +185,12 @@ async fn main() { } } + let read = warp::get() .and(chain.clone()) - .and(warp::path("get")) .and(client_ip.clone()) - .and(warp::path::param().map(|opt: usize| Some(opt)).or(warp::any().map(|| Option::::None)).unify()) + .and(warp::path::param().map(|opt: usize| Some(opt)) + .or(warp::path::end().map(|| Option::::None)).unify()) .and_then(|state: State, host: IpAddr, num: Option| { async move { let (tx, rx) = mpsc::channel(state.config().max_gen_size); @@ -199,7 +202,33 @@ async fn main() { } }) .with(warp::log("markov::read")); + + let sentance = warp::get() + .and(warp::path("sentance")) //TODO: sanitise::Sentance::new_iter the body line + .and(chain.clone()) + .and(client_ip.clone()) + .and(warp::path::param().map(|opt: usize| Some(opt)) + .or(warp::path::end().map(|| Option::::None)).unify()) + .and_then(|state: State, host: IpAddr, num: Option| { + async move { + let (tx, rx) = mpsc::channel(state.config().max_gen_size); + tokio::spawn(sentance::body(state, num, tx)); + Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.map(move |mut x| { + info!("{} (sentance) <- {:?}", host, x); + // match x.chars().last() { + // Some(chr) if sanitise::is_sentance_boundary(chr) => { + // x.push(' '); + // }, + // _ => (), + // } + x.push(' '); + Ok::<_, std::convert::Infallible>(x) + })))) + } + }) + .with(warp::log("markov::read::sentance")); + let read = warp::path("get").and(read.or(sentance)); #[cfg(feature="api")] let read = read.or(api); diff --git a/src/sanitise/.#filter.rs b/src/sanitise/.#filter.rs new file mode 120000 index 0000000..e39f68a --- /dev/null +++ b/src/sanitise/.#filter.rs @@ -0,0 +1 @@ +avril@eientei.880:1602382403 \ No newline at end of file diff --git a/src/sanitise/filter.rs b/src/sanitise/filter.rs new file mode 100644 index 0000000..c570dd7 --- /dev/null +++ b/src/sanitise/filter.rs @@ -0,0 +1,260 @@ +//! Filter out characters and such +use smallmap::Map as SmallMap; +use std::{ + borrow::Cow, + fmt, + iter::{ + self, + FromIterator, + }, + str, +}; +use once_cell::sync::OnceCell; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Filter(SmallMap); + +impl From<[char; N]> for Filter +{ + fn from(from: [char; N]) -> Self + { + let mut map = SmallMap::with_capacity(1 + (N / 256)); + for &chr in from.iter() + { + map.insert(chr, ()); + } + Self(map) + } +} + +impl<'a> From<&'a [char]> for Filter +{ + fn from(from: &'a [char]) -> Self + { + let mut map = SmallMap::new(); + for &chr in from.iter() + { + map.insert(chr, ()); + } + Self(map) + } +} +impl<'a> From<&'a str> for Filter +{ + fn from(from: &'a str) -> Self + { + let mut output = Self::new(); + output.insert(from.chars()); + output + } +} + +impl str::FromStr for Filter +{ + type Err = std::convert::Infallible; + fn from_str(s: &str) -> Result { + Ok(Self::from(s)) + } +} + +impl fmt::Display for Filter +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + use std::fmt::Write; + for chr in self.iter() + { + f.write_char(chr)?; + } + Ok(()) + } +} + +pub struct FilterKeyIter<'a>(smallmap::iter::Iter<'a, char, ()>, usize); + +impl<'a> Iterator for FilterKeyIter<'a> +{ + type Item = char; + fn next(&mut self) -> Option + { + self.0.next().map(|&(x, _)| x) + } + fn size_hint(&self) -> (usize, Option) { + (self.1, Some(self.1)) + } +} +impl<'a> iter::FusedIterator for FilterKeyIter<'a>{} +impl<'a> iter::ExactSizeIterator for FilterKeyIter<'a>{} + +impl Filter +{ + pub fn new() -> Self + { + Self(SmallMap::new()) + } + pub fn insert>(&mut self, from: I) + { + for from in from.into_iter() + { + self.0.insert(from, ()); + } + } + + pub fn remove>(&mut self, from: I) + { + for from in from.into_iter() + { + self.0.remove(&from); + } + } + + pub fn len(&self) -> usize + { + self.0.len() + } + + pub fn is_empty(&self) -> bool + { + //TODO: impl this in smallmap itself + self.len() == 0 + } + + pub fn iter(&self) -> impl Iterator + '_ + { + self.0.iter() + .copied() + .map(|(x, _)| x) + //FilterKeyIter(self.0.iter(), self.0.len()) + } + + /// Should this character be filtered? + #[inline] pub fn check(&self, chr: char) -> bool + { + self.0.get(&chr).is_some() + } + + pub fn filter<'a, I: IntoIterator>(&'a self, from_iter: I) -> FilterIter<'a, I::IntoIter> + where I::IntoIter: 'a + { + FilterIter(&self, from_iter.into_iter().fuse()) + } + + pub fn filter_cow<'a>(&self, string: &'a (impl AsRef + 'a + ?Sized)) -> Cow<'a, str> + { + let string = string.as_ref(); + + if self.is_empty() { + return Cow::Borrowed(string); + } + + let mut output = Cow::Borrowed(string); + let mut i=0; + for chr in string.chars() + { + if self.check(chr) { + output.to_mut().remove(i); + } else { + i+=1; + } + } + + output + } + + pub fn filter_str<'a, T: AsRef+'a>(&'a self, string: &'a T) -> FilterStr<'a> + { + FilterStr(string.as_ref(), self, OnceCell::new()) + } +} + +impl FromIterator for Filter +{ + fn from_iter>(iter: I) -> Self + { + let mut output= Self::new(); + output.insert(iter); + output + } +} + +impl<'a> FilterStr<'a> +{ + pub fn as_str(&self) -> &str + { + fn fmt(this: &FilterStr<'_>) -> String + { + let chars = this.0.chars(); + let mut f: String = crate::util::hint_cap(&chars); + for chr in chars { + if !this.1.check(chr) { + f.push(chr); + } + } + f + } + &self.2.get_or_init(|| fmt(&self))[..] + } +} + +pub struct FilterStr<'a>(&'a str, &'a Filter, OnceCell); +impl<'a> fmt::Display for FilterStr<'a> +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "{}", self.as_str()) + } +} +impl<'a> FilterStr<'a> +{ + pub fn filter(&self) -> &Filter + { + &self.1 + } +} + +pub struct FilterIter<'a, I>(&'a Filter, iter::Fuse); + +impl<'a, I: Iterator> Iterator for FilterIter<'a, I> +{ + type Item = char; + fn next(&mut self) -> Option + { + loop { + break match self.1.next() { + Some(chr) if !self.0.check(chr) => Some(chr), + None => None, + _ => continue, + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let (_, high) = self.1.size_hint(); + (0, high) + } +} +impl<'a, I> FilterIter<'a, I> +{ + pub fn filter(&self) -> &Filter + { + self.0 + } +} + +impl<'a, I: Iterator> iter::FusedIterator for FilterIter<'a, I>{} + +#[cfg(test)] +mod tests +{ + use super::*; + #[test] + fn filter_cow() + { + let filter: Filter = " hi".chars().collect(); + + let string = "abcdef ghi jk1\nhian"; + + assert_eq!(filter.filter_str(&string).to_string(), filter.filter_cow(&string).to_string()); + assert_eq!(filter.filter_cow(&string).to_string(), filter.filter(string.chars()).collect::()); + } +} diff --git a/src/sanitise/mod.rs b/src/sanitise/mod.rs index 3608cc0..8f5fae1 100644 --- a/src/sanitise/mod.rs +++ b/src/sanitise/mod.rs @@ -8,6 +8,9 @@ mod sentance; pub use sentance::*; mod word; pub use word::*; + +pub mod filter; + /* pub fn take_sentance(from: &mut T, to: &mut U) -> Result { diff --git a/src/sentance.rs b/src/sentance.rs new file mode 100644 index 0000000..b7aee69 --- /dev/null +++ b/src/sentance.rs @@ -0,0 +1,25 @@ +//! /sentance/ +use super::*; + +pub async fn body(state: State, num: Option, mut output: mpsc::Sender) -> Result<(), gen::GenBodyError> +{ + let string = { + let chain = state.chain().read().await; + if chain.is_empty() { + return Ok(()); + } + + match num { + None => chain.generate_str(), + Some(num) => (0..num).map(|_| chain.generate_str()).join("\n"), + } + }; + + debug!("Taking {:?} from {:?}" ,num, string); + for sen in sanitise::Sentance::new_iter(&string).take(num.unwrap_or(1)) + { + output.send(sen.to_owned()).await.map_err(|e| gen::GenBodyError(e.0))?; + } + Ok(()) +} + diff --git a/src/state.rs b/src/state.rs index 6ec7883..3bebfc8 100644 --- a/src/state.rs +++ b/src/state.rs @@ -11,6 +11,7 @@ use config::Config; pub struct State { config: Arc, //to avoid cloning config + exclude: Arc, chain: Arc>>, save: Arc, @@ -20,11 +21,17 @@ pub struct State impl State { + pub fn filter(&self) -> &sanitise::filter::Filter + { + &self.exclude + } + pub fn new(config: Config, chain: Arc>>, save: Arc) -> Self { let (shutdown, shutdown_recv) = watch::channel(false); Self { - config: Arc::new(config), + exclude: Arc::new(config.filter.get_filter()), + config: Arc::new(config), chain, save, shutdown: Arc::new(shutdown), diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..2a5d5a3 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,41 @@ +//! Utils + +pub trait NewCapacity: Sized +{ + fn new() -> Self; + fn with_capacity(cap: usize) -> Self; +} + +impl NewCapacity for String +{ + fn new() -> Self + { + Self::new() + } + + fn with_capacity(cap: usize) -> Self + { + Self::with_capacity(cap) + } +} + +impl NewCapacity for Vec +{ + fn new() -> Self + { + Self::new() + } + + fn with_capacity(cap: usize) -> Self + { + Self::with_capacity(cap) + } +} + +pub fn hint_cap(iter: &I) -> T +{ + match iter.size_hint() { + (0, Some(0)) | (0, None) => T::new(), + (_, Some(x)) | (x, _) => T::with_capacity(x) + } +} From 2f9f4dbb3a7bb6d74e6b5ffb5e75d02e39232f71 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 10:56:40 +0100 Subject: [PATCH 44/50] added /sentance/ chunking --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/sanitise/.#filter.rs | 1 - src/sanitise/filter.rs | 10 +++++----- 4 files changed, 7 insertions(+), 8 deletions(-) delete mode 120000 src/sanitise/.#filter.rs diff --git a/Cargo.lock b/Cargo.lock index c366482..1411a9d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.5.4" +version = "0.6.0" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/Cargo.toml b/Cargo.toml index c51f2df..0ad5f3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.6" +version = "0.6.0" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/src/sanitise/.#filter.rs b/src/sanitise/.#filter.rs deleted file mode 120000 index e39f68a..0000000 --- a/src/sanitise/.#filter.rs +++ /dev/null @@ -1 +0,0 @@ -avril@eientei.880:1602382403 \ No newline at end of file diff --git a/src/sanitise/filter.rs b/src/sanitise/filter.rs index c570dd7..686b1b5 100644 --- a/src/sanitise/filter.rs +++ b/src/sanitise/filter.rs @@ -119,12 +119,12 @@ impl Filter self.len() == 0 } - pub fn iter(&self) -> impl Iterator + '_ + pub fn iter(&self) -> FilterKeyIter<'_> //impl Iterator + '_ { - self.0.iter() - .copied() - .map(|(x, _)| x) - //FilterKeyIter(self.0.iter(), self.0.len()) + //self.0.iter() + // .copied() + // .map(|(x, _)| x) + FilterKeyIter(self.0.iter(), self.0.len()) } /// Should this character be filtered? From 988829c9b941222c0aaa337da1376165b9789a58 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 11:20:56 +0100 Subject: [PATCH 45/50] added outbound filter --- Cargo.toml | 2 +- markov.toml | 3 +- src/config.rs | 20 +++++++++--- src/ext.rs | 69 ++++++++++++++++++++++++++++++++++++++++++ src/feed.rs | 4 +-- src/gen.rs | 7 +++-- src/main.rs | 29 ++++++++++-------- src/sanitise/filter.rs | 18 ++++++++++- src/sentance.rs | 3 +- src/state.rs | 13 +++++--- 10 files changed, 137 insertions(+), 31 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0ad5f3b..d7b82b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.6.0" +version = "0.6.1" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/markov.toml b/markov.toml index e2064b8..df5fde0 100644 --- a/markov.toml +++ b/markov.toml @@ -6,4 +6,5 @@ max_gen_size = 256 trust_x_forwarded_for = false [filter] -exclude = "<>)([]/" +inbound = "<>)([]/" +outbound = "*" diff --git a/src/config.rs b/src/config.rs index ebae65e..a820eb8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -32,18 +32,28 @@ pub struct Config #[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Hash, Serialize, Deserialize)] pub struct FilterConfig { - exclude: String, - + inbound: String, + #[serde(default)] + outbound: String, } impl FilterConfig { - pub fn get_filter(&self) -> sanitise::filter::Filter + pub fn get_inbound_filter(&self) -> sanitise::filter::Filter + { + let filt: sanitise::filter::Filter = self.inbound.parse().unwrap(); + if !filt.is_empty() + { + info!("Loaded inbound filter: {:?}", filt.iter().collect::()); + } + filt + } + pub fn get_outbound_filter(&self) -> sanitise::filter::Filter { - let filt: sanitise::filter::Filter = self.exclude.parse().unwrap(); + let filt: sanitise::filter::Filter = self.outbound.parse().unwrap(); if !filt.is_empty() { - warn!("Loaded exclude filter: {:?}", filt.iter().collect::()); + info!("Loaded outbound filter: {:?}", filt.iter().collect::()); } filt } diff --git a/src/ext.rs b/src/ext.rs index eb9b5ce..bc44889 100644 --- a/src/ext.rs +++ b/src/ext.rs @@ -1,6 +1,7 @@ //! Extensions use std::{ iter, + ops::Range, }; pub trait StringJoinExt: Sized @@ -25,3 +26,71 @@ where I: IntoIterator, string } } + +pub trait FindSliceBounds +{ + type SliceType: ?Sized; + fn slice_bounds(&self, from: &Self::SliceType) -> Range; +} + +impl> FindSliceBounds for T +{ + type SliceType = str; + fn slice_bounds(&self, from: &Self::SliceType) -> Range{ + let this = self.as_ref(); + unsafe { + let sptr = from.as_ptr(); + let eptr = sptr.add(from.len()); + + let ssptr = this.as_ptr(); + let septr = ssptr.add(this.len()); + + let sptr = sptr as usize; + let ssptr = ssptr as usize; + let eptr = eptr as usize; + let septr = septr as usize; + + assert!(sptr >= ssptr && sptr <= septr, "Start index of slice is outside the bounds of self"); + assert!(eptr >= ssptr && eptr <= septr, "End index of slice is outside the bounds of self"); + + (sptr - ssptr)..(eptr - ssptr) + } + } +} + +pub trait SliceInPlace +{ + fn drain_inverse>(&mut self, slice: R); +} + +impl SliceInPlace for String +{ + fn drain_inverse>(&mut self, slice: R) + { + use std::ops::Bound; + match slice.end_bound() { + Bound::Excluded(&ex) => drop(self.drain(ex..)), + Bound::Included(&inc) => drop(self.drain(inc+1..)), + _ => (), + }; + match slice.start_bound() { + Bound::Included(&ex) => drop(self.drain(..ex)), + Bound::Excluded(&ex) => drop(..ex+1), + _ => () + }; + } +} + +pub trait TrimInPlace +{ + fn trim_in_place(&mut self) -> &mut Self; +} + +impl TrimInPlace for String +{ + fn trim_in_place(&mut self) -> &mut Self { + let bounds = self.slice_bounds(self.trim()); + self.drain_inverse(bounds); + self + } +} diff --git a/src/feed.rs b/src/feed.rs index 48150c2..2ac1031 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -69,7 +69,7 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream {:?}", who, buffer); let mut chain = state.chain().write().await; cfg_if! { @@ -92,7 +92,7 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream, mut output: mpsc::Sender) -> Result<(), GenBodyError> { let chain = state.chain().read().await; - if !chain.is_empty() { + if !chain.is_empty() { + let filter = state.outbound_filter(); match num { Some(num) if num < state.config().max_gen_size => { //This could DoS `full_body` and writes, potentially. for string in chain.str_iter_for(num) { - output.send(string).await.map_err(|e| GenBodyError(e.0))?; + output.send(filter.filter_owned(string)).await.map_err(|e| GenBodyError(e.0))?; } }, - _ => output.send(chain.generate_str()).await.map_err(|e| GenBodyError(e.0))?, + _ => output.send(filter.filter_owned(chain.generate_str())).await.map_err(|e| GenBodyError(e.0))?, } } Ok(()) diff --git a/src/main.rs b/src/main.rs index 46ad529..6472f53 100644 --- a/src/main.rs +++ b/src/main.rs @@ -195,9 +195,14 @@ async fn main() { async move { let (tx, rx) = mpsc::channel(state.config().max_gen_size); tokio::spawn(gen::body(state, num, tx)); - Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.map(move |x| { - info!("{} <- {:?}", host, x); - Ok::<_, std::convert::Infallible>(x) + Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.filter_map(move |mut x| { + if x.trim_in_place().len() != 0 { + info!("{} <- {:?}", host, x); + x.push('\n'); + Some(Ok::<_, std::convert::Infallible>(x)) + } else { + None + } })))) } }) @@ -213,16 +218,14 @@ async fn main() { async move { let (tx, rx) = mpsc::channel(state.config().max_gen_size); tokio::spawn(sentance::body(state, num, tx)); - Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.map(move |mut x| { - info!("{} (sentance) <- {:?}", host, x); - // match x.chars().last() { - // Some(chr) if sanitise::is_sentance_boundary(chr) => { - // x.push(' '); - // }, - // _ => (), - // } - x.push(' '); - Ok::<_, std::convert::Infallible>(x) + Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.filter_map(move |mut x| { + if x.trim_in_place().len() != 0 { + info!("{} (sentance) <- {:?}", host, x); + x.push(' '); + Some(Ok::<_, std::convert::Infallible>(x)) + } else { + None + } })))) } }) diff --git a/src/sanitise/filter.rs b/src/sanitise/filter.rs index 686b1b5..5288d08 100644 --- a/src/sanitise/filter.rs +++ b/src/sanitise/filter.rs @@ -133,7 +133,23 @@ impl Filter self.0.get(&chr).is_some() } - pub fn filter<'a, I: IntoIterator>(&'a self, from_iter: I) -> FilterIter<'a, I::IntoIter> + pub fn filter_owned(&self, input: impl Into) -> String + { + let mut input = input.into(); + self.filter(&mut input); + input + } + + pub fn filter<'a>(&self, output: &'a mut String) -> &'a mut String + { + if self.is_empty() { + return output; + } + output.retain(|chr| !self.check(chr)); + output + } + + pub fn filter_iter<'a, I: IntoIterator>(&'a self, from_iter: I) -> FilterIter<'a, I::IntoIter> where I::IntoIter: 'a { FilterIter(&self, from_iter.into_iter().fuse()) diff --git a/src/sentance.rs b/src/sentance.rs index b7aee69..5ffeacb 100644 --- a/src/sentance.rs +++ b/src/sentance.rs @@ -16,9 +16,10 @@ pub async fn body(state: State, num: Option, mut output: mpsc::Sender, //to avoid cloning config - exclude: Arc, + exclude: Arc<(sanitise::filter::Filter, sanitise::filter::Filter)>, chain: Arc>>, save: Arc, @@ -21,16 +21,21 @@ pub struct State impl State { - pub fn filter(&self) -> &sanitise::filter::Filter + pub fn inbound_filter(&self) -> &sanitise::filter::Filter { - &self.exclude + &self.exclude.0 + } + pub fn outbound_filter(&self) -> &sanitise::filter::Filter + { + &self.exclude.1 } pub fn new(config: Config, chain: Arc>>, save: Arc) -> Self { let (shutdown, shutdown_recv) = watch::channel(false); Self { - exclude: Arc::new(config.filter.get_filter()), + exclude: Arc::new((config.filter.get_inbound_filter(), + config.filter.get_outbound_filter())), config: Arc::new(config), chain, save, From c1e60503a64431250008014a9c79f3f224374d5f Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 11:23:52 +0100 Subject: [PATCH 46/50] remove const generics dependancy --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/main.rs | 1 - src/sanitise/filter.rs | 3 ++- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1411a9d..cd688c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.6.0" +version = "0.6.2" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/Cargo.toml b/Cargo.toml index d7b82b2..1831803 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.6.1" +version = "0.6.2" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/src/main.rs b/src/main.rs index 6472f53..837069c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,4 @@ #![feature(split_inclusive)] -#![feature(min_const_generics)] #![allow(dead_code)] diff --git a/src/sanitise/filter.rs b/src/sanitise/filter.rs index 5288d08..ddea8c0 100644 --- a/src/sanitise/filter.rs +++ b/src/sanitise/filter.rs @@ -14,6 +14,7 @@ use once_cell::sync::OnceCell; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Filter(SmallMap); +/* impl From<[char; N]> for Filter { fn from(from: [char; N]) -> Self @@ -25,7 +26,7 @@ impl From<[char; N]> for Filter } Self(map) } -} +}*/ impl<'a> From<&'a [char]> for Filter { From 1d482029f3e50d964e3766ab25c5f1cd551996d7 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 11:27:54 +0100 Subject: [PATCH 47/50] sentance delimit --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/sanitise/sentance.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cd688c2..2306f39 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.6.2" +version = "0.6.3" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/Cargo.toml b/Cargo.toml index 1831803..97381a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.6.2" +version = "0.6.3" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/src/sanitise/sentance.rs b/src/sanitise/sentance.rs index cc1c364..8da3d7d 100644 --- a/src/sanitise/sentance.rs +++ b/src/sanitise/sentance.rs @@ -25,7 +25,7 @@ macro_rules! new { }; } -const DEFAULT_BOUNDARIES: &[char] = &['\n', '.', ':', '!']; +const DEFAULT_BOUNDARIES: &[char] = &['\n', '.', ':', '!', '?']; lazy_static! { static ref BOUNDARIES: smallmap::Map = { From ca16c97629fa5b91ad8c651a941024a77516cfe5 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 11 Oct 2020 11:50:08 +0100 Subject: [PATCH 48/50] change how no-num /sentance works --- Cargo.toml | 2 +- src/sanitise/filter.rs | 2 +- src/sanitise/sentance.rs | 2 +- src/sentance.rs | 14 +++++++++++--- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 97381a1..92748c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.6.3" +version = "0.6.4" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/src/sanitise/filter.rs b/src/sanitise/filter.rs index ddea8c0..cbc896b 100644 --- a/src/sanitise/filter.rs +++ b/src/sanitise/filter.rs @@ -178,7 +178,7 @@ impl Filter output } - pub fn filter_str<'a, T: AsRef+'a>(&'a self, string: &'a T) -> FilterStr<'a> + pub fn filter_str<'a, T: AsRef+'a +?Sized>(&'a self, string: &'a T) -> FilterStr<'a> { FilterStr(string.as_ref(), self, OnceCell::new()) } diff --git a/src/sanitise/sentance.rs b/src/sanitise/sentance.rs index 8da3d7d..edae602 100644 --- a/src/sanitise/sentance.rs +++ b/src/sanitise/sentance.rs @@ -68,7 +68,7 @@ impl Sentance } /// Create a new iterator over sentances from this string. - pub fn new_iter<'a>(from: &'a (impl AsRef +'a + ?Sized)) -> impl Iterator + pub fn new_iter<'a>(from: &'a (impl AsRef +'a + ?Sized)) -> impl Iterator + Clone { let from = from.as_ref(); from.split_inclusive(is_sentance_boundary) diff --git a/src/sentance.rs b/src/sentance.rs index 5ffeacb..41dd16d 100644 --- a/src/sentance.rs +++ b/src/sentance.rs @@ -17,9 +17,17 @@ pub async fn body(state: State, num: Option, mut output: mpsc::Sender x, + #[cold] None => return Ok(()), + }.to_owned())).await.map_err(|e| gen::GenBodyError(e.0))?; } Ok(()) } From ef5dc3cbf1bdc83c0437e546c1efcbcfbf2c450a Mon Sep 17 00:00:00 2001 From: Avril Date: Mon, 12 Oct 2020 04:15:06 +0100 Subject: [PATCH 49/50] accepts AF_UNIX --- Cargo.lock | 2 +- Cargo.toml | 5 +- TODO | 3 - markov.toml | 6 +- src/bind.rs | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/config.rs | 4 +- src/ext.rs | 58 ++++++++++++++++- src/main.rs | 56 ++++++++++++++--- 8 files changed, 283 insertions(+), 22 deletions(-) delete mode 100644 TODO create mode 100644 src/bind.rs diff --git a/Cargo.lock b/Cargo.lock index 2306f39..25d5512 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.6.3" +version = "0.7.0" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/Cargo.toml b/Cargo.toml index 92748c0..b7431b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,10 @@ [package] name = "markov" -version = "0.6.4" +version = "0.7.0" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" +license = "gpl-3.0-or-later" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -16,7 +17,7 @@ compress-chain = ["async-compression"] # Treat each new line as a new set to feed instead of feeding the whole data at once split-newlines = [] -# Feed each sentance seperately, instead of just each line / whole body +# Feed each sentance seperately with default /get api, instead of just each line / whole body # Maybe better without `split-newlines`? # Kinda experimental split-sentance = [] diff --git a/TODO b/TODO deleted file mode 100644 index eeabf8c..0000000 --- a/TODO +++ /dev/null @@ -1,3 +0,0 @@ -Maybe see if `split-sentance` is stable enough to be enabled in prod now? -Allow Unix domain socket for bind - diff --git a/markov.toml b/markov.toml index df5fde0..f85b87f 100644 --- a/markov.toml +++ b/markov.toml @@ -2,9 +2,9 @@ bindpoint = '127.0.0.1:8001' file = 'chain.dat' max_content_length = 4194304 max_gen_size = 256 -#save_interval_secs = 2 +save_interval_secs = 2 trust_x_forwarded_for = false [filter] -inbound = "<>)([]/" -outbound = "*" +inbound = '' +outbound = '' diff --git a/src/bind.rs b/src/bind.rs new file mode 100644 index 0000000..810ec44 --- /dev/null +++ b/src/bind.rs @@ -0,0 +1,171 @@ +//! For binding to sockets +use super::*; +use futures::{ + prelude::*, +}; +use std::{ + marker::{ + Send, + Unpin, + }, + fmt, + error, + path::{ + Path, + PathBuf, + }, +}; +use tokio::{ + io::{ + self, + AsyncRead, + AsyncWrite, + }, +}; + +#[derive(Debug)] +pub enum BindError +{ + IO(io::Error), + Warp(warp::Error), + Other(E), +} + +impl error::Error for BindError +{ + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + Some(match &self { + Self::IO(io) => io, + Self::Other(o) => o, + Self::Warp(w) => w, + }) + } +} +impl fmt::Display for BindError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + match self { + Self::IO(io) => write!(f, "io error: {}", io), + Self::Other(other) => write!(f, "{}", other), + Self::Warp(warp) => write!(f, "server error: {}", warp), + } + } +} + + +#[derive(Debug)] +pub struct BindpointParseError; + +impl error::Error for BindpointParseError{} +impl fmt::Display for BindpointParseError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "Failed to parse bindpoint as IP or unix domain socket") + } +} + + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd)] +pub enum Bindpoint +{ + Unix(PathBuf), + TCP(SocketAddr), +} + +impl fmt::Display for Bindpoint +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + match self { + Self::Unix(unix) => write!(f, "unix:/{}", unix.to_string_lossy()), + Self::TCP(tcp) => write!(f, "{}", tcp), + } + } +} + +impl std::str::FromStr for Bindpoint +{ + type Err = BindpointParseError; + fn from_str(s: &str) -> Result { + Ok(if let Ok(ip) = s.parse::() { + Self::TCP(ip) + } else if s.starts_with("unix:/") { + Self::Unix(PathBuf::from(&s[6..].to_owned())) + } else { + return Err(BindpointParseError); + }) + } +} + +fn bind_unix(to: impl AsRef) -> io::Result>>> +{ + debug!("Binding to AF_UNIX: {:?}", to.as_ref()); + let listener = tokio::net::UnixListener::bind(to)?; + Ok(listener) +} + +pub fn serve(server: warp::Server, bind: Bindpoint, signal: impl Future + Send + 'static) -> Result<(Bindpoint, BoxFuture<'static, ()>), BindError> +where F: Filter + Clone + Send + Sync + 'static, +::Ok: warp::Reply, +{ + Ok(match bind { + Bindpoint::TCP(sock) => server.try_bind_with_graceful_shutdown(sock, signal).map(|(sock, fut)| (Bindpoint::TCP(sock), fut.boxed())).map_err(BindError::Warp)?, + Bindpoint::Unix(unix) => { + (Bindpoint::Unix(unix.clone()), + server.serve_incoming_with_graceful_shutdown(bind_unix(unix).map_err(BindError::IO)?, signal).boxed()) + }, + }) +} + +impl From for Bindpoint +{ + fn from(from: SocketAddr) -> Self + { + Self::TCP(from) + } +} + +pub fn try_serve(server: warp::Server, bind: impl TryBindpoint, signal: impl Future + Send + 'static) -> Result<(Bindpoint, BoxFuture<'static, ()>), BindError> +where F: Filter + Clone + Send + Sync + 'static, +::Ok: warp::Reply, +{ + serve(server, bind.try_parse().map_err(BindError::Other)?, signal).map_err(BindError::coerce) +} + +pub trait TryBindpoint: Sized +{ + type Err: error::Error + 'static; + fn try_parse(self) -> Result; +} + +impl TryBindpoint for Bindpoint +{ + type Err = std::convert::Infallible; + fn try_parse(self) -> Result + { + Ok(self) + } +} + +impl> TryBindpoint for T +{ + type Err = BindpointParseError; + fn try_parse(self) -> Result + { + self.as_ref().parse() + } +} + +impl BindError +{ + pub fn coerce(self) -> BindError + { + match self { + Self::Warp(w) => BindError::Warp(w), + Self::IO(w) => BindError::IO(w), + #[cold] _ => unreachable!(), + } + } +} diff --git a/src/config.rs b/src/config.rs index a820eb8..b69d0bf 100644 --- a/src/config.rs +++ b/src/config.rs @@ -19,7 +19,7 @@ pub const DEFAULT_FILE_LOCATION: &'static str = "markov.toml"; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash, Serialize, Deserialize)] pub struct Config { - pub bindpoint: SocketAddr, + pub bindpoint: String, pub file: String, pub max_content_length: u64, pub max_gen_size: usize, @@ -65,7 +65,7 @@ impl Default for Config fn default() -> Self { Self { - bindpoint: ([127,0,0,1], 8001).into(), + bindpoint: SocketAddr::from(([127,0,0,1], 8001)).to_string(), file: "chain.dat".to_owned(), max_content_length: 1024 * 1024 * 4, max_gen_size: 256, diff --git a/src/ext.rs b/src/ext.rs index bc44889..ad80d79 100644 --- a/src/ext.rs +++ b/src/ext.rs @@ -1,7 +1,14 @@ //! Extensions use std::{ iter, - ops::Range, + ops::{ + Range, + Deref,DerefMut, + }, + marker::{ + PhantomData, + Send, + }, }; pub trait StringJoinExt: Sized @@ -94,3 +101,52 @@ impl TrimInPlace for String self } } + +pub trait MapTuple2 +{ + fn map (V,W)>(self, fun: F) -> (V,W); +} + +impl MapTuple2 for (T,U) +{ + #[inline] fn map (V,W)>(self, fun: F) -> (V,W) + { + fun(self) + } +} + +/// To make sure we don't keep this data across an `await` boundary. +#[repr(transparent)] +pub struct AssertNotSend(pub T, PhantomData<*const T>); + +impl AssertNotSend +{ + pub const fn new(from :T) -> Self + { + Self(from, PhantomData) + } + pub fn into_inner(self) -> T + { + self.0 + } +} + +/// Require a future is Send +#[inline(always)] pub fn require_send(t: T) -> T +{ + t +} + +impl Deref for AssertNotSend +{ + type Target = T; + fn deref(&self) -> &Self::Target { + &self.0 + } +} +impl DerefMut for AssertNotSend +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} diff --git a/src/main.rs b/src/main.rs index 837069c..80af06f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -81,10 +81,29 @@ mod feed; mod gen; mod sentance; -#[tokio::main] -async fn main() { +const DEFAULT_LOG_LEVEL: &str = "warn"; + +fn init_log() +{ + let level = match std::env::var_os("RUST_LOG") { + None => { + std::env::set_var("RUST_LOG", DEFAULT_LOG_LEVEL); + std::borrow::Cow::Borrowed(std::ffi::OsStr::new(DEFAULT_LOG_LEVEL)) + }, + Some(w) => std::borrow::Cow::Owned(w), + }; pretty_env_logger::init(); + trace!("Initialising `genmarkov` ({}) v{} with log level {:?}.\n\tMade by {} with <3.\n\tLicensed with GPL v3 or later", + std::env::args().next().unwrap(), + env!("CARGO_PKG_VERSION"), + level, + env!("CARGO_PKG_AUTHORS")); +} +#[tokio::main] +async fn main() { + init_log(); + let config = match config::load().await { Some(v) => v, _ => { @@ -237,14 +256,29 @@ async fn main() { #[cfg(target_family="unix")] tasks.push(tokio::spawn(signals::handle(state.clone())).map(|res| res.expect("Signal handler panicked")).boxed()); - let (addr, server) = warp::serve(push - .or(read)) - .bind_with_graceful_shutdown(state.config().bindpoint, async move { - tokio::signal::ctrl_c().await.unwrap(); - state.shutdown(); - }); - info!("Server bound on {:?}", addr); - server.await; + require_send(async { + let server = { + let s2 = AssertNotSend::new(state.clone()); //temp clone the Arcs here for shutdown if server fails to bind, assert they cannot remain cloned across an await boundary. + match bind::try_serve(warp::serve(push + .or(read)), + state.config().bindpoint.clone(), + async move { + tokio::signal::ctrl_c().await.unwrap(); + state.shutdown(); + }) { + Ok((addr, server)) => { + info!("Server bound on {:?}", addr); + server + }, + Err(err) => { + error!("Failed to bind server: {}", err); + s2.into_inner().shutdown(); + return; + }, + } + }; + server.await; + }).await; // Cleanup async move { @@ -255,3 +289,5 @@ async fn main() { }.await; info!("Shut down gracefully") } + +mod bind; From 053f41ee50900786cfa55a63ba4da5092ba9c928 Mon Sep 17 00:00:00 2001 From: Avril Date: Mon, 12 Oct 2020 04:48:30 +0100 Subject: [PATCH 50/50] update readme --- README | 16 ------------ README.md | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 16 deletions(-) delete mode 100644 README create mode 100644 README.md diff --git a/README b/README deleted file mode 100644 index c17a163..0000000 --- a/README +++ /dev/null @@ -1,16 +0,0 @@ -HTTP server connecting to a Markov chain - -Feeding: -# PUT /put -Request body is fed to the chain - -NOTE: Strings fed to the chain must be valid UTF-8 and below 16 KB in size - -Generating: -# GET /get -Generate a string from the chain - -# GET /get/ -Generate strings from the chain - -NOTE: Number must be lower than 256 \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e550035 --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +# genmarkov +HTTP server connecting to a Markov chain + +# Build requirements +Unix & Rust nightly are currently requirements to build, for now. + +# Configuration +When ran with no arguments, `markov` will attempt to load the config file at `markov.toml`. If it does not exist, it will use the default configuration. (On debug builds, it will also create the default `markov.toml`.) + +An example default configuration file is provided at [./markov.toml](markov.toml). + +When ran with an argument specifying the config file however, it will attempt to load that. If it fails to load the file, the default will be used. + +## Config file entries +| Name | Description | Default | Optional | +|-------------------------|---------------------------------------------------------|------------------|----------| +| `bindpoint` | Address or Unix domain socket for the server to bind to | `127.0.0.1:8001` | No | +| `file` | File to save and load the chain from | `chain.dat` | No | +| `max_content_length` | Max request body length to allow | `4194304` (4MB) | No | +| `max_gen_size` | Max number of strings for a request to generate at once | `256` | No | +| `save_interval_secs` | Number of seconds to ensure waiting before saving chain | `2` | Yes | +| `trust_x_forwarded_for` | Trust the `X-Forwarded-For` HTTP header | `false` | No | +| `filter` | Remove characters from incoming and/or outgoing text | None | Yes | + +### AF_UNIX note +When binding to a Unix domain socket, prefix the path with `unix:/` (e.g. `unix://var/markov.socket`) +The server will not attempt to remove already existing sockets at the path, so ensure there isn't one before launching. + +## Logging + +Set the `RUST_LOG` environment variable to one of the following to switch runtime logging levels. +* `trace` - Most verbose +* `debug` - Verbose +* `info` - Show input and output to/from the chain and requests +* `warn` - Only show warnings (default) +* `error` - Only show errors + +## Signals +On Unix systems at runtime, some signals are trapped: + +| Signal | Description | +|-----------|------------------------------------------------------------------------| +| `SIGUSR1` | Immediately save the chain | +| `SIGUSR2` | Immediately load the chain | +| `SIGQUIT` | Ensure the chain is properly saved and then immediately call `abort()` | +| `SIGINT` | Perform a full graceful shutdown | + +# Usage +The server exposes several paths for access of the chain + +## Feeding +### `PUT /put` +Request body is fed to the chain + +#### NOTE +Strings fed to the chain must be valid UTF-8 and a size below the value specified in the config file. + +## Generating +### `GET /get` +Generate a string from the chain + +### `GET /get/` +Generate `` strings from the chain + +### `GET /get/sentance` +Generate a single sentance from the chain + +### `GET /get/sentance/` +Generate `` sentances from the chain + +#### NOTE +Number of strings/sentances must be lower than the value specified in the config file. + +# License +GPL'd with <3