Compare commits
87 Commits
@ -1,3 +1,5 @@
|
||||
/target
|
||||
chain.dat
|
||||
*~
|
||||
chain.dat
|
||||
flamegraph.svg
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,16 +1,75 @@
|
||||
[package]
|
||||
name = "markov"
|
||||
version = "0.1.2"
|
||||
version = "0.9.1"
|
||||
description = "Generate string of text from Markov chain fed by stdin"
|
||||
authors = ["Avril <flanchan@cumallover.me>"]
|
||||
edition = "2018"
|
||||
license = "gpl-3.0-or-later"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[features]
|
||||
default = ["compress-chain", "split-newlines", "api"]
|
||||
|
||||
# Compress the chain data file when saved to disk
|
||||
compress-chain = ["async-compression", "bzip2-sys"]
|
||||
|
||||
# Treat each new line as a new set to feed instead of feeding the whole data at once
|
||||
split-newlines = []
|
||||
|
||||
# Feed each sentance seperately with default /get api, instead of just each line / whole body
|
||||
#
|
||||
# Note that this happens after `split-newlines`.
|
||||
feed-sentance = ["split-sentance"]
|
||||
|
||||
# Split input buffer's to feed by sentance as well as word boundaries.
|
||||
#
|
||||
# Note that this happens after `split-newlines`.
|
||||
# This feature does nothing if `feed-sentance` is enabled.
|
||||
split-sentance = []
|
||||
|
||||
# Always aggregate incoming buffer instead of streaming them
|
||||
# This will make feeds faster but allocate full buffers for the aggregated body
|
||||
#
|
||||
# Large write: ~95ms
|
||||
#
|
||||
# NOTE: This does nothing if `split-newlines` is not enabled
|
||||
always-aggregate = []
|
||||
|
||||
# Does nothing on versions 9.0+
|
||||
hog-buffer = []
|
||||
|
||||
# Enable the /api/ route
|
||||
api = []
|
||||
|
||||
# Do not wait 2 seconds before starting worker tasks after server
|
||||
instant-init = []
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
lto = "fat"
|
||||
codegen-units = 1
|
||||
strip=true
|
||||
|
||||
[dependencies]
|
||||
chain = {package = "markov", version = "1.1.0"}
|
||||
tokio = {version = "0.2", features=["full"]}
|
||||
warp = "0.2"
|
||||
pretty_env_logger = "0.4.0"
|
||||
hyper = "0.13.8"
|
||||
log = "0.4.11"
|
||||
cfg-if = "1.0.0"
|
||||
futures = "0.3.6"
|
||||
serde_cbor = "0.11.1"
|
||||
serde = {version ="1.0", features=["derive"]}
|
||||
toml = "0.5.6"
|
||||
async-compression = {version = "0.3.5", features=["tokio-02", "bzip2"], optional=true}
|
||||
pin-project = "0.4"
|
||||
smallmap = "1.1.5"
|
||||
lazy_static = "1.4.0"
|
||||
once_cell = "1.4.1"
|
||||
bzip2-sys = {version = "0.1.9", optional = true}
|
||||
cidr = {version = "0.1.1", features = ["serde"]}
|
||||
|
||||
[build-dependencies]
|
||||
rustc_version = "0.2"
|
||||
|
@ -0,0 +1,674 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<https://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<https://www.gnu.org/licenses/why-not-lgpl.html>.
|
@ -0,0 +1,29 @@
|
||||
FEATURES:="api,split-sentance"
|
||||
VERSION:=`cargo read-manifest | rematch - 'version":"([0-9\.]+)"' 1`
|
||||
|
||||
markov:
|
||||
cargo build --release --features $(FEATURES)
|
||||
strip target/release/markov
|
||||
|
||||
install:
|
||||
-rc-service markov shutdown && sleep 0.6
|
||||
-rc-service markov stop
|
||||
cp -f target/release/markov /usr/local/bin/markov
|
||||
rc-service markov start
|
||||
|
||||
reinstall: uninstall
|
||||
cp -f target/release/markov /usr/local/bin/markov
|
||||
rm -f /var/nginx/markov.dat
|
||||
rc-service markov start
|
||||
sleep 0.2
|
||||
curl -X PUT --data-binary @default http://127.0.0.1:8001/put
|
||||
|
||||
uninstall:
|
||||
-rc-service markov stop
|
||||
rm -f /usr/local/bin/markov
|
||||
|
||||
package:
|
||||
git add .
|
||||
-git commit -S -m "Packaging version $(VERSION)"
|
||||
cargo package
|
||||
mv ./target/package/markov-$(VERSION).crate{,.gz}
|
@ -1,6 +0,0 @@
|
||||
Generate strings from markov chain of stdin
|
||||
|
||||
Usage:
|
||||
|
||||
$ cat corpus | markov
|
||||
$ cat corpus | markov <n of outputs to generate>
|
@ -0,0 +1,75 @@
|
||||
# genmarkov
|
||||
HTTP server connecting to a Markov chain
|
||||
|
||||
# Build requirements
|
||||
Unix & Rust nightly are currently requirements to build, for now.
|
||||
|
||||
# Configuration
|
||||
When ran with no arguments, `markov` will attempt to load the config file at `markov.toml`. If it does not exist, it will use the default configuration. (On debug builds, it will also create the default `markov.toml`.)
|
||||
|
||||
An example default configuration file is provided at [./markov.toml](markov.toml).
|
||||
|
||||
When ran with an argument specifying the config file however, it will attempt to load that. If it fails to load the file, the default will be used.
|
||||
|
||||
## Config file entries
|
||||
| Name | Description | Default | Optional |
|
||||
|-------------------------|---------------------------------------------------------|------------------|----------|
|
||||
| `bindpoint` | Address or Unix domain socket for the server to bind to | `127.0.0.1:8001` | No |
|
||||
| `file` | File to save and load the chain from | `chain.dat` | No |
|
||||
| `max_content_length` | Max request body length to allow | `4194304` (4MB) | No |
|
||||
| `max_gen_size` | Max number of strings for a request to generate at once | `256` | No |
|
||||
| `save_interval_secs` | Number of seconds to ensure waiting before saving chain | `2` | Yes |
|
||||
| `trust_x_forwarded_for` | Trust the `X-Forwarded-For` HTTP header | `false` | No |
|
||||
| `filter` | Remove characters from incoming and/or outgoing text | None | Yes |
|
||||
|
||||
### AF_UNIX note
|
||||
When binding to a Unix domain socket, prefix the path with `unix:/` (e.g. `unix://var/markov.socket`)
|
||||
The server will not attempt to remove already existing sockets at the path, so ensure there isn't one before launching.
|
||||
|
||||
## Logging
|
||||
|
||||
Set the `RUST_LOG` environment variable to one of the following to switch runtime logging levels.
|
||||
* `trace` - Most verbose
|
||||
* `debug` - Verbose
|
||||
* `info` - Show input and output to/from the chain and requests
|
||||
* `warn` - Only show warnings (default)
|
||||
* `error` - Only show errors
|
||||
|
||||
## Signals
|
||||
On Unix systems at runtime, some signals are trapped:
|
||||
|
||||
| Signal | Description |
|
||||
|-----------|------------------------------------------------------------------------|
|
||||
| `SIGUSR1` | Immediately save the chain |
|
||||
| `SIGUSR2` | Immediately load the chain |
|
||||
| `SIGQUIT` | Ensure the chain is properly saved and then immediately call `abort()` |
|
||||
| `SIGINT` | Perform a full graceful shutdown |
|
||||
|
||||
# Usage
|
||||
The server exposes several paths for access of the chain
|
||||
|
||||
## Feeding
|
||||
### `PUT /put`
|
||||
Request body is fed to the chain
|
||||
|
||||
#### NOTE
|
||||
Strings fed to the chain must be valid UTF-8 and a size below the value specified in the config file.
|
||||
|
||||
## Generating
|
||||
### `GET /get`
|
||||
Generate a string from the chain
|
||||
|
||||
### `GET /get/<number>`
|
||||
Generate `<number>` strings from the chain
|
||||
|
||||
### `GET /get/sentance`
|
||||
Generate a single sentance from the chain
|
||||
|
||||
### `GET /get/sentance/<number>`
|
||||
Generate `<number>` sentances from the chain
|
||||
|
||||
#### NOTE
|
||||
Number of strings/sentances must be lower than the value specified in the config file.
|
||||
|
||||
# License
|
||||
GPL'd with <3
|
@ -0,0 +1 @@
|
||||
Disallow exact same map input buffers by keeping hashes of input buffers.
|
@ -0,0 +1,26 @@
|
||||
|
||||
extern crate rustc_version;
|
||||
use rustc_version::{version, version_meta, Channel};
|
||||
|
||||
fn main() {
|
||||
// Assert we haven't travelled back in time
|
||||
assert!(version().unwrap().major >= 1);
|
||||
|
||||
// Set cfg flags depending on release channel
|
||||
match version_meta().unwrap().channel {
|
||||
Channel::Stable => {
|
||||
println!("cargo:rustc-cfg=stable");
|
||||
}
|
||||
Channel::Beta => {
|
||||
println!("cargo:rustc-cfg=beta");
|
||||
}
|
||||
Channel::Nightly => {
|
||||
println!("cargo:rustc-cfg=nightly");
|
||||
}
|
||||
Channel::Dev => {
|
||||
println!("cargo:rustc-cfg=dev");
|
||||
}
|
||||
}
|
||||
|
||||
//println!("cargo:rustc-link-lib=static=bz2"); // TODO: Make this conditional for `compress-chain`
|
||||
}
|
@ -0,0 +1,217 @@
|
||||
# Copyright 2017-2020 Gentoo Authors
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
# Auto-Generated by cargo-ebuild 0.3.1
|
||||
|
||||
EAPI=7
|
||||
|
||||
CRATES="
|
||||
aho-corasick-0.7.13
|
||||
arc-swap-0.4.7
|
||||
async-compression-0.3.5
|
||||
atty-0.2.14
|
||||
autocfg-0.1.7
|
||||
autocfg-1.0.1
|
||||
base64-0.12.3
|
||||
bitflags-1.2.1
|
||||
block-buffer-0.7.3
|
||||
block-buffer-0.9.0
|
||||
block-padding-0.1.5
|
||||
buf_redux-0.8.4
|
||||
byte-tools-0.3.1
|
||||
byteorder-1.3.4
|
||||
bytes-0.5.6
|
||||
bzip2-0.3.3
|
||||
bzip2-sys-0.1.9+1.0.8
|
||||
cc-1.0.60
|
||||
cfg-if-0.1.10
|
||||
cfg-if-1.0.0
|
||||
cloudabi-0.0.3
|
||||
cpuid-bool-0.1.2
|
||||
digest-0.8.1
|
||||
digest-0.9.0
|
||||
dtoa-0.4.6
|
||||
either-1.6.1
|
||||
env_logger-0.7.1
|
||||
fake-simd-0.1.2
|
||||
fixedbitset-0.2.0
|
||||
fnv-1.0.7
|
||||
fuchsia-cprng-0.1.1
|
||||
fuchsia-zircon-0.3.3
|
||||
fuchsia-zircon-sys-0.3.3
|
||||
futures-0.3.6
|
||||
futures-channel-0.3.6
|
||||
futures-core-0.3.6
|
||||
futures-executor-0.3.6
|
||||
futures-io-0.3.6
|
||||
futures-macro-0.3.6
|
||||
futures-sink-0.3.6
|
||||
futures-task-0.3.6
|
||||
futures-util-0.3.6
|
||||
generic-array-0.12.3
|
||||
generic-array-0.14.4
|
||||
getopts-0.2.21
|
||||
getrandom-0.1.15
|
||||
h2-0.2.6
|
||||
half-1.6.0
|
||||
hashbrown-0.9.1
|
||||
headers-0.3.2
|
||||
headers-core-0.2.0
|
||||
hermit-abi-0.1.17
|
||||
http-0.2.1
|
||||
http-body-0.3.1
|
||||
httparse-1.3.4
|
||||
httpdate-0.3.2
|
||||
humantime-1.3.0
|
||||
hyper-0.13.8
|
||||
idna-0.2.0
|
||||
indexmap-1.6.0
|
||||
input_buffer-0.3.1
|
||||
iovec-0.1.4
|
||||
itertools-0.9.0
|
||||
itoa-0.4.6
|
||||
kernel32-sys-0.2.2
|
||||
lazy_static-1.4.0
|
||||
libc-0.2.79
|
||||
linked-hash-map-0.5.3
|
||||
log-0.4.11
|
||||
markov-1.1.0
|
||||
matches-0.1.8
|
||||
memchr-2.3.3
|
||||
mime-0.3.16
|
||||
mime_guess-2.0.3
|
||||
mio-0.6.22
|
||||
mio-named-pipes-0.1.7
|
||||
mio-uds-0.6.8
|
||||
miow-0.2.1
|
||||
miow-0.3.5
|
||||
multipart-0.17.0
|
||||
net2-0.2.35
|
||||
num_cpus-1.13.0
|
||||
once_cell-1.4.1
|
||||
opaque-debug-0.2.3
|
||||
opaque-debug-0.3.0
|
||||
percent-encoding-2.1.0
|
||||
petgraph-0.5.1
|
||||
pin-project-0.4.26
|
||||
pin-project-internal-0.4.26
|
||||
pin-project-lite-0.1.10
|
||||
pin-utils-0.1.0
|
||||
pkg-config-0.3.18
|
||||
ppv-lite86-0.2.9
|
||||
pretty_env_logger-0.4.0
|
||||
proc-macro-hack-0.5.18
|
||||
proc-macro-nested-0.1.6
|
||||
proc-macro2-1.0.24
|
||||
quick-error-1.2.3
|
||||
quote-1.0.7
|
||||
rand-0.6.5
|
||||
rand-0.7.3
|
||||
rand_chacha-0.1.1
|
||||
rand_chacha-0.2.2
|
||||
rand_core-0.3.1
|
||||
rand_core-0.4.2
|
||||
rand_core-0.5.1
|
||||
rand_hc-0.1.0
|
||||
rand_hc-0.2.0
|
||||
rand_isaac-0.1.1
|
||||
rand_jitter-0.1.4
|
||||
rand_os-0.1.3
|
||||
rand_pcg-0.1.2
|
||||
rand_xorshift-0.1.1
|
||||
rdrand-0.4.0
|
||||
redox_syscall-0.1.57
|
||||
regex-1.3.9
|
||||
regex-syntax-0.6.18
|
||||
remove_dir_all-0.5.3
|
||||
rustc_version-0.2.3
|
||||
ryu-1.0.5
|
||||
safemem-0.3.3
|
||||
scoped-tls-1.0.0
|
||||
semver-0.9.0
|
||||
semver-parser-0.7.0
|
||||
serde-1.0.116
|
||||
serde_cbor-0.11.1
|
||||
serde_derive-1.0.116
|
||||
serde_json-1.0.58
|
||||
serde_urlencoded-0.6.1
|
||||
serde_yaml-0.8.13
|
||||
sha-1-0.8.2
|
||||
sha-1-0.9.1
|
||||
signal-hook-registry-1.2.1
|
||||
slab-0.4.2
|
||||
smallmap-1.1.5
|
||||
socket2-0.3.15
|
||||
syn-1.0.42
|
||||
tempfile-3.1.0
|
||||
termcolor-1.1.0
|
||||
thread_local-1.0.1
|
||||
time-0.1.44
|
||||
tinyvec-0.3.4
|
||||
tokio-0.2.22
|
||||
tokio-macros-0.2.5
|
||||
tokio-tungstenite-0.11.0
|
||||
tokio-util-0.3.1
|
||||
toml-0.5.6
|
||||
tower-service-0.3.0
|
||||
tracing-0.1.21
|
||||
tracing-core-0.1.17
|
||||
tracing-futures-0.2.4
|
||||
try-lock-0.2.3
|
||||
tungstenite-0.11.1
|
||||
twoway-0.1.8
|
||||
typenum-1.12.0
|
||||
unicase-2.6.0
|
||||
unicode-bidi-0.3.4
|
||||
unicode-normalization-0.1.13
|
||||
unicode-width-0.1.8
|
||||
unicode-xid-0.2.1
|
||||
url-2.1.1
|
||||
urlencoding-1.1.1
|
||||
utf-8-0.7.5
|
||||
version_check-0.9.2
|
||||
want-0.3.0
|
||||
warp-0.2.5
|
||||
wasi-0.10.0+wasi-snapshot-preview1
|
||||
wasi-0.9.0+wasi-snapshot-preview1
|
||||
winapi-0.2.8
|
||||
winapi-0.3.9
|
||||
winapi-build-0.1.1
|
||||
winapi-i686-pc-windows-gnu-0.4.0
|
||||
winapi-util-0.1.5
|
||||
winapi-x86_64-pc-windows-gnu-0.4.0
|
||||
ws2_32-sys-0.2.1
|
||||
yaml-rust-0.4.4
|
||||
"
|
||||
|
||||
inherit cargo
|
||||
|
||||
DESCRIPTION="Generate string of text from Markov chain fed by stdin"
|
||||
# Double check the homepage as the cargo_metadata crate
|
||||
# does not provide this value so instead repository is used
|
||||
HOMEPAGE="https://flanchan.moe/markov/"
|
||||
SRC_URI="$(cargo_crate_uris ${CRATES}) https://git.flanchan.moe/attachments/cf0b9095-2403-465b-b3aa-61b121134c84 -> markov-0.7.1.crate"
|
||||
RESTRICT="mirror"
|
||||
# License set may be more restrictive as OR is not respected
|
||||
# use cargo-license for a more accurate license picture
|
||||
LICENSE="GPL-3+"
|
||||
SLOT="0"
|
||||
KEYWORDS="~amd64"
|
||||
IUSE="+compress-chain +split-newlines +api split-sentance always-aggregate hog-buffer"
|
||||
|
||||
DEPEND="compress-chain? ( app-arch/bzip2 )"
|
||||
RDEPEND=""
|
||||
|
||||
src_configure() {
|
||||
local myfeatures=(
|
||||
$(usev compress-chain)
|
||||
$(usev split-newlines)
|
||||
$(usev api)
|
||||
$(usev split-sentance)
|
||||
$(usev always-aggregate)
|
||||
$(usev hog-buffer)
|
||||
)
|
||||
#TODO: This hack slows compilation down I think, but without it ld fails so... We should add cargo buildscript to do this instead
|
||||
use compress-chain && export RUSTFLAGS="${RUSTFLAGS} -ldylib=bz2"
|
||||
cargo_src_configure --no-default-features
|
||||
}
|
@ -0,0 +1,217 @@
|
||||
# Copyright 2017-2020 Gentoo Authors
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
# Auto-Generated by cargo-ebuild 0.3.1
|
||||
|
||||
EAPI=7
|
||||
CRATES="
|
||||
aho-corasick-0.7.13
|
||||
arc-swap-0.4.7
|
||||
async-compression-0.3.5
|
||||
atty-0.2.14
|
||||
autocfg-0.1.7
|
||||
autocfg-1.0.1
|
||||
base64-0.12.3
|
||||
bitflags-1.2.1
|
||||
block-buffer-0.7.3
|
||||
block-buffer-0.9.0
|
||||
block-padding-0.1.5
|
||||
buf_redux-0.8.4
|
||||
byte-tools-0.3.1
|
||||
byteorder-1.3.4
|
||||
bytes-0.5.6
|
||||
bzip2-0.3.3
|
||||
bzip2-sys-0.1.9+1.0.8
|
||||
cc-1.0.60
|
||||
cfg-if-0.1.10
|
||||
cfg-if-1.0.0
|
||||
cloudabi-0.0.3
|
||||
cpuid-bool-0.1.2
|
||||
digest-0.8.1
|
||||
digest-0.9.0
|
||||
dtoa-0.4.6
|
||||
either-1.6.1
|
||||
env_logger-0.7.1
|
||||
fake-simd-0.1.2
|
||||
fixedbitset-0.2.0
|
||||
fnv-1.0.7
|
||||
fuchsia-cprng-0.1.1
|
||||
fuchsia-zircon-0.3.3
|
||||
fuchsia-zircon-sys-0.3.3
|
||||
futures-0.3.6
|
||||
futures-channel-0.3.6
|
||||
futures-core-0.3.6
|
||||
futures-executor-0.3.6
|
||||
futures-io-0.3.6
|
||||
futures-macro-0.3.6
|
||||
futures-sink-0.3.6
|
||||
futures-task-0.3.6
|
||||
futures-util-0.3.6
|
||||
generic-array-0.12.3
|
||||
generic-array-0.14.4
|
||||
getopts-0.2.21
|
||||
getrandom-0.1.15
|
||||
h2-0.2.6
|
||||
half-1.6.0
|
||||
hashbrown-0.9.1
|
||||
headers-0.3.2
|
||||
headers-core-0.2.0
|
||||
hermit-abi-0.1.17
|
||||
http-0.2.1
|
||||
http-body-0.3.1
|
||||
httparse-1.3.4
|
||||
httpdate-0.3.2
|
||||
humantime-1.3.0
|
||||
hyper-0.13.8
|
||||
idna-0.2.0
|
||||
indexmap-1.6.0
|
||||
input_buffer-0.3.1
|
||||
iovec-0.1.4
|
||||
itertools-0.9.0
|
||||
itoa-0.4.6
|
||||
kernel32-sys-0.2.2
|
||||
lazy_static-1.4.0
|
||||
libc-0.2.79
|
||||
linked-hash-map-0.5.3
|
||||
log-0.4.11
|
||||
markov-1.1.0
|
||||
matches-0.1.8
|
||||
memchr-2.3.3
|
||||
mime-0.3.16
|
||||
mime_guess-2.0.3
|
||||
mio-0.6.22
|
||||
mio-named-pipes-0.1.7
|
||||
mio-uds-0.6.8
|
||||
miow-0.2.1
|
||||
miow-0.3.5
|
||||
multipart-0.17.0
|
||||
net2-0.2.35
|
||||
num_cpus-1.13.0
|
||||
once_cell-1.4.1
|
||||
opaque-debug-0.2.3
|
||||
opaque-debug-0.3.0
|
||||
percent-encoding-2.1.0
|
||||
petgraph-0.5.1
|
||||
pin-project-0.4.26
|
||||
pin-project-internal-0.4.26
|
||||
pin-project-lite-0.1.10
|
||||
pin-utils-0.1.0
|
||||
pkg-config-0.3.18
|
||||
ppv-lite86-0.2.9
|
||||
pretty_env_logger-0.4.0
|
||||
proc-macro-hack-0.5.18
|
||||
proc-macro-nested-0.1.6
|
||||
proc-macro2-1.0.24
|
||||
quick-error-1.2.3
|
||||
quote-1.0.7
|
||||
rand-0.6.5
|
||||
rand-0.7.3
|
||||
rand_chacha-0.1.1
|
||||
rand_chacha-0.2.2
|
||||
rand_core-0.3.1
|
||||
rand_core-0.4.2
|
||||
rand_core-0.5.1
|
||||
rand_hc-0.1.0
|
||||
rand_hc-0.2.0
|
||||
rand_isaac-0.1.1
|
||||
rand_jitter-0.1.4
|
||||
rand_os-0.1.3
|
||||
rand_pcg-0.1.2
|
||||
rand_xorshift-0.1.1
|
||||
rdrand-0.4.0
|
||||
redox_syscall-0.1.57
|
||||
regex-1.3.9
|
||||
regex-syntax-0.6.18
|
||||
remove_dir_all-0.5.3
|
||||
rustc_version-0.2.3
|
||||
ryu-1.0.5
|
||||
safemem-0.3.3
|
||||
scoped-tls-1.0.0
|
||||
semver-0.9.0
|
||||
semver-parser-0.7.0
|
||||
serde-1.0.116
|
||||
serde_cbor-0.11.1
|
||||
serde_derive-1.0.116
|
||||
serde_json-1.0.58
|
||||
serde_urlencoded-0.6.1
|
||||
serde_yaml-0.8.13
|
||||
sha-1-0.8.2
|
||||
sha-1-0.9.1
|
||||
signal-hook-registry-1.2.1
|
||||
slab-0.4.2
|
||||
smallmap-1.1.5
|
||||
socket2-0.3.15
|
||||
syn-1.0.42
|
||||
tempfile-3.1.0
|
||||
termcolor-1.1.0
|
||||
thread_local-1.0.1
|
||||
time-0.1.44
|
||||
tinyvec-0.3.4
|
||||
tokio-0.2.22
|
||||
tokio-macros-0.2.5
|
||||
tokio-tungstenite-0.11.0
|
||||
tokio-util-0.3.1
|
||||
toml-0.5.6
|
||||
tower-service-0.3.0
|
||||
tracing-0.1.21
|
||||
tracing-core-0.1.17
|
||||
tracing-futures-0.2.4
|
||||
try-lock-0.2.3
|
||||
tungstenite-0.11.1
|
||||
twoway-0.1.8
|
||||
typenum-1.12.0
|
||||
unicase-2.6.0
|
||||
unicode-bidi-0.3.4
|
||||
unicode-normalization-0.1.13
|
||||
unicode-width-0.1.8
|
||||
unicode-xid-0.2.1
|
||||
url-2.1.1
|
||||
urlencoding-1.1.1
|
||||
utf-8-0.7.5
|
||||
version_check-0.9.2
|
||||
want-0.3.0
|
||||
warp-0.2.5
|
||||
wasi-0.10.0+wasi-snapshot-preview1
|
||||
wasi-0.9.0+wasi-snapshot-preview1
|
||||
winapi-0.2.8
|
||||
winapi-0.3.9
|
||||
winapi-build-0.1.1
|
||||
winapi-i686-pc-windows-gnu-0.4.0
|
||||
winapi-util-0.1.5
|
||||
winapi-x86_64-pc-windows-gnu-0.4.0
|
||||
ws2_32-sys-0.2.1
|
||||
yaml-rust-0.4.4
|
||||
"
|
||||
|
||||
inherit cargo
|
||||
|
||||
DESCRIPTION="Generate string of text from Markov chain fed by stdin"
|
||||
# Double check the homepage as the cargo_metadata crate
|
||||
# does not provide this value so instead repository is used
|
||||
HOMEPAGE="https://flanchan.moe/markov/"
|
||||
SRC_URI="$(cargo_crate_uris ${CRATES}) https://git.flanchan.moe/attachments/c6f37bfc-afd8-462f-807f-ab9f95197680 -> markov-0.8.1.crate"
|
||||
RESTRICT="mirror"
|
||||
# License set may be more restrictive as OR is not respected
|
||||
# use cargo-license for a more accurate license picture
|
||||
LICENSE="GPL-3+"
|
||||
SLOT="0"
|
||||
KEYWORDS="~amd64"
|
||||
IUSE="+compress-chain +split-newlines +api split-sentance feed-sentance always-aggregate hog-buffer"
|
||||
|
||||
DEPEND="compress-chain? ( app-arch/bzip2 )"
|
||||
RDEPEND=""
|
||||
|
||||
src_configure() {
|
||||
local myfeatures=(
|
||||
$(usev compress-chain)
|
||||
$(usev split-newlines)
|
||||
$(usev api)
|
||||
$(usev split-sentance)
|
||||
$(usev feed-sentance)
|
||||
$(usev always-aggregate)
|
||||
$(usev hog-buffer)
|
||||
)
|
||||
#TODO: This hack slows compilation down I think, but without it ld fails so... We should add cargo buildscript to do this instead
|
||||
use compress-chain && export RUSTFLAGS="${RUSTFLAGS} -ldylib=bz2"
|
||||
cargo_src_configure --no-default-features
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
|
||||
<pkgmetadata>
|
||||
<maintainer type="person">
|
||||
<email>flanchan@cumallover.me</email>
|
||||
</maintainer>
|
||||
<use>
|
||||
<flag name="compress-chain">Compress chain when saving/loading</flag>
|
||||
<flag name="split-newlines">Treat each new line as a new set to feed</flag>
|
||||
<flag name="api">Enable /api route</flag>
|
||||
<flag name="feed-sentance">Further split buffers by sentance, feeding a new one for each.</flag>
|
||||
<flag name="split-sentance">Split by sentance as well as word boundaries</flag>
|
||||
<flag name="always-aggregate">Always operate on aggregated request body (can speed up writes at the cost of memory)</flag>
|
||||
<flag name="hog-buffer">Acquire chain mutex write lock while streaming body (can speed up writes, but can also allow for DoS)</flag></use>
|
||||
</pkgmetadata>
|
@ -0,0 +1,203 @@
|
||||
# Copyright 2017-2020 Gentoo Authors
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
# Auto-Generated by cargo-ebuild 0.3.1
|
||||
|
||||
EAPI=7
|
||||
|
||||
CRATES="
|
||||
aho-corasick-0.7.13
|
||||
arc-swap-0.4.7
|
||||
async-compression-0.3.5
|
||||
atty-0.2.14
|
||||
autocfg-0.1.7
|
||||
autocfg-1.0.1
|
||||
base64-0.12.3
|
||||
bitflags-1.2.1
|
||||
block-buffer-0.7.3
|
||||
block-buffer-0.9.0
|
||||
block-padding-0.1.5
|
||||
buf_redux-0.8.4
|
||||
byte-tools-0.3.1
|
||||
byteorder-1.3.4
|
||||
bytes-0.5.6
|
||||
bzip2-0.3.3
|
||||
bzip2-sys-0.1.9+1.0.8
|
||||
cc-1.0.60
|
||||
cfg-if-0.1.10
|
||||
cfg-if-1.0.0
|
||||
cloudabi-0.0.3
|
||||
cpuid-bool-0.1.2
|
||||
digest-0.8.1
|
||||
digest-0.9.0
|
||||
dtoa-0.4.6
|
||||
either-1.6.1
|
||||
env_logger-0.7.1
|
||||
fake-simd-0.1.2
|
||||
fixedbitset-0.2.0
|
||||
fnv-1.0.7
|
||||
fuchsia-cprng-0.1.1
|
||||
fuchsia-zircon-0.3.3
|
||||
fuchsia-zircon-sys-0.3.3
|
||||
futures-0.3.6
|
||||
futures-channel-0.3.6
|
||||
futures-core-0.3.6
|
||||
futures-executor-0.3.6
|
||||
futures-io-0.3.6
|
||||
futures-macro-0.3.6
|
||||
futures-sink-0.3.6
|
||||
futures-task-0.3.6
|
||||
futures-util-0.3.6
|
||||
generic-array-0.12.3
|
||||
generic-array-0.14.4
|
||||
getopts-0.2.21
|
||||
getrandom-0.1.15
|
||||
h2-0.2.6
|
||||
half-1.6.0
|
||||
hashbrown-0.9.1
|
||||
headers-0.3.2
|
||||
headers-core-0.2.0
|
||||
hermit-abi-0.1.17
|
||||
http-0.2.1
|
||||
http-body-0.3.1
|
||||
httparse-1.3.4
|
||||
httpdate-0.3.2
|
||||
humantime-1.3.0
|
||||
hyper-0.13.8
|
||||
idna-0.2.0
|
||||
indexmap-1.6.0
|
||||
input_buffer-0.3.1
|
||||
iovec-0.1.4
|
||||
itertools-0.9.0
|
||||
itoa-0.4.6
|
||||
kernel32-sys-0.2.2
|
||||
lazy_static-1.4.0
|
||||
libc-0.2.79
|
||||
linked-hash-map-0.5.3
|
||||
log-0.4.11
|
||||
markov-1.1.0
|
||||
matches-0.1.8
|
||||
memchr-2.3.3
|
||||
mime-0.3.16
|
||||
mime_guess-2.0.3
|
||||
mio-0.6.22
|
||||
mio-named-pipes-0.1.7
|
||||
mio-uds-0.6.8
|
||||
miow-0.2.1
|
||||
miow-0.3.5
|
||||
multipart-0.17.0
|
||||
net2-0.2.35
|
||||
num_cpus-1.13.0
|
||||
once_cell-1.4.1
|
||||
opaque-debug-0.2.3
|
||||
opaque-debug-0.3.0
|
||||
percent-encoding-2.1.0
|
||||
petgraph-0.5.1
|
||||
pin-project-0.4.26
|
||||
pin-project-internal-0.4.26
|
||||
pin-project-lite-0.1.10
|
||||
pin-utils-0.1.0
|
||||
pkg-config-0.3.18
|
||||
ppv-lite86-0.2.9
|
||||
pretty_env_logger-0.4.0
|
||||
proc-macro-hack-0.5.18
|
||||
proc-macro-nested-0.1.6
|
||||
proc-macro2-1.0.24
|
||||
quick-error-1.2.3
|
||||
quote-1.0.7
|
||||
rand-0.6.5
|
||||
rand-0.7.3
|
||||
rand_chacha-0.1.1
|
||||
rand_chacha-0.2.2
|
||||
rand_core-0.3.1
|
||||
rand_core-0.4.2
|
||||
rand_core-0.5.1
|
||||
rand_hc-0.1.0
|
||||
rand_hc-0.2.0
|
||||
rand_isaac-0.1.1
|
||||
rand_jitter-0.1.4
|
||||
rand_os-0.1.3
|
||||
rand_pcg-0.1.2
|
||||
rand_xorshift-0.1.1
|
||||
rdrand-0.4.0
|
||||
redox_syscall-0.1.57
|
||||
regex-1.3.9
|
||||
regex-syntax-0.6.18
|
||||
remove_dir_all-0.5.3
|
||||
rustc_version-0.2.3
|
||||
ryu-1.0.5
|
||||
safemem-0.3.3
|
||||
scoped-tls-1.0.0
|
||||
semver-0.9.0
|
||||
semver-parser-0.7.0
|
||||
serde-1.0.116
|
||||
serde_cbor-0.11.1
|
||||
serde_derive-1.0.116
|
||||
serde_json-1.0.58
|
||||
serde_urlencoded-0.6.1
|
||||
serde_yaml-0.8.13
|
||||
sha-1-0.8.2
|
||||
sha-1-0.9.1
|
||||
signal-hook-registry-1.2.1
|
||||
slab-0.4.2
|
||||
smallmap-1.1.5
|
||||
socket2-0.3.15
|
||||
syn-1.0.42
|
||||
tempfile-3.1.0
|
||||
termcolor-1.1.0
|
||||
thread_local-1.0.1
|
||||
time-0.1.44
|
||||
tinyvec-0.3.4
|
||||
tokio-0.2.22
|
||||
tokio-macros-0.2.5
|
||||
tokio-tungstenite-0.11.0
|
||||
tokio-util-0.3.1
|
||||
toml-0.5.6
|
||||
tower-service-0.3.0
|
||||
tracing-0.1.21
|
||||
tracing-core-0.1.17
|
||||
tracing-futures-0.2.4
|
||||
try-lock-0.2.3
|
||||
tungstenite-0.11.1
|
||||
twoway-0.1.8
|
||||
typenum-1.12.0
|
||||
unicase-2.6.0
|
||||
unicode-bidi-0.3.4
|
||||
unicode-normalization-0.1.13
|
||||
unicode-width-0.1.8
|
||||
unicode-xid-0.2.1
|
||||
url-2.1.1
|
||||
urlencoding-1.1.1
|
||||
utf-8-0.7.5
|
||||
version_check-0.9.2
|
||||
want-0.3.0
|
||||
warp-0.2.5
|
||||
wasi-0.10.0+wasi-snapshot-preview1
|
||||
wasi-0.9.0+wasi-snapshot-preview1
|
||||
winapi-0.2.8
|
||||
winapi-0.3.9
|
||||
winapi-build-0.1.1
|
||||
winapi-i686-pc-windows-gnu-0.4.0
|
||||
winapi-util-0.1.5
|
||||
winapi-x86_64-pc-windows-gnu-0.4.0
|
||||
ws2_32-sys-0.2.1
|
||||
yaml-rust-0.4.4
|
||||
"
|
||||
|
||||
inherit cargo
|
||||
|
||||
DESCRIPTION="Generate string of text from Markov chain fed by stdin"
|
||||
# Double check the homepage as the cargo_metadata crate
|
||||
# does not provide this value so instead repository is used
|
||||
HOMEPAGE="homepage field in Cargo.toml inaccessible to cargo metadata"
|
||||
SRC_URI="$(cargo_crate_uris ${CRATES})"
|
||||
RESTRICT="mirror"
|
||||
# License set may be more restrictive as OR is not respected
|
||||
# use cargo-license for a more accurate license picture
|
||||
LICENSE="Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 ISC MIT Unlicense Zlib gpl-3.0-or-later"
|
||||
SLOT="0"
|
||||
KEYWORDS="~amd64"
|
||||
IUSE=""
|
||||
|
||||
DEPEND=""
|
||||
RDEPEND=""
|
@ -0,0 +1,19 @@
|
||||
bindpoint = '127.0.0.1:8001'
|
||||
file = 'chain.dat'
|
||||
max_content_length = 4194304
|
||||
max_gen_size = 256
|
||||
save_interval_secs = 2
|
||||
trust_x_forwarded_for = false
|
||||
feed_bounds = '2..'
|
||||
|
||||
[filter]
|
||||
inbound = ''
|
||||
outbound = ''
|
||||
|
||||
[writer]
|
||||
backlog = 32
|
||||
internal_backlog = 8
|
||||
capacity = 4
|
||||
|
||||
[mask]
|
||||
default = 'Accept'
|
@ -0,0 +1,55 @@
|
||||
//! API errors
|
||||
//use super::*;
|
||||
use std::{
|
||||
error,
|
||||
fmt,
|
||||
};
|
||||
use warp::{
|
||||
Rejection,
|
||||
Reply,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ApiError {
|
||||
Body,
|
||||
}
|
||||
|
||||
impl ApiError
|
||||
{
|
||||
#[inline] fn error_code(&self) -> warp::http::StatusCode
|
||||
{
|
||||
status!(match self {
|
||||
Self::Body => 422,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl warp::reject::Reject for ApiError{}
|
||||
impl error::Error for ApiError{}
|
||||
impl std::fmt::Display for ApiError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
match self {
|
||||
Self::Body => write!(f, "invalid data in request body"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::str::Utf8Error> for ApiError
|
||||
{
|
||||
fn from(_: std::str::Utf8Error) -> Self
|
||||
{
|
||||
Self::Body
|
||||
}
|
||||
}
|
||||
|
||||
// Handles API rejections
|
||||
pub async fn rejection(err: Rejection) -> Result<impl Reply, Rejection>
|
||||
{
|
||||
if let Some(api) = err.find::<ApiError>() {
|
||||
Ok(warp::reply::with_status(format!("ApiError: {}\n", api), api.error_code()))
|
||||
} else {
|
||||
Err(err)
|
||||
}
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
//! For API calls if enabled
|
||||
use super::*;
|
||||
use std::{
|
||||
iter,
|
||||
convert::Infallible,
|
||||
};
|
||||
use futures::{
|
||||
stream::{
|
||||
self,
|
||||
BoxStream,
|
||||
StreamExt,
|
||||
},
|
||||
};
|
||||
pub mod error;
|
||||
use error::ApiError;
|
||||
|
||||
mod single;
|
||||
|
||||
#[inline] fn aggregate(mut body: impl Buf) -> Result<String, std::str::Utf8Error>
|
||||
{
|
||||
std::str::from_utf8(&body.to_bytes()).map(ToOwned::to_owned)
|
||||
}
|
||||
|
||||
pub async fn single(host: IpAddr, num: Option<usize>, body: impl Buf) -> Result<impl warp::Reply, warp::reject::Rejection>
|
||||
{
|
||||
single::single_stream(host, num, body).await
|
||||
.map(|rx| Response::new(Body::wrap_stream(rx.map(move |x| {
|
||||
info!("{} <- {:?}", host, x);
|
||||
x
|
||||
}))))
|
||||
.map_err(warp::reject::custom)
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
//! Handler for /single/
|
||||
use super::*;
|
||||
|
||||
//TODO: Change to stream impl like normal `feed` has, instead of taking aggregate?
|
||||
pub async fn single_stream(host: IpAddr, num: Option<usize>, body: impl Buf) -> Result<BoxStream<'static, Result<String, Infallible>>, ApiError>
|
||||
{
|
||||
let body = aggregate(body)?;
|
||||
info!("{} <- {:?}", host, &body[..]);
|
||||
|
||||
let mut chain = Chain::new();
|
||||
|
||||
if_debug! {
|
||||
let timer = std::time::Instant::now();
|
||||
}
|
||||
cfg_if! {
|
||||
if #[cfg(feature="split-newlines")] {
|
||||
for body in body.split('\n').filter(|line| !line.trim().is_empty()) {
|
||||
feed::feed(&mut chain, body, 1..);
|
||||
}
|
||||
}else {
|
||||
feed::feed(&mut chain, body, 1..);
|
||||
}
|
||||
}
|
||||
if_debug!{
|
||||
trace!("Write took {}ms", timer.elapsed().as_millis());
|
||||
}
|
||||
if chain.is_empty() {
|
||||
Ok(stream::empty().boxed())
|
||||
} else {
|
||||
match num {
|
||||
|
||||
None => Ok(stream::iter(iter::once(Ok(chain.generate_str()))).boxed()),
|
||||
Some(num) => {
|
||||
let (mut tx, rx) = mpsc::channel(num);
|
||||
tokio::spawn(async move {
|
||||
for string in chain.str_iter_for(num) {
|
||||
if let Err(e) = tx.send(string).await {
|
||||
error!("Failed to send string to body, aborting: {:?}", e.0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
Ok(StreamExt::map(rx, |x| Ok::<_, Infallible>(x)).boxed())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,171 @@
|
||||
//! For binding to sockets
|
||||
use super::*;
|
||||
use futures::{
|
||||
prelude::*,
|
||||
};
|
||||
use std::{
|
||||
marker::{
|
||||
Send,
|
||||
Unpin,
|
||||
},
|
||||
fmt,
|
||||
error,
|
||||
path::{
|
||||
Path,
|
||||
PathBuf,
|
||||
},
|
||||
};
|
||||
use tokio::{
|
||||
io::{
|
||||
self,
|
||||
AsyncRead,
|
||||
AsyncWrite,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BindError<E>
|
||||
{
|
||||
IO(io::Error),
|
||||
Warp(warp::Error),
|
||||
Other(E),
|
||||
}
|
||||
|
||||
impl<E: error::Error + 'static> error::Error for BindError<E>
|
||||
{
|
||||
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
|
||||
Some(match &self {
|
||||
Self::IO(io) => io,
|
||||
Self::Other(o) => o,
|
||||
Self::Warp(w) => w,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<E: fmt::Display> fmt::Display for BindError<E>
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
match self {
|
||||
Self::IO(io) => write!(f, "io error: {}", io),
|
||||
Self::Other(other) => write!(f, "{}", other),
|
||||
Self::Warp(warp) => write!(f, "server error: {}", warp),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BindpointParseError;
|
||||
|
||||
impl error::Error for BindpointParseError{}
|
||||
impl fmt::Display for BindpointParseError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
write!(f, "Failed to parse bindpoint as IP or unix domain socket")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd)]
|
||||
pub enum Bindpoint
|
||||
{
|
||||
Unix(PathBuf),
|
||||
TCP(SocketAddr),
|
||||
}
|
||||
|
||||
impl fmt::Display for Bindpoint
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
match self {
|
||||
Self::Unix(unix) => write!(f, "unix:/{}", unix.to_string_lossy()),
|
||||
Self::TCP(tcp) => write!(f, "{}", tcp),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for Bindpoint
|
||||
{
|
||||
type Err = BindpointParseError;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(if let Ok(ip) = s.parse::<SocketAddr>() {
|
||||
Self::TCP(ip)
|
||||
} else if s.starts_with("unix:/") {
|
||||
Self::Unix(PathBuf::from(&s[6..].to_owned()))
|
||||
} else {
|
||||
return Err(BindpointParseError);
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn bind_unix(to: impl AsRef<Path>) -> io::Result<impl TryStream<Ok= impl AsyncRead + AsyncWrite + Send + Unpin + 'static + Send, Error = impl Into<Box<dyn std::error::Error + Send + Sync>>>>
|
||||
{
|
||||
debug!("Binding to AF_UNIX: {:?}", to.as_ref());
|
||||
let listener = tokio::net::UnixListener::bind(to)?;
|
||||
Ok(listener)
|
||||
}
|
||||
|
||||
pub fn serve<F>(server: warp::Server<F>, bind: Bindpoint, signal: impl Future<Output=()> + Send + 'static) -> Result<(Bindpoint, BoxFuture<'static, ()>), BindError<std::convert::Infallible>>
|
||||
where F: Filter + Clone + Send + Sync + 'static,
|
||||
<F::Future as TryFuture>::Ok: warp::Reply,
|
||||
{
|
||||
Ok(match bind {
|
||||
Bindpoint::TCP(sock) => server.try_bind_with_graceful_shutdown(sock, signal).map(|(sock, fut)| (Bindpoint::TCP(sock), fut.boxed())).map_err(BindError::Warp)?,
|
||||
Bindpoint::Unix(unix) => {
|
||||
(Bindpoint::Unix(unix.clone()),
|
||||
server.serve_incoming_with_graceful_shutdown(bind_unix(unix).map_err(BindError::IO)?, signal).boxed())
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
impl From<SocketAddr> for Bindpoint
|
||||
{
|
||||
fn from(from: SocketAddr) -> Self
|
||||
{
|
||||
Self::TCP(from)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_serve<F>(server: warp::Server<F>, bind: impl TryBindpoint, signal: impl Future<Output=()> + Send + 'static) -> Result<(Bindpoint, BoxFuture<'static, ()>), BindError<impl error::Error + 'static>>
|
||||
where F: Filter + Clone + Send + Sync + 'static,
|
||||
<F::Future as TryFuture>::Ok: warp::Reply,
|
||||
{
|
||||
serve(server, bind.try_parse().map_err(BindError::Other)?, signal).map_err(BindError::coerce)
|
||||
}
|
||||
|
||||
pub trait TryBindpoint: Sized
|
||||
{
|
||||
type Err: error::Error + 'static;
|
||||
fn try_parse(self) -> Result<Bindpoint, Self::Err>;
|
||||
}
|
||||
|
||||
impl TryBindpoint for Bindpoint
|
||||
{
|
||||
type Err = std::convert::Infallible;
|
||||
fn try_parse(self) -> Result<Bindpoint, Self::Err>
|
||||
{
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<str>> TryBindpoint for T
|
||||
{
|
||||
type Err = BindpointParseError;
|
||||
fn try_parse(self) -> Result<Bindpoint, Self::Err>
|
||||
{
|
||||
self.as_ref().parse()
|
||||
}
|
||||
}
|
||||
|
||||
impl BindError<std::convert::Infallible>
|
||||
{
|
||||
pub fn coerce<T>(self) -> BindError<T>
|
||||
{
|
||||
match self {
|
||||
Self::Warp(w) => BindError::Warp(w),
|
||||
Self::IO(w) => BindError::IO(w),
|
||||
/*#[cold]*/ _ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
use std::ptr;
|
||||
|
||||
/// Copy slice of bytes only
|
||||
///
|
||||
/// # Notes
|
||||
/// `dst` and `src` must not overlap. See [move_slice].
|
||||
pub fn copy_slice(dst: &mut [u8], src: &[u8]) -> usize
|
||||
{
|
||||
let sz = std::cmp::min(dst.len(),src.len());
|
||||
unsafe {
|
||||
//libc::memcpy(&mut dst[0] as *mut u8 as *mut c_void, &src[0] as *const u8 as *const c_void, sz);
|
||||
ptr::copy_nonoverlapping(&src[0] as *const u8, &mut dst[0] as *mut u8, sz);
|
||||
}
|
||||
sz
|
||||
}
|
||||
|
||||
/// Move slice of bytes only
|
||||
///
|
||||
/// # Notes
|
||||
/// `dst` and `src` can overlap.
|
||||
pub fn move_slice(dst: &mut [u8], src: &[u8]) -> usize
|
||||
{
|
||||
let sz = std::cmp::min(dst.len(),src.len());
|
||||
unsafe {
|
||||
//libc::memmove(&mut dst[0] as *mut u8 as *mut c_void, &src[0] as *const u8 as *const c_void, sz);
|
||||
ptr::copy(&src[0] as *const u8, &mut dst[0] as *mut u8, sz);
|
||||
}
|
||||
sz
|
||||
}
|
@ -0,0 +1,282 @@
|
||||
//! Stream related things
|
||||
use super::*;
|
||||
use std::{
|
||||
task::{
|
||||
Poll,
|
||||
Context,
|
||||
},
|
||||
pin::Pin,
|
||||
marker::PhantomData,
|
||||
};
|
||||
use tokio::{
|
||||
io::{
|
||||
AsyncBufRead,
|
||||
AsyncRead,
|
||||
},
|
||||
prelude::*,
|
||||
};
|
||||
use futures::{
|
||||
stream::{
|
||||
Stream,
|
||||
StreamExt,
|
||||
Fuse,
|
||||
},
|
||||
};
|
||||
use pin_project::pin_project;
|
||||
|
||||
/// Converts a stream of byte-containing objects into an `AsyncRead` and `AsyncBufRead`er.
|
||||
#[pin_project]
|
||||
pub struct StreamReader<I, T>
|
||||
where I: Stream<Item=T>
|
||||
{
|
||||
#[pin]
|
||||
source: Fuse<I>,
|
||||
buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl<T, I> StreamReader<I, T>
|
||||
where I: Stream<Item=T>,
|
||||
T: AsRef<[u8]>
|
||||
{
|
||||
/// The current buffer
|
||||
pub fn buffer(&self) -> &[u8]
|
||||
{
|
||||
&self.buffer[..]
|
||||
}
|
||||
/// Consume into the original stream
|
||||
pub fn into_inner(self) -> I
|
||||
{
|
||||
self.source.into_inner()
|
||||
}
|
||||
/// Create a new instance with a buffer capacity
|
||||
pub fn with_capacity(source: I, cap: usize) -> Self
|
||||
{
|
||||
Self {
|
||||
source: source.fuse(),
|
||||
buffer: Vec::with_capacity(cap)
|
||||
}
|
||||
}
|
||||
/// Create a new instance from this stream
|
||||
pub fn new(source: I) -> Self
|
||||
{
|
||||
Self {
|
||||
source: source.fuse(),
|
||||
buffer: Vec::new(),
|
||||
}
|
||||
}
|
||||
/// Attempt to add to this buffer
|
||||
#[cold] fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<usize>
|
||||
{
|
||||
let this = self.project();
|
||||
match this.source.poll_next(cx) {
|
||||
Poll::Ready(None) => Poll::Ready(0),
|
||||
Poll::Ready(Some(buf)) if buf.as_ref().len() > 0 => {
|
||||
let buf = buf.as_ref();
|
||||
this.buffer.extend_from_slice(buf);
|
||||
Poll::Ready(buf.len())
|
||||
},
|
||||
_ => Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]>, I: Stream<Item=T>> AsyncRead for StreamReader<I,T>
|
||||
{
|
||||
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll<io::Result<usize>> {
|
||||
let this = self.project();
|
||||
if this.buffer.len() != 0 {
|
||||
// We can fill the whole buffer, do it.
|
||||
Poll::Ready(Ok(bytes::copy_slice(buf, this.buffer.drain(..buf.len()).as_slice())))
|
||||
} else {
|
||||
// Buffer is empty, try to fill it
|
||||
match match this.source.poll_next(cx) {
|
||||
Poll::Ready(None) => Poll::Ready(0),
|
||||
Poll::Ready(Some(buf)) if buf.as_ref().len() > 0 => {
|
||||
let buf = buf.as_ref();
|
||||
this.buffer.extend_from_slice(buf);
|
||||
Poll::Ready(buf.len())
|
||||
},
|
||||
_ => Poll::Pending,
|
||||
} {
|
||||
Poll::Ready(0) => Poll::Ready(Ok(0)),
|
||||
Poll::Ready(x) => {
|
||||
// x has been written
|
||||
Poll::Ready(Ok(bytes::copy_slice(buf, this.buffer.drain(..x).as_slice())))
|
||||
},
|
||||
_ => Poll::Pending,
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]>, I: Stream<Item=T>> AsyncBufRead for StreamReader<I,T>
|
||||
{
|
||||
fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<&[u8]>> {
|
||||
let this = self.project();
|
||||
if this.buffer.len() < 1 {
|
||||
// Fetch more into buffer
|
||||
match match this.source.poll_next(cx) {
|
||||
Poll::Ready(None) => Poll::Ready(0),
|
||||
Poll::Ready(Some(buf)) if buf.as_ref().len() > 0 => {
|
||||
let buf = buf.as_ref();
|
||||
this.buffer.extend_from_slice(buf);
|
||||
Poll::Ready(buf.len())
|
||||
},
|
||||
_ => Poll::Pending,
|
||||
} {
|
||||
Poll::Ready(0) => Poll::Ready(Ok(&[])), // should we return EOF error here?
|
||||
Poll::Ready(x) => Poll::Ready(Ok(&this.buffer[..x])),
|
||||
_ => Poll::Pending
|
||||
}
|
||||
} else {
|
||||
Poll::Ready(Ok(&this.buffer[..]))
|
||||
}
|
||||
}
|
||||
fn consume(self: Pin<&mut Self>, amt: usize) {
|
||||
self.project().buffer.drain(..amt);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests
|
||||
{
|
||||
use super::*;
|
||||
use tokio::{
|
||||
sync::{
|
||||
mpsc,
|
||||
},
|
||||
};
|
||||
#[tokio::test]
|
||||
async fn stream_of_vec()
|
||||
{
|
||||
let (mut tx, rx) = mpsc::channel(16);
|
||||
let sender = tokio::spawn(async move {
|
||||
tx.send("Hello ").await.unwrap();
|
||||
tx.send("world").await.unwrap();
|
||||
tx.send("\n").await.unwrap();
|
||||
tx.send("How ").await.unwrap();
|
||||
tx.send("are ").await.unwrap();
|
||||
tx.send("you").await.unwrap();
|
||||
});
|
||||
|
||||
let mut reader = StreamReader::new(rx);
|
||||
|
||||
let mut output = String::new();
|
||||
let mut read;
|
||||
while {read = reader.read_line(&mut output).await.expect("Failed to read"); read!=0} {
|
||||
println!("Read: {}", read);
|
||||
}
|
||||
|
||||
println!("Done: {:?}", output);
|
||||
sender.await.expect("Child panic");
|
||||
assert_eq!(&output[..], "Hello world\nHow are you");
|
||||
}
|
||||
}
|
||||
|
||||
/// A stream that chunks its input.
|
||||
#[pin_project]
|
||||
pub struct ChunkingStream<S, T, Into=Vec<T>>
|
||||
{
|
||||
#[pin] stream: Fuse<S>,
|
||||
buf: Vec<T>,
|
||||
cap: usize,
|
||||
_output: PhantomData<Into>,
|
||||
|
||||
push_now: bool,
|
||||
}
|
||||
|
||||
|
||||
impl<S, T, Into> ChunkingStream<S,T, Into>
|
||||
where S: Stream<Item=T>,
|
||||
Into: From<Vec<T>>
|
||||
{
|
||||
pub fn new(stream: S, sz: usize) -> Self
|
||||
{
|
||||
Self {
|
||||
stream: stream.fuse(),
|
||||
buf: Vec::with_capacity(sz),
|
||||
cap: sz,
|
||||
_output: PhantomData,
|
||||
push_now: false,
|
||||
}
|
||||
}
|
||||
pub fn into_inner(self) -> S
|
||||
{
|
||||
self.stream.into_inner()
|
||||
}
|
||||
pub fn cap(&self) -> usize
|
||||
{
|
||||
self.cap
|
||||
}
|
||||
pub fn buffer(&self) -> &[T]
|
||||
{
|
||||
&self.buf[..]
|
||||
}
|
||||
|
||||
pub fn get_ref(&self) -> &S
|
||||
{
|
||||
self.stream.get_ref()
|
||||
}
|
||||
|
||||
pub fn get_mut(&mut self)-> &mut S
|
||||
{
|
||||
self.stream.get_mut()
|
||||
}
|
||||
|
||||
/// Force the next read to send the buffer even if it's not full.
|
||||
///
|
||||
/// # Note
|
||||
/// The buffer still won't send if it's empty.
|
||||
pub fn push_now(&mut self)
|
||||
{
|
||||
self.push_now= true;
|
||||
}
|
||||
|
||||
/// Consume into the current held buffer
|
||||
pub fn into_buffer(self) -> Vec<T>
|
||||
{
|
||||
self.buf
|
||||
}
|
||||
|
||||
/// Take the buffer now
|
||||
pub fn take_now(&mut self) -> Into
|
||||
{
|
||||
std::mem::replace(&mut self.buf, Vec::with_capacity(self.cap)).into()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, T, Into> Stream for ChunkingStream<S,T, Into>
|
||||
where S: Stream<Item=T>,
|
||||
Into: From<Vec<T>>
|
||||
{
|
||||
type Item = Into;
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
while !(self.push_now && !self.buf.is_empty()) && self.buf.len() < self.cap {
|
||||
// Buffer isn't full, keep filling
|
||||
let this = self.as_mut().project();
|
||||
|
||||
match this.stream.poll_next(cx) {
|
||||
Poll::Ready(None) => {
|
||||
// Stream is over
|
||||
break;
|
||||
},
|
||||
Poll::Ready(Some(item)) => {
|
||||
this.buf.push(item);
|
||||
},
|
||||
_ => return Poll::Pending,
|
||||
}
|
||||
}
|
||||
debug!("Sending buffer of {} (cap {})", self.buf.len(), self.cap);
|
||||
// Buffer is full or we reach end of stream
|
||||
Poll::Ready(if self.buf.len() == 0 {
|
||||
None
|
||||
} else {
|
||||
let this = self.project();
|
||||
*this.push_now = false;
|
||||
let output = std::mem::replace(this.buf, Vec::with_capacity(*this.cap));
|
||||
Some(output.into())
|
||||
})
|
||||
}
|
||||
}
|
@ -0,0 +1,282 @@
|
||||
//! Server config
|
||||
use super::*;
|
||||
use std::{
|
||||
net::SocketAddr,
|
||||
path::Path,
|
||||
io,
|
||||
borrow::Cow,
|
||||
num::NonZeroU64,
|
||||
error,
|
||||
fmt,
|
||||
};
|
||||
use tokio::{
|
||||
fs::OpenOptions,
|
||||
prelude::*,
|
||||
time::Duration,
|
||||
io::BufReader,
|
||||
};
|
||||
use ipfilt::IpFilter;
|
||||
|
||||
pub const DEFAULT_FILE_LOCATION: &'static str = "markov.toml";
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
pub struct Config
|
||||
{
|
||||
pub bindpoint: String,
|
||||
pub file: String,
|
||||
pub max_content_length: u64,
|
||||
pub max_gen_size: usize,
|
||||
pub save_interval_secs: Option<NonZeroU64>,
|
||||
pub trust_x_forwarded_for: bool,
|
||||
#[serde(default)]
|
||||
pub feed_bounds: String,
|
||||
#[serde(default)]
|
||||
pub filter: FilterConfig,
|
||||
#[serde(default)]
|
||||
pub writer: WriterConfig,
|
||||
#[serde(default)]
|
||||
pub mask: IpFilter,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
pub struct FilterConfig
|
||||
{
|
||||
#[serde(default)]
|
||||
inbound: String,
|
||||
#[serde(default)]
|
||||
outbound: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
pub struct WriterConfig
|
||||
{
|
||||
pub backlog: usize,
|
||||
pub internal_backlog: usize,
|
||||
pub capacity: usize,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub throttle_ms: Option<u64>,
|
||||
}
|
||||
|
||||
impl Default for WriterConfig
|
||||
{
|
||||
#[inline]
|
||||
fn default() -> Self
|
||||
{
|
||||
Self {
|
||||
backlog: 32,
|
||||
internal_backlog: 8,
|
||||
capacity: 4,
|
||||
timeout_ms: None,
|
||||
throttle_ms: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WriterConfig
|
||||
{
|
||||
fn create_settings(self, bounds: range::DynRange<usize>) -> handle::Settings
|
||||
{
|
||||
|
||||
handle::Settings{
|
||||
backlog: self.backlog,
|
||||
internal_backlog: self.internal_backlog,
|
||||
capacity: self.capacity,
|
||||
timeout: self.timeout_ms.map(tokio::time::Duration::from_millis).unwrap_or(handle::DEFAULT_TIMEOUT),
|
||||
throttle: self.throttle_ms.map(tokio::time::Duration::from_millis),
|
||||
bounds,
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl FilterConfig
|
||||
{
|
||||
fn get_inbound_filter(&self) -> sanitise::filter::Filter
|
||||
{
|
||||
let filt: sanitise::filter::Filter = self.inbound.parse().unwrap();
|
||||
if !filt.is_empty()
|
||||
{
|
||||
info!("Loaded inbound filter: {:?}", filt.iter().collect::<String>());
|
||||
}
|
||||
filt
|
||||
}
|
||||
fn get_outbound_filter(&self) -> sanitise::filter::Filter
|
||||
{
|
||||
let filt: sanitise::filter::Filter = self.outbound.parse().unwrap();
|
||||
if !filt.is_empty()
|
||||
{
|
||||
info!("Loaded outbound filter: {:?}", filt.iter().collect::<String>());
|
||||
}
|
||||
filt
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Config
|
||||
{
|
||||
#[inline]
|
||||
fn default() -> Self
|
||||
{
|
||||
Self {
|
||||
bindpoint: SocketAddr::from(([127,0,0,1], 8001)).to_string(),
|
||||
file: "chain.dat".to_owned(),
|
||||
max_content_length: 1024 * 1024 * 4,
|
||||
max_gen_size: 256,
|
||||
save_interval_secs: Some(unsafe{NonZeroU64::new_unchecked(2)}),
|
||||
trust_x_forwarded_for: false,
|
||||
filter: Default::default(),
|
||||
feed_bounds: "2..".to_owned(),
|
||||
writer: Default::default(),
|
||||
mask: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config
|
||||
{
|
||||
/// Try to generate a config cache for this instance.
|
||||
pub fn try_gen_cache(&self) -> Result<Cache, InvalidConfigError>
|
||||
{
|
||||
macro_rules! section {
|
||||
($name:literal, $expr:expr) => {
|
||||
match $expr {
|
||||
Ok(v) => Ok(v),
|
||||
Err(e) => Err(InvalidConfigError($name, Box::new(e))),
|
||||
}
|
||||
}
|
||||
}
|
||||
use std::ops::RangeBounds;
|
||||
|
||||
let feed_bounds = section!("feed_bounds", self.parse_feed_bounds()).and_then(|bounds| if bounds.contains(&0) {
|
||||
Err(InvalidConfigError("feed_bounds", Box::new(opaque_error!("Bounds not allowed to contains 0 (they were `{}`)", bounds))))
|
||||
} else {
|
||||
Ok(bounds)
|
||||
})?;
|
||||
Ok(Cache {
|
||||
handler_settings: self.writer.create_settings(feed_bounds.clone()),
|
||||
feed_bounds,
|
||||
inbound_filter: self.filter.get_inbound_filter(),
|
||||
outbound_filter: self.filter.get_outbound_filter(),
|
||||
})
|
||||
}
|
||||
/// Try to parse the `feed_bounds`
|
||||
fn parse_feed_bounds(&self) -> Result<range::DynRange<usize>, range::ParseError>
|
||||
{
|
||||
if self.feed_bounds.len() == 0 {
|
||||
Ok(feed::DEFAULT_FEED_BOUNDS.into())
|
||||
} else {
|
||||
self.feed_bounds.parse()
|
||||
}
|
||||
}
|
||||
pub fn save_interval(&self) -> Option<Duration>
|
||||
{
|
||||
self.save_interval_secs.map(|x| Duration::from_secs(x.into()))
|
||||
}
|
||||
pub async fn load(from: impl AsRef<Path>) -> io::Result<Self>
|
||||
{
|
||||
let file = OpenOptions::new()
|
||||
.read(true)
|
||||
.open(from).await?;
|
||||
|
||||
let mut buffer= String::new();
|
||||
let reader = BufReader::new(file);
|
||||
let mut lines = reader.lines();
|
||||
while let Some(line) = lines.next_line().await? {
|
||||
buffer.push_str(&line[..]);
|
||||
buffer.push('\n');
|
||||
}
|
||||
toml::de::from_str(&buffer[..]).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))
|
||||
}
|
||||
|
||||
pub async fn save(&self, to: impl AsRef<Path>) -> io::Result<()>
|
||||
{
|
||||
let config = toml::ser::to_string_pretty(self).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
|
||||
let mut file = OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.open(to).await?;
|
||||
file.write_all(config.as_bytes()).await?;
|
||||
file.shutdown().await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to load config file specified by args, or default config file
|
||||
pub fn load() -> impl futures::future::Future<Output =Option<Config>>
|
||||
{
|
||||
load_args(std::env::args().skip(1))
|
||||
}
|
||||
|
||||
async fn load_args<I: Iterator<Item=String>>(mut from: I) -> Option<Config>
|
||||
{
|
||||
let place = if let Some(arg) = from.next() {
|
||||
trace!("File {:?} provided", arg);
|
||||
Cow::Owned(arg)
|
||||
} else {
|
||||
warn!("No config file provided. Using default location {:?}", DEFAULT_FILE_LOCATION);
|
||||
Cow::Borrowed(DEFAULT_FILE_LOCATION)
|
||||
};
|
||||
|
||||
match Config::load(place.as_ref()).await {
|
||||
Ok(cfg) => {
|
||||
info!("Loaded config file {:?}", place);
|
||||
Some(cfg)
|
||||
},
|
||||
Err(err) => {
|
||||
error!("Failed to load config file from {:?}: {}", place, err);
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct InvalidConfigError(&'static str, Box<dyn error::Error+ 'static>);
|
||||
|
||||
impl InvalidConfigError
|
||||
{
|
||||
pub fn field(&self) -> &str
|
||||
{
|
||||
&self.0[..]
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for InvalidConfigError
|
||||
{
|
||||
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
|
||||
Some(self.1.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for InvalidConfigError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
write!(f,"failed to parse field `{}`: {}", self.0, self.1)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Caches some parsed config arguments
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub struct Cache
|
||||
{
|
||||
pub feed_bounds: range::DynRange<usize>,
|
||||
pub inbound_filter: sanitise::filter::Filter,
|
||||
pub outbound_filter: sanitise::filter::Filter,
|
||||
pub handler_settings: handle::Settings,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Cache
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
f.debug_struct("Cache")
|
||||
.field("feed_bounds", &self.feed_bounds)
|
||||
.field("inbound_filter", &self.inbound_filter.iter().collect::<String>())
|
||||
.field("outbound_filter", &self.outbound_filter.iter().collect::<String>())
|
||||
.field("handler_settings", &self.handler_settings)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,183 @@
|
||||
//! Extensions
|
||||
use super::*;
|
||||
use std::{
|
||||
iter,
|
||||
ops::{
|
||||
Range,
|
||||
Deref,DerefMut,
|
||||
},
|
||||
marker::{
|
||||
PhantomData,
|
||||
Send,
|
||||
},
|
||||
};
|
||||
|
||||
pub trait StringJoinExt: Sized
|
||||
{
|
||||
fn join<P: AsRef<str>>(self, sep: P) -> String;
|
||||
}
|
||||
|
||||
impl<I,T> StringJoinExt for I
|
||||
where I: IntoIterator<Item=T>,
|
||||
T: AsRef<str>
|
||||
{
|
||||
fn join<P: AsRef<str>>(self, sep: P) -> String
|
||||
{
|
||||
let mut string = String::new();
|
||||
for (first, s) in iter::successors(Some(true), |_| Some(false)).zip(self.into_iter())
|
||||
{
|
||||
if !first {
|
||||
string.push_str(sep.as_ref());
|
||||
}
|
||||
string.push_str(s.as_ref());
|
||||
}
|
||||
string
|
||||
}
|
||||
}
|
||||
|
||||
pub trait FindSliceBounds
|
||||
{
|
||||
type SliceType: ?Sized;
|
||||
fn slice_bounds(&self, from: &Self::SliceType) -> Range<usize>;
|
||||
}
|
||||
|
||||
impl<T: ?Sized + AsRef<str>> FindSliceBounds for T
|
||||
{
|
||||
type SliceType = str;
|
||||
fn slice_bounds(&self, from: &Self::SliceType) -> Range<usize>{
|
||||
let this = self.as_ref();
|
||||
unsafe {
|
||||
let sptr = from.as_ptr();
|
||||
let eptr = sptr.add(from.len());
|
||||
|
||||
let ssptr = this.as_ptr();
|
||||
let septr = ssptr.add(this.len());
|
||||
|
||||
let sptr = sptr as usize;
|
||||
let ssptr = ssptr as usize;
|
||||
let eptr = eptr as usize;
|
||||
let septr = septr as usize;
|
||||
|
||||
assert!(sptr >= ssptr && sptr <= septr, "Start index of slice is outside the bounds of self");
|
||||
assert!(eptr >= ssptr && eptr <= septr, "End index of slice is outside the bounds of self");
|
||||
|
||||
(sptr - ssptr)..(eptr - ssptr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SliceInPlace
|
||||
{
|
||||
fn drain_inverse<R: std::ops::RangeBounds<usize>>(&mut self, slice: R);
|
||||
}
|
||||
|
||||
impl SliceInPlace for String
|
||||
{
|
||||
fn drain_inverse<R: std::ops::RangeBounds<usize>>(&mut self, slice: R)
|
||||
{
|
||||
use std::ops::Bound;
|
||||
match slice.end_bound() {
|
||||
Bound::Excluded(&ex) => drop(self.drain(ex..)),
|
||||
Bound::Included(&inc) => drop(self.drain(inc+1..)),
|
||||
_ => (),
|
||||
};
|
||||
match slice.start_bound() {
|
||||
Bound::Included(&ex) => drop(self.drain(..ex)),
|
||||
Bound::Excluded(&ex) => drop(..ex+1),
|
||||
_ => ()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub trait TrimInPlace
|
||||
{
|
||||
fn trim_in_place(&mut self) -> &mut Self;
|
||||
}
|
||||
|
||||
impl TrimInPlace for String
|
||||
{
|
||||
fn trim_in_place(&mut self) -> &mut Self {
|
||||
let bounds = self.slice_bounds(self.trim());
|
||||
self.drain_inverse(bounds);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub trait MapTuple2<T,U>
|
||||
{
|
||||
fn map<V,W, F: FnOnce((T,U)) -> (V,W)>(self, fun: F) -> (V,W);
|
||||
}
|
||||
|
||||
impl<T,U> MapTuple2<T,U> for (T,U)
|
||||
{
|
||||
#[inline] fn map<V,W, F: FnOnce((T,U)) -> (V,W)>(self, fun: F) -> (V,W)
|
||||
{
|
||||
fun(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// To make sure we don't keep this data across an `await` boundary.
|
||||
#[repr(transparent)]
|
||||
pub struct AssertNotSend<T>(pub T, PhantomData<*const T>);
|
||||
|
||||
impl<T> AssertNotSend<T>
|
||||
{
|
||||
pub const fn new(from :T) -> Self
|
||||
{
|
||||
Self(from, PhantomData)
|
||||
}
|
||||
pub fn into_inner(self) -> T
|
||||
{
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Require a future is Send
|
||||
#[inline(always)] pub fn require_send<T: Send>(t: T) -> T
|
||||
{
|
||||
t
|
||||
}
|
||||
|
||||
/// Require a value implements a specific trait
|
||||
#[macro_export] macro_rules! require_impl {
|
||||
($t:path: $val:expr) => {
|
||||
{
|
||||
#[inline(always)] fn require_impl<T: $t >(val: T) -> T {
|
||||
val
|
||||
}
|
||||
require_impl($val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for AssertNotSend<T>
|
||||
{
|
||||
type Target = T;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
impl<T> DerefMut for AssertNotSend<T>
|
||||
{
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ChunkStreamExt<T>: Sized
|
||||
{
|
||||
fn chunk_into<I: From<Vec<T>>>(self, sz: usize) -> chunking::ChunkingStream<Self,T,I>;
|
||||
fn chunk(self, sz: usize) -> chunking::ChunkingStream<Self, T>
|
||||
{
|
||||
self.chunk_into(sz)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, T> ChunkStreamExt<T> for S
|
||||
where S: Stream<Item=T>
|
||||
{
|
||||
fn chunk_into<I: From<Vec<T>>>(self, sz: usize) -> chunking::ChunkingStream<Self,T,I>
|
||||
{
|
||||
chunking::ChunkingStream::new(self, sz)
|
||||
}
|
||||
}
|
@ -0,0 +1,160 @@
|
||||
//! Feeding the chain
|
||||
use super::*;
|
||||
#[cfg(any(feature="feed-sentance", feature="split-sentance"))]
|
||||
use sanitise::Sentance;
|
||||
#[allow(unused_imports)]
|
||||
use futures::stream;
|
||||
|
||||
|
||||
pub const DEFAULT_FEED_BOUNDS: std::ops::RangeFrom<usize> = 2..;
|
||||
|
||||
/// Feed `what` into `chain`, at least `bounds` tokens.
|
||||
///
|
||||
/// # Tokenising
|
||||
/// How the tokens are split within this function that operates on single buffers is determined largely by the features `split-sentance` and `feed-sentance` determining the use of the sentance API.
|
||||
///
|
||||
/// ## Pipeline
|
||||
/// Since this is called on single buffers, it happens after the `split-newlines` tokenising if it's enabled, and thus the sentance API is only able to operate on each seperate line if that feature is enabled, regardless of `always-aggre`, or `feed-sentance` or `split-sentance`.
|
||||
/// This is the pipeline for just within this function, after splitting through newlines if enabled.
|
||||
///
|
||||
/// * `feed-sentance`
|
||||
/// ** Feed the buffer through the sentance split tokeniser
|
||||
/// ** Feed the sentances through the word split tokeniser
|
||||
/// ** Feed each collection of words into the chain seperately
|
||||
/// * `split-sentance`
|
||||
/// ** Feed the buffer through the sentance split tokeniser
|
||||
/// ** Feed the sentances through the word split tokeniser
|
||||
/// ** Feed the flattened collection into the chain once, concatenated.
|
||||
/// * Neither
|
||||
/// ** Feed the buffer through the word split tokeniser
|
||||
/// ** Feed the collection into the chain
|
||||
pub fn feed(chain: &mut Chain<String>, what: impl AsRef<str>, bounds: impl std::ops::RangeBounds<usize>)
|
||||
{
|
||||
cfg_if! {
|
||||
if #[cfg(feature="feed-sentance")] {
|
||||
let map = Sentance::new_iter(&what) //get each sentance in string
|
||||
.map(|what| what.words()
|
||||
.map(|s| s.to_owned()).collect::<Vec<_>>());
|
||||
debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds");
|
||||
for map in map {// feed each sentance seperately
|
||||
if bounds.contains(&map.len()) {
|
||||
debug!("Feeding chain {} items", map.len());
|
||||
chain.feed(map);
|
||||
}
|
||||
else {
|
||||
debug!("Ignoring feed of invalid length {}: {:?}", map.len(), map);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cfg_if!{
|
||||
if #[cfg(feature="split-sentance")] {
|
||||
let map = Sentance::new_iter(&what) //get each sentance in string
|
||||
.map(|what| what.words())
|
||||
.flatten() // add all into one buffer
|
||||
.map(|s| s.to_owned()).collect::<Vec<_>>();
|
||||
} else {
|
||||
let map: Vec<_> = sanitise::words(what.as_ref()).map(ToOwned::to_owned)
|
||||
.collect();
|
||||
}
|
||||
}
|
||||
debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds");
|
||||
if bounds.contains(&map.len()) {
|
||||
//debug!("Feeding chain {} items", map.len());
|
||||
chain.feed(map);
|
||||
}
|
||||
else {
|
||||
debug!("Ignoring feed of invalid length {}: {:?}", map.len(), map);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream<Item = Result<impl Buf, impl std::error::Error + 'static>>) -> Result<usize, FillBodyError> {
|
||||
|
||||
let mut written = 0usize;
|
||||
if_debug! {
|
||||
let timer = std::time::Instant::now();
|
||||
}
|
||||
//let bounds = &state.config_cache().feed_bounds;
|
||||
macro_rules! feed {
|
||||
($buffer:expr) => {
|
||||
{
|
||||
let buffer = $buffer;
|
||||
state.chain_write(buffer).await.map_err(|_| FillBodyError)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cfg_if!{
|
||||
if #[cfg(any(not(feature="split-newlines"), feature="always-aggregate"))] {
|
||||
let mut body = body;
|
||||
let mut buffer = Vec::new();
|
||||
while let Some(buf) = body.next().await {
|
||||
let mut body = buf.map_err(|_| FillBodyError)?;
|
||||
while body.has_remaining() {
|
||||
if body.bytes().len() > 0 {
|
||||
buffer.extend_from_slice(body.bytes());
|
||||
let cnt = body.bytes().len();
|
||||
body.advance(cnt);
|
||||
written += cnt;
|
||||
}
|
||||
}
|
||||
}
|
||||
let buffer = std::str::from_utf8(&buffer[..]).map_err(|_| FillBodyError)?;
|
||||
let buffer = state.inbound_filter().filter_cow(buffer);
|
||||
info!("{} -> {:?}", who, buffer);
|
||||
cfg_if! {
|
||||
if #[cfg(feature="split-newlines")] {
|
||||
feed!(stream::iter(buffer.split('\n').filter(|line| !line.trim().is_empty())
|
||||
.map(|x| x.to_owned())))
|
||||
} else {
|
||||
feed!(stream::once(async move{buffer.into_owned()}));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
use tokio::prelude::*;
|
||||
|
||||
let reader = chunking::StreamReader::new(body.filter_map(|x| x.map(|mut x| x.to_bytes()).ok()));
|
||||
let lines = reader.lines();
|
||||
|
||||
feed!(lines.filter_map(|x| x.ok().and_then(|line| {
|
||||
let line = state.inbound_filter().filter_cow(&line);
|
||||
let line = line.trim();
|
||||
|
||||
if !line.is_empty() {
|
||||
//#[cfg(not(feature="hog-buffer"))]
|
||||
//let mut chain = state.chain().write().await; // Acquire mutex once per line? Is this right?
|
||||
|
||||
info!("{} -> {:?}", who, line);
|
||||
written+=line.len();
|
||||
Some(line.to_owned())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
||||
})));
|
||||
}
|
||||
}
|
||||
|
||||
if_debug! {
|
||||
trace!("Write took {}ms", timer.elapsed().as_millis());
|
||||
}
|
||||
Ok(written)
|
||||
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FillBodyError;
|
||||
|
||||
impl error::Error for FillBodyError{}
|
||||
impl warp::reject::Reject for FillBodyError{}
|
||||
impl fmt::Display for FillBodyError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
write!(f, "failed to feed chain with this data")
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,74 @@
|
||||
use std::{
|
||||
net::{
|
||||
IpAddr,
|
||||
AddrParseError,
|
||||
},
|
||||
str,
|
||||
error,
|
||||
fmt,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct XFormatError;
|
||||
|
||||
impl error::Error for XFormatError{}
|
||||
|
||||
impl fmt::Display for XFormatError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
write!(f, "X-Forwarded-For was not in the correct format")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialOrd, PartialEq, Eq, Default)]
|
||||
pub struct XForwardedFor(Vec<IpAddr>);
|
||||
|
||||
impl XForwardedFor
|
||||
{
|
||||
pub fn new() -> Self
|
||||
{
|
||||
Self(Vec::new())
|
||||
}
|
||||
pub fn single(ip: impl Into<IpAddr>) -> Self
|
||||
{
|
||||
Self(vec![ip.into()])
|
||||
}
|
||||
pub fn addrs(&self) -> &[IpAddr]
|
||||
{
|
||||
&self.0[..]
|
||||
}
|
||||
|
||||
pub fn into_first(self) -> Option<IpAddr>
|
||||
{
|
||||
self.0.into_iter().next()
|
||||
}
|
||||
|
||||
pub fn into_addrs(self) -> Vec<IpAddr>
|
||||
{
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl str::FromStr for XForwardedFor
|
||||
{
|
||||
type Err = XFormatError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let mut output = Vec::new();
|
||||
for next in s.split(',')
|
||||
{
|
||||
output.push(next.trim().parse()?)
|
||||
}
|
||||
Ok(Self(output))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<AddrParseError> for XFormatError
|
||||
{
|
||||
#[inline(always)] fn from(_: AddrParseError) -> Self
|
||||
{
|
||||
Self
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,46 @@
|
||||
//! Generating the strings
|
||||
use super::*;
|
||||
use tokio::sync::mpsc::error::SendError;
|
||||
use futures::StreamExt;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct GenBodyError(Option<String>);
|
||||
|
||||
impl error::Error for GenBodyError{}
|
||||
impl fmt::Display for GenBodyError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
if let Some(z) = &self.0 {
|
||||
write!(f, "failed to write read string {:?} to body", z)
|
||||
} else {
|
||||
write!(f, "failed to read string from chain. it might be empty.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub async fn body(state: State, num: Option<usize>, mut output: mpsc::Sender<String>) -> Result<(), GenBodyError>
|
||||
{
|
||||
let mut chain = state.chain_read();
|
||||
let filter = state.outbound_filter();
|
||||
match num {
|
||||
Some(num) if num < state.config().max_gen_size => {
|
||||
let mut chain = chain.take(num);
|
||||
while let Some(string) = chain.next().await {
|
||||
output.send(filter.filter_owned(string)).await?;
|
||||
}
|
||||
},
|
||||
_ => output.send(filter.filter_owned(chain.next().await.ok_or_else(GenBodyError::default)?)).await?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
impl From<SendError<String>> for GenBodyError
|
||||
{
|
||||
#[inline] fn from(from: SendError<String>) -> Self
|
||||
{
|
||||
Self(Some(from.0))
|
||||
}
|
||||
}
|
@ -0,0 +1,392 @@
|
||||
//! Chain handler.
|
||||
use super::*;
|
||||
use std::{
|
||||
marker::Send,
|
||||
sync::Weak,
|
||||
num::NonZeroUsize,
|
||||
task::{Poll, Context,},
|
||||
pin::Pin,
|
||||
};
|
||||
use tokio::{
|
||||
sync::{
|
||||
RwLock,
|
||||
RwLockReadGuard,
|
||||
mpsc::{
|
||||
self,
|
||||
error::SendError,
|
||||
},
|
||||
watch,
|
||||
Notify,
|
||||
},
|
||||
task::JoinHandle,
|
||||
time::{
|
||||
self,
|
||||
Duration,
|
||||
},
|
||||
};
|
||||
use futures::StreamExt;
|
||||
|
||||
pub const DEFAULT_TIMEOUT: Duration= Duration::from_secs(5);
|
||||
|
||||
/// Settings for chain handler
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Settings
|
||||
{
|
||||
pub backlog: usize,
|
||||
pub internal_backlog: usize,
|
||||
pub capacity: usize,
|
||||
pub timeout: Duration,
|
||||
pub throttle: Option<Duration>,
|
||||
pub bounds: range::DynRange<usize>,
|
||||
}
|
||||
|
||||
impl Settings
|
||||
{
|
||||
/// Should we keep this string.
|
||||
#[inline] fn matches(&self, _s: &str) -> bool
|
||||
{
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Settings
|
||||
{
|
||||
#[inline]
|
||||
fn default() -> Self
|
||||
{
|
||||
Self {
|
||||
backlog: 32,
|
||||
internal_backlog: 8,
|
||||
capacity: 4,
|
||||
timeout: Duration::from_secs(5),
|
||||
throttle: Some(Duration::from_millis(200)),
|
||||
bounds: feed::DEFAULT_FEED_BOUNDS.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
struct HostInner<T>
|
||||
{
|
||||
input: mpsc::Receiver<Vec<T>>,
|
||||
shutdown: watch::Receiver<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Handle<T: Send+ chain::Chainable>
|
||||
{
|
||||
chain: RwLock<chain::Chain<T>>,
|
||||
input: mpsc::Sender<Vec<T>>,
|
||||
opt: Settings,
|
||||
notify_write: Arc<Notify>,
|
||||
push_now: Arc<Notify>,
|
||||
shutdown: watch::Sender<bool>,
|
||||
|
||||
/// Data used only for the worker task.
|
||||
host: msg::Once<HostInner<T>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ChainHandle<T: Send + chain::Chainable>(Arc<Box<Handle<T>>>);
|
||||
|
||||
impl<T: Send+ chain::Chainable + 'static> ChainHandle<T>
|
||||
{
|
||||
pub fn with_settings(chain: chain::Chain<T>, opt: Settings) -> Self
|
||||
{
|
||||
let (shutdown_tx, shutdown) = watch::channel(false);
|
||||
let (itx, irx) = mpsc::channel(opt.backlog);
|
||||
Self(Arc::new(Box::new(Handle{
|
||||
chain: RwLock::new(chain),
|
||||
input: itx,
|
||||
opt,
|
||||
push_now: Arc::new(Notify::new()),
|
||||
notify_write: Arc::new(Notify::new()),
|
||||
shutdown: shutdown_tx,
|
||||
|
||||
host: msg::Once::new(HostInner{
|
||||
input: irx,
|
||||
shutdown,
|
||||
})
|
||||
})))
|
||||
}
|
||||
|
||||
/// Acquire the chain read lock
|
||||
async fn chain(&self) -> RwLockReadGuard<'_, chain::Chain<T>>
|
||||
{
|
||||
self.0.chain.read().await
|
||||
}
|
||||
|
||||
/// A reference to the chain
|
||||
pub fn chain_ref(&self) -> &RwLock<chain::Chain<T>>
|
||||
{
|
||||
&self.0.chain
|
||||
}
|
||||
|
||||
/// Create a stream that reads generated values forever.
|
||||
pub fn read(&self) -> ChainStream<T>
|
||||
{
|
||||
ChainStream{
|
||||
chain: Arc::downgrade(&self.0),
|
||||
buffer: Vec::with_capacity(self.0.opt.backlog),
|
||||
}
|
||||
}
|
||||
|
||||
/// Send this buffer to the chain
|
||||
pub fn write(&self, buf: Vec<T>) -> impl futures::Future<Output = Result<(), SendError<Vec<T>>>> + 'static
|
||||
{
|
||||
let mut write = self.0.input.clone();
|
||||
async move {
|
||||
write.send(buf).await
|
||||
}
|
||||
}
|
||||
|
||||
/// Send this stream buffer to the chain
|
||||
pub fn write_stream<'a, I: Stream<Item=T>>(&self, buf: I) -> impl futures::Future<Output = Result<(), SendError<Vec<T>>>> + 'a
|
||||
where I: 'a
|
||||
{
|
||||
let mut write = self.0.input.clone();
|
||||
async move {
|
||||
write.send(buf.collect().await).await
|
||||
}
|
||||
}
|
||||
|
||||
/// Send this buffer to the chain
|
||||
pub async fn write_in_place(&self, buf: Vec<T>) -> Result<(), SendError<Vec<T>>>
|
||||
{
|
||||
self.0.input.clone().send(buf).await
|
||||
}
|
||||
|
||||
/// A referencer for the notifier
|
||||
pub fn notify_when(&self) -> &Arc<Notify>
|
||||
{
|
||||
&self.0.notify_write
|
||||
}
|
||||
|
||||
/// Force the pending buffers to be written to the chain now
|
||||
pub fn push_now(&self)
|
||||
{
|
||||
self.0.push_now.notify();
|
||||
}
|
||||
|
||||
/// Hang the worker thread, preventing it from taking any more inputs and also flushing it.
|
||||
///
|
||||
/// # Panics
|
||||
/// If there was no worker thread.
|
||||
pub fn hang(&self)
|
||||
{
|
||||
trace!("Communicating hang request");
|
||||
self.0.shutdown.broadcast(true).expect("Failed to communicate hang");
|
||||
}
|
||||
}
|
||||
|
||||
impl ChainHandle<String>
|
||||
{
|
||||
#[deprecated = "use read() pls"]
|
||||
pub async fn generate_body(&self, state: &state::State, num: Option<usize>, mut output: mpsc::Sender<String>) -> Result<(), SendError<String>>
|
||||
{
|
||||
let chain = self.chain().await;
|
||||
if !chain.is_empty() {
|
||||
let filter = state.outbound_filter();
|
||||
match num {
|
||||
Some(num) if num < state.config().max_gen_size => {
|
||||
//This could DoS writes, potentially.
|
||||
for string in chain.str_iter_for(num) {
|
||||
output.send(filter.filter_owned(string)).await?;
|
||||
}
|
||||
},
|
||||
_ => output.send(filter.filter_owned(chain.generate_str())).await?,
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Host this handle on the current task.
|
||||
///
|
||||
/// # Panics
|
||||
/// If `from` has already been hosted.
|
||||
pub async fn host(from: ChainHandle<String>)
|
||||
{
|
||||
let opt = from.0.opt.clone();
|
||||
let mut data = from.0.host.unwrap().await;
|
||||
|
||||
let (mut tx, mut child) = {
|
||||
// The `real` input channel.
|
||||
let from = from.clone();
|
||||
let opt = opt.clone();
|
||||
let (tx, rx) = mpsc::channel::<Vec<Vec<_>>>(opt.internal_backlog);
|
||||
(tx, tokio::spawn(async move {
|
||||
let mut rx = if let Some(thr) = opt.throttle {
|
||||
time::throttle(thr, rx).boxed()
|
||||
} else {
|
||||
rx.boxed()
|
||||
};
|
||||
trace!("child: Begin waiting on parent");
|
||||
while let Some(item) = rx.next().await {
|
||||
if item.len() > 0 {
|
||||
info!("Write lock acq");
|
||||
let mut lock = from.0.chain.write().await;
|
||||
for item in item.into_iter()
|
||||
{
|
||||
use std::ops::DerefMut;
|
||||
for item in item.into_iter() {
|
||||
feed::feed(lock.deref_mut(), item, &from.0.opt.bounds);
|
||||
}
|
||||
}
|
||||
trace!("Signalling write");
|
||||
from.0.notify_write.notify();
|
||||
}
|
||||
}
|
||||
trace!("child: exiting");
|
||||
}))
|
||||
};
|
||||
|
||||
trace!("Begin polling on child");
|
||||
tokio::select!{
|
||||
v = &mut child => {
|
||||
match v {
|
||||
/*#[cold]*/ Ok(_) => {warn!("Child exited before we have? This should probably never happen.")},//Should never happen.
|
||||
Err(e) => {error!("Child exited abnormally. Aborting: {}", e)}, //Child panic or cancel.
|
||||
}
|
||||
},
|
||||
_ = async move {
|
||||
let mut rx = data.input.chunk(opt.capacity); //we don't even need this tbh, oh well.
|
||||
|
||||
if !data.shutdown.recv().await.unwrap_or(true) { //first shutdown we get for free
|
||||
while Arc::strong_count(&from.0) > 2 {
|
||||
if *data.shutdown.borrow() {
|
||||
break;
|
||||
}
|
||||
|
||||
tokio::select!{
|
||||
Some(true) = data.shutdown.recv() => {
|
||||
debug!("Got shutdown (hang) request. Sending now then breaking");
|
||||
|
||||
let mut rest = {
|
||||
let irx = rx.get_mut();
|
||||
irx.close(); //accept no more inputs
|
||||
let mut output = Vec::with_capacity(opt.capacity);
|
||||
while let Ok(item) = irx.try_recv() {
|
||||
output.push(item);
|
||||
}
|
||||
output
|
||||
};
|
||||
rest.extend(rx.take_now());
|
||||
if rest.len() > 0 {
|
||||
if let Err(err) = tx.send(rest).await {
|
||||
error!("Failed to force send buffer, exiting now: {}", err);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
_ = time::delay_for(opt.timeout) => {
|
||||
trace!("Setting push now");
|
||||
rx.push_now();
|
||||
}
|
||||
_ = from.0.push_now.notified() => {
|
||||
debug!("Got force push signal");
|
||||
let take =rx.take_now();
|
||||
rx.push_now();
|
||||
if take.len() > 0 {
|
||||
if let Err(err) = tx.send(take).await {
|
||||
error!("Failed to force send buffer: {}", err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(buffer) = rx.next() => {
|
||||
debug!("Sending {} (cap {})", buffer.len(), buffer.capacity());
|
||||
if let Err(err) = tx.send(buffer).await {
|
||||
// Receive closed?
|
||||
//
|
||||
// This probably shouldn't happen, as we `select!` for it up there and child never calls `close()` on `rx`.
|
||||
// In any case, it means we should abort.
|
||||
/*#[cold]*/ error!("Failed to send buffer: {}", err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let last = rx.into_buffer();
|
||||
if last.len() > 0 {
|
||||
if let Err(err) = tx.send(last).await {
|
||||
error!("Failed to force send last part of buffer: {}", err);
|
||||
} else {
|
||||
trace!("Sent rest of buffer");
|
||||
}
|
||||
}
|
||||
} => {
|
||||
// Normal exit
|
||||
trace!("Normal exit")
|
||||
},
|
||||
}
|
||||
trace!("Waiting on child");
|
||||
// No more handles except child, no more possible inputs.
|
||||
child.await.expect("Child panic");
|
||||
trace!("Returning");
|
||||
}
|
||||
|
||||
/// Spawn a new chain handler for this chain.
|
||||
pub fn spawn(from: chain::Chain<String>, opt: Settings) -> (JoinHandle<()>, ChainHandle<String>)
|
||||
{
|
||||
debug!("Spawning with opt: {:?}", opt);
|
||||
let handle = ChainHandle::with_settings(from, opt);
|
||||
(tokio::spawn(host(handle.clone())), handle)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ChainStream<T: Send + chain::Chainable>
|
||||
{
|
||||
chain: Weak<Box<Handle<T>>>,
|
||||
buffer: Vec<T>,
|
||||
}
|
||||
|
||||
impl ChainStream<String>
|
||||
{
|
||||
async fn try_pull(&mut self, n: usize) -> Option<NonZeroUsize>
|
||||
{
|
||||
if n == 0 {
|
||||
return None;
|
||||
}
|
||||
if let Some(read) = self.chain.upgrade() {
|
||||
let chain = read.chain.read().await;
|
||||
if chain.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let n = if n == 1 {
|
||||
self.buffer.push(chain.generate_str());
|
||||
1
|
||||
} else {
|
||||
self.buffer.extend(chain.str_iter_for(n));
|
||||
n //for now
|
||||
};
|
||||
Some(unsafe{NonZeroUsize::new_unchecked(n)})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for ChainStream<String>
|
||||
{
|
||||
type Item = String;
|
||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
use futures::Future;
|
||||
let this = self.get_mut();
|
||||
|
||||
if this.buffer.len() == 0 {
|
||||
let pull = this.try_pull(this.buffer.capacity());
|
||||
tokio::pin!(pull);
|
||||
match pull.poll(cx) {
|
||||
Poll::Ready(Some(_)) => {},
|
||||
Poll::Pending => return Poll::Pending,
|
||||
_ => return Poll::Ready(None),
|
||||
};
|
||||
}
|
||||
debug_assert!(this.buffer.len()>0);
|
||||
Poll::Ready(Some(this.buffer.remove(0)))
|
||||
}
|
||||
}
|
@ -0,0 +1,181 @@
|
||||
//! Filter accepts and denies based on cidr masks.
|
||||
use super::*;
|
||||
use cidr::{
|
||||
Cidr,
|
||||
IpCidr,
|
||||
};
|
||||
use std::{
|
||||
net::{
|
||||
IpAddr,
|
||||
},
|
||||
error,
|
||||
fmt,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IpFilterDeniedError(IpAddr, Option<IpCidr>);
|
||||
|
||||
impl warp::reject::Reject for IpFilterDeniedError{}
|
||||
impl error::Error for IpFilterDeniedError{}
|
||||
impl fmt::Display for IpFilterDeniedError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
write!(f, "Denied {} due to ", self.0)?;
|
||||
match &self.1 {
|
||||
Some(cidr) => write!(f, "matching rule {}", cidr),
|
||||
None => write!(f, "non-matching accept rule"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub enum Rule
|
||||
{
|
||||
Accept,
|
||||
Deny,
|
||||
}
|
||||
|
||||
impl Default for Rule
|
||||
{
|
||||
#[inline]
|
||||
fn default() -> Self
|
||||
{
|
||||
Self::Deny
|
||||
}
|
||||
}
|
||||
|
||||
impl Rule
|
||||
{
|
||||
fn into_result<'a>(self, net: Option<&'a IpCidr>) -> Result<Option<&'a IpCidr>, Option<IpCidr>>
|
||||
{
|
||||
if let Self::Accept = self {
|
||||
Ok(net)
|
||||
} else {
|
||||
Err(net.cloned())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||
pub struct IpFilter
|
||||
{
|
||||
/// The default fallback rule
|
||||
pub default: Rule,
|
||||
|
||||
#[serde(default)]
|
||||
accept: Vec<IpCidr>,
|
||||
#[serde(default)]
|
||||
deny: Vec<IpCidr>,
|
||||
}
|
||||
|
||||
#[inline] fn find_in<'a>(needle: &IpAddr, haystack: &'a [IpCidr]) -> Option<&'a IpCidr>
|
||||
{
|
||||
for x in haystack.iter()
|
||||
{
|
||||
if x.contains(needle) {
|
||||
return Some(x);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
impl Default for IpFilter
|
||||
{
|
||||
#[inline]
|
||||
fn default() -> Self
|
||||
{
|
||||
Self {
|
||||
default: Rule::Deny,
|
||||
accept: vec![cidr::Cidr::new_host([127,0,0,1].into())],
|
||||
deny: Vec::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IpFilter
|
||||
{
|
||||
/// Create a new CIDR filter with thie default rule.
|
||||
///
|
||||
/// Use `default()` to use with default rule.
|
||||
pub fn new(fallback: Rule) -> Self
|
||||
{
|
||||
Self {
|
||||
default: fallback,
|
||||
accept: Vec::new(),
|
||||
deny: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks the rule for this IP, returns a result if it should accept or not.
|
||||
///
|
||||
/// If acceptance rule is met, return the CIDR match that caused the acceptance if applicable
|
||||
///
|
||||
/// If acceptance rule is not met, return in the error which CIDR match cause the deny if applicable
|
||||
pub fn check(&self, ip: &IpAddr) -> Result<Option<&'_ IpCidr>, IpFilterDeniedError>
|
||||
{
|
||||
let accept = find_in(ip, &self.accept[..]);
|
||||
let deny = find_in(ip, &self.deny[..]);
|
||||
|
||||
let (rule, cidr) = match (accept, deny) {
|
||||
(None, Some(net)) => (Rule::Deny, Some(net)),
|
||||
(Some(net), None) => (Rule::Accept, Some(net)),
|
||||
(Some(ac), Some(den)) if ac != den => {
|
||||
if ac.network_length() > den.network_length() {
|
||||
(Rule::Accept, Some(ac))
|
||||
} else {
|
||||
(Rule::Deny, Some(den))
|
||||
}
|
||||
},
|
||||
_ => (self.default, None)
|
||||
};
|
||||
rule.into_result(cidr)
|
||||
.map_err(|cidr| IpFilterDeniedError(*ip, cidr))
|
||||
}
|
||||
|
||||
pub fn accept_mask(&self) -> &[IpCidr]
|
||||
{
|
||||
&self.accept[..]
|
||||
}
|
||||
pub fn deny_mask(&self) -> &[IpCidr]
|
||||
{
|
||||
&self.deny[..]
|
||||
}
|
||||
pub fn accept_range(&mut self, items: impl IntoIterator<Item = IpCidr>)
|
||||
{
|
||||
self.accept.extend(items)
|
||||
}
|
||||
pub fn deny_range(&mut self, items: impl IntoIterator<Item = IpCidr>)
|
||||
{
|
||||
self.deny.extend(items)
|
||||
}
|
||||
|
||||
pub fn accept_one(&mut self, item: IpCidr)
|
||||
{
|
||||
self.accept.push(item)
|
||||
}
|
||||
pub fn deny_one(&mut self, items: IpCidr)
|
||||
{
|
||||
self.deny.push(items)
|
||||
}
|
||||
|
||||
/// Can any connection ever be accepted?
|
||||
pub fn possible(&self) -> bool
|
||||
{
|
||||
//TODO: Test this
|
||||
!(self.default == Rule::Deny && self.accept.len() == 0) &&
|
||||
!(self.deny.iter().find(|x| x.network_length() == 0).is_some() && self.accept.len() == 0)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn recover(err: warp::Rejection) -> Result<impl warp::Reply, warp::Rejection>
|
||||
{
|
||||
if let Some(t) = err.find::<IpFilterDeniedError>() {
|
||||
error!("Denying access to {} because of {:?} (403)", t.0, t.1);
|
||||
Ok(warp::http::Response::builder()
|
||||
.status(status!(403))
|
||||
.body(format!("Access denied: {}", t)))
|
||||
} else {
|
||||
Err(err)
|
||||
}
|
||||
}
|
@ -1,49 +1,340 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
#[macro_use] extern crate log;
|
||||
|
||||
use chain::{
|
||||
Chain,
|
||||
};
|
||||
use warp::{
|
||||
Filter,
|
||||
Buf,
|
||||
reply::Response,
|
||||
};
|
||||
use hyper::Body;
|
||||
use std::{
|
||||
io::{
|
||||
BufRead,
|
||||
self,
|
||||
sync::Arc,
|
||||
fmt,
|
||||
error,
|
||||
net::{
|
||||
SocketAddr,
|
||||
IpAddr,
|
||||
},
|
||||
};
|
||||
use tokio::{
|
||||
sync::{
|
||||
RwLock,
|
||||
mpsc,
|
||||
Notify,
|
||||
},
|
||||
stream::{Stream,StreamExt,},
|
||||
};
|
||||
use serde::{
|
||||
Serialize,
|
||||
Deserialize
|
||||
};
|
||||
use futures::{
|
||||
future::{
|
||||
FutureExt,
|
||||
BoxFuture,
|
||||
join_all,
|
||||
},
|
||||
};
|
||||
use lazy_static::lazy_static;
|
||||
use cfg_if::cfg_if;
|
||||
|
||||
fn buffered_read_all_lines<T: BufRead+?Sized, F: FnMut(&str) -> io::Result<()>>(input: &mut T, mut then: F) -> io::Result<usize>
|
||||
{
|
||||
let mut buffer = String::new();
|
||||
let mut read;
|
||||
let mut total=0;
|
||||
while {read = input.read_line(&mut buffer)?; read!=0} {
|
||||
if buffer.trim().len() > 0 {
|
||||
then(&buffer[..])?;
|
||||
macro_rules! if_debug {
|
||||
($($tt:tt)*) => {
|
||||
cfg_if::cfg_if!{
|
||||
if #[cfg(debug_assertions)] {
|
||||
$($tt)*
|
||||
}
|
||||
}
|
||||
buffer.clear();
|
||||
total += read;
|
||||
}
|
||||
Ok(total)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let stdin = io::stdin();
|
||||
let mut stdin = stdin.lock();
|
||||
let mut chain = Chain::new();
|
||||
macro_rules! status {
|
||||
($code:expr) => {
|
||||
::warp::http::status::StatusCode::from_u16($code).unwrap()
|
||||
};
|
||||
}
|
||||
|
||||
buffered_read_all_lines(&mut stdin, |string| {
|
||||
chain.feed(&string.split_whitespace()
|
||||
.filter(|word| !word.is_empty())
|
||||
.map(|s| s.to_owned()).collect::<Vec<_>>());
|
||||
|
||||
Ok(())
|
||||
}).expect("Failed to read from stdin");
|
||||
|
||||
if !chain.is_empty() {
|
||||
if let Some(num) = std::env::args().skip(1).next() {
|
||||
let sz: usize = num.parse().expect("Cannot parse number of tokens to generate");
|
||||
for string in chain.str_iter_for(sz) {
|
||||
println!("{}", string);
|
||||
mod ext;
|
||||
use ext::*;
|
||||
mod util;
|
||||
mod range;
|
||||
mod sanitise;
|
||||
mod bytes;
|
||||
mod chunking;
|
||||
#[cfg(feature="api")]
|
||||
mod api;
|
||||
#[cfg(target_family="unix")]
|
||||
mod signals;
|
||||
mod config;
|
||||
mod msg;
|
||||
mod state;
|
||||
use state::State;
|
||||
mod save;
|
||||
mod ipfilt;
|
||||
mod forwarded_list;
|
||||
use forwarded_list::XForwardedFor;
|
||||
mod handle;
|
||||
|
||||
mod feed;
|
||||
mod gen;
|
||||
mod sentance;
|
||||
|
||||
const DEFAULT_LOG_LEVEL: &str = "warn";
|
||||
|
||||
fn init_log()
|
||||
{
|
||||
let level = match std::env::var_os("RUST_LOG") {
|
||||
None => {
|
||||
std::env::set_var("RUST_LOG", DEFAULT_LOG_LEVEL);
|
||||
std::borrow::Cow::Borrowed(std::ffi::OsStr::new(DEFAULT_LOG_LEVEL))
|
||||
},
|
||||
Some(w) => std::borrow::Cow::Owned(w),
|
||||
};
|
||||
pretty_env_logger::init();
|
||||
trace!("Initialising `genmarkov` ({}) v{} with log level {:?}.\n\tMade by {} with <3.\n\tLicensed with GPL v3 or later",
|
||||
std::env::args().next().unwrap(),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
level,
|
||||
env!("CARGO_PKG_AUTHORS"));
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
init_log();
|
||||
|
||||
let (config, ccache) = match config::load().await {
|
||||
Some(v) => {
|
||||
let cache = match v.try_gen_cache() {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
error!("Invalid config, cannot continue");
|
||||
error!("{}", e);
|
||||
debug!("{:?}", e);
|
||||
return;
|
||||
},
|
||||
};
|
||||
(v, cache)
|
||||
},
|
||||
_ => {
|
||||
let cfg = config::Config::default();
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
if let Err(err) = cfg.save(config::DEFAULT_FILE_LOCATION).await {
|
||||
error!("Failed to create default config file: {}", err);
|
||||
}
|
||||
}
|
||||
let cache= cfg.try_gen_cache().unwrap();
|
||||
(cfg, cache)
|
||||
},
|
||||
};
|
||||
debug!("Using config {:?}", config);
|
||||
trace!("With config cached: {:?}", ccache);
|
||||
|
||||
let (chain_handle, chain) = handle::spawn(match save::load(&config.file).await {
|
||||
Ok(chain) => {
|
||||
info!("Loaded chain from {:?}", config.file);
|
||||
chain
|
||||
},
|
||||
Err(e) => {
|
||||
warn!("Failed to load chain, creating new");
|
||||
trace!("Error: {}", e);
|
||||
Chain::new()
|
||||
},
|
||||
}, ccache.handler_settings.clone());
|
||||
{
|
||||
let mut tasks = Vec::<BoxFuture<'static, ()>>::new();
|
||||
tasks.push(chain_handle.map(|res| res.expect("Chain handle panicked")).boxed());
|
||||
let (state, chain) = {
|
||||
|
||||
let state = State::new(config,
|
||||
ccache,
|
||||
chain);
|
||||
let state2 = state.clone();
|
||||
let saver = tokio::spawn(save::host(Box::new(state.clone())));
|
||||
let chain = warp::any().map(move || state.clone());
|
||||
|
||||
tasks.push(saver.map(|res| res.expect("Saver panicked")).boxed());
|
||||
(state2, chain)
|
||||
};
|
||||
|
||||
let client_ip = if state.config().trust_x_forwarded_for {
|
||||
warp::header("x-forwarded-for")
|
||||
.map(|ip: XForwardedFor| ip)
|
||||
.and_then(|x: XForwardedFor| async move { x.into_first().ok_or_else(|| warp::reject::not_found()) })
|
||||
.or(warp::filters::addr::remote()
|
||||
.and_then(|x: Option<SocketAddr>| async move { x.map(|x| x.ip()).ok_or_else(|| warp::reject::not_found()) }))
|
||||
.unify().boxed()
|
||||
} else {
|
||||
println!("{}", chain.generate_str());
|
||||
warp::filters::addr::remote().and_then(|x: Option<SocketAddr>| async move {x.map(|x| x.ip()).ok_or_else(|| warp::reject::not_found())}).boxed()
|
||||
};
|
||||
|
||||
let ipfilter = warp::any()
|
||||
.and(chain)
|
||||
.and(client_ip)
|
||||
.and_then(|state: State, host: IpAddr| {
|
||||
async move {
|
||||
state.config().mask.check(&host)
|
||||
.map(|ci| {
|
||||
trace!("Accepting from rule {:?}", ci);
|
||||
host
|
||||
})
|
||||
.map(move |host| (state, host))
|
||||
.map_err(warp::reject::custom)
|
||||
}
|
||||
}).untuple_one();
|
||||
|
||||
let push = warp::put()
|
||||
.and(warp::path("put"))
|
||||
.and(ipfilter.clone())
|
||||
.and(warp::body::content_length_limit(state.config().max_content_length))
|
||||
.and(warp::body::stream())
|
||||
.and_then(|state: State, host: IpAddr, buf| {
|
||||
async move {
|
||||
feed::full(&host, state, buf).await
|
||||
.map(|_| warp::reply::with_status(warp::reply(), status!(201)))
|
||||
.map_err(|_| warp::reject::not_found()) //(warp::reject::custom) //TODO: Recover rejection filter down below for custom error return
|
||||
}
|
||||
})
|
||||
|
||||
.recover(ipfilt::recover)
|
||||
.with(warp::log("markov::put"));
|
||||
|
||||
|
||||
cfg_if!{
|
||||
if #[cfg(feature="api")] {
|
||||
let api = {
|
||||
let single = {
|
||||
let msz = state.config().max_gen_size;
|
||||
warp::post()
|
||||
.and(ipfilter.clone())
|
||||
.and(warp::path("single"))
|
||||
.and(warp::path::param()
|
||||
.map(move |sz: usize| {
|
||||
if sz == 0 || (2..=msz).contains(&sz) {
|
||||
Some(sz)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.or(warp::any().map(|| None))
|
||||
.unify())
|
||||
.and(warp::body::content_length_limit(state.config().max_content_length))
|
||||
.and(warp::body::aggregate())
|
||||
.map(|_, x, y, z| (x,y,z)).untuple_one()
|
||||
.and_then(api::single)
|
||||
.with(warp::log("markov::api::single"))
|
||||
};
|
||||
warp::path("api")
|
||||
.and(single)
|
||||
.recover(ipfilt::recover)
|
||||
.recover(api::error::rejection)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
let read = warp::get()
|
||||
.and(ipfilter.clone())
|
||||
.and(warp::path::param().map(|opt: usize| Some(opt))
|
||||
.or(warp::path::end().map(|| Option::<usize>::None)).unify())
|
||||
.and_then(|state: State, host: IpAddr, num: Option<usize>| {
|
||||
async move {
|
||||
let (tx, rx) = mpsc::channel(state.config().max_gen_size);
|
||||
tokio::spawn(gen::body(state, num, tx));
|
||||
Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.filter_map(move |mut x| {
|
||||
if x.trim_in_place().len() != 0 {
|
||||
info!("{} <- {:?}", host, x);
|
||||
x.push('\n');
|
||||
Some(Ok::<_, std::convert::Infallible>(x))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))))
|
||||
}
|
||||
})
|
||||
.recover(ipfilt::recover)
|
||||
.with(warp::log("markov::read"));
|
||||
|
||||
let sentance = warp::get()
|
||||
.and(warp::path("sentance")) //TODO: sanitise::Sentance::new_iter the body line
|
||||
.and(ipfilter.clone())
|
||||
.and(warp::path::param().map(|opt: usize| Some(opt))
|
||||
.or(warp::path::end().map(|| Option::<usize>::None)).unify())
|
||||
.and_then(|state: State, host: IpAddr, num: Option<usize>| {
|
||||
async move {
|
||||
let (tx, rx) = mpsc::channel(state.config().max_gen_size);
|
||||
tokio::spawn(sentance::body(state, num, tx));
|
||||
Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.filter_map(move |mut x| {
|
||||
if x.trim_in_place().len() != 0 {
|
||||
info!("{} (sentance) <- {:?}", host, x);
|
||||
x.push(' ');
|
||||
Some(Ok::<_, std::convert::Infallible>(x))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))))
|
||||
}
|
||||
})
|
||||
.recover(ipfilt::recover)
|
||||
.with(warp::log("markov::read::sentance"));
|
||||
|
||||
let read = warp::path("get").and(read.or(sentance));
|
||||
#[cfg(feature="api")]
|
||||
let read = read.or(api);
|
||||
|
||||
#[cfg(target_family="unix")]
|
||||
tasks.push(tokio::spawn(signals::handle(state.clone())).map(|res| res.expect("Signal handler panicked")).boxed());
|
||||
|
||||
require_impl!(Send: async {
|
||||
let (server, init) = {
|
||||
let s2 = AssertNotSend::new(state.clone()); //temp clone the Arcs here for shutdown if server fails to bind, assert they cannot remain cloned across an await boundary.
|
||||
match bind::try_serve(warp::serve(push
|
||||
.or(read)),
|
||||
state.config().bindpoint.clone(),
|
||||
async move {
|
||||
tokio::signal::ctrl_c().await.unwrap();
|
||||
state.shutdown();
|
||||
}) {
|
||||
Ok((addr, server)) => {
|
||||
info!("Server bound on {:?}", addr);
|
||||
(server, s2.into_inner().into_initialiser())
|
||||
},
|
||||
Err(err) => {
|
||||
error!("Failed to bind server: {}", err);
|
||||
s2.into_inner().shutdown();
|
||||
return;
|
||||
},
|
||||
}
|
||||
};
|
||||
tokio::join![
|
||||
server,
|
||||
async move {
|
||||
cfg_if! {
|
||||
if #[cfg(feature="instant-init")] {
|
||||
trace!("Setting init");
|
||||
} else {
|
||||
trace!("Setting init in 2 seconds for good measure.");
|
||||
tokio::time::delay_for(tokio::time::Duration::from_secs(2)).await;
|
||||
}
|
||||
}
|
||||
init.set().expect("Failed to initialise saver")
|
||||
},
|
||||
];
|
||||
}).await;
|
||||
|
||||
// Cleanup
|
||||
async move {
|
||||
trace!("Cleanup");
|
||||
debug!("Waiting on {} tasks now", tasks.len());
|
||||
|
||||
join_all(tasks).await;
|
||||
}
|
||||
}.await;
|
||||
info!("Shut down gracefully")
|
||||
}
|
||||
|
||||
mod bind;
|
||||
|
@ -0,0 +1,210 @@
|
||||
//! Message passing things
|
||||
use super::*;
|
||||
use tokio::{
|
||||
sync::{
|
||||
watch,
|
||||
Mutex,
|
||||
},
|
||||
};
|
||||
use std::{
|
||||
task::{Poll, Context},
|
||||
pin::Pin,
|
||||
fmt,
|
||||
error,
|
||||
};
|
||||
use futures::{
|
||||
future::{
|
||||
Future,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct InitError;
|
||||
#[derive(Debug)]
|
||||
pub struct InitWaitError;
|
||||
|
||||
impl error::Error for InitError{}
|
||||
impl fmt::Display for InitError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
write!(f, "failed to set init value")
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for InitWaitError{}
|
||||
impl fmt::Display for InitWaitError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
write!(f, "failed to receive init value")
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides a method of waiting on and setting a single initialisation.
|
||||
///
|
||||
/// In general, it should only be set once, as multiple sets do nothing but hog `Arc`s.
|
||||
/// Dropping the `Initialiser` after waiting or setting should generally be done immediately.
|
||||
/// Choose the `into_wait()` and `set()` varients over the non-consuming ones.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Initialiser
|
||||
{
|
||||
tx: Arc<watch::Sender<bool>>,
|
||||
rx: watch::Receiver<bool>
|
||||
}
|
||||
|
||||
impl Initialiser
|
||||
{
|
||||
/// Create a new, unset initialiser
|
||||
pub fn new() -> Self
|
||||
{
|
||||
let (tx, rx) = watch::channel(false);
|
||||
Self {
|
||||
tx: Arc::new(tx),
|
||||
rx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a pre-set initialiser. Calls to `wait()` will immediately resolve.
|
||||
pub fn new_set() -> Self
|
||||
{
|
||||
let (tx, rx) = watch::channel(true);
|
||||
Self {
|
||||
tx: Arc::new(tx),
|
||||
rx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume into a future that completes when init is set.
|
||||
pub fn into_wait(self) -> impl Future<Output=Result<(), InitWaitError>> + 'static
|
||||
{
|
||||
let mut rx = self.rx;
|
||||
async move {
|
||||
if !*rx.borrow() {
|
||||
while !rx.recv().await.ok_or_else(|| InitWaitError)? {
|
||||
//tokio::task::yield_now().await;
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Clone into a future that completes when init is set.
|
||||
///
|
||||
/// This method does not clone any `Arc`s and is prefered to `self.clone().into_wait()`.
|
||||
/// Use this when the `Initialiser` you want to wait on is behind a shared reference.
|
||||
pub fn clone_into_wait(&self) -> impl Future<Output=Result<(), InitWaitError>> + 'static
|
||||
{
|
||||
let mut rx = self.rx.clone();
|
||||
async move {
|
||||
if !*rx.borrow() {
|
||||
while !rx.recv().await.ok_or_else(|| InitWaitError)? {
|
||||
//tokio::task::yield_now().await;
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Completes when init is set
|
||||
pub async fn wait(&mut self) -> Result<(), InitWaitError>
|
||||
{
|
||||
if !*self.rx.borrow() {
|
||||
while !self.rx.recv().await.ok_or_else(|| InitWaitError)? {
|
||||
//tokio::task::yield_now().await;
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Is init set?
|
||||
pub fn is_set(&self) -> bool
|
||||
{
|
||||
*self.rx.borrow()
|
||||
}
|
||||
|
||||
/// Consume and set init if it's not already set
|
||||
pub fn set(self) -> Result<(), InitError>
|
||||
{
|
||||
if !*self.rx.borrow() {
|
||||
self.tx.broadcast(true).map_err(|_| InitError)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Set init without consuming.
|
||||
///
|
||||
/// # Note
|
||||
/// It is prefered to use `set()`, as this method may make `Arc`s hang around longer than they need to.
|
||||
/// Calling this multiple times is useless.
|
||||
pub fn set_in_place(&self) -> Result<(), InitError>
|
||||
{
|
||||
if !*self.rx.borrow() {
|
||||
self.tx.broadcast(true).map_err(|_| InitError)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Future for Initialiser
|
||||
{
|
||||
type Output = Result<(), InitWaitError>;
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
let uhh = self.wait();
|
||||
tokio::pin!(uhh);
|
||||
uhh.poll(cx)
|
||||
}
|
||||
}
|
||||
|
||||
/// A value that can be consumed once.
|
||||
#[derive(Debug)]
|
||||
pub struct Once<T>(Mutex<Option<T>>);
|
||||
|
||||
impl<T> Once<T>
|
||||
{
|
||||
/// Create a new instance
|
||||
pub fn new(from: T) -> Self
|
||||
{
|
||||
Self(Mutex::new(Some(from)))
|
||||
}
|
||||
/// Consume into the instance from behind a potentially shared reference.
|
||||
pub async fn consume_shared(self: Arc<Self>) -> Option<T>
|
||||
{
|
||||
match Arc::try_unwrap(self) {
|
||||
Ok(x) => x.0.into_inner(),
|
||||
Err(x) => x.0.lock().await.take(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume from a shared reference and panic if the value has already been consumed.
|
||||
pub async fn unwrap_shared(self: Arc<Self>) -> T
|
||||
{
|
||||
self.consume_shared().await.unwrap()
|
||||
}
|
||||
|
||||
/// Consume into the instance.
|
||||
pub async fn consume(&self) -> Option<T>
|
||||
{
|
||||
self.0.lock().await.take()
|
||||
}
|
||||
|
||||
/// Consume and panic if the value has already been consumed.
|
||||
pub async fn unwrap(&self) -> T
|
||||
{
|
||||
self.consume().await.unwrap()
|
||||
}
|
||||
|
||||
/// Consume into the inner value
|
||||
pub fn into_inner(self) -> Option<T>
|
||||
{
|
||||
self.0.into_inner()
|
||||
}
|
||||
}
|
@ -0,0 +1,296 @@
|
||||
//! Workarounds for ridiculously janky `std::ops::Range*` polymorphism
|
||||
use super::*;
|
||||
use std::{
|
||||
ops::{
|
||||
Range,
|
||||
RangeFrom,
|
||||
RangeInclusive,
|
||||
RangeTo,
|
||||
RangeToInclusive,
|
||||
RangeFull,
|
||||
|
||||
Bound,
|
||||
RangeBounds,
|
||||
},
|
||||
str::{
|
||||
FromStr,
|
||||
},
|
||||
fmt,
|
||||
error,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum DynRange<T>
|
||||
{
|
||||
Range(Range<T>),
|
||||
From(RangeFrom<T>),
|
||||
Inclusive(RangeInclusive<T>),
|
||||
To(RangeTo<T>),
|
||||
ToInclusive(RangeToInclusive<T>),
|
||||
Full(RangeFull),
|
||||
}
|
||||
|
||||
#[macro_export] macro_rules! impl_from {
|
||||
(Full, RangeFull) => {
|
||||
impl<T> From<RangeFull> for DynRange<T>
|
||||
{
|
||||
#[inline] fn from(from: RangeFull) -> Self
|
||||
{
|
||||
Self::Full(from)
|
||||
}
|
||||
}
|
||||
};
|
||||
($name:ident, $range:tt) => {
|
||||
|
||||
impl<T> From<$range <T>> for DynRange<T>
|
||||
{
|
||||
#[inline] fn from(from: $range<T>) -> Self
|
||||
{
|
||||
Self::$name(from)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_from!(Range, Range);
|
||||
impl_from!(From, RangeFrom);
|
||||
impl_from!(Inclusive, RangeInclusive);
|
||||
impl_from!(To, RangeTo);
|
||||
impl_from!(ToInclusive, RangeToInclusive);
|
||||
impl_from!(Full, RangeFull);
|
||||
|
||||
macro_rules! bounds {
|
||||
($self:ident, $bound:ident) => {
|
||||
match $self {
|
||||
DynRange::Range(from) => from.$bound(),
|
||||
DynRange::From(from) => from.$bound(),
|
||||
DynRange::Inclusive(i) => i.$bound(),
|
||||
DynRange::To(i) => i.$bound(),
|
||||
DynRange::ToInclusive(i) => i.$bound(),
|
||||
DynRange::Full(_) => (..).$bound(),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl<T> RangeBounds<T> for DynRange<T>
|
||||
{
|
||||
fn start_bound(&self) -> Bound<&T> {
|
||||
bounds!(self, start_bound)
|
||||
}
|
||||
fn end_bound(&self) -> Bound<&T> {
|
||||
bounds!(self, end_bound)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> RangeBounds<T> for &'a DynRange<T>
|
||||
{
|
||||
fn start_bound(&self) -> Bound<&T> {
|
||||
bounds!(self, start_bound)
|
||||
}
|
||||
fn end_bound(&self) -> Bound<&T> {
|
||||
bounds!(self, end_bound)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Display> fmt::Display for DynRange<T>
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
match self {
|
||||
Self::Range(from) => write!(f, "{}..{}", from.start, from.end),
|
||||
Self::From(from) => write!(f, "{}..", from.start),
|
||||
Self::Inclusive(from) => write!(f, "{}..={}", from.start(), from.end()),
|
||||
Self::To(from) => write!(f, "..{}", from.end),
|
||||
Self::ToInclusive(from) => write!(f, "..={}", from.end),
|
||||
Self::Full(_) => write!(f, ".."),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
use std::any::{
|
||||
Any,
|
||||
};
|
||||
|
||||
impl<T: 'static> DynRange<T>
|
||||
{
|
||||
#[inline]
|
||||
pub fn into_boxed(self) -> Box<dyn Any /*TODO: + Send + Sync */+ 'static>
|
||||
{
|
||||
self.into_inner()
|
||||
}
|
||||
fn into_inner(self) -> Box<dyn Any + 'static>
|
||||
{
|
||||
match self {
|
||||
Self::Range(from) => Box::new(from),
|
||||
Self::From(from) => Box::new(from),
|
||||
Self::Inclusive(from) => Box::new(from),
|
||||
Self::To(from) => Box::new(from),
|
||||
Self::ToInclusive(from) => Box::new(from),
|
||||
Self::Full(_) => Box::new(..),
|
||||
}
|
||||
}
|
||||
fn inner_mut(&mut self) -> &mut dyn Any
|
||||
{
|
||||
match self {
|
||||
Self::Range(from) => from,
|
||||
Self::From(from) => from,
|
||||
Self::Inclusive(from) => from,
|
||||
Self::To(from) => from,
|
||||
Self::ToInclusive(from) => from,
|
||||
Self::Full(f) => f,
|
||||
}
|
||||
}
|
||||
fn inner_ref(&self) -> &dyn Any
|
||||
{
|
||||
match self {
|
||||
Self::Range(from) => from,
|
||||
Self::From(from) => from,
|
||||
Self::Inclusive(from) => from,
|
||||
Self::To(from) => from,
|
||||
Self::ToInclusive(from) => from,
|
||||
Self::Full(_) => &(..),
|
||||
}
|
||||
}
|
||||
pub fn downcast_ref<R: RangeBounds<T> + 'static>(&self) -> Option<&R>
|
||||
{
|
||||
self.inner_ref().downcast_ref()
|
||||
}
|
||||
pub fn downcast_mut<R: RangeBounds<T> + 'static>(&mut self) -> Option<&mut R>
|
||||
{
|
||||
self.inner_mut().downcast_mut()
|
||||
}
|
||||
pub fn downcast<R: RangeBounds<T> + 'static>(self) -> Result<R, Self>
|
||||
{
|
||||
self.into_inner().downcast::<R>()
|
||||
.map(|x| *x)
|
||||
.map_err(|b| {
|
||||
todo!("make this bullshit properly unboxable ehh...")
|
||||
})
|
||||
//Box::<(dyn std::any::Any + 'static)>::downcast(Box::new(self)).map_ok(|ok| *ok)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParseError(DynRange<()>, Option<Box<dyn error::Error+'static>>);
|
||||
|
||||
impl ParseError
|
||||
{
|
||||
fn new<R: Into<DynRange<()>>>(which: R, err: impl error::Error + 'static) -> Self
|
||||
{
|
||||
Self(which.into(), Some(Box::new(err)))
|
||||
}
|
||||
fn none(which: impl Into<DynRange<()>>) -> Self
|
||||
{
|
||||
Self(which.into(), None)
|
||||
}
|
||||
fn map<T: Into<DynRange<()>>>(self, to: T) -> Self
|
||||
{
|
||||
Self (to.into(), self.1)
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for ParseError
|
||||
{
|
||||
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
|
||||
if let Some(this) = self.1.as_ref() {
|
||||
Some(this.as_ref())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
write!(f, "failed to parse range in format `{:?}`", self.0)?;
|
||||
if let Some(this) = self.1.as_ref() {
|
||||
write!(f, ": {}", this)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<T: FromStr> FromStr for DynRange<T>
|
||||
where T::Err: error::Error + 'static
|
||||
{
|
||||
type Err = ParseError;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
if s== ".." {
|
||||
Ok(Self::Full(..))
|
||||
} else if s.starts_with("..=") {
|
||||
Ok(Self::ToInclusive(..=T::from_str(&s[3..]).map_err(|x| ParseError::new(..=(), x))?))
|
||||
} else if s.starts_with("..") {
|
||||
Ok(Self::To(..(T::from_str(&s[2..])).map_err(|x| ParseError::new(..(), x))?))
|
||||
} else if s.ends_with("..") {
|
||||
Ok(Self::From(T::from_str(&s[..s.len()-2]).map_err(|x| ParseError::new(().., x))?..))
|
||||
} else {
|
||||
fn try_next_incl<'a, T: FromStr>(m: &mut impl Iterator<Item=&'a str>) -> Result<RangeInclusive<T>, ParseError>
|
||||
where T::Err: error::Error + 'static
|
||||
{
|
||||
let (first, second) = if let Some(first) = m.next() {
|
||||
if let Some(seocond) = m.next() {
|
||||
(first,seocond)
|
||||
} else {
|
||||
return Err(ParseError::none(()..=()));
|
||||
}
|
||||
} else {
|
||||
return Err(ParseError::none(()..=()));
|
||||
};
|
||||
|
||||
let first: T = first.parse().map_err(|x| ParseError::new(()..=(), x))?;
|
||||
let second: T = second.parse().map_err(|x| ParseError::new(()..=(), x))?;
|
||||
|
||||
Ok(first..=second)
|
||||
}
|
||||
|
||||
fn try_next<'a, T: FromStr>(m: &mut impl Iterator<Item=&'a str>) -> Result<Range<T>, ParseError>
|
||||
where T::Err: error::Error + 'static
|
||||
{
|
||||
let (first, second) = if let Some(first) = m.next() {
|
||||
if let Some(seocond) = m.next() {
|
||||
(first,seocond)
|
||||
} else {
|
||||
return Err(ParseError::none(()..()));
|
||||
}
|
||||
} else {
|
||||
return Err(ParseError::none(()..()));
|
||||
};
|
||||
|
||||
let first: T = first.parse().map_err(|x| ParseError::new(()..(), x))?;
|
||||
let second: T = second.parse().map_err(|x| ParseError::new(()..(), x))?;
|
||||
|
||||
Ok(first..second)
|
||||
}
|
||||
|
||||
|
||||
let mut split = s.split("..=").fuse();
|
||||
|
||||
let mut last_err = ParseError::none(()..());
|
||||
match loop {
|
||||
match try_next_incl(&mut split) {
|
||||
Err(ParseError(_, None)) => break Err(last_err), // iter empty
|
||||
Err(other) => last_err = other,
|
||||
Ok(value) => break Ok(Self::Inclusive(value)),
|
||||
}
|
||||
} {
|
||||
Ok(v) => return Ok(v),
|
||||
Err(e) => last_err = e,
|
||||
};
|
||||
|
||||
let mut split = s.split("..").fuse();
|
||||
match loop {
|
||||
match try_next(&mut split) {
|
||||
Err(ParseError(_, None)) => break Err(last_err), // iter empty
|
||||
Err(other) => last_err = other,
|
||||
Ok(value) => break Ok(Self::Range(value)),
|
||||
}
|
||||
} {
|
||||
Ok(v) => Ok(v),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,277 @@
|
||||
//! Filter out characters and such
|
||||
use smallmap::Map as SmallMap;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
fmt,
|
||||
iter::{
|
||||
self,
|
||||
FromIterator,
|
||||
},
|
||||
str,
|
||||
};
|
||||
use once_cell::sync::OnceCell;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Filter(SmallMap<char, ()>);
|
||||
|
||||
/*
|
||||
impl<const N: usize> From<[char; N]> for Filter
|
||||
{
|
||||
fn from(from: [char; N]) -> Self
|
||||
{
|
||||
let mut map = SmallMap::with_capacity(1 + (N / 256));
|
||||
for &chr in from.iter()
|
||||
{
|
||||
map.insert(chr, ());
|
||||
}
|
||||
Self(map)
|
||||
}
|
||||
}*/
|
||||
|
||||
impl<'a> From<&'a [char]> for Filter
|
||||
{
|
||||
fn from(from: &'a [char]) -> Self
|
||||
{
|
||||
let mut map = SmallMap::new();
|
||||
for &chr in from.iter()
|
||||
{
|
||||
map.insert(chr, ());
|
||||
}
|
||||
Self(map)
|
||||
}
|
||||
}
|
||||
impl<'a> From<&'a str> for Filter
|
||||
{
|
||||
fn from(from: &'a str) -> Self
|
||||
{
|
||||
let mut output = Self::new();
|
||||
output.insert(from.chars());
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
impl str::FromStr for Filter
|
||||
{
|
||||
type Err = std::convert::Infallible;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(Self::from(s))
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Filter
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
use std::fmt::Write;
|
||||
for chr in self.iter()
|
||||
{
|
||||
f.write_char(chr)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FilterKeyIter<'a>(smallmap::iter::Iter<'a, char, ()>, usize);
|
||||
|
||||
impl<'a> Iterator for FilterKeyIter<'a>
|
||||
{
|
||||
type Item = char;
|
||||
fn next(&mut self) -> Option<Self::Item>
|
||||
{
|
||||
self.0.next().map(|&(x, _)| x)
|
||||
}
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
(self.1, Some(self.1))
|
||||
}
|
||||
}
|
||||
impl<'a> iter::FusedIterator for FilterKeyIter<'a>{}
|
||||
impl<'a> iter::ExactSizeIterator for FilterKeyIter<'a>{}
|
||||
|
||||
impl Filter
|
||||
{
|
||||
pub fn new() -> Self
|
||||
{
|
||||
Self(SmallMap::new())
|
||||
}
|
||||
pub fn insert<I: IntoIterator<Item=char>>(&mut self, from: I)
|
||||
{
|
||||
for from in from.into_iter()
|
||||
{
|
||||
self.0.insert(from, ());
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove<I: IntoIterator<Item=char>>(&mut self, from: I)
|
||||
{
|
||||
for from in from.into_iter()
|
||||
{
|
||||
self.0.remove(&from);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize
|
||||
{
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool
|
||||
{
|
||||
//TODO: impl this in smallmap itself
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> FilterKeyIter<'_> //impl Iterator<Item=char> + '_
|
||||
{
|
||||
//self.0.iter()
|
||||
// .copied()
|
||||
// .map(|(x, _)| x)
|
||||
FilterKeyIter(self.0.iter(), self.0.len())
|
||||
}
|
||||
|
||||
/// Should this character be filtered?
|
||||
#[inline] pub fn check(&self, chr: char) -> bool
|
||||
{
|
||||
self.0.get(&chr).is_some()
|
||||
}
|
||||
|
||||
pub fn filter_owned(&self, input: impl Into<String>) -> String
|
||||
{
|
||||
let mut input = input.into();
|
||||
self.filter(&mut input);
|
||||
input
|
||||
}
|
||||
|
||||
pub fn filter<'a>(&self, output: &'a mut String) -> &'a mut String
|
||||
{
|
||||
if self.is_empty() {
|
||||
return output;
|
||||
}
|
||||
output.retain(|chr| !self.check(chr));
|
||||
output
|
||||
}
|
||||
|
||||
pub fn filter_iter<'a, I: IntoIterator<Item=char>>(&'a self, from_iter: I) -> FilterIter<'a, I::IntoIter>
|
||||
where I::IntoIter: 'a
|
||||
{
|
||||
FilterIter(&self, from_iter.into_iter().fuse())
|
||||
}
|
||||
|
||||
pub fn filter_cow<'a>(&self, string: &'a (impl AsRef<str> + 'a + ?Sized)) -> Cow<'a, str>
|
||||
{
|
||||
let string = string.as_ref();
|
||||
|
||||
if self.is_empty() {
|
||||
return Cow::Borrowed(string);
|
||||
}
|
||||
|
||||
let mut output = Cow::Borrowed(string);
|
||||
let mut i=0;
|
||||
for chr in string.chars()
|
||||
{
|
||||
if self.check(chr) {
|
||||
output.to_mut().remove(i);
|
||||
} else {
|
||||
i+=1;
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
pub fn filter_str<'a, T: AsRef<str>+'a +?Sized>(&'a self, string: &'a T) -> FilterStr<'a>
|
||||
{
|
||||
FilterStr(string.as_ref(), self, OnceCell::new())
|
||||
}
|
||||
}
|
||||
|
||||
impl FromIterator<char> for Filter
|
||||
{
|
||||
fn from_iter<I: IntoIterator<Item=char>>(iter: I) -> Self
|
||||
{
|
||||
let mut output= Self::new();
|
||||
output.insert(iter);
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FilterStr<'a>
|
||||
{
|
||||
pub fn as_str(&self) -> &str
|
||||
{
|
||||
fn fmt(this: &FilterStr<'_>) -> String
|
||||
{
|
||||
let chars = this.0.chars();
|
||||
let mut f: String = crate::util::hint_cap(&chars);
|
||||
for chr in chars {
|
||||
if !this.1.check(chr) {
|
||||
f.push(chr);
|
||||
}
|
||||
}
|
||||
f
|
||||
}
|
||||
&self.2.get_or_init(|| fmt(&self))[..]
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FilterStr<'a>(&'a str, &'a Filter, OnceCell<String>);
|
||||
impl<'a> fmt::Display for FilterStr<'a>
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
write!(f, "{}", self.as_str())
|
||||
}
|
||||
}
|
||||
impl<'a> FilterStr<'a>
|
||||
{
|
||||
pub fn filter(&self) -> &Filter
|
||||
{
|
||||
&self.1
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FilterIter<'a, I>(&'a Filter, iter::Fuse<I>);
|
||||
|
||||
impl<'a, I: Iterator<Item=char>> Iterator for FilterIter<'a, I>
|
||||
{
|
||||
type Item = char;
|
||||
fn next(&mut self) -> Option<Self::Item>
|
||||
{
|
||||
loop {
|
||||
break match self.1.next() {
|
||||
Some(chr) if !self.0.check(chr) => Some(chr),
|
||||
None => None,
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let (_, high) = self.1.size_hint();
|
||||
(0, high)
|
||||
}
|
||||
}
|
||||
impl<'a, I> FilterIter<'a, I>
|
||||
{
|
||||
pub fn filter(&self) -> &Filter
|
||||
{
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, I: Iterator<Item=char>> iter::FusedIterator for FilterIter<'a, I>{}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests
|
||||
{
|
||||
use super::*;
|
||||
#[test]
|
||||
fn filter_cow()
|
||||
{
|
||||
let filter: Filter = " hi".chars().collect();
|
||||
|
||||
let string = "abcdef ghi jk1\nhian";
|
||||
|
||||
assert_eq!(filter.filter_str(&string).to_string(), filter.filter_cow(&string).to_string());
|
||||
assert_eq!(filter.filter_cow(&string).to_string(), filter.filter_iter(string.chars()).collect::<String>());
|
||||
}
|
||||
}
|
@ -0,0 +1,76 @@
|
||||
//! Sanitisers
|
||||
use super::*;
|
||||
use std::{
|
||||
error,
|
||||
fmt,
|
||||
};
|
||||
mod sentance;
|
||||
pub use sentance::*;
|
||||
mod word;
|
||||
pub use word::*;
|
||||
|
||||
pub mod filter;
|
||||
|
||||
/*
|
||||
pub fn take_sentance<T: AsyncBufRead+ ?Sized + Unpin, U: AsyncWrite + ?Sized + Unpin>(from: &mut T, to: &mut U) -> Result<usize, Error>
|
||||
{
|
||||
todo!()
|
||||
}*/
|
||||
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Word(WordError),
|
||||
Sentance(SentanceError),
|
||||
}
|
||||
|
||||
impl error::Error for Error{}
|
||||
|
||||
impl fmt::Display for Error
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
match self {
|
||||
Self::Word(_) => write!(f, "couldn't extract word"),
|
||||
Self::Sentance(_) => write!(f, "couldn't extract sentance"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<WordError> for Error
|
||||
{
|
||||
#[inline] fn from(from: WordError) -> Self
|
||||
{
|
||||
Self::Word(from)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SentanceError> for Error
|
||||
{
|
||||
#[inline] fn from(from: SentanceError) -> Self
|
||||
{
|
||||
Self::Sentance(from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests
|
||||
{
|
||||
use super::*;
|
||||
#[test]
|
||||
fn sentance()
|
||||
{
|
||||
let string = r#"Hello world.
|
||||
I am a string, that is a string. Strings, I love them!!!
|
||||
|
||||
Owo uwu"#;
|
||||
let sentances = Sentance::new_iter(string);
|
||||
for sentance in sentances {
|
||||
let words = Word::new(sentance);
|
||||
println!("Word in {:?} -> {:?}", sentance, words);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,146 @@
|
||||
//! Sentance splitting
|
||||
use super::*;
|
||||
use std::{
|
||||
borrow::{
|
||||
Borrow,
|
||||
ToOwned,
|
||||
},
|
||||
ops::{
|
||||
Deref,DerefMut,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SentanceError;
|
||||
|
||||
/// A sentence
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[repr(transparent)]
|
||||
pub struct Sentance(str);
|
||||
|
||||
|
||||
macro_rules! new {
|
||||
($str:expr) => {
|
||||
unsafe {Sentance::new_unchecked($str)}
|
||||
};
|
||||
}
|
||||
|
||||
const DEFAULT_BOUNDARIES: &[char] = &['\n', '.', ':', '!', '?', '~'];
|
||||
|
||||
lazy_static! {
|
||||
static ref BOUNDARIES: smallmap::Map<char, ()> = {
|
||||
let mut map = smallmap::Map::new();
|
||||
for &chr in DEFAULT_BOUNDARIES.iter() {
|
||||
map.insert(chr, ());
|
||||
}
|
||||
map
|
||||
};
|
||||
}
|
||||
|
||||
#[inline] pub fn is_sentance_boundary(chr: char) -> bool
|
||||
{
|
||||
BOUNDARIES.contains_key(&chr)
|
||||
}
|
||||
|
||||
impl Sentance
|
||||
{
|
||||
/// Create a new word reference without checking for sentance boundaries
|
||||
pub unsafe fn new_unchecked<'a>(from: &'a str) -> &'a Self
|
||||
{
|
||||
std::mem::transmute(from)
|
||||
}
|
||||
|
||||
/// Create a single sentance
|
||||
pub fn single<'a>(from: &'a (impl AsRef<str> + 'a + ?Sized)) -> Result<&'a Self, SentanceError>
|
||||
{
|
||||
let from = from.as_ref();
|
||||
match from.find(is_sentance_boundary) {
|
||||
Some(_) => Err(SentanceError),
|
||||
_ => Ok(new!(from)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new section of sentances from this string
|
||||
#[inline] pub fn new<'a>(from: &'a (impl AsRef<str> + 'a + ?Sized)) -> Vec<&'a Self>
|
||||
{
|
||||
Self::new_iter(from)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Create a new iterator over sentances from this string.
|
||||
pub fn new_iter<'a>(from: &'a (impl AsRef<str> +'a + ?Sized)) -> impl Iterator<Item = &'a Self> + Clone
|
||||
{
|
||||
let from = from.as_ref();
|
||||
from.split_inclusive(is_sentance_boundary)
|
||||
.map(|x| new!(x.trim()))
|
||||
.filter(|x| !x.is_empty())
|
||||
}
|
||||
|
||||
/// Get the words in this sentance
|
||||
#[inline] pub fn words(&self) -> impl Iterator<Item = &'_ Word>
|
||||
{
|
||||
Word::new_iter(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for &'a Sentance
|
||||
{
|
||||
fn from(from: &'a str) -> Self
|
||||
{
|
||||
new!(from)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for Sentance
|
||||
{
|
||||
fn as_ref(&self) -> &str
|
||||
{
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Sentance> for str
|
||||
{
|
||||
fn as_ref(&self) -> &Sentance
|
||||
{
|
||||
new!(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Borrow<Sentance> for String
|
||||
{
|
||||
fn borrow(&self) -> &Sentance {
|
||||
new!(&self[..])
|
||||
}
|
||||
}
|
||||
|
||||
impl ToOwned for Sentance
|
||||
{
|
||||
type Owned = String;
|
||||
fn to_owned(&self) -> Self::Owned {
|
||||
self.0.to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Sentance
|
||||
{
|
||||
type Target = str;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Sentance
|
||||
{
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Sentance> for Sentance
|
||||
{
|
||||
#[inline] fn as_ref(&self) -> &Sentance
|
||||
{
|
||||
self
|
||||
}
|
||||
}
|
@ -0,0 +1,150 @@
|
||||
//! Word splitting
|
||||
use super::*;
|
||||
use std::{
|
||||
borrow::{
|
||||
Borrow,
|
||||
ToOwned,
|
||||
},
|
||||
ops::{
|
||||
Deref,DerefMut,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct WordError;
|
||||
|
||||
/// A word is a non-whitespace containing string representing part of a sentance
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[repr(transparent)]
|
||||
pub struct Word(str);
|
||||
|
||||
|
||||
macro_rules! new {
|
||||
($str:expr) => {
|
||||
unsafe {Word::new_unchecked($str)}
|
||||
};
|
||||
}
|
||||
|
||||
const DEFAULT_BOUNDARIES: &[char] = &['!', '.', ',', '*'];
|
||||
|
||||
lazy_static! {
|
||||
static ref BOUNDARIES: smallmap::Map<char, ()> = {
|
||||
let mut map = smallmap::Map::new();
|
||||
for &chr in DEFAULT_BOUNDARIES.iter() {
|
||||
map.insert(chr, ());
|
||||
}
|
||||
map
|
||||
};
|
||||
}
|
||||
|
||||
#[inline] pub fn is_word_boundary(chr: char) -> bool
|
||||
{
|
||||
chr.is_whitespace() || BOUNDARIES.contains_key(&chr)
|
||||
}
|
||||
|
||||
impl Word
|
||||
{
|
||||
/// Create a new word reference without checking for whitespace
|
||||
pub unsafe fn new_unchecked<'a>(from: &'a str) -> &'a Self
|
||||
{
|
||||
std::mem::transmute(from)
|
||||
}
|
||||
|
||||
/// Create a single word
|
||||
pub fn single<'a>(from: &'a (impl AsRef<Sentance> +?Sized +'a)) -> Result<&'a Self, WordError>
|
||||
{
|
||||
let from = from.as_ref();
|
||||
match from.find(is_word_boundary) {
|
||||
Some(_) => Err(WordError),
|
||||
_ => Ok(new!(from)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new section of words from this sentance
|
||||
pub fn new<'a>(from: &'a (impl AsRef<Sentance> +?Sized+'a)) -> Vec<&'a Self>
|
||||
{
|
||||
Self::new_iter(from)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Create a new iterator over words from this sentance.
|
||||
pub fn new_iter<'a, 'b>(from: &'a (impl AsRef<Sentance> +?Sized+'b)) -> impl Iterator<Item = &'a Self>
|
||||
where 'b: 'a
|
||||
{
|
||||
let from = from.as_ref();
|
||||
from.split_inclusive(is_word_boundary)
|
||||
.map(|x| x.trim())
|
||||
.filter(|x| !x.is_empty())
|
||||
.map(|x| new!(x))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for &'a Word
|
||||
{
|
||||
fn from(from: &'a str) -> Self
|
||||
{
|
||||
new!(from)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for Word
|
||||
{
|
||||
fn as_ref(&self) -> &str
|
||||
{
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Word> for str
|
||||
{
|
||||
fn as_ref(&self) -> &Word
|
||||
{
|
||||
new!(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Borrow<Word> for String
|
||||
{
|
||||
fn borrow(&self) -> &Word {
|
||||
new!(&self[..])
|
||||
}
|
||||
}
|
||||
|
||||
impl ToOwned for Word
|
||||
{
|
||||
type Owned = String;
|
||||
fn to_owned(&self) -> Self::Owned {
|
||||
self.0.to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Word
|
||||
{
|
||||
type Target = str;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Word
|
||||
{
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Word> for Word
|
||||
{
|
||||
#[inline] fn as_ref(&self) -> &Word
|
||||
{
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn words(input: &str) -> impl Iterator<Item=&'_ Word>
|
||||
{
|
||||
input.split_inclusive(is_word_boundary)
|
||||
.map(|x| x.trim())
|
||||
.filter(|x| !x.is_empty())
|
||||
.map(|x| new!(x))
|
||||
}
|
@ -0,0 +1,132 @@
|
||||
//! Saving and loading chain
|
||||
use super::*;
|
||||
use std::{
|
||||
sync::Arc,
|
||||
path::{
|
||||
Path,
|
||||
},
|
||||
io,
|
||||
};
|
||||
use tokio::{
|
||||
time::{
|
||||
self,
|
||||
Duration,
|
||||
},
|
||||
fs::{
|
||||
OpenOptions,
|
||||
},
|
||||
prelude::*,
|
||||
};
|
||||
use futures::{
|
||||
future::{
|
||||
OptionFuture,
|
||||
},
|
||||
};
|
||||
#[cfg(feature="compress-chain")]
|
||||
use async_compression::{
|
||||
tokio_02::{
|
||||
write::{
|
||||
BzEncoder,
|
||||
BzDecoder,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const SAVE_INTERVAL: Option<Duration> = Some(Duration::from_secs(2));
|
||||
|
||||
|
||||
#[cfg(feature="compress-chain")]
|
||||
type Compressor<T> = BzEncoder<T>;
|
||||
#[cfg(feature="compress-chain")]
|
||||
type Decompressor<T> = BzDecoder<T>;
|
||||
|
||||
|
||||
pub async fn save_now(state: &State) -> io::Result<()>
|
||||
{
|
||||
let chain = state.chain_ref().read().await;
|
||||
use std::ops::Deref;
|
||||
let to = &state.config().file;
|
||||
save_now_to(chain.deref(),to).await
|
||||
}
|
||||
|
||||
async fn save_now_to(chain: &Chain<String>, to: impl AsRef<Path>) -> io::Result<()>
|
||||
{
|
||||
debug!("Saving chain to {:?}", to.as_ref());
|
||||
let mut file = OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.open(to).await?;
|
||||
let chain = serde_cbor::to_vec(chain).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
|
||||
{
|
||||
#[cfg(feature="compress-chain")]
|
||||
let mut file = Compressor::new(&mut file);
|
||||
file.write_all(&chain[..]).await?;
|
||||
#[cfg(feature="compress-chain")]
|
||||
file.flush().await?;
|
||||
#[cfg(feature="compress-chain")]
|
||||
file.shutdown().await?;
|
||||
}
|
||||
file.flush().await?;
|
||||
file.shutdown().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start the save loop for this chain
|
||||
pub async fn host(mut state: Box<State>)
|
||||
{
|
||||
let to = state.config().file.to_owned();
|
||||
let interval = state.config().save_interval();
|
||||
let when = Arc::clone(state.when_ref());
|
||||
trace!("Setup oke. Waiting on init");
|
||||
if state.on_init().await.is_ok() {
|
||||
debug!("Begin save handler");
|
||||
while Arc::strong_count(&when) > 1 {
|
||||
{
|
||||
let chain = state.chain_ref().read().await;
|
||||
use std::ops::Deref;
|
||||
if let Err(e) = save_now_to(chain.deref(), &to).await {
|
||||
error!("Failed to save chain: {}", e);
|
||||
} else {
|
||||
info!("Saved chain to {:?}", to);
|
||||
}
|
||||
}
|
||||
|
||||
tokio::select!{
|
||||
_ = OptionFuture::from(interval.map(|interval| time::delay_for(interval))) => {},
|
||||
_ = state.on_shutdown() => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
when.notified().await;
|
||||
if state.has_shutdown() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
debug!("Shutdown called before init");
|
||||
}
|
||||
trace!("Saver exiting");
|
||||
}
|
||||
|
||||
/// Try to load a chain from this path
|
||||
pub async fn load(from: impl AsRef<Path>) -> io::Result<Chain<String>>
|
||||
{
|
||||
debug!("Loading chain from {:?}", from.as_ref());
|
||||
#[allow(unused_mut)]
|
||||
let mut file = OpenOptions::new()
|
||||
.read(true)
|
||||
.open(from).await?;
|
||||
#[allow(unused_mut)]
|
||||
let mut whole = Vec::new();
|
||||
#[cfg(feature="compress-chain")]
|
||||
let mut whole = Decompressor::new(whole);
|
||||
tokio::io::copy(&mut file, &mut whole).await?;
|
||||
whole.flush().await?;
|
||||
#[cfg(feature="compress-chain")]
|
||||
whole.shutdown().await?;
|
||||
#[cfg(feature="compress-chain")]
|
||||
let whole = whole.into_inner();
|
||||
serde_cbor::from_slice(&whole[..])
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
//! /sentance/
|
||||
use super::*;
|
||||
use futures::StreamExt;
|
||||
|
||||
pub async fn body(state: State, num: Option<usize>, mut output: mpsc::Sender<String>) -> Result<(), gen::GenBodyError>
|
||||
{
|
||||
let string = {
|
||||
let mut chain = state.chain_read();
|
||||
|
||||
match num {
|
||||
None => chain.next().await.ok_or_else(gen::GenBodyError::default)?,
|
||||
Some(num) if num < state.config().max_gen_size => {//(0..num).map(|_| chain.generate_str()).join("\n"),
|
||||
let chain = chain.take(num);
|
||||
chain.collect::<Vec<_>>().await.join("\n")//TODO: Stream version of JoinStrExt
|
||||
},
|
||||
_ => return Err(Default::default()),
|
||||
}
|
||||
};
|
||||
|
||||
debug!("Taking {:?} from {:?}" ,num, string);
|
||||
let filter = state.outbound_filter();
|
||||
if let Some(num) = num {
|
||||
for sen in sanitise::Sentance::new_iter(&string).take(num)
|
||||
{
|
||||
output.send(filter.filter_owned(sen.to_owned())).await?;
|
||||
}
|
||||
} else {
|
||||
output.send(filter.filter_owned(match sanitise::Sentance::new_iter(&string)
|
||||
.max_by_key(|x| x.len()) {
|
||||
Some(x) => x,
|
||||
/*#[cold]*/ None => return Ok(()),
|
||||
}.to_owned())).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -0,0 +1,71 @@
|
||||
//! Unix signals
|
||||
use super::*;
|
||||
use tokio::{
|
||||
signal::unix::{
|
||||
self,
|
||||
SignalKind,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
|
||||
pub async fn handle(mut state: State)
|
||||
{
|
||||
let mut usr1 = unix::signal(SignalKind::user_defined1()).expect("Failed to hook SIGUSR1");
|
||||
let mut usr2 = unix::signal(SignalKind::user_defined2()).expect("Failed to hook SIGUSR2");
|
||||
let mut quit = unix::signal(SignalKind::quit()).expect("Failed to hook SIGQUIT");
|
||||
let mut io = unix::signal(SignalKind::io()).expect("Failed to hook IO");
|
||||
|
||||
trace!("Setup oke. Waiting on init");
|
||||
if state.on_init().await.is_ok() {
|
||||
debug!("Begin signal handler");
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = state.on_shutdown() => {
|
||||
break;
|
||||
}
|
||||
_ = usr1.recv() => {
|
||||
info!("Got SIGUSR1. Causing chain write.");
|
||||
state.push_now();
|
||||
},
|
||||
_ = usr2.recv() => {
|
||||
info!("Got SIGUSR2. Loading chain immediately.");
|
||||
match save::load(&state.config().file).await {
|
||||
Ok(new) => {
|
||||
{
|
||||
let mut chain = state.chain_ref().write().await;
|
||||
*chain = new;
|
||||
}
|
||||
trace!("Replaced with read chain");
|
||||
},
|
||||
Err(e) => {
|
||||
error!("Failed to load chain from file, keeping current: {}", e);
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
_ = io.recv() => {
|
||||
info!("Got SIGIO. Saving chain immediately.");
|
||||
if let Err(e) = save::save_now(&state).await {
|
||||
error!("Failed to save chain: {}", e);
|
||||
} else{
|
||||
trace!("Saved chain okay");
|
||||
}
|
||||
},
|
||||
_ = quit.recv() => {
|
||||
warn!("Got SIGQUIT. Saving chain then aborting.");
|
||||
if let Err(e) = save::save_now(&state).await {
|
||||
error!("Failed to save chain: {}", e);
|
||||
} else{
|
||||
trace!("Saved chain okay.");
|
||||
}
|
||||
error!("Aborting");
|
||||
std::process::abort()
|
||||
},
|
||||
}
|
||||
}
|
||||
} else {
|
||||
debug!("Shutdown called before init()");
|
||||
}
|
||||
trace!("Exiting");
|
||||
}
|
@ -0,0 +1,161 @@
|
||||
//! State
|
||||
use super::*;
|
||||
use tokio::{
|
||||
sync::{
|
||||
watch,
|
||||
mpsc::error::SendError,
|
||||
},
|
||||
};
|
||||
use config::Config;
|
||||
use msg::Initialiser;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ShutdownError;
|
||||
|
||||
impl error::Error for ShutdownError{}
|
||||
impl fmt::Display for ShutdownError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
write!(f, "shutdown signal caught")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct State
|
||||
{
|
||||
config: Arc<Box<(Config, config::Cache)>>, //to avoid cloning config
|
||||
chain: handle::ChainHandle<String>,
|
||||
//save: Arc<Notify>,
|
||||
begin: Initialiser,
|
||||
|
||||
shutdown: Arc<watch::Sender<bool>>,
|
||||
shutdown_recv: watch::Receiver<bool>,
|
||||
}
|
||||
|
||||
impl State
|
||||
{
|
||||
/// Consume this `state` into its initialiser
|
||||
pub fn into_initialiser(self) -> Initialiser
|
||||
{
|
||||
self.begin
|
||||
}
|
||||
|
||||
/// Allow the saver task to start work
|
||||
pub fn init(self) -> Result<(), msg::InitError>
|
||||
{
|
||||
self.begin.set()
|
||||
}
|
||||
|
||||
/// Has `init` been called?
|
||||
pub fn is_init(&self) -> bool
|
||||
{
|
||||
self.begin.is_set()
|
||||
}
|
||||
|
||||
/// A future that completes either when `init` is called, or `shutdown`.
|
||||
pub async fn on_init(&mut self) -> Result<(), ShutdownError>
|
||||
{
|
||||
if self.has_shutdown() {
|
||||
return Err(ShutdownError);
|
||||
}
|
||||
tokio::select! {
|
||||
Ok(()) = self.begin.clone_into_wait() => Ok(()),
|
||||
_ = self.on_shutdown() => {
|
||||
debug!("on_init(): shutdown received");
|
||||
Err(ShutdownError)
|
||||
}
|
||||
else => Err(ShutdownError)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn inbound_filter(&self) -> &sanitise::filter::Filter
|
||||
{
|
||||
&self.config_cache().inbound_filter
|
||||
}
|
||||
pub fn outbound_filter(&self) -> &sanitise::filter::Filter
|
||||
{
|
||||
&self.config_cache().outbound_filter
|
||||
}
|
||||
|
||||
pub fn new(config: Config, cache: config::Cache, chain: handle::ChainHandle<String>) -> Self
|
||||
{
|
||||
let (shutdown, shutdown_recv) = watch::channel(false);
|
||||
Self {
|
||||
config: Arc::new(Box::new((config, cache))),
|
||||
chain,
|
||||
begin: Initialiser::new(),
|
||||
shutdown: Arc::new(shutdown),
|
||||
shutdown_recv,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn config(&self) -> &Config
|
||||
{
|
||||
&self.config.as_ref().0
|
||||
}
|
||||
|
||||
pub fn config_cache(&self) -> &config::Cache
|
||||
{
|
||||
&self.config.as_ref().1
|
||||
}
|
||||
|
||||
/*pub fn notify_save(&self)
|
||||
{
|
||||
self.save.notify();
|
||||
}*/
|
||||
|
||||
/*pub fn chain(&self) -> &RwLock<Chain<String>>
|
||||
{
|
||||
&self.chain.as_ref()
|
||||
}*/
|
||||
pub fn chain_ref(&self) -> &RwLock<Chain<String>>
|
||||
{
|
||||
&self.chain.chain_ref()
|
||||
}
|
||||
|
||||
pub fn chain_read(&self) -> handle::ChainStream<String>
|
||||
{
|
||||
self.chain.read()
|
||||
}
|
||||
|
||||
/// Write to this chain
|
||||
pub async fn chain_write<'a, T: Stream<Item = String>>(&'a self, buffer: T) -> Result<(), SendError<Vec<String>>>
|
||||
{
|
||||
self.chain.write_stream(buffer).await
|
||||
}
|
||||
|
||||
|
||||
pub fn when_ref(&self) -> &Arc<Notify>
|
||||
{
|
||||
&self.chain.notify_when()
|
||||
}
|
||||
|
||||
/// Force the chain to push through now
|
||||
pub fn push_now(&self)
|
||||
{
|
||||
self.chain.push_now()
|
||||
}
|
||||
|
||||
pub fn shutdown(self)
|
||||
{
|
||||
self.shutdown.broadcast(true).expect("Failed to communicate shutdown");
|
||||
self.chain.hang();
|
||||
self.when_ref().notify();
|
||||
}
|
||||
|
||||
pub fn has_shutdown(&self) -> bool
|
||||
{
|
||||
*self.shutdown_recv.borrow()
|
||||
}
|
||||
|
||||
pub async fn on_shutdown(&mut self)
|
||||
{
|
||||
if !self.has_shutdown() {
|
||||
while let Some(false) = self.shutdown_recv.recv().await {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,92 @@
|
||||
//! Utils
|
||||
|
||||
pub trait NewCapacity: Sized
|
||||
{
|
||||
fn new() -> Self;
|
||||
fn with_capacity(cap: usize) -> Self;
|
||||
}
|
||||
|
||||
impl NewCapacity for String
|
||||
{
|
||||
fn new() -> Self
|
||||
{
|
||||
Self::new()
|
||||
}
|
||||
|
||||
fn with_capacity(cap: usize) -> Self
|
||||
{
|
||||
Self::with_capacity(cap)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> NewCapacity for Vec<T>
|
||||
{
|
||||
fn new() -> Self
|
||||
{
|
||||
Self::new()
|
||||
}
|
||||
|
||||
fn with_capacity(cap: usize) -> Self
|
||||
{
|
||||
Self::with_capacity(cap)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hint_cap<T: NewCapacity, I: Iterator>(iter: &I) -> T
|
||||
{
|
||||
match iter.size_hint() {
|
||||
(0, Some(0)) | (0, None) => T::new(),
|
||||
(_, Some(x)) | (x, _) => T::with_capacity(x)
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export] macro_rules! opaque_error {
|
||||
($msg:literal) => {
|
||||
{
|
||||
#[derive(Debug)]
|
||||
struct OpaqueError;
|
||||
|
||||
impl ::std::error::Error for OpaqueError{}
|
||||
impl ::std::fmt::Display for OpaqueError
|
||||
{
|
||||
fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result
|
||||
{
|
||||
write!(f, $msg)
|
||||
}
|
||||
}
|
||||
OpaqueError
|
||||
}
|
||||
};
|
||||
($msg:literal $($tt:tt)*) => {
|
||||
{
|
||||
#[derive(Debug)]
|
||||
struct OpaqueError(String);
|
||||
|
||||
impl ::std::error::Error for OpaqueError{}
|
||||
impl ::std::fmt::Display for OpaqueError
|
||||
{
|
||||
fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result
|
||||
{
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
OpaqueError(format!($msg $($tt)*))
|
||||
}
|
||||
};
|
||||
(yield $msg:literal $($tt:tt)*) => {
|
||||
{
|
||||
#[derive(Debug)]
|
||||
struct OpaqueError<'a>(fmt::Arguments<'a>);
|
||||
|
||||
impl ::std::error::Error for OpaqueError{}
|
||||
impl ::std::fmt::Display for OpaqueError
|
||||
{
|
||||
fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result
|
||||
{
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
OpaqueError(format_args!($msg $($tt)*))
|
||||
}
|
||||
};
|
||||
}
|
Loading…
Reference in new issue