You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
libexopt/include/leven.h

150 lines
4.8 KiB

#ifndef _LEVEN_H
#define _LEVEN_H
#include "exopt.h"
#ifdef __cplusplus
//#include "util.hh"
#include <string_view>
#include <concepts>
#include <vector>
#include <map>
#include <array>
#include <span>
#include <tuple>
extern "C" {
#endif
size_t _EO(leven_diff)(const char* _EO(restrict), const char* _EO(restrict), size_t)
_EO(internal)
_EO(readonly)
__attribute__((nonnull(1, 2)))
;
//TODO: C interface for `string_ord` and `sim_map`?
#ifdef __cplusplus
}
namespace exopt { namespace util [[gnu::visibility("internal")]] {
template<size_t N =0> // NOTE: Do not change this manually //
constexpr auto leven_diff(std::string_view s1, std::string_view s2) noexcept {
const size_t
m(s1.size()),
n(s2.size());
if(__builtin_expect(!m, false)) return n;
if(__builtin_expect(!n, false)) return m;
auto&& work_on_d = [&](auto&& costs) {
for(size_t i=0;i <= n; i++) costs[i] = i;
size_t i{0};
for(auto const& c1: s1) {
costs[0] = i + 1;
size_t corner { i },
j { 0 };
for(auto const& c2: s2) {
size_t upper { costs[j+1] };
if( c1 == c2 ) costs[j+1] = corner;
else {
using std::min;
size_t t{min(upper, corner)};//upper<corner ? upper : corner
costs[j+1] = min(costs[j], t) + 1;
}
corner = upper;
j += 1;
}
i += 1;
}
return costs[n];
};
if constexpr(N > 0) {
return work_on_d(std::array<size_t, N+1>{});
} else if consteval {
return work_on_d(std::vector<size_t>(n + 1));
} else {
thread_local static std::vector<size_t> d;
const size_t n1 = n + 1;
if(__builtin_expect(d.size() < n1, false)) d.resize(n1);
// We don't need to clear the buffer, it will be reinitialised by `work_on_d()`.
return work_on_d(std::span<size_t>{d.begin(), n1});
}
}
template<std::convertible_to<std::string_view> S1, std::convertible_to<std::string_view> S2>
constexpr decltype(auto) leven_diff(const S1& sa, const S2& sb) noexcept {
using str = std::string_view;
return leven_diff(str{sa}, str{sb});
}
//TODO: Add overloads for when one bound is known statically but the other is not. Would this be useful?
template<size_t M, size_t N>
constexpr decltype(auto) leven_diff(const char (&sa)[M], const char (&sb)[N]) noexcept {
if constexpr(!N) return M;
else if constexpr(!M) return N;
//else if constexpr(M < N) return N - M; // These lines are not correct, just fixing the sizes does not make the edit valid
//else if constexpr(N < M) return M - N; // ^
else return leven_diff<N>(std::string_view{sa}, std::string_view{sb});
}
/// String that is ordered by Levenshtein distance.
///
/// Used as sorted map key for finding suggestions for possible typos. All valid strings are added as `string_ord`, and the invalid string lookup looks for its closest neighbour(s), those are presented to the user in a "did you mean ..." format with the lowest distance first and so on.
template<std::convertible_to<std::string_view> S>
struct string_ord {
using string_type = std::remove_reference_t<S>;
constexpr string_ord(string_type&& str) noexcept
: m_string(std::move(str)) {}
constexpr string_ord(string_ord const&) = default;
constexpr string_ord(string_ord &&) = default;
constexpr string_ord& operator=(string_ord const&) = default;
constexpr string_ord& operator=(string_ord &&) = default;
constexpr ~string_ord() = default;
constexpr auto difference_from(std::convertible_to<std::string_view> auto const& str) const noexcept {
return leven_diff(m_string, std::forward<decltype(str)>(str));
}
constexpr auto difference_to(std::convertible_to<std::string_view> auto const& str) const noexcept {
return leven_diff(std::forward<decltype(str)>(str), m_string);
}
constexpr friend auto operator<=>(const string_ord& a, const string_ord& b) noexcept {
auto ab = leven_diff(a.m_string, b.m_string);
auto ba = leven_diff(b.m_string, a.m_string);
return ab <=> ba;
}
constexpr operator std::string_view() const noexcept { return m_string; }
constexpr string_type& string() noexcept { return m_string; }
constexpr string_type const& string() const noexcept { return m_string; }
constexpr string_type& operator*() & noexcept { return m_string; }
constexpr string_type const& operator*() const& noexcept { return m_string; }
constexpr string_type&& operator*() && noexcept { return std::move(m_string); }
constexpr string_type const&& operator*() const&& noexcept { return std::move(m_string); }
private:
string_type m_string;
};
/// Used to store all valid command names, so when an invalid one is found, the closest matching one(s) can be suggested to the user in a "did you mean ..." format with the *lowest difference* neighbour(s) to the invalid string first.
template<typename T, std::convertible_to<std::string_view> S = std::string_view>
using sim_map = std::map<string_ord<S>, T>;
} // ::util //
}
#endif
#endif /* _LEVEN_H */