You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
144 lines
3.9 KiB
144 lines
3.9 KiB
#ifndef _LEVEN_H
|
|
#define _LEVEN_H
|
|
|
|
#ifdef __cplusplus
|
|
|
|
#include <string_view>
|
|
#include <concepts>
|
|
#include <vector>
|
|
#include <span>
|
|
#include <tuple>
|
|
|
|
extern "C" {
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
|
|
namespace exopt { namespace util [[gnu::visibility("internal")]] {
|
|
#if 0
|
|
void** allocate_array(size_t elem, size_t d1, size_t d2) [[gnu::returns_nonnull]];
|
|
void deallocate_array(void** array) noexcept;
|
|
|
|
/*constexpr auto min(auto const& a, auto const& b) noexcept
|
|
-> std::common_type_t<decltype(a), decltype(b)>
|
|
requires(requires(decltype(a) _a, decltype(b) _b) {
|
|
{ _a < _b } -> std::convertible_to<bool>;
|
|
}) {
|
|
return a < b ? a : b;
|
|
}*/
|
|
|
|
/// Compute the levenshtein distance.
|
|
constexpr auto leven_diff(std::convertible_to<std::string_view> auto const& a, std::convertible_to<std::string_view> auto const& b) noexcept {
|
|
std::string_view s = a,
|
|
t = b;
|
|
const signed long n = long(s.size());
|
|
const signed long m = long(t.size());
|
|
|
|
if(__builtin_expect(!n, 0)) return m;
|
|
if(__builtin_expect(!m, 0)) return n;
|
|
|
|
//constexpr
|
|
auto&& work_on_d = [&](auto& d) {
|
|
for(long i=0; i <= n ; d[i][0] = i) i+=1;
|
|
for(long i=0; i <= m ; d[0][i] = i) i+=1;
|
|
|
|
for(long i=1; i <= n; i++) {
|
|
for(long j=1; j<=m; j++)
|
|
{
|
|
using std::min;
|
|
const auto cost = std::type_identity_t<signed long>(! (t[j-1] == s[i-1]));
|
|
d[i][j] = min(
|
|
min(d[i-1][j] + 1, d[i][j-1] + 1)
|
|
, d[i-1][j-1] + cost);
|
|
}
|
|
}
|
|
return d[n][m];
|
|
};
|
|
//TODO: XXX Multidimensional array not working... fuck THIS
|
|
if consteval {
|
|
using Vec2 = std::vector<std::vector<signed long>>;
|
|
struct vec : public Vec2 ///TODO: XXX: best way to do this? We just want the `d`` ctor overload...
|
|
{
|
|
//TODO: for below ctor of inner (size_t, constT&), since it's ambiguous and chooses the incorrect one no matter what
|
|
using Vec2::Vec2;//(size_t, signed long const&, Vec2::allocator_type const&);
|
|
vec(signed long, signed long) = delete;
|
|
vec(signed long, size_t) = delete;
|
|
vec(size_t) = delete;
|
|
|
|
constexpr ~vec() noexcept = default;
|
|
|
|
|
|
constexpr operator Vec2&&() && noexcept { return std::move(*static_cast<Vec2*>(this)); }
|
|
constexpr operator Vec2&() const&& = delete;
|
|
};
|
|
|
|
const auto inner = std::vector<signed long>{ size_t(m) + 1 };
|
|
std::vector<std::vector<signed long>> d{ static_cast<Vec2&&>(std::move(vec{size_t(n) + 1, inner})) };
|
|
return work_on_d(d);
|
|
} else {
|
|
auto** d = reinterpret_cast<signed long**>(allocate_array(sizeof(signed long), m+1, n+1));
|
|
try {
|
|
return work_on_d(d);
|
|
} catch(...) {
|
|
deallocate_array(reinterpret_cast<void**>(d));
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
constexpr auto leven_diff(std::string_view s1, std::string_view s2) noexcept {
|
|
const size_t
|
|
m(s1.size()),
|
|
n(s2.size());
|
|
|
|
if(__builtin_expect(!m, false)) return n;
|
|
if(__builtin_expect(!n, false)) return m;
|
|
|
|
auto&& work_on_d = [&](auto&& costs) {
|
|
for(size_t i=0;i <= n; i++) costs[i] = i;
|
|
|
|
size_t i{0};
|
|
for(auto const& c1: s1) {
|
|
costs[0] = i + 1;
|
|
size_t corner { i },
|
|
j { 0 };
|
|
|
|
for(auto const& c2: s2) {
|
|
size_t upper { costs[j+1] };
|
|
if( c1 == c2 ) costs[j+1] = corner;
|
|
else {
|
|
using std::min;
|
|
size_t t{min(upper, corner)};//upper<corner ? upper : corner
|
|
costs[j+1] = min(costs[j], t) + 1;
|
|
|
|
}
|
|
corner = upper;
|
|
j += 1;
|
|
}
|
|
i += 1;
|
|
}
|
|
return costs[n];
|
|
};
|
|
|
|
if consteval {
|
|
return work_on_d(std::vector<size_t>(n + 1));
|
|
} else {
|
|
thread_local static std::vector<size_t> d;
|
|
const size_t n1 = n + 1;
|
|
if(__builtin_expect(d.size() < n1, false)) d.resize(n1);
|
|
// We don't need to clear the buffer, it will be reinitialised by `work_on_d()`.
|
|
return work_on_d(std::span<size_t>{d.begin(), n1});
|
|
}
|
|
}
|
|
template<std::convertible_to<std::string_view> S1, std::convertible_to<std::string_view> S2>
|
|
constexpr decltype(auto) leven_diff(const S1& sa, const S2& sb) noexcept {
|
|
using str = std::string_view;
|
|
return leven_diff(str{sa}, str{sb});
|
|
}
|
|
|
|
} }
|
|
#endif
|
|
|
|
#endif /* _LEVEN_H */
|