exopt::util::leven_diff(): Added working levenshtein distance algorithm.

Fortune for libexopt's current commit: Half blessing − 半吉
boxed_is_boxed_value
Avril 2 years ago
commit 388068bc1d
Signed by: flanchan
GPG Key ID: 284488987C31F630

4
.gitignore vendored

@ -0,0 +1,4 @@
obj/
lib*.so
lib*.a
*.o

@ -0,0 +1,234 @@
# Makefile template, generic for libraries (static + shared)
PROJECT=exopt
DEFAULT_NAMESPACE=exopt
VERSION=0.0.0
SRC_C = $(wildcard src/*.c)
SRC_CXX = $(wildcard src/*.cpp)
INCLUDE=include
ifeq ($(PREFIX),)
PREFIX := /usr/local
endif
# Default archivers
AR?=ar
RANLIB?=ranlib
# Use gcc-{ar,ranlib} when using gcc
ifeq ($(CXX),g++)
AR=gcc-ar
RANLIB=gcc-ranlib
endif
# Link to these libraries dynamicalls
SHARED_LIBS=
# Link to these libraries statically
STATIC_LIBS=
override __VERSION_SPLIT:= $(subst ., ,$(VERSION))
override __VERSION_REVISION:=$(word 3,$(__VERSION_SPLIT)) 0
VERSION_MAJOR:= $(word 1,$(__VERSION_SPLIT))
VERSION_MINOR:= $(word 2,$(__VERSION_SPLIT))
VERSION_BUGFIX:= $(word 3,$(__VERSION_SPLIT))
VERSION_REVISION:= $(word 2,$(subst r, ,$(__VERSION_REVISION)))
override __VERSION_SPLIT:= MAJOR:$(word 1,$(__VERSION_SPLIT)) MINOR:$(word 2,$(__VERSION_SPLIT)) BUGFIX:$(word 1,$(subst r, ,$(__VERSION_REVISION))) REVISION:$(word 2,$(subst r, ,$(__VERSION_REVISION))) REVISION_STRING:$(word 3,$(__VERSION_SPLIT))
COMMON_FLAGS?= -W -Wall
COMMON_FLAGS+= -pipe -Wstrict-aliasing -fno-strict-aliasing $(addprefix -I,$(INCLUDE))
COMMON_FLAGS+= $(addprefix -D_VERSION_,$(subst :,=,$(__VERSION_SPLIT))) '-D_VERSION="$(VERSION)"'
ifneq ($(DEFAULT_NAMESPACE),)
COMMON_FLAGS+= '-D_DEFAULT_NS=$(DEFAULT_NAMESPACE)'
endif
## CPU feature flags
CPU_FLAGS+=
DEFINITIONS?=_IMPL
COMMON_FLAGS+=$(addprefix -D,$(DEFINITIONS))
COMMON_FLAGS+=$(addprefix -m,$(CPU_FLAGS))
## For LTO and linking over all TUs in general
BINFLAGS+=
DEBUG_BINFLAGS+=
RELEASE_BINFLAGS+= -fuse-linker-plugin
# Target arch. Set to blank for generic
ARCH?=native
# Enable OpenMP and loop parallelisation? (dyn-links to openmp)
PARALLEL?=yes
OPT_FLAGS?= -fgraphite \
-floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block \
-fno-stack-check
SHARED_FLAGS+=-fPIC
SHARED_RELEASE_FLAGS+=
SHARED_DEBUG_FLAGS+=
STATIC_FLAGS+=
STATIC_RELEASE_FLAGS+=-ffat-lto-objects
STATIC_DEBUG_FLAGS+=
ifneq ($(ARCH),)
OPT_FLAGS+= $(addprefix -march=,$(ARCH))
endif
ifeq ($(PARALLEL),yes)
SHARED_FLAGS+= -fopenmp
SHARED_RELEASE_FLAGS+= -floop-parallelize-all -ftree-parallelize-loops=4
endif
CXX_OPT_FLAGS?= $(OPT_FLAGS) -felide-constructors
CSTD?=gnu2x
CXXSTD?=gnu++23
CFLAGS += $(COMMON_FLAGS) --std=$(CSTD)
CXXFLAGS += $(COMMON_FLAGS) --std=$(CXXSTD)
LDFLAGS += $(addsuffix .a,$(addprefix -l:lib,$(STATIC_LIBS))) $(addprefix -l,$(SHARED_LIBS))
STRIP=strip
# TODO: XXX: Benchmark to see if `-fno-plt` actually helps...
RELEASE_COMMON_FLAGS+= -fno-plt -fno-bounds-check
DEBUG_COMMON_FLAGS+= -ggdb -gz -ftrapv -fbounds-check
# -fanalyzer
ifneq ($(TARGET_SPEC_FLAGS),no)
RELEASE_CFLAGS?= -O3 -flto $(OPT_FLAGS)
RELEASE_CXXFLAGS?= -O3 -flto $(CXX_OPT_FLAGS)
RELEASE_LDFLAGS?= -Wl,-O3 -Wl,-flto
#SHARED_FLAGS+=$(SHARED_RELEASE_FLAGS)
DEBUG_CFLAGS?= -Og
DEBUG_CXXFLAGS?= -Og
DEBUG_LDFLAGS?=
endif
DEBUG_CFLAGS+=-DDEBUG $(DEBUG_COMMON_FLAGS)
DEBUG_CXXFLAGS+=-DDEBUG $(DEBUG_COMMON_FLAGS) -fasynchronous-unwind-tables
RELEASE_CFLAGS+=-DRELEASE $(RELEASE_COMMON_FLAGS)
RELEASE_CXXFLAGS+=-DRELEASE $(RELEASE_COMMON_FLAGS)
# Objects
OBJ_C = $(addprefix obj/c/,$(SRC_C:.c=.o))
OBJ_CXX = $(addprefix obj/cxx/,$(SRC_CXX:.cpp=.o))
OBJ = $(OBJ_C) $(OBJ_CXX)
# Phonies
.PHONY: release
release: | dirs
$(MAKE) lib$(PROJECT).a
@$(MAKE) clean-rebuild >> /dev/null
@$(MAKE) dirs >> /dev/null
$(MAKE) lib$(PROJECT).so
.PHONY: debug
debug: | dirs
$(MAKE) lib$(PROJECT)-debug.a
@$(MAKE) clean-rebuild >> /dev/null
@$(MAKE) dirs >> /dev/null
$(MAKE) lib$(PROJECT)-debug.so
# Rebuild both release and debug targets from scratch
.PHONY: all
all: | clean
@$(MAKE) release
@$(MAKE) clean-rebuild
@$(MAKE) debug
.PHONY: install
.PHONY: uninstall
.PHONY: test
test:
@rm -f $(PROJECT)-test
@$(MAKE) $(PROJECT)-test
# Targets
dirs:
@mkdir -p obj/c{,xx}/src{,/rng}
obj/c/%.o: %.c
$(CC) -c $< $(CFLAGS) -o $@ $(LDFLAGS)
obj/cxx/%.o: %.cpp
$(CXX) -c $< $(CXXFLAGS) -o $@ $(LDFLAGS)
lib$(PROJECT)-release.a: CFLAGS+= $(RELEASE_CFLAGS) $(STATIC_FLAGS) $(STATIC_RELEASE_FLAGS)
lib$(PROJECT)-release.a: CXXFLAGS += $(RELEASE_CXXFLAGS) $(STATIC_FLAGS) $(STATIC_RELEASE_FLAGS)
lib$(PROJECT)-release.a: LDFLAGS += $(RELEASE_LDFLAGS)
lib$(PROJECT)-release.a: $(OBJ)
$(AR) rcs $@ $^
$(RANLIB) $@
lib$(PROJECT)-debug.a: CFLAGS+= $(DEBUG_CFLAGS) $(STATIC_FLAGS) $(STATIC_DEBUG_FLAGS)
lib$(PROJECT)-debug.a: CXXFLAGS += $(DEBUG_CXXFLAGS) $(STATIC_FLAGS) $(STATIC_DEBUG_FLAGS)
lib$(PROJECT)-debug.a: LDFLAGS += $(DEBUG_LDFLAGS)
lib$(PROJECT)-debug.a: $(OBJ)
$(AR) rcs $@ $^
$(RANLIB) $@
lib$(PROJECT)-release.so: CFLAGS+= $(RELEASE_CFLAGS) $(SHARED_FLAGS) $(SHARED_RELEASE_FLAGS)
lib$(PROJECT)-release.so: CXXFLAGS += $(RELEASE_CXXFLAGS) $(SHARED_FLAGS) $(SHARED_RELEASE_FLAGS)
lib$(PROJECT)-release.so: LDFLAGS += $(RELEASE_LDFLAGS)
lib$(PROJECT)-release.so: BINFLAGS += $(RELEASE_BINFLAGS)
lib$(PROJECT)-release.so: $(OBJ)
$(CXX) -shared $^ $(BINFLAGS) $(CXXFLAGS) -o $@ $(LDFLAGS)
$(STRIP) $@
lib$(PROJECT)-debug.so: CFLAGS+= $(DEBUG_CFLAGS) $(SHARED_FLAGS) $(SHARED_DEBUG_FLAGS)
lib$(PROJECT)-debug.so: CXXFLAGS += $(DEBUG_CXXFLAGS) $(SHARED_FLAGS) $(SHARED_DEBUG_FLAGS)
lib$(PROJECT)-debug.so: LDFLAGS += $(DEBUG_LDFLAGS)
lib$(PROJECT)-debug.so: BINFLAGS += $(DEBUG_BINFLAGS)
lib$(PROJECT)-debug.so: $(OBJ)
$(CXX) -shared $^ $(BINFLAGS) $(CXXFLAGS) -o $@ $(LDFLAGS)
lib$(PROJECT).a: lib$(PROJECT)-release.a
ln -f $< $@
lib$(PROJECT).so: LDFLAGS+= -Wl,-soname,lib$(PROJECT).so.$(VERSION_MAJOR)
lib$(PROJECT).so: lib$(PROJECT)-release.so
ln -f $< $@.$(VERSION)
ln -sf $@.$(VERSION) $@.$(VERSION_MAJOR)
ln -sf $@.$(VERSION_MAJOR) $@
clean-rebuild:
rm -rf obj
clean: clean-rebuild
rm -f lib$(PROJECT){,-{release,debug,pgo}}.{a,so{,.*}}
rm -f $(PROJECT)-test
install:
install -d $(DESTDIR)$(PREFIX)/lib/
install -m 644 lib$(PROJECT).a $(DESTDIR)$(PREFIX)/lib/
install -s -m 755 lib$(PROJECT).so.$(VERSION) $(DESTDIR)$(PREFIX)/lib/
ln -sf lib$(PROJECT).so.$(VERSION) $(DESTDIR)$(PREFIX)/lib/lib$(PROJECT).so.$(VERSION_MAJOR)
ln -sf lib$(PROJECT).so.$(VERSION_MAJOR) $(DESTDIR)$(PREFIX)/lib/lib$(PROJECT).so
install -d $(DESTDIR)$(PREFIX)/include/$(PROJECT)/
install -m 644 $(wildcard $(INCLUDE)/*.*) $(DESTDIR)$(PREFIX)/include/$(PROJECT)/
uninstall:
-rm $(DESTDIR)$(PREFIX)/lib/lib$(PROJECT).{a,so{,.*}}
cd $(INCLUDE) && find . -type f | xargs -I {} rm "$(DESTDIR)$(PREFIX)/include/$(PROJECT)/{}"
-rmdir $(DESTDIR)$(PREFIX)/include/$(PROJECT)
$(PROJECT)-test: LDFLAGS+= -lfmt -lstdc++
$(PROJECT)-test: CFLAGS+= -Og -g
$(PROJECT)-test: lib$(PROJECT)-debug.a
$(CC) $(CFLAGS) src/test/*.c -o $@ -l:$< $(LDFLAGS)
-valgrind ./$@

@ -0,0 +1,143 @@
#ifndef _LEVEN_H
#define _LEVEN_H
#ifdef __cplusplus
#include <string_view>
#include <concepts>
#include <vector>
#include <span>
#include <tuple>
extern "C" {
#endif
#ifdef __cplusplus
}
namespace exopt { namespace util [[gnu::visibility("internal")]] {
#if 0
void** allocate_array(size_t elem, size_t d1, size_t d2) [[gnu::returns_nonnull]];
void deallocate_array(void** array) noexcept;
/*constexpr auto min(auto const& a, auto const& b) noexcept
-> std::common_type_t<decltype(a), decltype(b)>
requires(requires(decltype(a) _a, decltype(b) _b) {
{ _a < _b } -> std::convertible_to<bool>;
}) {
return a < b ? a : b;
}*/
/// Compute the levenshtein distance.
constexpr auto leven_diff(std::convertible_to<std::string_view> auto const& a, std::convertible_to<std::string_view> auto const& b) noexcept {
std::string_view s = a,
t = b;
const signed long n = long(s.size());
const signed long m = long(t.size());
if(__builtin_expect(!n, 0)) return m;
if(__builtin_expect(!m, 0)) return n;
//constexpr
auto&& work_on_d = [&](auto& d) {
for(long i=0; i <= n ; d[i][0] = i) i+=1;
for(long i=0; i <= m ; d[0][i] = i) i+=1;
for(long i=1; i <= n; i++) {
for(long j=1; j<=m; j++)
{
using std::min;
const auto cost = std::type_identity_t<signed long>(! (t[j-1] == s[i-1]));
d[i][j] = min(
min(d[i-1][j] + 1, d[i][j-1] + 1)
, d[i-1][j-1] + cost);
}
}
return d[n][m];
};
//TODO: XXX Multidimensional array not working... fuck THIS
if consteval {
using Vec2 = std::vector<std::vector<signed long>>;
struct vec : public Vec2 ///TODO: XXX: best way to do this? We just want the `d`` ctor overload...
{
//TODO: for below ctor of inner (size_t, constT&), since it's ambiguous and chooses the incorrect one no matter what
using Vec2::Vec2;//(size_t, signed long const&, Vec2::allocator_type const&);
vec(signed long, signed long) = delete;
vec(signed long, size_t) = delete;
vec(size_t) = delete;
constexpr ~vec() noexcept = default;
constexpr operator Vec2&&() && noexcept { return std::move(*static_cast<Vec2*>(this)); }
constexpr operator Vec2&() const&& = delete;
};
const auto inner = std::vector<signed long>{ size_t(m) + 1 };
std::vector<std::vector<signed long>> d{ static_cast<Vec2&&>(std::move(vec{size_t(n) + 1, inner})) };
return work_on_d(d);
} else {
auto** d = reinterpret_cast<signed long**>(allocate_array(sizeof(signed long), m+1, n+1));
try {
return work_on_d(d);
} catch(...) {
deallocate_array(reinterpret_cast<void**>(d));
throw;
}
}
}
#endif
constexpr auto leven_diff(std::string_view s1, std::string_view s2) noexcept {
const size_t
m(s1.size()),
n(s2.size());
if(__builtin_expect(!m, false)) return n;
if(__builtin_expect(!n, false)) return m;
auto&& work_on_d = [&](auto&& costs) {
for(size_t i=0;i <= n; i++) costs[i] = i;
size_t i{0};
for(auto const& c1: s1) {
costs[0] = i + 1;
size_t corner { i },
j { 0 };
for(auto const& c2: s2) {
size_t upper { costs[j+1] };
if( c1 == c2 ) costs[j+1] = corner;
else {
using std::min;
size_t t{min(upper, corner)};//upper<corner ? upper : corner
costs[j+1] = min(costs[j], t) + 1;
}
corner = upper;
j += 1;
}
i += 1;
}
return costs[n];
};
if consteval {
return work_on_d(std::vector<size_t>(n + 1));
} else {
thread_local static std::vector<size_t> d;
const size_t n1 = n + 1;
if(__builtin_expect(d.size() < n1, false)) d.resize(n1);
// We don't need to clear the buffer, it will be reinitialised by `work_on_d()`.
return work_on_d(std::span<size_t>{d.begin(), n1});
}
}
template<std::convertible_to<std::string_view> S1, std::convertible_to<std::string_view> S2>
constexpr decltype(auto) leven_diff(const S1& sa, const S2& sb) noexcept {
using str = std::string_view;
return leven_diff(str{sa}, str{sb});
}
} }
#endif
#endif /* _LEVEN_H */

@ -0,0 +1,12 @@
#include <cstdlib>
#include <cstring>
#include <leven.h>
namespace exopt {
namespace util [[gnu::visibility("internal")]] {
static_assert(leven_diff("hello world", "Hello World") == 2, "Levelshtein distance incorrect for non-matching strings");
static_assert(leven_diff("hello world", "hello world") == 0, "Levelshtein distance incorrect for matching strings");
}
}
Loading…
Cancel
Save