You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
shuffle3/Makefile

173 lines
4.8 KiB

SRC_C = $(wildcard src/*.c)
SRC_CXX = $(wildcard src/*.cpp)
SRC_CXX+= $(wildcard src/rng/*.cpp)
INCLUDE = include
PROJECT=shuffle3
# Currently supported:
# _FS_SPILL_BUFFER: Use file backed buffer instead of memory backed one for unshuffling. See `shuffle3.h`.
# Setting the value to `DYN` enables the dynamic buffer, setting it to `MAP` enabled memory-mapped buffer. `MAP` is usually the fastest of the 3 modes.
# DEBUG: Pretend we're building a debug release even though we're not. Will enable additional output messages and may interfere with some optimisations
FEATURE_FLAGS?=
COMMON_FLAGS+= $(addprefix -D,$(FEATURE_FLAGS)) -Wall -Wstrict-aliasing $(addprefix -I,$(INCLUDE)) -fno-strict-aliasing
ARCH?=native
INTRINSICS=avx sse3
OPT_FLAGS?=-fgraphite -fopenmp -floop-parallelize-all -ftree-parallelize-loops=4 \
-floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block \
-fno-stack-check
OPT_FLAGS+=$(addprefix -m,$(INTRINSICS))
ifneq ($(ARCH),)
OPT_FLAGS+=$(addprefix -march=,$(ARCH))
endif
CXX_OPT_FLAGS?= $(OPT_FLAGS) -felide-constructors
CFLAGS += $(COMMON_FLAGS) --std=gnu11
CXXFLAGS += $(COMMON_FLAGS) --std=gnu++23 -fno-exceptions
# XXX: We might need exceptions soon, for OOP usage, because we try multiple approaches from most efficient to least.
LDFLAGS +=
STRIP=strip
RELEASE_COMMON_FLAGS?= -DNOTRACE
RELEASE_COMMON_FLAGS+= -DRELEASE
RELEASE_CFLAGS?= -O3 -flto $(OPT_FLAGS) $(RELEASE_COMMON_FLAGS)
RELEASE_CXXFLAGS?= -O3 -flto $(CXX_OPT_FLAGS) $(RELEASE_COMMON_FLAGS)
RELEASE_LDFLAGS?= -Wl,-O3 -Wl,-flto
DEBUG_CFLAGS?= -Og -g -DDEBUG
DEBUG_CXXFLAGS?= $(DEBUG_CFLAGS)
DEBUG_LDFLAGS?=
# Objects
OBJ_C = $(addprefix obj/c/,$(SRC_C:.c=.o))
OBJ_CXX = $(addprefix obj/cxx/,$(SRC_CXX:.cpp=.o))
OBJ = $(OBJ_C) $(OBJ_CXX)
# PGO stuff
PGO_OBJ_C = $(addprefix obj/pgo/c/,$(SRC_C:.c=.o))
PGO_OBJ_CXX = $(addprefix obj/pgo/cxx/,$(SRC_CXX:.cpp=.o))
PGO_OBJ = $(PGO_OBJ_C) $(PGO_OBJ_CXX)
PGO_ITERATIONS=5
PGO_BLOCKS={1..4}
PGO_SIZE={1024..4096}
PGO_SET_LOC?=/tmp/$(PROJECT)-pgo
PGO_FLAGS = -fprofile-generate
PGO_LDFLAGS=
# Phonies
.PHONY: release
release: | dirs $(PROJECT)-release
.PHONY: debug
debug: | dirs $(PROJECT)-debug
.PHONY: pgo
pgo: | dirs $(PROJECT)-pgo
.PHONY: test
test: test-all
# Targets
dirs:
@mkdir -p obj/{pgo/,}c{,xx}/src{,/rng}
obj/c/%.o: %.c
$(CC) -c $< $(CFLAGS) -o $@ $(LDFLAGS)
obj/cxx/%.o: %.cpp
$(CXX) -c $< $(CXXFLAGS) -o $@ $(LDFLAGS)
obj/pgo/c/%.o: %.c
$(CC) -c $< $(CFLAGS) $(PGO_FLAGS) -o $@ $(LDFLAGS) $(PGO_LDFLAGS)
obj/pgo/cxx/%.o: %.cpp
$(CXX) -c $< $(CXXFLAGS) $(PGO_FLAGS) -o $@ $(LDFLAGS) $(PGO_LDFLAGS)
$(PROJECT)-release: CFLAGS+= $(RELEASE_CFLAGS)
$(PROJECT)-release: CXXFLAGS += $(RELEASE_CXXFLAGS)
$(PROJECT)-release: LDFLAGS += $(RELEASE_LDFLAGS)
$(PROJECT)-release: $(OBJ)
$(CXX) $^ $(CXXFLAGS) -o $@ $(LDFLAGS)
$(STRIP) $@
$(PROJECT)-debug: CFLAGS+= $(DEBUG_CFLAGS)
$(PROJECT)-debug: CXXFLAGS += $(DEBUG_CXXFLAGS)
$(PROJECT)-debug: LDFLAGS += $(DEBUG_LDFLAGS)
$(PROJECT)-debug: $(OBJ)
$(CXX) $^ $(CXXFLAGS) -o $@ $(LDFLAGS)
pgo-generate: CFLAGS+= $(RELEASE_CFLAGS)
pgo-generate: CXXFLAGS += $(RELEASE_CXXFLAGS)
pgo-generate: LDFLAGS += $(RELEASE_LDFLAGS)
pgo-generate: $(PGO_OBJ)
$(CXX) $^ $(CXXFLAGS) $(PGO_FLAGS) -o $@ $(LDFLAGS) $(PGO_LDFLAGS)
pgo-reset:
-find ./obj/pgo -name \*.gcda -exec rm {} +
pgo-profile: | pgo-reset pgo-generate
mkdir -p $(PGO_SET_LOC)
for i in {1..$(PGO_ITERATIONS)}; do \
block=$$(rng --of $(PGO_SIZE)); \
block_count=$$(rng --of $(PGO_BLOCKS)); \
dd if=/dev/urandom of=$(PGO_SET_LOC)/small bs=$$block count=$$block_count >> /dev/null 2>&1; \
printf "Iteration $$i / $(PGO_ITERATIONS) ($$block * $$block_count)\r"; \
( echo ">> $$i" >&2; \
echo ">> $$i"; \
./pgo-generate -s $(PGO_SET_LOC)/small && \
./pgo-generate -u $(PGO_SET_LOC)/small && \
./pgo-generate -h && \
FCNT=1 ./test.sh ./pgo-generate && \
FCNT=2 ./test.sh ./pgo-generate && \
FCNT=3 ./test.sh ./pgo-generate && \
FCNT=4 ./test.sh ./pgo-generate && \
: \
) >>$(PGO_SET_LOC)/stdout.log 2>>$(PGO_SET_LOC)/stderr.log || \exit $$?; \
done
$(shell command -v bat >/dev/null && echo "bat --pager=none" || echo cat) $(PGO_SET_LOC)/stdout.log; \
$(shell command -v bat >/dev/null && echo "bat --pager=none" || echo cat) $(PGO_SET_LOC)/stderr.log >&2
rm -rf $(PGO_SET_LOC)
rm pgo-generate
pgo-use: PGO_FLAGS = -fprofile-use
pgo-use: PGO_LDFLAGS = -lgcov -fprofile-use
pgo-use: CFLAGS+= $(RELEASE_CFLAGS)
pgo-use: CXXFLAGS += $(RELEASE_CXXFLAGS)
pgo-use: LDFLAGS += $(RELEASE_LDFLAGS)
pgo-use: $(PGO_OBJ)
$(CXX) $^ $(CXXFLAGS) $(PGO_FLAGS) -o $@ $(LDFLAGS) $(PGO_LDFLAGS)
$(PROJECT)-pgo: | pgo-profile
find ./obj/pgo -name \*.o -exec rm {} +
$(MAKE) pgo-use
mv pgo-use $@
$(STRIP) $@
clean-rebuild:
rm -rf obj
clean: clean-rebuild
rm -f $(PROJECT)-{release,debug,pgo}
test-all:
@./test.sh ./$(PROJECT)-*