From 643eece25370447aeacf4b772f0d48af803fcc9a Mon Sep 17 00:00:00 2001 From: Avril Date: Wed, 9 Dec 2020 17:47:51 +0000 Subject: [PATCH] day9: fix alignment day9: pgo --- .gitignore | 3 +++ day9/Makefile | 36 +++++++++++++++++++++++++++++++----- day9/day9.c | 2 -- day9/map.c | 21 ++++++++++++++++++++- 4 files changed, 54 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 7186bb3..37efd27 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ part2* *.lock target/ + +*.o +*.gcda diff --git a/day9/Makefile b/day9/Makefile index d35f96e..6b996ca 100644 --- a/day9/Makefile +++ b/day9/Makefile @@ -5,7 +5,7 @@ OPT_FLAGS?= -march=native -flto \ -floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block \ -fno-stack-check -fno-strict-aliasing -COMMON_FLAGS?=-pipe -O3 -Wall -pedantic -Wextra -Wstrict-aliasing +COMMON_FLAGS?=-pipe -O3 -Wall -pedantic -Wextra -Wstrict-aliasing -Wcast-align C_OPT_FLAGS=$(OPT_FLAGS) CXX_OPT_FLAGS=$(OPT_FLAGS) -felide-constructors -fno-exceptions @@ -19,9 +19,19 @@ LDFLAGS?=$(LD_OPT_FLAGS) CFLAGS+=-I$(INCLUDE) +STRIP=strip + +# Number of iterations to profile program +PROF_ITERATIONS?=40 +# Set concurrent profiling runs to be number of *physical* cpu cores +PROF_CONCURRENT?=$(shell grep ^cpu\\scores /proc/cpuinfo | uniq | awk '{print $4}') + .PHONY: all all: part1 part2 +.PHONY: test +test: part1-test part2-test + inpu%.h: inpu% @rm -f $@ while read line; do \ @@ -30,16 +40,32 @@ inpu%.h: inpu% part1: $(SRC) | input.h $(CC) $(SRC) $(CFLAGS) -o $@ $(LDFLAGS) - strip $@ + $(STRIP) $@ part2: $(SRC) | input.h - $(CC) $(SRC) -DPART2 $(CFLAGS) -o $@ $(LDFLAGS) - strip $@ + rm -f *.gcda + $(CC) $(SRC) -DPART2 $(CFLAGS) -fprofile-generate -o $@ $(LDFLAGS) + for i in {0..$(PROF_ITERATIONS)}; do \ + printf "\rIteration $$i / $(PROF_ITERATIONS)"; \ + for j in {0..$(PROF_CONCURRENT)}; do \ + ./$@ >> /dev/null 2>&1 & \ + done; \ + wait; \ + done + @echo " DONE" + rm $@ + $(CC) $(SRC) -DPART2 $(CFLAGS) -fprofile-use -fprofile-correction -o $@ -lgcov $(LDFLAGS) -fprofile-use -fprofile-correction + $(STRIP) $@ part1-test: $(SRC) | input-test.h $(CC) $(SRC) -DTEST $(CFLAGS) -o $@ $(LDFLAGS) - strip $@ + $(STRIP) $@ + +part2-test: $(SRC) | input-test.h + $(CC) $(SRC) -DTEST -DPART2 $(CFLAGS) -o $@ $(LDFLAGS) + $(STRIP) $@ clean: rm -f part{1,2}{,-test} rm -f input{,-test}.h + rm -f *.gcda diff --git a/day9/day9.c b/day9/day9.c index e7239bd..6df48ad 100644 --- a/day9/day9.c +++ b/day9/day9.c @@ -24,8 +24,6 @@ const u64 input[] = { #endif }; -#define box(t) (calloc(sizeof(t), 1)) - struct pair { bool valid; u64 num; diff --git a/day9/map.c b/day9/map.c index 4674858..225380d 100644 --- a/day9/map.c +++ b/day9/map.c @@ -3,8 +3,27 @@ #include #include "map.h" - +/* +static inline void* align_ptr(void* ptr, size_t al) +{ + unsigned char* buffer = ptr; + return buffer + al - ((intptr_t)buffer) % al; +} +*/ +#ifdef _EXPR_EXT +#define box(t) ({ t* restrict p = aligned_alloc(_Alignof(t), sizeof(t)); \ + *p = (t){0}; \ + p; }) +#elif !defined(IGNORE_ALIGMENT) +inline extern void* _zero_ptr(void* ptr, size_t n) +{ + memset(ptr, 0, n); + return ptr; +} +#define box(t) _zero_ptr(aligned_alloc(_Alignof(t), sizeof(t)), sizeof(t)) +#else #define box(t) calloc(sizeof(t), 1) +#endif struct page { struct entry {