initial commit

start: day1p1 Fortune for aoc2021's current commit: Half blessing − 半吉
3 years ago · a8dce41cc4
commit a8dce41cc4
7 changed files with 2275 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
 obj/
 day*-*
 day*/input
--- a/day1/Makefile
+++ b/day1/Makefile
@ -0,0 +1,90 @@
 # Vectorised rot13ing data
 # Contains targets for `release', `debug', and `clean'.
 PROJECT=day1
 AUTHOR=Avril (Flanchan) <flanchan@cumallover.me>
 SRC_C   = $(wildcard src/*.c)
 SRC_CXX = $(wildcard src/*.cpp)
 INCLUDE=include
 COMMON_FLAGS+= -W -Wall -fno-strict-aliasing $(addprefix -I,$(INCLUDE))
 COMMON_FLAGS+=-msse -msse2 -msse3
 OPT_FLAGS?= -march=native -fgraphite -fopenmp -floop-parallelize-all -ftree-parallelize-loops=4 \
 	    -floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block \
 	    -fno-stack-check
 CXX_OPT_FLAGS?= $(OPT_FLAGS) -felide-constructors
 CFLAGS   += $(COMMON_FLAGS) --std=gnu17
 CXXFLAGS += $(COMMON_FLAGS) --std=gnu++20
 LDFLAGS  += -ltracem
 STRIP=strip
 RELEASE_COMMON_FLAGS+= -Werror
 DEBUG_COMMON_FLAGS+= -fanalyzer 
 ifneq ($(TARGET_SPEC_FLAGS),no)
 	RELEASE_CFLAGS?=   -O3 -flto $(OPT_FLAGS)
 	RELEASE_CXXFLAGS?= -O3 -flto $(CXX_OPT_FLAGS)
 	RELEASE_LDFLAGS?=  -Wl,-O3 -Wl,-flto
 	DEBUG_CFLAGS?=	-Og -g
 	DEBUG_CXXFLAGS?=-Og -g
 	DEBUG_LDFLAGS?=
 endif
 DEBUG_CFLAGS+=-DDEBUG $(DEBUG_COMMON_FLAGS)
 DEBUG_CXXFLAGS+=-DDEBUG $(DEBUG_COMMON_FLAGS)
 RELEASE_CFLAGS+=-DRELEASE $(RELEASE_COMMON_FLAGS)
 RELEASE_CXXFLAGS+=-DRELEASE $(RELEASE_COMMON_FLAGS)
 # Objects
 OBJ_C   = $(addprefix obj/c/,$(SRC_C:.c=.o))
 OBJ_CXX = $(addprefix obj/cxx/,$(SRC_CXX:.cpp=.o))
 OBJ = $(OBJ_C) $(OBJ_CXX)
 # Phonies
 .PHONY: release
 release: | dirs $(PROJECT)-release
 .PHONY: debug
 debug: | dirs $(PROJECT)-debug
 # Targets
 dirs:
 	@mkdir -p obj/c{,xx}/src
 obj/c/%.o: %.c
 	$(CC) -c $< $(CFLAGS) -o $@ $(LDFLAGS)
 obj/cxx/%.o: %.cpp
 	$(CXX) -c $< $(CXXFLAGS) -o $@ $(LDFLAGS)
 $(PROJECT)-release: CFLAGS+= $(RELEASE_CFLAGS)
 $(PROJECT)-release: CXXFLAGS += $(RELEASE_CXXFLAGS)
 $(PROJECT)-release: LDFLAGS += $(RELEASE_LDFLAGS)
 $(PROJECT)-release: $(OBJ)
 	$(CXX) $^ $(CXXFLAGS) -o $@ $(LDFLAGS)
 	$(STRIP) $@
 $(PROJECT)-debug: CFLAGS+= $(DEBUG_CFLAGS)
 $(PROJECT)-debug: CXXFLAGS += $(DEBUG_CXXFLAGS)
 $(PROJECT)-debug: LDFLAGS += $(DEBUG_LDFLAGS)
 $(PROJECT)-debug: $(OBJ)
 	$(CXX) $^ $(CXXFLAGS) -o $@ $(LDFLAGS)
 clean-rebuild:
 	rm -rf obj
 clean: clean-rebuild
 	rm -f $(PROJECT)-{release,debug,pgo}
--- a/day1/include/input.h
+++ b/day1/include/input.h
@ -0,0 +1,22 @@
 #ifndef _INPUT_H
 #define _INPUT_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <stddef.h>
 #include <stdint.h>
 typedef uint16_t input_t;
 #define INPUT_VECTOR_STEP (16 / sizeof(input_t))
 extern const size_t  INPUT_SIZE;
 extern const input_t INPUT[];
 #ifdef __cplusplus
 }
 #endif
 #endif /* _INPUT_H */
--- a/day1/include/vector.h
+++ b/day1/include/vector.h
@ -0,0 +1,71 @@
 #ifndef _VECTOR_H
 #define _VECTOR_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <stdint.h>
 #include <mmintrin.h>
 #include <immintrin.h>
 #include <tmmintrin.h>
 #include <xmmintrin.h>
 #include <emmintrin.h>
 #define _VECTOR_SIZE 16
 // Vector128 int 8
 typedef uint8_t  v8_u128n __attribute__((vector_size(_VECTOR_SIZE)));
 typedef int8_t   v8_i128n __attribute__((vector_size(_VECTOR_SIZE)));
 // Vector128 int 16
 typedef uint16_t v16_u128n __attribute__((vector_size(_VECTOR_SIZE)));
 typedef int16_t  v16_i128n __attribute__((vector_size(_VECTOR_SIZE)));
 // Vector128 int 32 
 typedef uint32_t v32_u128n __attribute__((vector_size(_VECTOR_SIZE)));
 typedef int32_t  v32_i128n __attribute__((vector_size(_VECTOR_SIZE)));
 // Vector128 int 64
 typedef uint64_t v64_u128n __attribute__((vector_size(_VECTOR_SIZE)));
 typedef int64_t  v64_i128n __attribute__((vector_size(_VECTOR_SIZE)));
 typedef union v128 {
 	// Intrinsics
 	__m128i mm; // long int
 	__m128d mmd; // double
 	__m128  mmf; // float //XXX: Should we have the halfs
 	// Vectorised integers
 	union {
 		v8_u128n u8;
 		v8_i128n i8;
 		v16_u128n u16;
 		v16_i128n i16;
 		v32_u128n u32;
 		v32_i128n i32;
 		v64_u128n u64;
 		v64_i128n i64;
 	} as;
 	// Native 128 bit integers
 	unsigned __int128 u128;
 	signed   __int128  i128;
 	// Raw bytes
 	uint8_t bytes[_VECTOR_SIZE];
 } v128_t;
 #ifdef __cplusplus
 }
 static_assert(sizeof(union v128)==_VECTOR_SIZE, "invalid vector size");
 static_assert(alignof(union v128)==alignof(__m128i), "invalid vector alignment");
 #else
 _Static_assert(sizeof(union v128)==_VECTOR_SIZE, "invalid vector size");
 _Static_assert(_Alignof(union v128)==_Alignof(__m128i), "invalid vector alignment");
 #endif
 #endif /* _VECTOR_H */
--- a/day1/mkinput
+++ b/day1/mkinput
@ -0,0 +1,15 @@
 #!/bin/bash
 INPUT=${INPUT:-input}
 NUM=$(wc -l < ${INPUT})
 echo "#include <input.h>"
 echo ""
 echo "const input_t INPUT[${NUM}] = {"
 while IFS= read -r line; do
 	echo "	$line, "
 done < $INPUT
 unset line
 echo "};"
 echo "const size_t INPUT_SIZE=${NUM};"
--- a/day1/src/input.c
+++ b/day1/src/input.c
--- a/day1/src/vcomp.c
+++ b/day1/src/vcomp.c
@ -0,0 +1,69 @@
 #include <assert.h>
 #include <input.h>
 #include <vector.h>
 #include <tracem/ints.h>
 _Static_assert(sizeof(input_t)*INPUT_VECTOR_STEP == sizeof(v128_t), "invalid vector size mul");
 #define INPUT_VPROC_STEP 15
 __attribute__((pure))
 u64 vproc(usize len, const input_t input[const restrict len])
 {
 	static const v128_t SINGLE = { .as.i16 = { -1, -1, -1, -1, -1, -1, -1, -1 } };
 	static const v128_t ONES   = { .as.i16 = {  1,  1,  1,  1,  1,  1,  1,  1 } };
 #if DEBUG
 	assert(len % INPUT_VPROC_STEP == 0);
 #endif
 #define LAST(j) input[i +  (j)]
 #define NEXT(j) input[i + ((j) * 2)]
 	__m128i end = _mm_setzero_si128();
 	for(usize i = 0;i<len;i+=INPUT_VPROC_STEP)
 	{
 		// load the vectors
 		v128_t prev = { .as.u16 = {
 				input[i+0],
 				input[i+2],
 				input[i+4],
 				input[i+6],
 				input[i+8],
 				input[i+10],
 				input[i+12],
 				input[i+14],
 			}
 		};
 		v128_t next = { .as.u16 = {
 				input[i+1],
 				input[i+3],
 				input[i+5],
 				input[i+7],
 				input[i+9],
 				input[i+11],
 				input[i+13],
 				input[i+15],
 			}
 		};
 		//__m128i res = _mm_subs_epi16(prev.mm, next.mm);
 		__m128i pltn = _mm_cmplt_epi16(prev.mm, next.mm); // prev < next
 		pltn = _mm_sign_epi16(pltn, SINGLE.mm);
 		pltn = _mm_sign_epi16(ONES.mm, pltn);
 		end = _mm_add_epi16(end, pltn);
 		//__m128i pr =  _mm_sign_epi16(FULL_MASK.mm, res);
 	}
 	v16_u128n _ev = ((union v128)end).as.u16;
 	return (u64) (    _ev[0] + _ev[1] 
 			+ _ev[2] + _ev[3]
 			+ _ev[4] + _ev[5]
 			+ _ev[6] + _ev[7] );
 }