You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
70 lines
1.4 KiB
70 lines
1.4 KiB
#include <assert.h>
|
|
|
|
|
|
#include <input.h>
|
|
#include <vector.h>
|
|
|
|
#include <tracem/ints.h>
|
|
|
|
_Static_assert(sizeof(input_t)*INPUT_VECTOR_STEP == sizeof(v128_t), "invalid vector size mul");
|
|
|
|
#define INPUT_VPROC_STEP 15
|
|
|
|
__attribute__((pure))
|
|
u64 vproc(usize len, const input_t input[const restrict len])
|
|
{
|
|
static const v128_t SINGLE = { .as.i16 = { -1, -1, -1, -1, -1, -1, -1, -1 } };
|
|
static const v128_t ONES = { .as.i16 = { 1, 1, 1, 1, 1, 1, 1, 1 } };
|
|
#if DEBUG
|
|
assert(len % INPUT_VPROC_STEP == 0);
|
|
#endif
|
|
|
|
#define LAST(j) input[i + (j)]
|
|
#define NEXT(j) input[i + ((j) * 2)]
|
|
__m128i end = _mm_setzero_si128();
|
|
for(usize i = 0;i<len;i+=INPUT_VPROC_STEP)
|
|
{
|
|
// load the vectors
|
|
v128_t prev = { .as.u16 = {
|
|
input[i+0],
|
|
input[i+2],
|
|
|
|
input[i+4],
|
|
input[i+6],
|
|
|
|
input[i+8],
|
|
input[i+10],
|
|
|
|
input[i+12],
|
|
input[i+14],
|
|
}
|
|
};
|
|
v128_t next = { .as.u16 = {
|
|
input[i+1],
|
|
input[i+3],
|
|
|
|
input[i+5],
|
|
input[i+7],
|
|
|
|
input[i+9],
|
|
input[i+11],
|
|
|
|
input[i+13],
|
|
input[i+15],
|
|
}
|
|
};
|
|
//__m128i res = _mm_subs_epi16(prev.mm, next.mm);
|
|
__m128i pltn = _mm_cmplt_epi16(prev.mm, next.mm); // prev < next
|
|
pltn = _mm_sign_epi16(pltn, SINGLE.mm);
|
|
pltn = _mm_sign_epi16(ONES.mm, pltn);
|
|
end = _mm_add_epi16(end, pltn);
|
|
//__m128i pr = _mm_sign_epi16(FULL_MASK.mm, res);
|
|
}
|
|
|
|
v16_u128n _ev = ((union v128)end).as.u16;
|
|
return (u64) ( _ev[0] + _ev[1]
|
|
+ _ev[2] + _ev[3]
|
|
+ _ev[4] + _ev[5]
|
|
+ _ev[6] + _ev[7] );
|
|
}
|