#include #include #include #include /* VSX/Altivec registers (128 bits) */ typedef union _ppc_vsr_t { uint8_t u8[16]; uint16_t u16[8]; uint32_t u32[4]; uint64_t u64[2]; int8_t s8[16]; int16_t s16[8]; int32_t s32[4]; int64_t s64[2]; float f32[4]; double f64[2]; } ppc_vsr_t; typedef ppc_vsr_t ppc_avr_t; #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define IS_LITTLE_ENDIAN (1 == *(unsigned char *)&(const int){1}) #if defined(IS_LITTLE_ENDIAN) #define HI_IDX 1 #define LO_IDX 0 #define VsrB(i) u8[15 - (i)] #else #define HI_IDX 0 #define LO_IDX 1 #define VsrB(i) u8[i] #endif #define ITERATIONS 0x8000000 void benchmark1(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { /* Existing algorithm */ ppc_avr_t result; int i; size_t n_elems = ARRAY_SIZE(r->u8); for (i = 0; i < n_elems / 2; i++) { result.u8[i*2+HI_IDX] = a->u8[i]; result.u8[i*2+LO_IDX] = b->u8[i]; } *r = result; } void benchmark2(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { /* v3 patchset algorithm */ ppc_avr_t result; int i; for (i = 0; i < ARRAY_SIZE(r->u8); i++) { result.VsrB(i) = (i & 1) ? b->VsrB(i >> 1) : a->VsrB(i >> 1); } *r = result; } void benchmark3(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { /* Split loop into 2 halves */ ppc_avr_t result; int i; for (i = 0; i < ARRAY_SIZE(r->u8); i+=2) { result.VsrB(i) = a->VsrB(i >> 1); } for (i = 1; i < ARRAY_SIZE(r->u8); i+=2) { result.VsrB(i) = b->VsrB(i >> 1); } *r = result; } void benchmark4(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { /* Richard's merge high */ ppc_avr_t result; int i; int half = ARRAY_SIZE(r->u8) / 2; int ofs = 0; for (i = 0; i < half; i++) { result.VsrB(i * 2 + 0) = a->VsrB(i + ofs); result.VsrB(i * 2 + 1) = b->VsrB(i + ofs); } *r = result; } int elapsed_time(struct timeval *st, struct timeval *et) { return ((et->tv_sec - st->tv_sec) * 1000000) + (et->tv_usec - st->tv_usec); } int main(int argc, char *argv[]) { ppc_avr_t a, b, r; int i; struct timeval start_time, end_time; printf("Benchmark 1 - existing merge high\n"); gettimeofday(&start_time, NULL); for (i = 0; i < ITERATIONS; i++) { benchmark1(&r, &a, &b); } gettimeofday(&end_time ,NULL); printf("Elapsed time: %d us\n\n", elapsed_time(&start_time, &end_time)); printf("Benchmark 2 - v3 merge high\n"); gettimeofday(&start_time, NULL); for (i = 0; i < ITERATIONS; i++) { benchmark2(&r, &a, &b); } gettimeofday(&end_time ,NULL); printf("Elapsed time: %d us\n\n", elapsed_time(&start_time, &end_time)); printf("Benchmark 3 - 2 loops merge high\n"); gettimeofday(&start_time, NULL); for (i = 0; i < ITERATIONS; i++) { benchmark3(&r, &a, &b); } gettimeofday(&end_time ,NULL); printf("Elapsed time: %d us\n\n", elapsed_time(&start_time, &end_time)); printf("Benchmark 4 - Richard's merge high\n"); gettimeofday(&start_time, NULL); for (i = 0; i < ITERATIONS; i++) { benchmark4(&r, &a, &b); } gettimeofday(&end_time ,NULL); printf("Elapsed time: %d us\n\n", elapsed_time(&start_time, &end_time)); }