#ifndef __LP64__
#define CRC_NATIVE uint32_t
#else
#define CRC_NATIVE uint64_t
#endif
#ifndef __LP64__
#define CRCtriplet(crc, buf, offset) \
crc ## 0 = __builtin_ia32_crc32si(crc ## 0, *((uint32_t*) buf ## 0 + 2 * offset)); \
crc ## 1 = __builtin_ia32_crc32si(crc ## 1, *((uint32_t*) buf ## 1 + 2 * offset)); \
crc ## 2 = __builtin_ia32_crc32si(crc ## 2, *((uint32_t*) buf ## 2 + 2 * offset)); \
crc ## 0 = __builtin_ia32_crc32si(crc ## 0, *((uint32_t*) buf ## 0 + 1 + 2 * offset)); \
crc ## 1 = __builtin_ia32_crc32si(crc ## 1, *((uint32_t*) buf ## 1 + 1 + 2 * offset)); \
crc ## 2 = __builtin_ia32_crc32si(crc ## 2, *((uint32_t*) buf ## 2 + 1 + 2 * offset));
#else
#define CRCtriplet(crc, buf, offset) \
crc ## 0 = __builtin_ia32_crc32di(crc ## 0, *(buf ## 0 + offset)); \
crc ## 1 = __builtin_ia32_crc32di(crc ## 1, *(buf ## 1 + offset)); \
crc ## 2 = __builtin_ia32_crc32di(crc ## 2, *(buf ## 2 + offset));
#endif
#ifndef __LP64__
#define CRCduplet(crc, buf, offset) \
crc ## 0 = __builtin_ia32_crc32si(crc ## 0, *((uint32_t*) buf ## 0 + 2 * offset)); \
crc ## 1 = __builtin_ia32_crc32si(crc ## 1, *((uint32_t*) buf ## 1 + 2 * offset)); \
crc ## 0 = __builtin_ia32_crc32si(crc ## 0, *((uint32_t*) buf ## 0 + 1 + 2 * offset)); \
crc ## 1 = __builtin_ia32_crc32si(crc ## 1, *((uint32_t*) buf ## 1 + 1 + 2 * offset));
#else
#define CRCduplet(crc, buf, offset) \
crc ## 0 = __builtin_ia32_crc32di(crc ## 0, *(buf ## 0 + offset)); \
crc ## 1 = __builtin_ia32_crc32di(crc ## 1, *(buf ## 1 + offset));
#endif
#ifndef __LP64__
#define CRCsinglet(crc, buf, offset) \
crc = __builtin_ia32_crc32si(crc, *(uint32_t*)(buf + offset)); \
crc = __builtin_ia32_crc32si(crc, *(uint32_t*)(buf + offset +
sizeof
(uint32_t)));
#else
#define CRCsinglet(crc, buf, offset) crc = __builtin_ia32_crc32di(crc, *(uint64_t*)(buf + offset));
#endif
#ifndef __LP64__
#define CombineCRC()\
asm
volatile
(\
"movdqu (%3), %%xmm0\n\t"
\
"movd %0, %%xmm1\n\t"
\
"pclmullqlqdq %%xmm0, %%xmm1\n\t"
\
"movd %2, %%xmm2\n\t"
\
"pclmullqhqdq %%xmm0, %%xmm2\n\t"
\
"pxor %%xmm2, %%xmm1\n\t"
\
"movdqu (%4), %%xmm2\n\t"
\
"pxor %%xmm2, %%xmm1\n\t"
\
"movd %%xmm1, %0\n\t"
\
"crc32l %0, %5\n\t"
\
"pextrd $1, %%xmm1, %1\n\t"
\
"crc32l %1, %5\n\t"
\
"movl %5, %0"
\
:
"=r"
( crc0 )\
:
"0"
( crc0 ),
"r"
( crc1 ),
"r"
( crc32cIntelC_K + block_size - 1 ),
"r"
( ( uint64_t* ) next2 - 1 ),
"r"
( crc2 )\
:
"%xmm0"
,
"%xmm1"
,
"%xmm2"
\
);
#else
#define CombineCRC()\
asm
volatile
(\
"movdqa (%3), %%xmm0\n\t"
\
"movq %0, %%xmm1\n\t"
\
"pclmullqlqdq %%xmm0, %%xmm1\n\t"
\
"movq %2, %%xmm2\n\t"
\
"pclmullqhqdq %%xmm0, %%xmm2\n\t"
\
"pxor %%xmm2, %%xmm1\n\t"
\
"movq %%xmm1, %0"
\
:
"=r"
( crc0 ) \
:
"0"
( crc0 ),
"r"
( crc1 ),
"r"
( crc32cIntelC_K + block_size - 1 ) \
:
"%xmm0"
,
"%xmm1"
,
"%xmm2"
\
); \
crc0 = crc0 ^ * ( ( uint64_t* ) next2 - 1 );\
crc2 = __builtin_ia32_crc32di ( crc2, crc0 );\
crc0 = crc2;
#endif