From: Bassam Tabbara Date: Thu, 18 May 2017 00:41:16 +0000 (-0700) Subject: cmake: workaound ccache issue with .S assembly files X-Git-Tag: v12.1.0~10^2~84^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F15142%2Fhead;p=ceph.git cmake: workaound ccache issue with .S assembly files when building with -DWITH_CCACHE=ON recent versions of ccache (for example 3.3.3) seem to mangle object files from the cache for source files that have a .S extension. This seems similar but unrelated to PR #14633. The workaround here is to rename .S files to .s (lower case). Signed-off-by: Bassam Tabbara --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 387a7d30731..6c5375bfdbd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -90,7 +90,7 @@ if(HAVE_INTEL) endif() execute_process( - COMMAND yasm -f elf64 ${CMAKE_SOURCE_DIR}/src/common/crc32c_intel_fast_asm.S -o /dev/null + COMMAND yasm -f elf64 ${CMAKE_SOURCE_DIR}/src/common/crc32c_intel_fast_asm.s -o /dev/null RESULT_VARIABLE no_yasm OUTPUT_QUIET) if(no_yasm) @@ -536,13 +536,13 @@ if(HAVE_INTEL) common/crc32c_intel_fast.c) if(HAVE_GOOD_YASM_ELF64) list(APPEND libcommon_files - common/crc32c_intel_fast_asm.S - common/crc32c_intel_fast_zero_asm.S) + common/crc32c_intel_fast_asm.s + common/crc32c_intel_fast_zero_asm.s) endif(HAVE_GOOD_YASM_ELF64) elseif(HAVE_POWER8) list(APPEND libcommon_files common/crc32c_ppc.c - common/crc32c_ppc_asm.S) + common/crc32c_ppc_asm.s) endif(HAVE_INTEL) if(LINUX) diff --git a/src/common/crc32c_intel_fast_asm.S b/src/common/crc32c_intel_fast_asm.S deleted file mode 100644 index 2189684b4c7..00000000000 --- a/src/common/crc32c_intel_fast_asm.S +++ /dev/null @@ -1,666 +0,0 @@ -; -; Copyright 2012-2013 Intel Corporation All Rights Reserved. -; All rights reserved. -; -; http://opensource.org/licenses/BSD-3-Clause -; -; Redistribution and use in source and binary forms, with or without -; modification, are permitted provided that the following -; conditions are met: -; -; * Redistributions of source code must retain the above copyright -; notice, this list of conditions and the following disclaimer. -; -; * Redistributions in binary form must reproduce the above copyright -; notice, this list of conditions and the following disclaimer in -; the documentation and/or other materials provided with the -; distribution. -; -; * Neither the name of the Intel Corporation nor the names of its -; contributors may be used to endorse or promote products derived -; from this software without specific prior written permission. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -; COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -; OF THE POSSIBILITY OF SUCH DAMAGE. -; - -; Function to compute iscsi CRC32 with table-based recombination -; crc done "by 3" with block sizes 1920, 960, 480, 240 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -; crcB3 MACRO to implement crc32 on 3 %%bSize-byte blocks -%macro crcB3 3 -%define %%bSize %1 ; 1/3 of buffer size -%define %%td2 %2 ; table offset for crc0 (2/3 of buffer) -%define %%td1 %3 ; table offset for crc1 (1/3 of buffer) - -%IF %%bSize=640 - sub len, %%bSize*3 - js %%crcB3_end ;; jump to next level if 3*blockSize > len -%ELSE - cmp len, %%bSize*3 - jnae %%crcB3_end ;; jump to next level if 3*blockSize > len -%ENDIF - ;;;;;; Calculate CRC of 3 blocks of the buffer ;;;;;; -%%crcB3_loop: - ;; rax = crc0 = initial crc - xor rbx, rbx ;; rbx = crc1 = 0; - xor r10, r10 ;; r10 = crc2 = 0; - - %assign i 0 - %rep %%bSize/8 - 1 - crc32 rax, [bufptmp+i + 0*%%bSize] ;; update crc0 - crc32 rbx, [bufptmp+i + 1*%%bSize] ;; update crc1 - crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2 - %assign i (i+8) - %endrep - crc32 rax, [bufptmp+i + 0*%%bSize] ;; update crc0 - crc32 rbx, [bufptmp+i + 1*%%bSize] ;; update crc1 -; SKIP ;crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2 - - ; merge in crc0 - movzx bufp_dw, al - mov r9d, [crc_init + bufp*4 + %%td2] - movzx bufp_dw, ah - shr eax, 16 - mov r11d, [crc_init + bufp*4 + %%td2] - shl r11, 8 - xor r9, r11 - - movzx bufp_dw, al - mov r11d, [crc_init + bufp*4 + %%td2] - movzx bufp_dw, ah - shl r11, 16 - xor r9, r11 - mov r11d, [crc_init + bufp*4 + %%td2] - shl r11, 24 - xor r9, r11 - - ; merge in crc1 - - movzx bufp_dw, bl - mov r11d, [crc_init + bufp*4 + %%td1] - movzx bufp_dw, bh - shr ebx, 16 - xor r9, r11 - mov r11d, [crc_init + bufp*4 + %%td1] - shl r11, 8 - xor r9, r11 - - movzx bufp_dw, bl - mov r11d, [crc_init + bufp*4 + %%td1] - movzx bufp_dw, bh - shl r11, 16 - xor r9, r11 - mov r11d, [crc_init + bufp*4 + %%td1] - shl r11, 24 - xor r9, r11 - - xor r9, [bufptmp+i + 2*%%bSize] - crc32 r10, r9 - mov rax, r10 - - add bufptmp, %%bSize*3 ;; move to next block - sub len, %%bSize*3 -%IF %%bSize=640 - jns %%crcB3_loop -%ENDIF - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -%%crcB3_end: -%IF %%bSize=640 - add len, %%bSize*3 -%ENDIF - je do_return ;; return if remaining data is zero -%endmacro - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;; ISCSI CRC 32 Implementation with crc32 Instruction - -;;; unsigned int crc32_iscsi_00(unsigned char * buffer, int len, unsigned int crc_init); -;;; -;;; *buf = rcx -;;; len = rdx -;;; crc_init = r8 -;;; - -global crc32_iscsi_00:function -crc32_iscsi_00: - -%ifidn __OUTPUT_FORMAT__, elf64 -%define bufp rdi -%define bufp_dw edi -%define bufp_w di -%define bufp_b dil -%define bufptmp rcx -%define block_0 rcx -%define block_1 r8 -%define block_2 r11 -%define len rsi -%define len_dw esi -%define len_w si -%define len_b sil -%define crc_init rdx -%define crc_init_dw edx -%else -%define bufp rcx -%define bufp_dw ecx -%define bufp_w cx -%define bufp_b cl -%define bufptmp rdi -%define block_0 rdi -%define block_1 rsi -%define block_2 r11 -%define len rdx -%define len_dw edx -%define len_w dx -%define len_b dl -%define crc_init r8 -%define crc_init_dw r8d -%endif - - - push rdi - push rbx - - mov rax, crc_init ;; rax = crc_init; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - mov bufptmp, bufp ;; rdi = *buf - neg bufp - and bufp, 7 ;; calculate the unalignment - ;; amount of the address - je proc_block ;; Skip if aligned - - cmp len, 8 - jb less_than_8 - - ;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;;; - mov rbx, [bufptmp] ;; load a quadword from the buffer - add bufptmp, bufp ;; align buffer pointer for - ;; quadword processing - sub len, bufp ;; update buffer length -align_loop: - crc32 eax, bl ;; compute crc32 of 1-byte - shr rbx, 8 ;; get next byte - dec bufp - jne align_loop - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; 2) BLOCK LEVEL: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -proc_block: - cmp len, 240 - jb bit8 - - lea crc_init, [mul_table_72 wrt rip] ;; load table base address - - crcB3 640, 0x1000, 0x0c00 ; 640*3 = 1920 (Tables 1280, 640) - crcB3 320, 0x0c00, 0x0800 ; 320*3 = 960 (Tables 640, 320) - crcB3 160, 0x0800, 0x0400 ; 160*3 = 480 (Tables 320, 160) - crcB3 80, 0x0400, 0x0000 ; 80*3 = 240 (Tables 160, 80) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of rdx are full) - -bit8: - shl len_b, 1 ;; shift-out MSB (bit-7) - jnc bit7 ;; jump to bit-6 if bit-7 == 0 - %assign i 0 - %rep 16 - crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data - %assign i (i+8) - %endrep - je do_return ;; return if remaining data is zero - add bufptmp, 128 ;; buf +=64; (next 64 bytes) - -bit7: - shl len_b, 1 ;; shift-out MSB (bit-7) - jnc bit6 ;; jump to bit-6 if bit-7 == 0 - %assign i 0 - %rep 8 - crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data - %assign i (i+8) - %endrep - je do_return ;; return if remaining data is zero - add bufptmp, 64 ;; buf +=64; (next 64 bytes) -bit6: - shl len_b, 1 ;; shift-out MSB (bit-6) - jnc bit5 ;; jump to bit-5 if bit-6 == 0 - %assign i 0 - %rep 4 - crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data - %assign i (i+8) - %endrep - je do_return ;; return if remaining data is zero - add bufptmp, 32 ;; buf +=32; (next 32 bytes) -bit5: - shl len_b, 1 ;; shift-out MSB (bit-5) - jnc bit4 ;; jump to bit-4 if bit-5 == 0 - %assign i 0 - %rep 2 - crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data - %assign i (i+8) - %endrep - je do_return ;; return if remaining data is zero - add bufptmp, 16 ;; buf +=16; (next 16 bytes) -bit4: - shl len_b, 1 ;; shift-out MSB (bit-4) - jnc bit3 ;; jump to bit-3 if bit-4 == 0 - crc32 rax, [bufptmp] ;; compute crc32 of 8-byte data - je do_return ;; return if remaining data is zero - add bufptmp, 8 ;; buf +=8; (next 8 bytes) -bit3: - mov rbx, [bufptmp] ;; load a 8-bytes from the buffer: - shl len_b, 1 ;; shift-out MSB (bit-3) - jnc bit2 ;; jump to bit-2 if bit-3 == 0 - crc32 eax, ebx ;; compute crc32 of 4-byte data - je do_return ;; return if remaining data is zero - shr rbx, 32 ;; get next 3 bytes -bit2: - shl len_b, 1 ;; shift-out MSB (bit-2) - jnc bit1 ;; jump to bit-1 if bit-2 == 0 - crc32 eax, bx ;; compute crc32 of 2-byte data - je do_return ;; return if remaining data is zero - shr rbx, 16 ;; next byte -bit1: - test len_b,len_b - je do_return - crc32 eax, bl ;; compute crc32 of 1-byte data -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -do_return: - - pop rbx - pop rdi - ret - -less_than_8: - test len,4 - jz less_than_4 - crc32 eax, dword[bufptmp] - add bufptmp,4 -less_than_4: - test len,2 - jz less_than_2 - crc32 eax, word[bufptmp] - add bufptmp,2 -less_than_2: - test len,1 - jz do_return - crc32 rax, byte[bufptmp] - pop rbx - pop bufptmp - ret - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;; global mul_table_72, mul_table_152, mul_table_312, mul_table_632, mul_table_1272 - -section .data -align 8 -mul_table_72: -DD 0x00000000,0x39d3b296,0x73a7652c,0x4a74d7ba -DD 0xe74eca58,0xde9d78ce,0x94e9af74,0xad3a1de2 -DD 0xcb71e241,0xf2a250d7,0xb8d6876d,0x810535fb -DD 0x2c3f2819,0x15ec9a8f,0x5f984d35,0x664bffa3 -DD 0x930fb273,0xaadc00e5,0xe0a8d75f,0xd97b65c9 -DD 0x7441782b,0x4d92cabd,0x07e61d07,0x3e35af91 -DD 0x587e5032,0x61ade2a4,0x2bd9351e,0x120a8788 -DD 0xbf309a6a,0x86e328fc,0xcc97ff46,0xf5444dd0 -DD 0x23f31217,0x1a20a081,0x5054773b,0x6987c5ad -DD 0xc4bdd84f,0xfd6e6ad9,0xb71abd63,0x8ec90ff5 -DD 0xe882f056,0xd15142c0,0x9b25957a,0xa2f627ec -DD 0x0fcc3a0e,0x361f8898,0x7c6b5f22,0x45b8edb4 -DD 0xb0fca064,0x892f12f2,0xc35bc548,0xfa8877de -DD 0x57b26a3c,0x6e61d8aa,0x24150f10,0x1dc6bd86 -DD 0x7b8d4225,0x425ef0b3,0x082a2709,0x31f9959f -DD 0x9cc3887d,0xa5103aeb,0xef64ed51,0xd6b75fc7 -DD 0x47e6242e,0x7e3596b8,0x34414102,0x0d92f394 -DD 0xa0a8ee76,0x997b5ce0,0xd30f8b5a,0xeadc39cc -DD 0x8c97c66f,0xb54474f9,0xff30a343,0xc6e311d5 -DD 0x6bd90c37,0x520abea1,0x187e691b,0x21addb8d -DD 0xd4e9965d,0xed3a24cb,0xa74ef371,0x9e9d41e7 -DD 0x33a75c05,0x0a74ee93,0x40003929,0x79d38bbf -DD 0x1f98741c,0x264bc68a,0x6c3f1130,0x55eca3a6 -DD 0xf8d6be44,0xc1050cd2,0x8b71db68,0xb2a269fe -DD 0x64153639,0x5dc684af,0x17b25315,0x2e61e183 -DD 0x835bfc61,0xba884ef7,0xf0fc994d,0xc92f2bdb -DD 0xaf64d478,0x96b766ee,0xdcc3b154,0xe51003c2 -DD 0x482a1e20,0x71f9acb6,0x3b8d7b0c,0x025ec99a -DD 0xf71a844a,0xcec936dc,0x84bde166,0xbd6e53f0 -DD 0x10544e12,0x2987fc84,0x63f32b3e,0x5a2099a8 -DD 0x3c6b660b,0x05b8d49d,0x4fcc0327,0x761fb1b1 -DD 0xdb25ac53,0xe2f61ec5,0xa882c97f,0x91517be9 -DD 0x8fcc485c,0xb61ffaca,0xfc6b2d70,0xc5b89fe6 -DD 0x68828204,0x51513092,0x1b25e728,0x22f655be -DD 0x44bdaa1d,0x7d6e188b,0x371acf31,0x0ec97da7 -DD 0xa3f36045,0x9a20d2d3,0xd0540569,0xe987b7ff -DD 0x1cc3fa2f,0x251048b9,0x6f649f03,0x56b72d95 -DD 0xfb8d3077,0xc25e82e1,0x882a555b,0xb1f9e7cd -DD 0xd7b2186e,0xee61aaf8,0xa4157d42,0x9dc6cfd4 -DD 0x30fcd236,0x092f60a0,0x435bb71a,0x7a88058c -DD 0xac3f5a4b,0x95ece8dd,0xdf983f67,0xe64b8df1 -DD 0x4b719013,0x72a22285,0x38d6f53f,0x010547a9 -DD 0x674eb80a,0x5e9d0a9c,0x14e9dd26,0x2d3a6fb0 -DD 0x80007252,0xb9d3c0c4,0xf3a7177e,0xca74a5e8 -DD 0x3f30e838,0x06e35aae,0x4c978d14,0x75443f82 -DD 0xd87e2260,0xe1ad90f6,0xabd9474c,0x920af5da -DD 0xf4410a79,0xcd92b8ef,0x87e66f55,0xbe35ddc3 -DD 0x130fc021,0x2adc72b7,0x60a8a50d,0x597b179b -DD 0xc82a6c72,0xf1f9dee4,0xbb8d095e,0x825ebbc8 -DD 0x2f64a62a,0x16b714bc,0x5cc3c306,0x65107190 -DD 0x035b8e33,0x3a883ca5,0x70fceb1f,0x492f5989 -DD 0xe415446b,0xddc6f6fd,0x97b22147,0xae6193d1 -DD 0x5b25de01,0x62f66c97,0x2882bb2d,0x115109bb -DD 0xbc6b1459,0x85b8a6cf,0xcfcc7175,0xf61fc3e3 -DD 0x90543c40,0xa9878ed6,0xe3f3596c,0xda20ebfa -DD 0x771af618,0x4ec9448e,0x04bd9334,0x3d6e21a2 -DD 0xebd97e65,0xd20accf3,0x987e1b49,0xa1ada9df -DD 0x0c97b43d,0x354406ab,0x7f30d111,0x46e36387 -DD 0x20a89c24,0x197b2eb2,0x530ff908,0x6adc4b9e -DD 0xc7e6567c,0xfe35e4ea,0xb4413350,0x8d9281c6 -DD 0x78d6cc16,0x41057e80,0x0b71a93a,0x32a21bac -DD 0x9f98064e,0xa64bb4d8,0xec3f6362,0xd5ecd1f4 -DD 0xb3a72e57,0x8a749cc1,0xc0004b7b,0xf9d3f9ed -DD 0x54e9e40f,0x6d3a5699,0x274e8123,0x1e9d33b5 - -mul_table_152: -DD 0x00000000,0x878a92a7,0x0af953bf,0x8d73c118 -DD 0x15f2a77e,0x927835d9,0x1f0bf4c1,0x98816666 -DD 0x2be54efc,0xac6fdc5b,0x211c1d43,0xa6968fe4 -DD 0x3e17e982,0xb99d7b25,0x34eeba3d,0xb364289a -DD 0x57ca9df8,0xd0400f5f,0x5d33ce47,0xdab95ce0 -DD 0x42383a86,0xc5b2a821,0x48c16939,0xcf4bfb9e -DD 0x7c2fd304,0xfba541a3,0x76d680bb,0xf15c121c -DD 0x69dd747a,0xee57e6dd,0x632427c5,0xe4aeb562 -DD 0xaf953bf0,0x281fa957,0xa56c684f,0x22e6fae8 -DD 0xba679c8e,0x3ded0e29,0xb09ecf31,0x37145d96 -DD 0x8470750c,0x03fae7ab,0x8e8926b3,0x0903b414 -DD 0x9182d272,0x160840d5,0x9b7b81cd,0x1cf1136a -DD 0xf85fa608,0x7fd534af,0xf2a6f5b7,0x752c6710 -DD 0xedad0176,0x6a2793d1,0xe75452c9,0x60dec06e -DD 0xd3bae8f4,0x54307a53,0xd943bb4b,0x5ec929ec -DD 0xc6484f8a,0x41c2dd2d,0xccb11c35,0x4b3b8e92 -DD 0x5ac60111,0xdd4c93b6,0x503f52ae,0xd7b5c009 -DD 0x4f34a66f,0xc8be34c8,0x45cdf5d0,0xc2476777 -DD 0x71234fed,0xf6a9dd4a,0x7bda1c52,0xfc508ef5 -DD 0x64d1e893,0xe35b7a34,0x6e28bb2c,0xe9a2298b -DD 0x0d0c9ce9,0x8a860e4e,0x07f5cf56,0x807f5df1 -DD 0x18fe3b97,0x9f74a930,0x12076828,0x958dfa8f -DD 0x26e9d215,0xa16340b2,0x2c1081aa,0xab9a130d -DD 0x331b756b,0xb491e7cc,0x39e226d4,0xbe68b473 -DD 0xf5533ae1,0x72d9a846,0xffaa695e,0x7820fbf9 -DD 0xe0a19d9f,0x672b0f38,0xea58ce20,0x6dd25c87 -DD 0xdeb6741d,0x593ce6ba,0xd44f27a2,0x53c5b505 -DD 0xcb44d363,0x4cce41c4,0xc1bd80dc,0x4637127b -DD 0xa299a719,0x251335be,0xa860f4a6,0x2fea6601 -DD 0xb76b0067,0x30e192c0,0xbd9253d8,0x3a18c17f -DD 0x897ce9e5,0x0ef67b42,0x8385ba5a,0x040f28fd -DD 0x9c8e4e9b,0x1b04dc3c,0x96771d24,0x11fd8f83 -DD 0xb58c0222,0x32069085,0xbf75519d,0x38ffc33a -DD 0xa07ea55c,0x27f437fb,0xaa87f6e3,0x2d0d6444 -DD 0x9e694cde,0x19e3de79,0x94901f61,0x131a8dc6 -DD 0x8b9beba0,0x0c117907,0x8162b81f,0x06e82ab8 -DD 0xe2469fda,0x65cc0d7d,0xe8bfcc65,0x6f355ec2 -DD 0xf7b438a4,0x703eaa03,0xfd4d6b1b,0x7ac7f9bc -DD 0xc9a3d126,0x4e294381,0xc35a8299,0x44d0103e -DD 0xdc517658,0x5bdbe4ff,0xd6a825e7,0x5122b740 -DD 0x1a1939d2,0x9d93ab75,0x10e06a6d,0x976af8ca -DD 0x0feb9eac,0x88610c0b,0x0512cd13,0x82985fb4 -DD 0x31fc772e,0xb676e589,0x3b052491,0xbc8fb636 -DD 0x240ed050,0xa38442f7,0x2ef783ef,0xa97d1148 -DD 0x4dd3a42a,0xca59368d,0x472af795,0xc0a06532 -DD 0x58210354,0xdfab91f3,0x52d850eb,0xd552c24c -DD 0x6636ead6,0xe1bc7871,0x6ccfb969,0xeb452bce -DD 0x73c44da8,0xf44edf0f,0x793d1e17,0xfeb78cb0 -DD 0xef4a0333,0x68c09194,0xe5b3508c,0x6239c22b -DD 0xfab8a44d,0x7d3236ea,0xf041f7f2,0x77cb6555 -DD 0xc4af4dcf,0x4325df68,0xce561e70,0x49dc8cd7 -DD 0xd15deab1,0x56d77816,0xdba4b90e,0x5c2e2ba9 -DD 0xb8809ecb,0x3f0a0c6c,0xb279cd74,0x35f35fd3 -DD 0xad7239b5,0x2af8ab12,0xa78b6a0a,0x2001f8ad -DD 0x9365d037,0x14ef4290,0x999c8388,0x1e16112f -DD 0x86977749,0x011de5ee,0x8c6e24f6,0x0be4b651 -DD 0x40df38c3,0xc755aa64,0x4a266b7c,0xcdacf9db -DD 0x552d9fbd,0xd2a70d1a,0x5fd4cc02,0xd85e5ea5 -DD 0x6b3a763f,0xecb0e498,0x61c32580,0xe649b727 -DD 0x7ec8d141,0xf94243e6,0x743182fe,0xf3bb1059 -DD 0x1715a53b,0x909f379c,0x1decf684,0x9a666423 -DD 0x02e70245,0x856d90e2,0x081e51fa,0x8f94c35d -DD 0x3cf0ebc7,0xbb7a7960,0x3609b878,0xb1832adf -DD 0x29024cb9,0xae88de1e,0x23fb1f06,0xa4718da1 - -mul_table_312: -DD 0x00000000,0xbac2fd7b,0x70698c07,0xcaab717c -DD 0xe0d3180e,0x5a11e575,0x90ba9409,0x2a786972 -DD 0xc44a46ed,0x7e88bb96,0xb423caea,0x0ee13791 -DD 0x24995ee3,0x9e5ba398,0x54f0d2e4,0xee322f9f -DD 0x8d78fb2b,0x37ba0650,0xfd11772c,0x47d38a57 -DD 0x6dabe325,0xd7691e5e,0x1dc26f22,0xa7009259 -DD 0x4932bdc6,0xf3f040bd,0x395b31c1,0x8399ccba -DD 0xa9e1a5c8,0x132358b3,0xd98829cf,0x634ad4b4 -DD 0x1f1d80a7,0xa5df7ddc,0x6f740ca0,0xd5b6f1db -DD 0xffce98a9,0x450c65d2,0x8fa714ae,0x3565e9d5 -DD 0xdb57c64a,0x61953b31,0xab3e4a4d,0x11fcb736 -DD 0x3b84de44,0x8146233f,0x4bed5243,0xf12faf38 -DD 0x92657b8c,0x28a786f7,0xe20cf78b,0x58ce0af0 -DD 0x72b66382,0xc8749ef9,0x02dfef85,0xb81d12fe -DD 0x562f3d61,0xecedc01a,0x2646b166,0x9c844c1d -DD 0xb6fc256f,0x0c3ed814,0xc695a968,0x7c575413 -DD 0x3e3b014e,0x84f9fc35,0x4e528d49,0xf4907032 -DD 0xdee81940,0x642ae43b,0xae819547,0x1443683c -DD 0xfa7147a3,0x40b3bad8,0x8a18cba4,0x30da36df -DD 0x1aa25fad,0xa060a2d6,0x6acbd3aa,0xd0092ed1 -DD 0xb343fa65,0x0981071e,0xc32a7662,0x79e88b19 -DD 0x5390e26b,0xe9521f10,0x23f96e6c,0x993b9317 -DD 0x7709bc88,0xcdcb41f3,0x0760308f,0xbda2cdf4 -DD 0x97daa486,0x2d1859fd,0xe7b32881,0x5d71d5fa -DD 0x212681e9,0x9be47c92,0x514f0dee,0xeb8df095 -DD 0xc1f599e7,0x7b37649c,0xb19c15e0,0x0b5ee89b -DD 0xe56cc704,0x5fae3a7f,0x95054b03,0x2fc7b678 -DD 0x05bfdf0a,0xbf7d2271,0x75d6530d,0xcf14ae76 -DD 0xac5e7ac2,0x169c87b9,0xdc37f6c5,0x66f50bbe -DD 0x4c8d62cc,0xf64f9fb7,0x3ce4eecb,0x862613b0 -DD 0x68143c2f,0xd2d6c154,0x187db028,0xa2bf4d53 -DD 0x88c72421,0x3205d95a,0xf8aea826,0x426c555d -DD 0x7c76029c,0xc6b4ffe7,0x0c1f8e9b,0xb6dd73e0 -DD 0x9ca51a92,0x2667e7e9,0xeccc9695,0x560e6bee -DD 0xb83c4471,0x02feb90a,0xc855c876,0x7297350d -DD 0x58ef5c7f,0xe22da104,0x2886d078,0x92442d03 -DD 0xf10ef9b7,0x4bcc04cc,0x816775b0,0x3ba588cb -DD 0x11dde1b9,0xab1f1cc2,0x61b46dbe,0xdb7690c5 -DD 0x3544bf5a,0x8f864221,0x452d335d,0xffefce26 -DD 0xd597a754,0x6f555a2f,0xa5fe2b53,0x1f3cd628 -DD 0x636b823b,0xd9a97f40,0x13020e3c,0xa9c0f347 -DD 0x83b89a35,0x397a674e,0xf3d11632,0x4913eb49 -DD 0xa721c4d6,0x1de339ad,0xd74848d1,0x6d8ab5aa -DD 0x47f2dcd8,0xfd3021a3,0x379b50df,0x8d59ada4 -DD 0xee137910,0x54d1846b,0x9e7af517,0x24b8086c -DD 0x0ec0611e,0xb4029c65,0x7ea9ed19,0xc46b1062 -DD 0x2a593ffd,0x909bc286,0x5a30b3fa,0xe0f24e81 -DD 0xca8a27f3,0x7048da88,0xbae3abf4,0x0021568f -DD 0x424d03d2,0xf88ffea9,0x32248fd5,0x88e672ae -DD 0xa29e1bdc,0x185ce6a7,0xd2f797db,0x68356aa0 -DD 0x8607453f,0x3cc5b844,0xf66ec938,0x4cac3443 -DD 0x66d45d31,0xdc16a04a,0x16bdd136,0xac7f2c4d -DD 0xcf35f8f9,0x75f70582,0xbf5c74fe,0x059e8985 -DD 0x2fe6e0f7,0x95241d8c,0x5f8f6cf0,0xe54d918b -DD 0x0b7fbe14,0xb1bd436f,0x7b163213,0xc1d4cf68 -DD 0xebaca61a,0x516e5b61,0x9bc52a1d,0x2107d766 -DD 0x5d508375,0xe7927e0e,0x2d390f72,0x97fbf209 -DD 0xbd839b7b,0x07416600,0xcdea177c,0x7728ea07 -DD 0x991ac598,0x23d838e3,0xe973499f,0x53b1b4e4 -DD 0x79c9dd96,0xc30b20ed,0x09a05191,0xb362acea -DD 0xd028785e,0x6aea8525,0xa041f459,0x1a830922 -DD 0x30fb6050,0x8a399d2b,0x4092ec57,0xfa50112c -DD 0x14623eb3,0xaea0c3c8,0x640bb2b4,0xdec94fcf -DD 0xf4b126bd,0x4e73dbc6,0x84d8aaba,0x3e1a57c1 - -mul_table_632: -DD 0x00000000,0x6b749fb2,0xd6e93f64,0xbd9da0d6 -DD 0xa83e0839,0xc34a978b,0x7ed7375d,0x15a3a8ef -DD 0x55906683,0x3ee4f931,0x837959e7,0xe80dc655 -DD 0xfdae6eba,0x96daf108,0x2b4751de,0x4033ce6c -DD 0xab20cd06,0xc05452b4,0x7dc9f262,0x16bd6dd0 -DD 0x031ec53f,0x686a5a8d,0xd5f7fa5b,0xbe8365e9 -DD 0xfeb0ab85,0x95c43437,0x285994e1,0x432d0b53 -DD 0x568ea3bc,0x3dfa3c0e,0x80679cd8,0xeb13036a -DD 0x53adecfd,0x38d9734f,0x8544d399,0xee304c2b -DD 0xfb93e4c4,0x90e77b76,0x2d7adba0,0x460e4412 -DD 0x063d8a7e,0x6d4915cc,0xd0d4b51a,0xbba02aa8 -DD 0xae038247,0xc5771df5,0x78eabd23,0x139e2291 -DD 0xf88d21fb,0x93f9be49,0x2e641e9f,0x4510812d -DD 0x50b329c2,0x3bc7b670,0x865a16a6,0xed2e8914 -DD 0xad1d4778,0xc669d8ca,0x7bf4781c,0x1080e7ae -DD 0x05234f41,0x6e57d0f3,0xd3ca7025,0xb8beef97 -DD 0xa75bd9fa,0xcc2f4648,0x71b2e69e,0x1ac6792c -DD 0x0f65d1c3,0x64114e71,0xd98ceea7,0xb2f87115 -DD 0xf2cbbf79,0x99bf20cb,0x2422801d,0x4f561faf -DD 0x5af5b740,0x318128f2,0x8c1c8824,0xe7681796 -DD 0x0c7b14fc,0x670f8b4e,0xda922b98,0xb1e6b42a -DD 0xa4451cc5,0xcf318377,0x72ac23a1,0x19d8bc13 -DD 0x59eb727f,0x329fedcd,0x8f024d1b,0xe476d2a9 -DD 0xf1d57a46,0x9aa1e5f4,0x273c4522,0x4c48da90 -DD 0xf4f63507,0x9f82aab5,0x221f0a63,0x496b95d1 -DD 0x5cc83d3e,0x37bca28c,0x8a21025a,0xe1559de8 -DD 0xa1665384,0xca12cc36,0x778f6ce0,0x1cfbf352 -DD 0x09585bbd,0x622cc40f,0xdfb164d9,0xb4c5fb6b -DD 0x5fd6f801,0x34a267b3,0x893fc765,0xe24b58d7 -DD 0xf7e8f038,0x9c9c6f8a,0x2101cf5c,0x4a7550ee -DD 0x0a469e82,0x61320130,0xdcafa1e6,0xb7db3e54 -DD 0xa27896bb,0xc90c0909,0x7491a9df,0x1fe5366d -DD 0x4b5bc505,0x202f5ab7,0x9db2fa61,0xf6c665d3 -DD 0xe365cd3c,0x8811528e,0x358cf258,0x5ef86dea -DD 0x1ecba386,0x75bf3c34,0xc8229ce2,0xa3560350 -DD 0xb6f5abbf,0xdd81340d,0x601c94db,0x0b680b69 -DD 0xe07b0803,0x8b0f97b1,0x36923767,0x5de6a8d5 -DD 0x4845003a,0x23319f88,0x9eac3f5e,0xf5d8a0ec -DD 0xb5eb6e80,0xde9ff132,0x630251e4,0x0876ce56 -DD 0x1dd566b9,0x76a1f90b,0xcb3c59dd,0xa048c66f -DD 0x18f629f8,0x7382b64a,0xce1f169c,0xa56b892e -DD 0xb0c821c1,0xdbbcbe73,0x66211ea5,0x0d558117 -DD 0x4d664f7b,0x2612d0c9,0x9b8f701f,0xf0fbefad -DD 0xe5584742,0x8e2cd8f0,0x33b17826,0x58c5e794 -DD 0xb3d6e4fe,0xd8a27b4c,0x653fdb9a,0x0e4b4428 -DD 0x1be8ecc7,0x709c7375,0xcd01d3a3,0xa6754c11 -DD 0xe646827d,0x8d321dcf,0x30afbd19,0x5bdb22ab -DD 0x4e788a44,0x250c15f6,0x9891b520,0xf3e52a92 -DD 0xec001cff,0x8774834d,0x3ae9239b,0x519dbc29 -DD 0x443e14c6,0x2f4a8b74,0x92d72ba2,0xf9a3b410 -DD 0xb9907a7c,0xd2e4e5ce,0x6f794518,0x040ddaaa -DD 0x11ae7245,0x7adaedf7,0xc7474d21,0xac33d293 -DD 0x4720d1f9,0x2c544e4b,0x91c9ee9d,0xfabd712f -DD 0xef1ed9c0,0x846a4672,0x39f7e6a4,0x52837916 -DD 0x12b0b77a,0x79c428c8,0xc459881e,0xaf2d17ac -DD 0xba8ebf43,0xd1fa20f1,0x6c678027,0x07131f95 -DD 0xbfadf002,0xd4d96fb0,0x6944cf66,0x023050d4 -DD 0x1793f83b,0x7ce76789,0xc17ac75f,0xaa0e58ed -DD 0xea3d9681,0x81490933,0x3cd4a9e5,0x57a03657 -DD 0x42039eb8,0x2977010a,0x94eaa1dc,0xff9e3e6e -DD 0x148d3d04,0x7ff9a2b6,0xc2640260,0xa9109dd2 -DD 0xbcb3353d,0xd7c7aa8f,0x6a5a0a59,0x012e95eb -DD 0x411d5b87,0x2a69c435,0x97f464e3,0xfc80fb51 -DD 0xe92353be,0x8257cc0c,0x3fca6cda,0x54bef368 - -mul_table_1272: -DD 0x00000000,0xdd66cbbb,0xbf21e187,0x62472a3c -DD 0x7bafb5ff,0xa6c97e44,0xc48e5478,0x19e89fc3 -DD 0xf75f6bfe,0x2a39a045,0x487e8a79,0x951841c2 -DD 0x8cf0de01,0x519615ba,0x33d13f86,0xeeb7f43d -DD 0xeb52a10d,0x36346ab6,0x5473408a,0x89158b31 -DD 0x90fd14f2,0x4d9bdf49,0x2fdcf575,0xf2ba3ece -DD 0x1c0dcaf3,0xc16b0148,0xa32c2b74,0x7e4ae0cf -DD 0x67a27f0c,0xbac4b4b7,0xd8839e8b,0x05e55530 -DD 0xd34934eb,0x0e2fff50,0x6c68d56c,0xb10e1ed7 -DD 0xa8e68114,0x75804aaf,0x17c76093,0xcaa1ab28 -DD 0x24165f15,0xf97094ae,0x9b37be92,0x46517529 -DD 0x5fb9eaea,0x82df2151,0xe0980b6d,0x3dfec0d6 -DD 0x381b95e6,0xe57d5e5d,0x873a7461,0x5a5cbfda -DD 0x43b42019,0x9ed2eba2,0xfc95c19e,0x21f30a25 -DD 0xcf44fe18,0x122235a3,0x70651f9f,0xad03d424 -DD 0xb4eb4be7,0x698d805c,0x0bcaaa60,0xd6ac61db -DD 0xa37e1f27,0x7e18d49c,0x1c5ffea0,0xc139351b -DD 0xd8d1aad8,0x05b76163,0x67f04b5f,0xba9680e4 -DD 0x542174d9,0x8947bf62,0xeb00955e,0x36665ee5 -DD 0x2f8ec126,0xf2e80a9d,0x90af20a1,0x4dc9eb1a -DD 0x482cbe2a,0x954a7591,0xf70d5fad,0x2a6b9416 -DD 0x33830bd5,0xeee5c06e,0x8ca2ea52,0x51c421e9 -DD 0xbf73d5d4,0x62151e6f,0x00523453,0xdd34ffe8 -DD 0xc4dc602b,0x19baab90,0x7bfd81ac,0xa69b4a17 -DD 0x70372bcc,0xad51e077,0xcf16ca4b,0x127001f0 -DD 0x0b989e33,0xd6fe5588,0xb4b97fb4,0x69dfb40f -DD 0x87684032,0x5a0e8b89,0x3849a1b5,0xe52f6a0e -DD 0xfcc7f5cd,0x21a13e76,0x43e6144a,0x9e80dff1 -DD 0x9b658ac1,0x4603417a,0x24446b46,0xf922a0fd -DD 0xe0ca3f3e,0x3dacf485,0x5febdeb9,0x828d1502 -DD 0x6c3ae13f,0xb15c2a84,0xd31b00b8,0x0e7dcb03 -DD 0x179554c0,0xcaf39f7b,0xa8b4b547,0x75d27efc -DD 0x431048bf,0x9e768304,0xfc31a938,0x21576283 -DD 0x38bffd40,0xe5d936fb,0x879e1cc7,0x5af8d77c -DD 0xb44f2341,0x6929e8fa,0x0b6ec2c6,0xd608097d -DD 0xcfe096be,0x12865d05,0x70c17739,0xada7bc82 -DD 0xa842e9b2,0x75242209,0x17630835,0xca05c38e -DD 0xd3ed5c4d,0x0e8b97f6,0x6cccbdca,0xb1aa7671 -DD 0x5f1d824c,0x827b49f7,0xe03c63cb,0x3d5aa870 -DD 0x24b237b3,0xf9d4fc08,0x9b93d634,0x46f51d8f -DD 0x90597c54,0x4d3fb7ef,0x2f789dd3,0xf21e5668 -DD 0xebf6c9ab,0x36900210,0x54d7282c,0x89b1e397 -DD 0x670617aa,0xba60dc11,0xd827f62d,0x05413d96 -DD 0x1ca9a255,0xc1cf69ee,0xa38843d2,0x7eee8869 -DD 0x7b0bdd59,0xa66d16e2,0xc42a3cde,0x194cf765 -DD 0x00a468a6,0xddc2a31d,0xbf858921,0x62e3429a -DD 0x8c54b6a7,0x51327d1c,0x33755720,0xee139c9b -DD 0xf7fb0358,0x2a9dc8e3,0x48dae2df,0x95bc2964 -DD 0xe06e5798,0x3d089c23,0x5f4fb61f,0x82297da4 -DD 0x9bc1e267,0x46a729dc,0x24e003e0,0xf986c85b -DD 0x17313c66,0xca57f7dd,0xa810dde1,0x7576165a -DD 0x6c9e8999,0xb1f84222,0xd3bf681e,0x0ed9a3a5 -DD 0x0b3cf695,0xd65a3d2e,0xb41d1712,0x697bdca9 -DD 0x7093436a,0xadf588d1,0xcfb2a2ed,0x12d46956 -DD 0xfc639d6b,0x210556d0,0x43427cec,0x9e24b757 -DD 0x87cc2894,0x5aaae32f,0x38edc913,0xe58b02a8 -DD 0x33276373,0xee41a8c8,0x8c0682f4,0x5160494f -DD 0x4888d68c,0x95ee1d37,0xf7a9370b,0x2acffcb0 -DD 0xc478088d,0x191ec336,0x7b59e90a,0xa63f22b1 -DD 0xbfd7bd72,0x62b176c9,0x00f65cf5,0xdd90974e -DD 0xd875c27e,0x051309c5,0x675423f9,0xba32e842 -DD 0xa3da7781,0x7ebcbc3a,0x1cfb9606,0xc19d5dbd -DD 0x2f2aa980,0xf24c623b,0x900b4807,0x4d6d83bc -DD 0x54851c7f,0x89e3d7c4,0xeba4fdf8,0x36c23643 - -%macro slversion 4 -global %1_slver_%2%3%4 -global %1_slver -%1_slver: -%1_slver_%2%3%4: - dw 0x%4 - db 0x%3, 0x%2 -%endmacro -;;; func core, ver, snum -slversion crc32_iscsi_00, 00, 02, 0014 -; inform linker that this doesn't require executable stack -section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/src/common/crc32c_intel_fast_asm.s b/src/common/crc32c_intel_fast_asm.s new file mode 100644 index 00000000000..2189684b4c7 --- /dev/null +++ b/src/common/crc32c_intel_fast_asm.s @@ -0,0 +1,666 @@ +; +; Copyright 2012-2013 Intel Corporation All Rights Reserved. +; All rights reserved. +; +; http://opensource.org/licenses/BSD-3-Clause +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following +; conditions are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; +; * Neither the name of the Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +; COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +; OF THE POSSIBILITY OF SUCH DAMAGE. +; + +; Function to compute iscsi CRC32 with table-based recombination +; crc done "by 3" with block sizes 1920, 960, 480, 240 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; crcB3 MACRO to implement crc32 on 3 %%bSize-byte blocks +%macro crcB3 3 +%define %%bSize %1 ; 1/3 of buffer size +%define %%td2 %2 ; table offset for crc0 (2/3 of buffer) +%define %%td1 %3 ; table offset for crc1 (1/3 of buffer) + +%IF %%bSize=640 + sub len, %%bSize*3 + js %%crcB3_end ;; jump to next level if 3*blockSize > len +%ELSE + cmp len, %%bSize*3 + jnae %%crcB3_end ;; jump to next level if 3*blockSize > len +%ENDIF + ;;;;;; Calculate CRC of 3 blocks of the buffer ;;;;;; +%%crcB3_loop: + ;; rax = crc0 = initial crc + xor rbx, rbx ;; rbx = crc1 = 0; + xor r10, r10 ;; r10 = crc2 = 0; + + %assign i 0 + %rep %%bSize/8 - 1 + crc32 rax, [bufptmp+i + 0*%%bSize] ;; update crc0 + crc32 rbx, [bufptmp+i + 1*%%bSize] ;; update crc1 + crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2 + %assign i (i+8) + %endrep + crc32 rax, [bufptmp+i + 0*%%bSize] ;; update crc0 + crc32 rbx, [bufptmp+i + 1*%%bSize] ;; update crc1 +; SKIP ;crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2 + + ; merge in crc0 + movzx bufp_dw, al + mov r9d, [crc_init + bufp*4 + %%td2] + movzx bufp_dw, ah + shr eax, 16 + mov r11d, [crc_init + bufp*4 + %%td2] + shl r11, 8 + xor r9, r11 + + movzx bufp_dw, al + mov r11d, [crc_init + bufp*4 + %%td2] + movzx bufp_dw, ah + shl r11, 16 + xor r9, r11 + mov r11d, [crc_init + bufp*4 + %%td2] + shl r11, 24 + xor r9, r11 + + ; merge in crc1 + + movzx bufp_dw, bl + mov r11d, [crc_init + bufp*4 + %%td1] + movzx bufp_dw, bh + shr ebx, 16 + xor r9, r11 + mov r11d, [crc_init + bufp*4 + %%td1] + shl r11, 8 + xor r9, r11 + + movzx bufp_dw, bl + mov r11d, [crc_init + bufp*4 + %%td1] + movzx bufp_dw, bh + shl r11, 16 + xor r9, r11 + mov r11d, [crc_init + bufp*4 + %%td1] + shl r11, 24 + xor r9, r11 + + xor r9, [bufptmp+i + 2*%%bSize] + crc32 r10, r9 + mov rax, r10 + + add bufptmp, %%bSize*3 ;; move to next block + sub len, %%bSize*3 +%IF %%bSize=640 + jns %%crcB3_loop +%ENDIF + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%%crcB3_end: +%IF %%bSize=640 + add len, %%bSize*3 +%ENDIF + je do_return ;; return if remaining data is zero +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; ISCSI CRC 32 Implementation with crc32 Instruction + +;;; unsigned int crc32_iscsi_00(unsigned char * buffer, int len, unsigned int crc_init); +;;; +;;; *buf = rcx +;;; len = rdx +;;; crc_init = r8 +;;; + +global crc32_iscsi_00:function +crc32_iscsi_00: + +%ifidn __OUTPUT_FORMAT__, elf64 +%define bufp rdi +%define bufp_dw edi +%define bufp_w di +%define bufp_b dil +%define bufptmp rcx +%define block_0 rcx +%define block_1 r8 +%define block_2 r11 +%define len rsi +%define len_dw esi +%define len_w si +%define len_b sil +%define crc_init rdx +%define crc_init_dw edx +%else +%define bufp rcx +%define bufp_dw ecx +%define bufp_w cx +%define bufp_b cl +%define bufptmp rdi +%define block_0 rdi +%define block_1 rsi +%define block_2 r11 +%define len rdx +%define len_dw edx +%define len_w dx +%define len_b dl +%define crc_init r8 +%define crc_init_dw r8d +%endif + + + push rdi + push rbx + + mov rax, crc_init ;; rax = crc_init; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + mov bufptmp, bufp ;; rdi = *buf + neg bufp + and bufp, 7 ;; calculate the unalignment + ;; amount of the address + je proc_block ;; Skip if aligned + + cmp len, 8 + jb less_than_8 + + ;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;;; + mov rbx, [bufptmp] ;; load a quadword from the buffer + add bufptmp, bufp ;; align buffer pointer for + ;; quadword processing + sub len, bufp ;; update buffer length +align_loop: + crc32 eax, bl ;; compute crc32 of 1-byte + shr rbx, 8 ;; get next byte + dec bufp + jne align_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; 2) BLOCK LEVEL: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +proc_block: + cmp len, 240 + jb bit8 + + lea crc_init, [mul_table_72 wrt rip] ;; load table base address + + crcB3 640, 0x1000, 0x0c00 ; 640*3 = 1920 (Tables 1280, 640) + crcB3 320, 0x0c00, 0x0800 ; 320*3 = 960 (Tables 640, 320) + crcB3 160, 0x0800, 0x0400 ; 160*3 = 480 (Tables 320, 160) + crcB3 80, 0x0400, 0x0000 ; 80*3 = 240 (Tables 160, 80) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of rdx are full) + +bit8: + shl len_b, 1 ;; shift-out MSB (bit-7) + jnc bit7 ;; jump to bit-6 if bit-7 == 0 + %assign i 0 + %rep 16 + crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + add bufptmp, 128 ;; buf +=64; (next 64 bytes) + +bit7: + shl len_b, 1 ;; shift-out MSB (bit-7) + jnc bit6 ;; jump to bit-6 if bit-7 == 0 + %assign i 0 + %rep 8 + crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + add bufptmp, 64 ;; buf +=64; (next 64 bytes) +bit6: + shl len_b, 1 ;; shift-out MSB (bit-6) + jnc bit5 ;; jump to bit-5 if bit-6 == 0 + %assign i 0 + %rep 4 + crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + add bufptmp, 32 ;; buf +=32; (next 32 bytes) +bit5: + shl len_b, 1 ;; shift-out MSB (bit-5) + jnc bit4 ;; jump to bit-4 if bit-5 == 0 + %assign i 0 + %rep 2 + crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + add bufptmp, 16 ;; buf +=16; (next 16 bytes) +bit4: + shl len_b, 1 ;; shift-out MSB (bit-4) + jnc bit3 ;; jump to bit-3 if bit-4 == 0 + crc32 rax, [bufptmp] ;; compute crc32 of 8-byte data + je do_return ;; return if remaining data is zero + add bufptmp, 8 ;; buf +=8; (next 8 bytes) +bit3: + mov rbx, [bufptmp] ;; load a 8-bytes from the buffer: + shl len_b, 1 ;; shift-out MSB (bit-3) + jnc bit2 ;; jump to bit-2 if bit-3 == 0 + crc32 eax, ebx ;; compute crc32 of 4-byte data + je do_return ;; return if remaining data is zero + shr rbx, 32 ;; get next 3 bytes +bit2: + shl len_b, 1 ;; shift-out MSB (bit-2) + jnc bit1 ;; jump to bit-1 if bit-2 == 0 + crc32 eax, bx ;; compute crc32 of 2-byte data + je do_return ;; return if remaining data is zero + shr rbx, 16 ;; next byte +bit1: + test len_b,len_b + je do_return + crc32 eax, bl ;; compute crc32 of 1-byte data +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +do_return: + + pop rbx + pop rdi + ret + +less_than_8: + test len,4 + jz less_than_4 + crc32 eax, dword[bufptmp] + add bufptmp,4 +less_than_4: + test len,2 + jz less_than_2 + crc32 eax, word[bufptmp] + add bufptmp,2 +less_than_2: + test len,1 + jz do_return + crc32 rax, byte[bufptmp] + pop rbx + pop bufptmp + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; global mul_table_72, mul_table_152, mul_table_312, mul_table_632, mul_table_1272 + +section .data +align 8 +mul_table_72: +DD 0x00000000,0x39d3b296,0x73a7652c,0x4a74d7ba +DD 0xe74eca58,0xde9d78ce,0x94e9af74,0xad3a1de2 +DD 0xcb71e241,0xf2a250d7,0xb8d6876d,0x810535fb +DD 0x2c3f2819,0x15ec9a8f,0x5f984d35,0x664bffa3 +DD 0x930fb273,0xaadc00e5,0xe0a8d75f,0xd97b65c9 +DD 0x7441782b,0x4d92cabd,0x07e61d07,0x3e35af91 +DD 0x587e5032,0x61ade2a4,0x2bd9351e,0x120a8788 +DD 0xbf309a6a,0x86e328fc,0xcc97ff46,0xf5444dd0 +DD 0x23f31217,0x1a20a081,0x5054773b,0x6987c5ad +DD 0xc4bdd84f,0xfd6e6ad9,0xb71abd63,0x8ec90ff5 +DD 0xe882f056,0xd15142c0,0x9b25957a,0xa2f627ec +DD 0x0fcc3a0e,0x361f8898,0x7c6b5f22,0x45b8edb4 +DD 0xb0fca064,0x892f12f2,0xc35bc548,0xfa8877de +DD 0x57b26a3c,0x6e61d8aa,0x24150f10,0x1dc6bd86 +DD 0x7b8d4225,0x425ef0b3,0x082a2709,0x31f9959f +DD 0x9cc3887d,0xa5103aeb,0xef64ed51,0xd6b75fc7 +DD 0x47e6242e,0x7e3596b8,0x34414102,0x0d92f394 +DD 0xa0a8ee76,0x997b5ce0,0xd30f8b5a,0xeadc39cc +DD 0x8c97c66f,0xb54474f9,0xff30a343,0xc6e311d5 +DD 0x6bd90c37,0x520abea1,0x187e691b,0x21addb8d +DD 0xd4e9965d,0xed3a24cb,0xa74ef371,0x9e9d41e7 +DD 0x33a75c05,0x0a74ee93,0x40003929,0x79d38bbf +DD 0x1f98741c,0x264bc68a,0x6c3f1130,0x55eca3a6 +DD 0xf8d6be44,0xc1050cd2,0x8b71db68,0xb2a269fe +DD 0x64153639,0x5dc684af,0x17b25315,0x2e61e183 +DD 0x835bfc61,0xba884ef7,0xf0fc994d,0xc92f2bdb +DD 0xaf64d478,0x96b766ee,0xdcc3b154,0xe51003c2 +DD 0x482a1e20,0x71f9acb6,0x3b8d7b0c,0x025ec99a +DD 0xf71a844a,0xcec936dc,0x84bde166,0xbd6e53f0 +DD 0x10544e12,0x2987fc84,0x63f32b3e,0x5a2099a8 +DD 0x3c6b660b,0x05b8d49d,0x4fcc0327,0x761fb1b1 +DD 0xdb25ac53,0xe2f61ec5,0xa882c97f,0x91517be9 +DD 0x8fcc485c,0xb61ffaca,0xfc6b2d70,0xc5b89fe6 +DD 0x68828204,0x51513092,0x1b25e728,0x22f655be +DD 0x44bdaa1d,0x7d6e188b,0x371acf31,0x0ec97da7 +DD 0xa3f36045,0x9a20d2d3,0xd0540569,0xe987b7ff +DD 0x1cc3fa2f,0x251048b9,0x6f649f03,0x56b72d95 +DD 0xfb8d3077,0xc25e82e1,0x882a555b,0xb1f9e7cd +DD 0xd7b2186e,0xee61aaf8,0xa4157d42,0x9dc6cfd4 +DD 0x30fcd236,0x092f60a0,0x435bb71a,0x7a88058c +DD 0xac3f5a4b,0x95ece8dd,0xdf983f67,0xe64b8df1 +DD 0x4b719013,0x72a22285,0x38d6f53f,0x010547a9 +DD 0x674eb80a,0x5e9d0a9c,0x14e9dd26,0x2d3a6fb0 +DD 0x80007252,0xb9d3c0c4,0xf3a7177e,0xca74a5e8 +DD 0x3f30e838,0x06e35aae,0x4c978d14,0x75443f82 +DD 0xd87e2260,0xe1ad90f6,0xabd9474c,0x920af5da +DD 0xf4410a79,0xcd92b8ef,0x87e66f55,0xbe35ddc3 +DD 0x130fc021,0x2adc72b7,0x60a8a50d,0x597b179b +DD 0xc82a6c72,0xf1f9dee4,0xbb8d095e,0x825ebbc8 +DD 0x2f64a62a,0x16b714bc,0x5cc3c306,0x65107190 +DD 0x035b8e33,0x3a883ca5,0x70fceb1f,0x492f5989 +DD 0xe415446b,0xddc6f6fd,0x97b22147,0xae6193d1 +DD 0x5b25de01,0x62f66c97,0x2882bb2d,0x115109bb +DD 0xbc6b1459,0x85b8a6cf,0xcfcc7175,0xf61fc3e3 +DD 0x90543c40,0xa9878ed6,0xe3f3596c,0xda20ebfa +DD 0x771af618,0x4ec9448e,0x04bd9334,0x3d6e21a2 +DD 0xebd97e65,0xd20accf3,0x987e1b49,0xa1ada9df +DD 0x0c97b43d,0x354406ab,0x7f30d111,0x46e36387 +DD 0x20a89c24,0x197b2eb2,0x530ff908,0x6adc4b9e +DD 0xc7e6567c,0xfe35e4ea,0xb4413350,0x8d9281c6 +DD 0x78d6cc16,0x41057e80,0x0b71a93a,0x32a21bac +DD 0x9f98064e,0xa64bb4d8,0xec3f6362,0xd5ecd1f4 +DD 0xb3a72e57,0x8a749cc1,0xc0004b7b,0xf9d3f9ed +DD 0x54e9e40f,0x6d3a5699,0x274e8123,0x1e9d33b5 + +mul_table_152: +DD 0x00000000,0x878a92a7,0x0af953bf,0x8d73c118 +DD 0x15f2a77e,0x927835d9,0x1f0bf4c1,0x98816666 +DD 0x2be54efc,0xac6fdc5b,0x211c1d43,0xa6968fe4 +DD 0x3e17e982,0xb99d7b25,0x34eeba3d,0xb364289a +DD 0x57ca9df8,0xd0400f5f,0x5d33ce47,0xdab95ce0 +DD 0x42383a86,0xc5b2a821,0x48c16939,0xcf4bfb9e +DD 0x7c2fd304,0xfba541a3,0x76d680bb,0xf15c121c +DD 0x69dd747a,0xee57e6dd,0x632427c5,0xe4aeb562 +DD 0xaf953bf0,0x281fa957,0xa56c684f,0x22e6fae8 +DD 0xba679c8e,0x3ded0e29,0xb09ecf31,0x37145d96 +DD 0x8470750c,0x03fae7ab,0x8e8926b3,0x0903b414 +DD 0x9182d272,0x160840d5,0x9b7b81cd,0x1cf1136a +DD 0xf85fa608,0x7fd534af,0xf2a6f5b7,0x752c6710 +DD 0xedad0176,0x6a2793d1,0xe75452c9,0x60dec06e +DD 0xd3bae8f4,0x54307a53,0xd943bb4b,0x5ec929ec +DD 0xc6484f8a,0x41c2dd2d,0xccb11c35,0x4b3b8e92 +DD 0x5ac60111,0xdd4c93b6,0x503f52ae,0xd7b5c009 +DD 0x4f34a66f,0xc8be34c8,0x45cdf5d0,0xc2476777 +DD 0x71234fed,0xf6a9dd4a,0x7bda1c52,0xfc508ef5 +DD 0x64d1e893,0xe35b7a34,0x6e28bb2c,0xe9a2298b +DD 0x0d0c9ce9,0x8a860e4e,0x07f5cf56,0x807f5df1 +DD 0x18fe3b97,0x9f74a930,0x12076828,0x958dfa8f +DD 0x26e9d215,0xa16340b2,0x2c1081aa,0xab9a130d +DD 0x331b756b,0xb491e7cc,0x39e226d4,0xbe68b473 +DD 0xf5533ae1,0x72d9a846,0xffaa695e,0x7820fbf9 +DD 0xe0a19d9f,0x672b0f38,0xea58ce20,0x6dd25c87 +DD 0xdeb6741d,0x593ce6ba,0xd44f27a2,0x53c5b505 +DD 0xcb44d363,0x4cce41c4,0xc1bd80dc,0x4637127b +DD 0xa299a719,0x251335be,0xa860f4a6,0x2fea6601 +DD 0xb76b0067,0x30e192c0,0xbd9253d8,0x3a18c17f +DD 0x897ce9e5,0x0ef67b42,0x8385ba5a,0x040f28fd +DD 0x9c8e4e9b,0x1b04dc3c,0x96771d24,0x11fd8f83 +DD 0xb58c0222,0x32069085,0xbf75519d,0x38ffc33a +DD 0xa07ea55c,0x27f437fb,0xaa87f6e3,0x2d0d6444 +DD 0x9e694cde,0x19e3de79,0x94901f61,0x131a8dc6 +DD 0x8b9beba0,0x0c117907,0x8162b81f,0x06e82ab8 +DD 0xe2469fda,0x65cc0d7d,0xe8bfcc65,0x6f355ec2 +DD 0xf7b438a4,0x703eaa03,0xfd4d6b1b,0x7ac7f9bc +DD 0xc9a3d126,0x4e294381,0xc35a8299,0x44d0103e +DD 0xdc517658,0x5bdbe4ff,0xd6a825e7,0x5122b740 +DD 0x1a1939d2,0x9d93ab75,0x10e06a6d,0x976af8ca +DD 0x0feb9eac,0x88610c0b,0x0512cd13,0x82985fb4 +DD 0x31fc772e,0xb676e589,0x3b052491,0xbc8fb636 +DD 0x240ed050,0xa38442f7,0x2ef783ef,0xa97d1148 +DD 0x4dd3a42a,0xca59368d,0x472af795,0xc0a06532 +DD 0x58210354,0xdfab91f3,0x52d850eb,0xd552c24c +DD 0x6636ead6,0xe1bc7871,0x6ccfb969,0xeb452bce +DD 0x73c44da8,0xf44edf0f,0x793d1e17,0xfeb78cb0 +DD 0xef4a0333,0x68c09194,0xe5b3508c,0x6239c22b +DD 0xfab8a44d,0x7d3236ea,0xf041f7f2,0x77cb6555 +DD 0xc4af4dcf,0x4325df68,0xce561e70,0x49dc8cd7 +DD 0xd15deab1,0x56d77816,0xdba4b90e,0x5c2e2ba9 +DD 0xb8809ecb,0x3f0a0c6c,0xb279cd74,0x35f35fd3 +DD 0xad7239b5,0x2af8ab12,0xa78b6a0a,0x2001f8ad +DD 0x9365d037,0x14ef4290,0x999c8388,0x1e16112f +DD 0x86977749,0x011de5ee,0x8c6e24f6,0x0be4b651 +DD 0x40df38c3,0xc755aa64,0x4a266b7c,0xcdacf9db +DD 0x552d9fbd,0xd2a70d1a,0x5fd4cc02,0xd85e5ea5 +DD 0x6b3a763f,0xecb0e498,0x61c32580,0xe649b727 +DD 0x7ec8d141,0xf94243e6,0x743182fe,0xf3bb1059 +DD 0x1715a53b,0x909f379c,0x1decf684,0x9a666423 +DD 0x02e70245,0x856d90e2,0x081e51fa,0x8f94c35d +DD 0x3cf0ebc7,0xbb7a7960,0x3609b878,0xb1832adf +DD 0x29024cb9,0xae88de1e,0x23fb1f06,0xa4718da1 + +mul_table_312: +DD 0x00000000,0xbac2fd7b,0x70698c07,0xcaab717c +DD 0xe0d3180e,0x5a11e575,0x90ba9409,0x2a786972 +DD 0xc44a46ed,0x7e88bb96,0xb423caea,0x0ee13791 +DD 0x24995ee3,0x9e5ba398,0x54f0d2e4,0xee322f9f +DD 0x8d78fb2b,0x37ba0650,0xfd11772c,0x47d38a57 +DD 0x6dabe325,0xd7691e5e,0x1dc26f22,0xa7009259 +DD 0x4932bdc6,0xf3f040bd,0x395b31c1,0x8399ccba +DD 0xa9e1a5c8,0x132358b3,0xd98829cf,0x634ad4b4 +DD 0x1f1d80a7,0xa5df7ddc,0x6f740ca0,0xd5b6f1db +DD 0xffce98a9,0x450c65d2,0x8fa714ae,0x3565e9d5 +DD 0xdb57c64a,0x61953b31,0xab3e4a4d,0x11fcb736 +DD 0x3b84de44,0x8146233f,0x4bed5243,0xf12faf38 +DD 0x92657b8c,0x28a786f7,0xe20cf78b,0x58ce0af0 +DD 0x72b66382,0xc8749ef9,0x02dfef85,0xb81d12fe +DD 0x562f3d61,0xecedc01a,0x2646b166,0x9c844c1d +DD 0xb6fc256f,0x0c3ed814,0xc695a968,0x7c575413 +DD 0x3e3b014e,0x84f9fc35,0x4e528d49,0xf4907032 +DD 0xdee81940,0x642ae43b,0xae819547,0x1443683c +DD 0xfa7147a3,0x40b3bad8,0x8a18cba4,0x30da36df +DD 0x1aa25fad,0xa060a2d6,0x6acbd3aa,0xd0092ed1 +DD 0xb343fa65,0x0981071e,0xc32a7662,0x79e88b19 +DD 0x5390e26b,0xe9521f10,0x23f96e6c,0x993b9317 +DD 0x7709bc88,0xcdcb41f3,0x0760308f,0xbda2cdf4 +DD 0x97daa486,0x2d1859fd,0xe7b32881,0x5d71d5fa +DD 0x212681e9,0x9be47c92,0x514f0dee,0xeb8df095 +DD 0xc1f599e7,0x7b37649c,0xb19c15e0,0x0b5ee89b +DD 0xe56cc704,0x5fae3a7f,0x95054b03,0x2fc7b678 +DD 0x05bfdf0a,0xbf7d2271,0x75d6530d,0xcf14ae76 +DD 0xac5e7ac2,0x169c87b9,0xdc37f6c5,0x66f50bbe +DD 0x4c8d62cc,0xf64f9fb7,0x3ce4eecb,0x862613b0 +DD 0x68143c2f,0xd2d6c154,0x187db028,0xa2bf4d53 +DD 0x88c72421,0x3205d95a,0xf8aea826,0x426c555d +DD 0x7c76029c,0xc6b4ffe7,0x0c1f8e9b,0xb6dd73e0 +DD 0x9ca51a92,0x2667e7e9,0xeccc9695,0x560e6bee +DD 0xb83c4471,0x02feb90a,0xc855c876,0x7297350d +DD 0x58ef5c7f,0xe22da104,0x2886d078,0x92442d03 +DD 0xf10ef9b7,0x4bcc04cc,0x816775b0,0x3ba588cb +DD 0x11dde1b9,0xab1f1cc2,0x61b46dbe,0xdb7690c5 +DD 0x3544bf5a,0x8f864221,0x452d335d,0xffefce26 +DD 0xd597a754,0x6f555a2f,0xa5fe2b53,0x1f3cd628 +DD 0x636b823b,0xd9a97f40,0x13020e3c,0xa9c0f347 +DD 0x83b89a35,0x397a674e,0xf3d11632,0x4913eb49 +DD 0xa721c4d6,0x1de339ad,0xd74848d1,0x6d8ab5aa +DD 0x47f2dcd8,0xfd3021a3,0x379b50df,0x8d59ada4 +DD 0xee137910,0x54d1846b,0x9e7af517,0x24b8086c +DD 0x0ec0611e,0xb4029c65,0x7ea9ed19,0xc46b1062 +DD 0x2a593ffd,0x909bc286,0x5a30b3fa,0xe0f24e81 +DD 0xca8a27f3,0x7048da88,0xbae3abf4,0x0021568f +DD 0x424d03d2,0xf88ffea9,0x32248fd5,0x88e672ae +DD 0xa29e1bdc,0x185ce6a7,0xd2f797db,0x68356aa0 +DD 0x8607453f,0x3cc5b844,0xf66ec938,0x4cac3443 +DD 0x66d45d31,0xdc16a04a,0x16bdd136,0xac7f2c4d +DD 0xcf35f8f9,0x75f70582,0xbf5c74fe,0x059e8985 +DD 0x2fe6e0f7,0x95241d8c,0x5f8f6cf0,0xe54d918b +DD 0x0b7fbe14,0xb1bd436f,0x7b163213,0xc1d4cf68 +DD 0xebaca61a,0x516e5b61,0x9bc52a1d,0x2107d766 +DD 0x5d508375,0xe7927e0e,0x2d390f72,0x97fbf209 +DD 0xbd839b7b,0x07416600,0xcdea177c,0x7728ea07 +DD 0x991ac598,0x23d838e3,0xe973499f,0x53b1b4e4 +DD 0x79c9dd96,0xc30b20ed,0x09a05191,0xb362acea +DD 0xd028785e,0x6aea8525,0xa041f459,0x1a830922 +DD 0x30fb6050,0x8a399d2b,0x4092ec57,0xfa50112c +DD 0x14623eb3,0xaea0c3c8,0x640bb2b4,0xdec94fcf +DD 0xf4b126bd,0x4e73dbc6,0x84d8aaba,0x3e1a57c1 + +mul_table_632: +DD 0x00000000,0x6b749fb2,0xd6e93f64,0xbd9da0d6 +DD 0xa83e0839,0xc34a978b,0x7ed7375d,0x15a3a8ef +DD 0x55906683,0x3ee4f931,0x837959e7,0xe80dc655 +DD 0xfdae6eba,0x96daf108,0x2b4751de,0x4033ce6c +DD 0xab20cd06,0xc05452b4,0x7dc9f262,0x16bd6dd0 +DD 0x031ec53f,0x686a5a8d,0xd5f7fa5b,0xbe8365e9 +DD 0xfeb0ab85,0x95c43437,0x285994e1,0x432d0b53 +DD 0x568ea3bc,0x3dfa3c0e,0x80679cd8,0xeb13036a +DD 0x53adecfd,0x38d9734f,0x8544d399,0xee304c2b +DD 0xfb93e4c4,0x90e77b76,0x2d7adba0,0x460e4412 +DD 0x063d8a7e,0x6d4915cc,0xd0d4b51a,0xbba02aa8 +DD 0xae038247,0xc5771df5,0x78eabd23,0x139e2291 +DD 0xf88d21fb,0x93f9be49,0x2e641e9f,0x4510812d +DD 0x50b329c2,0x3bc7b670,0x865a16a6,0xed2e8914 +DD 0xad1d4778,0xc669d8ca,0x7bf4781c,0x1080e7ae +DD 0x05234f41,0x6e57d0f3,0xd3ca7025,0xb8beef97 +DD 0xa75bd9fa,0xcc2f4648,0x71b2e69e,0x1ac6792c +DD 0x0f65d1c3,0x64114e71,0xd98ceea7,0xb2f87115 +DD 0xf2cbbf79,0x99bf20cb,0x2422801d,0x4f561faf +DD 0x5af5b740,0x318128f2,0x8c1c8824,0xe7681796 +DD 0x0c7b14fc,0x670f8b4e,0xda922b98,0xb1e6b42a +DD 0xa4451cc5,0xcf318377,0x72ac23a1,0x19d8bc13 +DD 0x59eb727f,0x329fedcd,0x8f024d1b,0xe476d2a9 +DD 0xf1d57a46,0x9aa1e5f4,0x273c4522,0x4c48da90 +DD 0xf4f63507,0x9f82aab5,0x221f0a63,0x496b95d1 +DD 0x5cc83d3e,0x37bca28c,0x8a21025a,0xe1559de8 +DD 0xa1665384,0xca12cc36,0x778f6ce0,0x1cfbf352 +DD 0x09585bbd,0x622cc40f,0xdfb164d9,0xb4c5fb6b +DD 0x5fd6f801,0x34a267b3,0x893fc765,0xe24b58d7 +DD 0xf7e8f038,0x9c9c6f8a,0x2101cf5c,0x4a7550ee +DD 0x0a469e82,0x61320130,0xdcafa1e6,0xb7db3e54 +DD 0xa27896bb,0xc90c0909,0x7491a9df,0x1fe5366d +DD 0x4b5bc505,0x202f5ab7,0x9db2fa61,0xf6c665d3 +DD 0xe365cd3c,0x8811528e,0x358cf258,0x5ef86dea +DD 0x1ecba386,0x75bf3c34,0xc8229ce2,0xa3560350 +DD 0xb6f5abbf,0xdd81340d,0x601c94db,0x0b680b69 +DD 0xe07b0803,0x8b0f97b1,0x36923767,0x5de6a8d5 +DD 0x4845003a,0x23319f88,0x9eac3f5e,0xf5d8a0ec +DD 0xb5eb6e80,0xde9ff132,0x630251e4,0x0876ce56 +DD 0x1dd566b9,0x76a1f90b,0xcb3c59dd,0xa048c66f +DD 0x18f629f8,0x7382b64a,0xce1f169c,0xa56b892e +DD 0xb0c821c1,0xdbbcbe73,0x66211ea5,0x0d558117 +DD 0x4d664f7b,0x2612d0c9,0x9b8f701f,0xf0fbefad +DD 0xe5584742,0x8e2cd8f0,0x33b17826,0x58c5e794 +DD 0xb3d6e4fe,0xd8a27b4c,0x653fdb9a,0x0e4b4428 +DD 0x1be8ecc7,0x709c7375,0xcd01d3a3,0xa6754c11 +DD 0xe646827d,0x8d321dcf,0x30afbd19,0x5bdb22ab +DD 0x4e788a44,0x250c15f6,0x9891b520,0xf3e52a92 +DD 0xec001cff,0x8774834d,0x3ae9239b,0x519dbc29 +DD 0x443e14c6,0x2f4a8b74,0x92d72ba2,0xf9a3b410 +DD 0xb9907a7c,0xd2e4e5ce,0x6f794518,0x040ddaaa +DD 0x11ae7245,0x7adaedf7,0xc7474d21,0xac33d293 +DD 0x4720d1f9,0x2c544e4b,0x91c9ee9d,0xfabd712f +DD 0xef1ed9c0,0x846a4672,0x39f7e6a4,0x52837916 +DD 0x12b0b77a,0x79c428c8,0xc459881e,0xaf2d17ac +DD 0xba8ebf43,0xd1fa20f1,0x6c678027,0x07131f95 +DD 0xbfadf002,0xd4d96fb0,0x6944cf66,0x023050d4 +DD 0x1793f83b,0x7ce76789,0xc17ac75f,0xaa0e58ed +DD 0xea3d9681,0x81490933,0x3cd4a9e5,0x57a03657 +DD 0x42039eb8,0x2977010a,0x94eaa1dc,0xff9e3e6e +DD 0x148d3d04,0x7ff9a2b6,0xc2640260,0xa9109dd2 +DD 0xbcb3353d,0xd7c7aa8f,0x6a5a0a59,0x012e95eb +DD 0x411d5b87,0x2a69c435,0x97f464e3,0xfc80fb51 +DD 0xe92353be,0x8257cc0c,0x3fca6cda,0x54bef368 + +mul_table_1272: +DD 0x00000000,0xdd66cbbb,0xbf21e187,0x62472a3c +DD 0x7bafb5ff,0xa6c97e44,0xc48e5478,0x19e89fc3 +DD 0xf75f6bfe,0x2a39a045,0x487e8a79,0x951841c2 +DD 0x8cf0de01,0x519615ba,0x33d13f86,0xeeb7f43d +DD 0xeb52a10d,0x36346ab6,0x5473408a,0x89158b31 +DD 0x90fd14f2,0x4d9bdf49,0x2fdcf575,0xf2ba3ece +DD 0x1c0dcaf3,0xc16b0148,0xa32c2b74,0x7e4ae0cf +DD 0x67a27f0c,0xbac4b4b7,0xd8839e8b,0x05e55530 +DD 0xd34934eb,0x0e2fff50,0x6c68d56c,0xb10e1ed7 +DD 0xa8e68114,0x75804aaf,0x17c76093,0xcaa1ab28 +DD 0x24165f15,0xf97094ae,0x9b37be92,0x46517529 +DD 0x5fb9eaea,0x82df2151,0xe0980b6d,0x3dfec0d6 +DD 0x381b95e6,0xe57d5e5d,0x873a7461,0x5a5cbfda +DD 0x43b42019,0x9ed2eba2,0xfc95c19e,0x21f30a25 +DD 0xcf44fe18,0x122235a3,0x70651f9f,0xad03d424 +DD 0xb4eb4be7,0x698d805c,0x0bcaaa60,0xd6ac61db +DD 0xa37e1f27,0x7e18d49c,0x1c5ffea0,0xc139351b +DD 0xd8d1aad8,0x05b76163,0x67f04b5f,0xba9680e4 +DD 0x542174d9,0x8947bf62,0xeb00955e,0x36665ee5 +DD 0x2f8ec126,0xf2e80a9d,0x90af20a1,0x4dc9eb1a +DD 0x482cbe2a,0x954a7591,0xf70d5fad,0x2a6b9416 +DD 0x33830bd5,0xeee5c06e,0x8ca2ea52,0x51c421e9 +DD 0xbf73d5d4,0x62151e6f,0x00523453,0xdd34ffe8 +DD 0xc4dc602b,0x19baab90,0x7bfd81ac,0xa69b4a17 +DD 0x70372bcc,0xad51e077,0xcf16ca4b,0x127001f0 +DD 0x0b989e33,0xd6fe5588,0xb4b97fb4,0x69dfb40f +DD 0x87684032,0x5a0e8b89,0x3849a1b5,0xe52f6a0e +DD 0xfcc7f5cd,0x21a13e76,0x43e6144a,0x9e80dff1 +DD 0x9b658ac1,0x4603417a,0x24446b46,0xf922a0fd +DD 0xe0ca3f3e,0x3dacf485,0x5febdeb9,0x828d1502 +DD 0x6c3ae13f,0xb15c2a84,0xd31b00b8,0x0e7dcb03 +DD 0x179554c0,0xcaf39f7b,0xa8b4b547,0x75d27efc +DD 0x431048bf,0x9e768304,0xfc31a938,0x21576283 +DD 0x38bffd40,0xe5d936fb,0x879e1cc7,0x5af8d77c +DD 0xb44f2341,0x6929e8fa,0x0b6ec2c6,0xd608097d +DD 0xcfe096be,0x12865d05,0x70c17739,0xada7bc82 +DD 0xa842e9b2,0x75242209,0x17630835,0xca05c38e +DD 0xd3ed5c4d,0x0e8b97f6,0x6cccbdca,0xb1aa7671 +DD 0x5f1d824c,0x827b49f7,0xe03c63cb,0x3d5aa870 +DD 0x24b237b3,0xf9d4fc08,0x9b93d634,0x46f51d8f +DD 0x90597c54,0x4d3fb7ef,0x2f789dd3,0xf21e5668 +DD 0xebf6c9ab,0x36900210,0x54d7282c,0x89b1e397 +DD 0x670617aa,0xba60dc11,0xd827f62d,0x05413d96 +DD 0x1ca9a255,0xc1cf69ee,0xa38843d2,0x7eee8869 +DD 0x7b0bdd59,0xa66d16e2,0xc42a3cde,0x194cf765 +DD 0x00a468a6,0xddc2a31d,0xbf858921,0x62e3429a +DD 0x8c54b6a7,0x51327d1c,0x33755720,0xee139c9b +DD 0xf7fb0358,0x2a9dc8e3,0x48dae2df,0x95bc2964 +DD 0xe06e5798,0x3d089c23,0x5f4fb61f,0x82297da4 +DD 0x9bc1e267,0x46a729dc,0x24e003e0,0xf986c85b +DD 0x17313c66,0xca57f7dd,0xa810dde1,0x7576165a +DD 0x6c9e8999,0xb1f84222,0xd3bf681e,0x0ed9a3a5 +DD 0x0b3cf695,0xd65a3d2e,0xb41d1712,0x697bdca9 +DD 0x7093436a,0xadf588d1,0xcfb2a2ed,0x12d46956 +DD 0xfc639d6b,0x210556d0,0x43427cec,0x9e24b757 +DD 0x87cc2894,0x5aaae32f,0x38edc913,0xe58b02a8 +DD 0x33276373,0xee41a8c8,0x8c0682f4,0x5160494f +DD 0x4888d68c,0x95ee1d37,0xf7a9370b,0x2acffcb0 +DD 0xc478088d,0x191ec336,0x7b59e90a,0xa63f22b1 +DD 0xbfd7bd72,0x62b176c9,0x00f65cf5,0xdd90974e +DD 0xd875c27e,0x051309c5,0x675423f9,0xba32e842 +DD 0xa3da7781,0x7ebcbc3a,0x1cfb9606,0xc19d5dbd +DD 0x2f2aa980,0xf24c623b,0x900b4807,0x4d6d83bc +DD 0x54851c7f,0x89e3d7c4,0xeba4fdf8,0x36c23643 + +%macro slversion 4 +global %1_slver_%2%3%4 +global %1_slver +%1_slver: +%1_slver_%2%3%4: + dw 0x%4 + db 0x%3, 0x%2 +%endmacro +;;; func core, ver, snum +slversion crc32_iscsi_00, 00, 02, 0014 +; inform linker that this doesn't require executable stack +section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/src/common/crc32c_intel_fast_zero_asm.S b/src/common/crc32c_intel_fast_zero_asm.S deleted file mode 100644 index 34b7f489016..00000000000 --- a/src/common/crc32c_intel_fast_zero_asm.S +++ /dev/null @@ -1,648 +0,0 @@ -; -; Copyright 2012-2013 Intel Corporation All Rights Reserved. -; All rights reserved. -; -; http://opensource.org/licenses/BSD-3-Clause -; -; Redistribution and use in source and binary forms, with or without -; modification, are permitted provided that the following -; conditions are met: -; -; * Redistributions of source code must retain the above copyright -; notice, this list of conditions and the following disclaimer. -; -; * Redistributions in binary form must reproduce the above copyright -; notice, this list of conditions and the following disclaimer in -; the documentation and/or other materials provided with the -; distribution. -; -; * Neither the name of the Intel Corporation nor the names of its -; contributors may be used to endorse or promote products derived -; from this software without specific prior written permission. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -; COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -; OF THE POSSIBILITY OF SUCH DAMAGE. -; - -; Function to compute iscsi CRC32 with table-based recombination -; crc done "by 3" with block sizes 1920, 960, 480, 240 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -; crcB3 MACRO to implement crc32 on 3 %%bSize-byte blocks -%macro crcB3 3 -%define %%bSize %1 ; 1/3 of buffer size -%define %%td2 %2 ; table offset for crc0 (2/3 of buffer) -%define %%td1 %3 ; table offset for crc1 (1/3 of buffer) - -%IF %%bSize=640 - sub len, %%bSize*3 - js %%crcB3_end ;; jump to next level if 3*blockSize > len -%ELSE - cmp len, %%bSize*3 - jnae %%crcB3_end ;; jump to next level if 3*blockSize > len -%ENDIF - ;;;;;; Calculate CRC of 3 blocks of the buffer ;;;;;; -%%crcB3_loop: - ;; rax = crc0 = initial crc - xor rbx, rbx ;; rbx = crc1 = 0; - xor r10, r10 ;; r10 = crc2 = 0; - - %assign i 0 - %rep %%bSize/8 - 1 - crc32 rax, bufptmp ;; update crc0 - crc32 rbx, bufptmp ;; update crc1 - crc32 r10, bufptmp ;; update crc2 - %assign i (i+8) - %endrep - crc32 rax, bufptmp ;; update crc0 - crc32 rbx, bufptmp ;; update crc1 -; SKIP ;crc32 r10, bufptmp ;; update crc2 - - ; merge in crc0 - movzx bufp_dw, al - mov r9d, [crc_init + bufp*4 + %%td2] - movzx bufp_dw, ah - shr eax, 16 - mov r11d, [crc_init + bufp*4 + %%td2] - shl r11, 8 - xor r9, r11 - - movzx bufp_dw, al - mov r11d, [crc_init + bufp*4 + %%td2] - movzx bufp_dw, ah - shl r11, 16 - xor r9, r11 - mov r11d, [crc_init + bufp*4 + %%td2] - shl r11, 24 - xor r9, r11 - - ; merge in crc1 - - movzx bufp_dw, bl - mov r11d, [crc_init + bufp*4 + %%td1] - movzx bufp_dw, bh - shr ebx, 16 - xor r9, r11 - mov r11d, [crc_init + bufp*4 + %%td1] - shl r11, 8 - xor r9, r11 - - movzx bufp_dw, bl - mov r11d, [crc_init + bufp*4 + %%td1] - movzx bufp_dw, bh - shl r11, 16 - xor r9, r11 - mov r11d, [crc_init + bufp*4 + %%td1] - shl r11, 24 - xor r9, r11 - - ; xor r9, [bufptmp+i + 2*%%bSize] - crc32 r10, r9 - mov rax, r10 - - ; add bufptmp, %%bSize*3 ;; move to next block - sub len, %%bSize*3 -%IF %%bSize=640 - jns %%crcB3_loop -%ENDIF - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -%%crcB3_end: -%IF %%bSize=640 - add len, %%bSize*3 -%ENDIF - je do_return ;; return if remaining data is zero -%endmacro - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;; ISCSI CRC 32 Implementation with crc32 Instruction - -;;; unsigned int crc32_iscsi_00(unsigned char * buffer, int len, unsigned int crc_init); -;;; -;;; *buf = rcx -;;; len = rdx -;;; crc_init = r8 -;;; - -global crc32_iscsi_zero_00:function -crc32_iscsi_zero_00: - -%ifidn __OUTPUT_FORMAT__, elf64 -%define bufp rdi -%define bufp_dw edi -%define bufp_w di -%define bufp_b dil -%define bufptmp rcx -%define block_0 rcx -%define block_1 r8 -%define block_2 r11 -%define len rsi -%define len_dw esi -%define len_w si -%define len_b sil -%define crc_init rdx -%define crc_init_dw edx -%else -%define bufp rcx -%define bufp_dw ecx -%define bufp_w cx -%define bufp_b cl -%define bufptmp rdi -%define block_0 rdi -%define block_1 rsi -%define block_2 r11 -%define len rdx -%define len_dw edx -%define len_w dx -%define len_b dl -%define crc_init r8 -%define crc_init_dw r8d -%endif - - - push rdi - push rbx - - mov rax, crc_init ;; rax = crc_init; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -; no need for alignment - xor bufptmp, bufptmp - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; 2) BLOCK LEVEL: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -proc_block: - cmp len, 240 - jb bit8 - - lea crc_init, [mul_table_72 wrt rip] ;; load table base address - - crcB3 640, 0x1000, 0x0c00 ; 640*3 = 1920 (Tables 1280, 640) - crcB3 320, 0x0c00, 0x0800 ; 320*3 = 960 (Tables 640, 320) - crcB3 160, 0x0800, 0x0400 ; 160*3 = 480 (Tables 320, 160) - crcB3 80, 0x0400, 0x0000 ; 80*3 = 240 (Tables 160, 80) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of rdx are full) - -bit8: - shl len_b, 1 ;; shift-out MSB (bit-7) - jnc bit7 ;; jump to bit-6 if bit-7 == 0 - %assign i 0 - %rep 16 - crc32 rax, bufptmp ;; compute crc32 of 8-byte data - %assign i (i+8) - %endrep - je do_return ;; return if remaining data is zero - -bit7: - shl len_b, 1 ;; shift-out MSB (bit-7) - jnc bit6 ;; jump to bit-6 if bit-7 == 0 - %assign i 0 - %rep 8 - crc32 rax, bufptmp ;; compute crc32 of 8-byte data - %assign i (i+8) - %endrep - je do_return ;; return if remaining data is zero - ; add bufptmp, 64 ;; buf +=64; (next 64 bytes) -bit6: - shl len_b, 1 ;; shift-out MSB (bit-6) - jnc bit5 ;; jump to bit-5 if bit-6 == 0 - %assign i 0 - %rep 4 - crc32 rax, bufptmp ;; compute crc32 of 8-byte data - %assign i (i+8) - %endrep - je do_return ;; return if remaining data is zero - ; add bufptmp, 32 ;; buf +=32; (next 32 bytes) -bit5: - shl len_b, 1 ;; shift-out MSB (bit-5) - jnc bit4 ;; jump to bit-4 if bit-5 == 0 - %assign i 0 - %rep 2 - crc32 rax, bufptmp ;; compute crc32 of 8-byte data - %assign i (i+8) - %endrep - je do_return ;; return if remaining data is zero - ; add bufptmp, 16 ;; buf +=16; (next 16 bytes) -bit4: - shl len_b, 1 ;; shift-out MSB (bit-4) - jnc bit3 ;; jump to bit-3 if bit-4 == 0 - crc32 rax, bufptmp ;; compute crc32 of 8-byte data - je do_return ;; return if remaining data is zero - ; add bufptmp, 8 ;; buf +=8; (next 8 bytes) -bit3: - mov rbx, bufptmp ;; load a 8-bytes from the buffer: - shl len_b, 1 ;; shift-out MSB (bit-3) - jnc bit2 ;; jump to bit-2 if bit-3 == 0 - crc32 eax, ebx ;; compute crc32 of 4-byte data - je do_return ;; return if remaining data is zero - shr rbx, 32 ;; get next 3 bytes -bit2: - shl len_b, 1 ;; shift-out MSB (bit-2) - jnc bit1 ;; jump to bit-1 if bit-2 == 0 - crc32 eax, bx ;; compute crc32 of 2-byte data - je do_return ;; return if remaining data is zero - shr rbx, 16 ;; next byte -bit1: - test len_b,len_b - je do_return - crc32 eax, bl ;; compute crc32 of 1-byte data -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -do_return: - - pop rbx - pop rdi - ret - -less_than_8: - xor bufp, bufp - test len,4 - jz less_than_4 - crc32 eax, bufp_dw - add bufptmp,4 -less_than_4: - test len,2 - jz less_than_2 - crc32 eax, bufp_w - add bufptmp,2 -less_than_2: - test len,1 - jz do_return - crc32 rax, bufp_b - pop rbx - pop bufptmp - ret - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;; global mul_table_72, mul_table_152, mul_table_312, mul_table_632, mul_table_1272 - -section .data -align 8 -mul_table_72: -DD 0x00000000,0x39d3b296,0x73a7652c,0x4a74d7ba -DD 0xe74eca58,0xde9d78ce,0x94e9af74,0xad3a1de2 -DD 0xcb71e241,0xf2a250d7,0xb8d6876d,0x810535fb -DD 0x2c3f2819,0x15ec9a8f,0x5f984d35,0x664bffa3 -DD 0x930fb273,0xaadc00e5,0xe0a8d75f,0xd97b65c9 -DD 0x7441782b,0x4d92cabd,0x07e61d07,0x3e35af91 -DD 0x587e5032,0x61ade2a4,0x2bd9351e,0x120a8788 -DD 0xbf309a6a,0x86e328fc,0xcc97ff46,0xf5444dd0 -DD 0x23f31217,0x1a20a081,0x5054773b,0x6987c5ad -DD 0xc4bdd84f,0xfd6e6ad9,0xb71abd63,0x8ec90ff5 -DD 0xe882f056,0xd15142c0,0x9b25957a,0xa2f627ec -DD 0x0fcc3a0e,0x361f8898,0x7c6b5f22,0x45b8edb4 -DD 0xb0fca064,0x892f12f2,0xc35bc548,0xfa8877de -DD 0x57b26a3c,0x6e61d8aa,0x24150f10,0x1dc6bd86 -DD 0x7b8d4225,0x425ef0b3,0x082a2709,0x31f9959f -DD 0x9cc3887d,0xa5103aeb,0xef64ed51,0xd6b75fc7 -DD 0x47e6242e,0x7e3596b8,0x34414102,0x0d92f394 -DD 0xa0a8ee76,0x997b5ce0,0xd30f8b5a,0xeadc39cc -DD 0x8c97c66f,0xb54474f9,0xff30a343,0xc6e311d5 -DD 0x6bd90c37,0x520abea1,0x187e691b,0x21addb8d -DD 0xd4e9965d,0xed3a24cb,0xa74ef371,0x9e9d41e7 -DD 0x33a75c05,0x0a74ee93,0x40003929,0x79d38bbf -DD 0x1f98741c,0x264bc68a,0x6c3f1130,0x55eca3a6 -DD 0xf8d6be44,0xc1050cd2,0x8b71db68,0xb2a269fe -DD 0x64153639,0x5dc684af,0x17b25315,0x2e61e183 -DD 0x835bfc61,0xba884ef7,0xf0fc994d,0xc92f2bdb -DD 0xaf64d478,0x96b766ee,0xdcc3b154,0xe51003c2 -DD 0x482a1e20,0x71f9acb6,0x3b8d7b0c,0x025ec99a -DD 0xf71a844a,0xcec936dc,0x84bde166,0xbd6e53f0 -DD 0x10544e12,0x2987fc84,0x63f32b3e,0x5a2099a8 -DD 0x3c6b660b,0x05b8d49d,0x4fcc0327,0x761fb1b1 -DD 0xdb25ac53,0xe2f61ec5,0xa882c97f,0x91517be9 -DD 0x8fcc485c,0xb61ffaca,0xfc6b2d70,0xc5b89fe6 -DD 0x68828204,0x51513092,0x1b25e728,0x22f655be -DD 0x44bdaa1d,0x7d6e188b,0x371acf31,0x0ec97da7 -DD 0xa3f36045,0x9a20d2d3,0xd0540569,0xe987b7ff -DD 0x1cc3fa2f,0x251048b9,0x6f649f03,0x56b72d95 -DD 0xfb8d3077,0xc25e82e1,0x882a555b,0xb1f9e7cd -DD 0xd7b2186e,0xee61aaf8,0xa4157d42,0x9dc6cfd4 -DD 0x30fcd236,0x092f60a0,0x435bb71a,0x7a88058c -DD 0xac3f5a4b,0x95ece8dd,0xdf983f67,0xe64b8df1 -DD 0x4b719013,0x72a22285,0x38d6f53f,0x010547a9 -DD 0x674eb80a,0x5e9d0a9c,0x14e9dd26,0x2d3a6fb0 -DD 0x80007252,0xb9d3c0c4,0xf3a7177e,0xca74a5e8 -DD 0x3f30e838,0x06e35aae,0x4c978d14,0x75443f82 -DD 0xd87e2260,0xe1ad90f6,0xabd9474c,0x920af5da -DD 0xf4410a79,0xcd92b8ef,0x87e66f55,0xbe35ddc3 -DD 0x130fc021,0x2adc72b7,0x60a8a50d,0x597b179b -DD 0xc82a6c72,0xf1f9dee4,0xbb8d095e,0x825ebbc8 -DD 0x2f64a62a,0x16b714bc,0x5cc3c306,0x65107190 -DD 0x035b8e33,0x3a883ca5,0x70fceb1f,0x492f5989 -DD 0xe415446b,0xddc6f6fd,0x97b22147,0xae6193d1 -DD 0x5b25de01,0x62f66c97,0x2882bb2d,0x115109bb -DD 0xbc6b1459,0x85b8a6cf,0xcfcc7175,0xf61fc3e3 -DD 0x90543c40,0xa9878ed6,0xe3f3596c,0xda20ebfa -DD 0x771af618,0x4ec9448e,0x04bd9334,0x3d6e21a2 -DD 0xebd97e65,0xd20accf3,0x987e1b49,0xa1ada9df -DD 0x0c97b43d,0x354406ab,0x7f30d111,0x46e36387 -DD 0x20a89c24,0x197b2eb2,0x530ff908,0x6adc4b9e -DD 0xc7e6567c,0xfe35e4ea,0xb4413350,0x8d9281c6 -DD 0x78d6cc16,0x41057e80,0x0b71a93a,0x32a21bac -DD 0x9f98064e,0xa64bb4d8,0xec3f6362,0xd5ecd1f4 -DD 0xb3a72e57,0x8a749cc1,0xc0004b7b,0xf9d3f9ed -DD 0x54e9e40f,0x6d3a5699,0x274e8123,0x1e9d33b5 - -mul_table_152: -DD 0x00000000,0x878a92a7,0x0af953bf,0x8d73c118 -DD 0x15f2a77e,0x927835d9,0x1f0bf4c1,0x98816666 -DD 0x2be54efc,0xac6fdc5b,0x211c1d43,0xa6968fe4 -DD 0x3e17e982,0xb99d7b25,0x34eeba3d,0xb364289a -DD 0x57ca9df8,0xd0400f5f,0x5d33ce47,0xdab95ce0 -DD 0x42383a86,0xc5b2a821,0x48c16939,0xcf4bfb9e -DD 0x7c2fd304,0xfba541a3,0x76d680bb,0xf15c121c -DD 0x69dd747a,0xee57e6dd,0x632427c5,0xe4aeb562 -DD 0xaf953bf0,0x281fa957,0xa56c684f,0x22e6fae8 -DD 0xba679c8e,0x3ded0e29,0xb09ecf31,0x37145d96 -DD 0x8470750c,0x03fae7ab,0x8e8926b3,0x0903b414 -DD 0x9182d272,0x160840d5,0x9b7b81cd,0x1cf1136a -DD 0xf85fa608,0x7fd534af,0xf2a6f5b7,0x752c6710 -DD 0xedad0176,0x6a2793d1,0xe75452c9,0x60dec06e -DD 0xd3bae8f4,0x54307a53,0xd943bb4b,0x5ec929ec -DD 0xc6484f8a,0x41c2dd2d,0xccb11c35,0x4b3b8e92 -DD 0x5ac60111,0xdd4c93b6,0x503f52ae,0xd7b5c009 -DD 0x4f34a66f,0xc8be34c8,0x45cdf5d0,0xc2476777 -DD 0x71234fed,0xf6a9dd4a,0x7bda1c52,0xfc508ef5 -DD 0x64d1e893,0xe35b7a34,0x6e28bb2c,0xe9a2298b -DD 0x0d0c9ce9,0x8a860e4e,0x07f5cf56,0x807f5df1 -DD 0x18fe3b97,0x9f74a930,0x12076828,0x958dfa8f -DD 0x26e9d215,0xa16340b2,0x2c1081aa,0xab9a130d -DD 0x331b756b,0xb491e7cc,0x39e226d4,0xbe68b473 -DD 0xf5533ae1,0x72d9a846,0xffaa695e,0x7820fbf9 -DD 0xe0a19d9f,0x672b0f38,0xea58ce20,0x6dd25c87 -DD 0xdeb6741d,0x593ce6ba,0xd44f27a2,0x53c5b505 -DD 0xcb44d363,0x4cce41c4,0xc1bd80dc,0x4637127b -DD 0xa299a719,0x251335be,0xa860f4a6,0x2fea6601 -DD 0xb76b0067,0x30e192c0,0xbd9253d8,0x3a18c17f -DD 0x897ce9e5,0x0ef67b42,0x8385ba5a,0x040f28fd -DD 0x9c8e4e9b,0x1b04dc3c,0x96771d24,0x11fd8f83 -DD 0xb58c0222,0x32069085,0xbf75519d,0x38ffc33a -DD 0xa07ea55c,0x27f437fb,0xaa87f6e3,0x2d0d6444 -DD 0x9e694cde,0x19e3de79,0x94901f61,0x131a8dc6 -DD 0x8b9beba0,0x0c117907,0x8162b81f,0x06e82ab8 -DD 0xe2469fda,0x65cc0d7d,0xe8bfcc65,0x6f355ec2 -DD 0xf7b438a4,0x703eaa03,0xfd4d6b1b,0x7ac7f9bc -DD 0xc9a3d126,0x4e294381,0xc35a8299,0x44d0103e -DD 0xdc517658,0x5bdbe4ff,0xd6a825e7,0x5122b740 -DD 0x1a1939d2,0x9d93ab75,0x10e06a6d,0x976af8ca -DD 0x0feb9eac,0x88610c0b,0x0512cd13,0x82985fb4 -DD 0x31fc772e,0xb676e589,0x3b052491,0xbc8fb636 -DD 0x240ed050,0xa38442f7,0x2ef783ef,0xa97d1148 -DD 0x4dd3a42a,0xca59368d,0x472af795,0xc0a06532 -DD 0x58210354,0xdfab91f3,0x52d850eb,0xd552c24c -DD 0x6636ead6,0xe1bc7871,0x6ccfb969,0xeb452bce -DD 0x73c44da8,0xf44edf0f,0x793d1e17,0xfeb78cb0 -DD 0xef4a0333,0x68c09194,0xe5b3508c,0x6239c22b -DD 0xfab8a44d,0x7d3236ea,0xf041f7f2,0x77cb6555 -DD 0xc4af4dcf,0x4325df68,0xce561e70,0x49dc8cd7 -DD 0xd15deab1,0x56d77816,0xdba4b90e,0x5c2e2ba9 -DD 0xb8809ecb,0x3f0a0c6c,0xb279cd74,0x35f35fd3 -DD 0xad7239b5,0x2af8ab12,0xa78b6a0a,0x2001f8ad -DD 0x9365d037,0x14ef4290,0x999c8388,0x1e16112f -DD 0x86977749,0x011de5ee,0x8c6e24f6,0x0be4b651 -DD 0x40df38c3,0xc755aa64,0x4a266b7c,0xcdacf9db -DD 0x552d9fbd,0xd2a70d1a,0x5fd4cc02,0xd85e5ea5 -DD 0x6b3a763f,0xecb0e498,0x61c32580,0xe649b727 -DD 0x7ec8d141,0xf94243e6,0x743182fe,0xf3bb1059 -DD 0x1715a53b,0x909f379c,0x1decf684,0x9a666423 -DD 0x02e70245,0x856d90e2,0x081e51fa,0x8f94c35d -DD 0x3cf0ebc7,0xbb7a7960,0x3609b878,0xb1832adf -DD 0x29024cb9,0xae88de1e,0x23fb1f06,0xa4718da1 - -mul_table_312: -DD 0x00000000,0xbac2fd7b,0x70698c07,0xcaab717c -DD 0xe0d3180e,0x5a11e575,0x90ba9409,0x2a786972 -DD 0xc44a46ed,0x7e88bb96,0xb423caea,0x0ee13791 -DD 0x24995ee3,0x9e5ba398,0x54f0d2e4,0xee322f9f -DD 0x8d78fb2b,0x37ba0650,0xfd11772c,0x47d38a57 -DD 0x6dabe325,0xd7691e5e,0x1dc26f22,0xa7009259 -DD 0x4932bdc6,0xf3f040bd,0x395b31c1,0x8399ccba -DD 0xa9e1a5c8,0x132358b3,0xd98829cf,0x634ad4b4 -DD 0x1f1d80a7,0xa5df7ddc,0x6f740ca0,0xd5b6f1db -DD 0xffce98a9,0x450c65d2,0x8fa714ae,0x3565e9d5 -DD 0xdb57c64a,0x61953b31,0xab3e4a4d,0x11fcb736 -DD 0x3b84de44,0x8146233f,0x4bed5243,0xf12faf38 -DD 0x92657b8c,0x28a786f7,0xe20cf78b,0x58ce0af0 -DD 0x72b66382,0xc8749ef9,0x02dfef85,0xb81d12fe -DD 0x562f3d61,0xecedc01a,0x2646b166,0x9c844c1d -DD 0xb6fc256f,0x0c3ed814,0xc695a968,0x7c575413 -DD 0x3e3b014e,0x84f9fc35,0x4e528d49,0xf4907032 -DD 0xdee81940,0x642ae43b,0xae819547,0x1443683c -DD 0xfa7147a3,0x40b3bad8,0x8a18cba4,0x30da36df -DD 0x1aa25fad,0xa060a2d6,0x6acbd3aa,0xd0092ed1 -DD 0xb343fa65,0x0981071e,0xc32a7662,0x79e88b19 -DD 0x5390e26b,0xe9521f10,0x23f96e6c,0x993b9317 -DD 0x7709bc88,0xcdcb41f3,0x0760308f,0xbda2cdf4 -DD 0x97daa486,0x2d1859fd,0xe7b32881,0x5d71d5fa -DD 0x212681e9,0x9be47c92,0x514f0dee,0xeb8df095 -DD 0xc1f599e7,0x7b37649c,0xb19c15e0,0x0b5ee89b -DD 0xe56cc704,0x5fae3a7f,0x95054b03,0x2fc7b678 -DD 0x05bfdf0a,0xbf7d2271,0x75d6530d,0xcf14ae76 -DD 0xac5e7ac2,0x169c87b9,0xdc37f6c5,0x66f50bbe -DD 0x4c8d62cc,0xf64f9fb7,0x3ce4eecb,0x862613b0 -DD 0x68143c2f,0xd2d6c154,0x187db028,0xa2bf4d53 -DD 0x88c72421,0x3205d95a,0xf8aea826,0x426c555d -DD 0x7c76029c,0xc6b4ffe7,0x0c1f8e9b,0xb6dd73e0 -DD 0x9ca51a92,0x2667e7e9,0xeccc9695,0x560e6bee -DD 0xb83c4471,0x02feb90a,0xc855c876,0x7297350d -DD 0x58ef5c7f,0xe22da104,0x2886d078,0x92442d03 -DD 0xf10ef9b7,0x4bcc04cc,0x816775b0,0x3ba588cb -DD 0x11dde1b9,0xab1f1cc2,0x61b46dbe,0xdb7690c5 -DD 0x3544bf5a,0x8f864221,0x452d335d,0xffefce26 -DD 0xd597a754,0x6f555a2f,0xa5fe2b53,0x1f3cd628 -DD 0x636b823b,0xd9a97f40,0x13020e3c,0xa9c0f347 -DD 0x83b89a35,0x397a674e,0xf3d11632,0x4913eb49 -DD 0xa721c4d6,0x1de339ad,0xd74848d1,0x6d8ab5aa -DD 0x47f2dcd8,0xfd3021a3,0x379b50df,0x8d59ada4 -DD 0xee137910,0x54d1846b,0x9e7af517,0x24b8086c -DD 0x0ec0611e,0xb4029c65,0x7ea9ed19,0xc46b1062 -DD 0x2a593ffd,0x909bc286,0x5a30b3fa,0xe0f24e81 -DD 0xca8a27f3,0x7048da88,0xbae3abf4,0x0021568f -DD 0x424d03d2,0xf88ffea9,0x32248fd5,0x88e672ae -DD 0xa29e1bdc,0x185ce6a7,0xd2f797db,0x68356aa0 -DD 0x8607453f,0x3cc5b844,0xf66ec938,0x4cac3443 -DD 0x66d45d31,0xdc16a04a,0x16bdd136,0xac7f2c4d -DD 0xcf35f8f9,0x75f70582,0xbf5c74fe,0x059e8985 -DD 0x2fe6e0f7,0x95241d8c,0x5f8f6cf0,0xe54d918b -DD 0x0b7fbe14,0xb1bd436f,0x7b163213,0xc1d4cf68 -DD 0xebaca61a,0x516e5b61,0x9bc52a1d,0x2107d766 -DD 0x5d508375,0xe7927e0e,0x2d390f72,0x97fbf209 -DD 0xbd839b7b,0x07416600,0xcdea177c,0x7728ea07 -DD 0x991ac598,0x23d838e3,0xe973499f,0x53b1b4e4 -DD 0x79c9dd96,0xc30b20ed,0x09a05191,0xb362acea -DD 0xd028785e,0x6aea8525,0xa041f459,0x1a830922 -DD 0x30fb6050,0x8a399d2b,0x4092ec57,0xfa50112c -DD 0x14623eb3,0xaea0c3c8,0x640bb2b4,0xdec94fcf -DD 0xf4b126bd,0x4e73dbc6,0x84d8aaba,0x3e1a57c1 - -mul_table_632: -DD 0x00000000,0x6b749fb2,0xd6e93f64,0xbd9da0d6 -DD 0xa83e0839,0xc34a978b,0x7ed7375d,0x15a3a8ef -DD 0x55906683,0x3ee4f931,0x837959e7,0xe80dc655 -DD 0xfdae6eba,0x96daf108,0x2b4751de,0x4033ce6c -DD 0xab20cd06,0xc05452b4,0x7dc9f262,0x16bd6dd0 -DD 0x031ec53f,0x686a5a8d,0xd5f7fa5b,0xbe8365e9 -DD 0xfeb0ab85,0x95c43437,0x285994e1,0x432d0b53 -DD 0x568ea3bc,0x3dfa3c0e,0x80679cd8,0xeb13036a -DD 0x53adecfd,0x38d9734f,0x8544d399,0xee304c2b -DD 0xfb93e4c4,0x90e77b76,0x2d7adba0,0x460e4412 -DD 0x063d8a7e,0x6d4915cc,0xd0d4b51a,0xbba02aa8 -DD 0xae038247,0xc5771df5,0x78eabd23,0x139e2291 -DD 0xf88d21fb,0x93f9be49,0x2e641e9f,0x4510812d -DD 0x50b329c2,0x3bc7b670,0x865a16a6,0xed2e8914 -DD 0xad1d4778,0xc669d8ca,0x7bf4781c,0x1080e7ae -DD 0x05234f41,0x6e57d0f3,0xd3ca7025,0xb8beef97 -DD 0xa75bd9fa,0xcc2f4648,0x71b2e69e,0x1ac6792c -DD 0x0f65d1c3,0x64114e71,0xd98ceea7,0xb2f87115 -DD 0xf2cbbf79,0x99bf20cb,0x2422801d,0x4f561faf -DD 0x5af5b740,0x318128f2,0x8c1c8824,0xe7681796 -DD 0x0c7b14fc,0x670f8b4e,0xda922b98,0xb1e6b42a -DD 0xa4451cc5,0xcf318377,0x72ac23a1,0x19d8bc13 -DD 0x59eb727f,0x329fedcd,0x8f024d1b,0xe476d2a9 -DD 0xf1d57a46,0x9aa1e5f4,0x273c4522,0x4c48da90 -DD 0xf4f63507,0x9f82aab5,0x221f0a63,0x496b95d1 -DD 0x5cc83d3e,0x37bca28c,0x8a21025a,0xe1559de8 -DD 0xa1665384,0xca12cc36,0x778f6ce0,0x1cfbf352 -DD 0x09585bbd,0x622cc40f,0xdfb164d9,0xb4c5fb6b -DD 0x5fd6f801,0x34a267b3,0x893fc765,0xe24b58d7 -DD 0xf7e8f038,0x9c9c6f8a,0x2101cf5c,0x4a7550ee -DD 0x0a469e82,0x61320130,0xdcafa1e6,0xb7db3e54 -DD 0xa27896bb,0xc90c0909,0x7491a9df,0x1fe5366d -DD 0x4b5bc505,0x202f5ab7,0x9db2fa61,0xf6c665d3 -DD 0xe365cd3c,0x8811528e,0x358cf258,0x5ef86dea -DD 0x1ecba386,0x75bf3c34,0xc8229ce2,0xa3560350 -DD 0xb6f5abbf,0xdd81340d,0x601c94db,0x0b680b69 -DD 0xe07b0803,0x8b0f97b1,0x36923767,0x5de6a8d5 -DD 0x4845003a,0x23319f88,0x9eac3f5e,0xf5d8a0ec -DD 0xb5eb6e80,0xde9ff132,0x630251e4,0x0876ce56 -DD 0x1dd566b9,0x76a1f90b,0xcb3c59dd,0xa048c66f -DD 0x18f629f8,0x7382b64a,0xce1f169c,0xa56b892e -DD 0xb0c821c1,0xdbbcbe73,0x66211ea5,0x0d558117 -DD 0x4d664f7b,0x2612d0c9,0x9b8f701f,0xf0fbefad -DD 0xe5584742,0x8e2cd8f0,0x33b17826,0x58c5e794 -DD 0xb3d6e4fe,0xd8a27b4c,0x653fdb9a,0x0e4b4428 -DD 0x1be8ecc7,0x709c7375,0xcd01d3a3,0xa6754c11 -DD 0xe646827d,0x8d321dcf,0x30afbd19,0x5bdb22ab -DD 0x4e788a44,0x250c15f6,0x9891b520,0xf3e52a92 -DD 0xec001cff,0x8774834d,0x3ae9239b,0x519dbc29 -DD 0x443e14c6,0x2f4a8b74,0x92d72ba2,0xf9a3b410 -DD 0xb9907a7c,0xd2e4e5ce,0x6f794518,0x040ddaaa -DD 0x11ae7245,0x7adaedf7,0xc7474d21,0xac33d293 -DD 0x4720d1f9,0x2c544e4b,0x91c9ee9d,0xfabd712f -DD 0xef1ed9c0,0x846a4672,0x39f7e6a4,0x52837916 -DD 0x12b0b77a,0x79c428c8,0xc459881e,0xaf2d17ac -DD 0xba8ebf43,0xd1fa20f1,0x6c678027,0x07131f95 -DD 0xbfadf002,0xd4d96fb0,0x6944cf66,0x023050d4 -DD 0x1793f83b,0x7ce76789,0xc17ac75f,0xaa0e58ed -DD 0xea3d9681,0x81490933,0x3cd4a9e5,0x57a03657 -DD 0x42039eb8,0x2977010a,0x94eaa1dc,0xff9e3e6e -DD 0x148d3d04,0x7ff9a2b6,0xc2640260,0xa9109dd2 -DD 0xbcb3353d,0xd7c7aa8f,0x6a5a0a59,0x012e95eb -DD 0x411d5b87,0x2a69c435,0x97f464e3,0xfc80fb51 -DD 0xe92353be,0x8257cc0c,0x3fca6cda,0x54bef368 - -mul_table_1272: -DD 0x00000000,0xdd66cbbb,0xbf21e187,0x62472a3c -DD 0x7bafb5ff,0xa6c97e44,0xc48e5478,0x19e89fc3 -DD 0xf75f6bfe,0x2a39a045,0x487e8a79,0x951841c2 -DD 0x8cf0de01,0x519615ba,0x33d13f86,0xeeb7f43d -DD 0xeb52a10d,0x36346ab6,0x5473408a,0x89158b31 -DD 0x90fd14f2,0x4d9bdf49,0x2fdcf575,0xf2ba3ece -DD 0x1c0dcaf3,0xc16b0148,0xa32c2b74,0x7e4ae0cf -DD 0x67a27f0c,0xbac4b4b7,0xd8839e8b,0x05e55530 -DD 0xd34934eb,0x0e2fff50,0x6c68d56c,0xb10e1ed7 -DD 0xa8e68114,0x75804aaf,0x17c76093,0xcaa1ab28 -DD 0x24165f15,0xf97094ae,0x9b37be92,0x46517529 -DD 0x5fb9eaea,0x82df2151,0xe0980b6d,0x3dfec0d6 -DD 0x381b95e6,0xe57d5e5d,0x873a7461,0x5a5cbfda -DD 0x43b42019,0x9ed2eba2,0xfc95c19e,0x21f30a25 -DD 0xcf44fe18,0x122235a3,0x70651f9f,0xad03d424 -DD 0xb4eb4be7,0x698d805c,0x0bcaaa60,0xd6ac61db -DD 0xa37e1f27,0x7e18d49c,0x1c5ffea0,0xc139351b -DD 0xd8d1aad8,0x05b76163,0x67f04b5f,0xba9680e4 -DD 0x542174d9,0x8947bf62,0xeb00955e,0x36665ee5 -DD 0x2f8ec126,0xf2e80a9d,0x90af20a1,0x4dc9eb1a -DD 0x482cbe2a,0x954a7591,0xf70d5fad,0x2a6b9416 -DD 0x33830bd5,0xeee5c06e,0x8ca2ea52,0x51c421e9 -DD 0xbf73d5d4,0x62151e6f,0x00523453,0xdd34ffe8 -DD 0xc4dc602b,0x19baab90,0x7bfd81ac,0xa69b4a17 -DD 0x70372bcc,0xad51e077,0xcf16ca4b,0x127001f0 -DD 0x0b989e33,0xd6fe5588,0xb4b97fb4,0x69dfb40f -DD 0x87684032,0x5a0e8b89,0x3849a1b5,0xe52f6a0e -DD 0xfcc7f5cd,0x21a13e76,0x43e6144a,0x9e80dff1 -DD 0x9b658ac1,0x4603417a,0x24446b46,0xf922a0fd -DD 0xe0ca3f3e,0x3dacf485,0x5febdeb9,0x828d1502 -DD 0x6c3ae13f,0xb15c2a84,0xd31b00b8,0x0e7dcb03 -DD 0x179554c0,0xcaf39f7b,0xa8b4b547,0x75d27efc -DD 0x431048bf,0x9e768304,0xfc31a938,0x21576283 -DD 0x38bffd40,0xe5d936fb,0x879e1cc7,0x5af8d77c -DD 0xb44f2341,0x6929e8fa,0x0b6ec2c6,0xd608097d -DD 0xcfe096be,0x12865d05,0x70c17739,0xada7bc82 -DD 0xa842e9b2,0x75242209,0x17630835,0xca05c38e -DD 0xd3ed5c4d,0x0e8b97f6,0x6cccbdca,0xb1aa7671 -DD 0x5f1d824c,0x827b49f7,0xe03c63cb,0x3d5aa870 -DD 0x24b237b3,0xf9d4fc08,0x9b93d634,0x46f51d8f -DD 0x90597c54,0x4d3fb7ef,0x2f789dd3,0xf21e5668 -DD 0xebf6c9ab,0x36900210,0x54d7282c,0x89b1e397 -DD 0x670617aa,0xba60dc11,0xd827f62d,0x05413d96 -DD 0x1ca9a255,0xc1cf69ee,0xa38843d2,0x7eee8869 -DD 0x7b0bdd59,0xa66d16e2,0xc42a3cde,0x194cf765 -DD 0x00a468a6,0xddc2a31d,0xbf858921,0x62e3429a -DD 0x8c54b6a7,0x51327d1c,0x33755720,0xee139c9b -DD 0xf7fb0358,0x2a9dc8e3,0x48dae2df,0x95bc2964 -DD 0xe06e5798,0x3d089c23,0x5f4fb61f,0x82297da4 -DD 0x9bc1e267,0x46a729dc,0x24e003e0,0xf986c85b -DD 0x17313c66,0xca57f7dd,0xa810dde1,0x7576165a -DD 0x6c9e8999,0xb1f84222,0xd3bf681e,0x0ed9a3a5 -DD 0x0b3cf695,0xd65a3d2e,0xb41d1712,0x697bdca9 -DD 0x7093436a,0xadf588d1,0xcfb2a2ed,0x12d46956 -DD 0xfc639d6b,0x210556d0,0x43427cec,0x9e24b757 -DD 0x87cc2894,0x5aaae32f,0x38edc913,0xe58b02a8 -DD 0x33276373,0xee41a8c8,0x8c0682f4,0x5160494f -DD 0x4888d68c,0x95ee1d37,0xf7a9370b,0x2acffcb0 -DD 0xc478088d,0x191ec336,0x7b59e90a,0xa63f22b1 -DD 0xbfd7bd72,0x62b176c9,0x00f65cf5,0xdd90974e -DD 0xd875c27e,0x051309c5,0x675423f9,0xba32e842 -DD 0xa3da7781,0x7ebcbc3a,0x1cfb9606,0xc19d5dbd -DD 0x2f2aa980,0xf24c623b,0x900b4807,0x4d6d83bc -DD 0x54851c7f,0x89e3d7c4,0xeba4fdf8,0x36c23643 - -%macro slversion 4 -global %1_slver_%2%3%4 -global %1_slver -%1_slver: -%1_slver_%2%3%4: - dw 0x%4 - db 0x%3, 0x%2 -%endmacro -;;; func core, ver, snum -slversion crc32_iscsi_zero_00, 00, 02, 0014 -; inform linker that this doesn't require executable stack -section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/src/common/crc32c_intel_fast_zero_asm.s b/src/common/crc32c_intel_fast_zero_asm.s new file mode 100644 index 00000000000..34b7f489016 --- /dev/null +++ b/src/common/crc32c_intel_fast_zero_asm.s @@ -0,0 +1,648 @@ +; +; Copyright 2012-2013 Intel Corporation All Rights Reserved. +; All rights reserved. +; +; http://opensource.org/licenses/BSD-3-Clause +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following +; conditions are met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; +; * Neither the name of the Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +; COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +; OF THE POSSIBILITY OF SUCH DAMAGE. +; + +; Function to compute iscsi CRC32 with table-based recombination +; crc done "by 3" with block sizes 1920, 960, 480, 240 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; crcB3 MACRO to implement crc32 on 3 %%bSize-byte blocks +%macro crcB3 3 +%define %%bSize %1 ; 1/3 of buffer size +%define %%td2 %2 ; table offset for crc0 (2/3 of buffer) +%define %%td1 %3 ; table offset for crc1 (1/3 of buffer) + +%IF %%bSize=640 + sub len, %%bSize*3 + js %%crcB3_end ;; jump to next level if 3*blockSize > len +%ELSE + cmp len, %%bSize*3 + jnae %%crcB3_end ;; jump to next level if 3*blockSize > len +%ENDIF + ;;;;;; Calculate CRC of 3 blocks of the buffer ;;;;;; +%%crcB3_loop: + ;; rax = crc0 = initial crc + xor rbx, rbx ;; rbx = crc1 = 0; + xor r10, r10 ;; r10 = crc2 = 0; + + %assign i 0 + %rep %%bSize/8 - 1 + crc32 rax, bufptmp ;; update crc0 + crc32 rbx, bufptmp ;; update crc1 + crc32 r10, bufptmp ;; update crc2 + %assign i (i+8) + %endrep + crc32 rax, bufptmp ;; update crc0 + crc32 rbx, bufptmp ;; update crc1 +; SKIP ;crc32 r10, bufptmp ;; update crc2 + + ; merge in crc0 + movzx bufp_dw, al + mov r9d, [crc_init + bufp*4 + %%td2] + movzx bufp_dw, ah + shr eax, 16 + mov r11d, [crc_init + bufp*4 + %%td2] + shl r11, 8 + xor r9, r11 + + movzx bufp_dw, al + mov r11d, [crc_init + bufp*4 + %%td2] + movzx bufp_dw, ah + shl r11, 16 + xor r9, r11 + mov r11d, [crc_init + bufp*4 + %%td2] + shl r11, 24 + xor r9, r11 + + ; merge in crc1 + + movzx bufp_dw, bl + mov r11d, [crc_init + bufp*4 + %%td1] + movzx bufp_dw, bh + shr ebx, 16 + xor r9, r11 + mov r11d, [crc_init + bufp*4 + %%td1] + shl r11, 8 + xor r9, r11 + + movzx bufp_dw, bl + mov r11d, [crc_init + bufp*4 + %%td1] + movzx bufp_dw, bh + shl r11, 16 + xor r9, r11 + mov r11d, [crc_init + bufp*4 + %%td1] + shl r11, 24 + xor r9, r11 + + ; xor r9, [bufptmp+i + 2*%%bSize] + crc32 r10, r9 + mov rax, r10 + + ; add bufptmp, %%bSize*3 ;; move to next block + sub len, %%bSize*3 +%IF %%bSize=640 + jns %%crcB3_loop +%ENDIF + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%%crcB3_end: +%IF %%bSize=640 + add len, %%bSize*3 +%ENDIF + je do_return ;; return if remaining data is zero +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; ISCSI CRC 32 Implementation with crc32 Instruction + +;;; unsigned int crc32_iscsi_00(unsigned char * buffer, int len, unsigned int crc_init); +;;; +;;; *buf = rcx +;;; len = rdx +;;; crc_init = r8 +;;; + +global crc32_iscsi_zero_00:function +crc32_iscsi_zero_00: + +%ifidn __OUTPUT_FORMAT__, elf64 +%define bufp rdi +%define bufp_dw edi +%define bufp_w di +%define bufp_b dil +%define bufptmp rcx +%define block_0 rcx +%define block_1 r8 +%define block_2 r11 +%define len rsi +%define len_dw esi +%define len_w si +%define len_b sil +%define crc_init rdx +%define crc_init_dw edx +%else +%define bufp rcx +%define bufp_dw ecx +%define bufp_w cx +%define bufp_b cl +%define bufptmp rdi +%define block_0 rdi +%define block_1 rsi +%define block_2 r11 +%define len rdx +%define len_dw edx +%define len_w dx +%define len_b dl +%define crc_init r8 +%define crc_init_dw r8d +%endif + + + push rdi + push rbx + + mov rax, crc_init ;; rax = crc_init; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; no need for alignment + xor bufptmp, bufptmp + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; 2) BLOCK LEVEL: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +proc_block: + cmp len, 240 + jb bit8 + + lea crc_init, [mul_table_72 wrt rip] ;; load table base address + + crcB3 640, 0x1000, 0x0c00 ; 640*3 = 1920 (Tables 1280, 640) + crcB3 320, 0x0c00, 0x0800 ; 320*3 = 960 (Tables 640, 320) + crcB3 160, 0x0800, 0x0400 ; 160*3 = 480 (Tables 320, 160) + crcB3 80, 0x0400, 0x0000 ; 80*3 = 240 (Tables 160, 80) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of rdx are full) + +bit8: + shl len_b, 1 ;; shift-out MSB (bit-7) + jnc bit7 ;; jump to bit-6 if bit-7 == 0 + %assign i 0 + %rep 16 + crc32 rax, bufptmp ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + +bit7: + shl len_b, 1 ;; shift-out MSB (bit-7) + jnc bit6 ;; jump to bit-6 if bit-7 == 0 + %assign i 0 + %rep 8 + crc32 rax, bufptmp ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + ; add bufptmp, 64 ;; buf +=64; (next 64 bytes) +bit6: + shl len_b, 1 ;; shift-out MSB (bit-6) + jnc bit5 ;; jump to bit-5 if bit-6 == 0 + %assign i 0 + %rep 4 + crc32 rax, bufptmp ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + ; add bufptmp, 32 ;; buf +=32; (next 32 bytes) +bit5: + shl len_b, 1 ;; shift-out MSB (bit-5) + jnc bit4 ;; jump to bit-4 if bit-5 == 0 + %assign i 0 + %rep 2 + crc32 rax, bufptmp ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + ; add bufptmp, 16 ;; buf +=16; (next 16 bytes) +bit4: + shl len_b, 1 ;; shift-out MSB (bit-4) + jnc bit3 ;; jump to bit-3 if bit-4 == 0 + crc32 rax, bufptmp ;; compute crc32 of 8-byte data + je do_return ;; return if remaining data is zero + ; add bufptmp, 8 ;; buf +=8; (next 8 bytes) +bit3: + mov rbx, bufptmp ;; load a 8-bytes from the buffer: + shl len_b, 1 ;; shift-out MSB (bit-3) + jnc bit2 ;; jump to bit-2 if bit-3 == 0 + crc32 eax, ebx ;; compute crc32 of 4-byte data + je do_return ;; return if remaining data is zero + shr rbx, 32 ;; get next 3 bytes +bit2: + shl len_b, 1 ;; shift-out MSB (bit-2) + jnc bit1 ;; jump to bit-1 if bit-2 == 0 + crc32 eax, bx ;; compute crc32 of 2-byte data + je do_return ;; return if remaining data is zero + shr rbx, 16 ;; next byte +bit1: + test len_b,len_b + je do_return + crc32 eax, bl ;; compute crc32 of 1-byte data +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +do_return: + + pop rbx + pop rdi + ret + +less_than_8: + xor bufp, bufp + test len,4 + jz less_than_4 + crc32 eax, bufp_dw + add bufptmp,4 +less_than_4: + test len,2 + jz less_than_2 + crc32 eax, bufp_w + add bufptmp,2 +less_than_2: + test len,1 + jz do_return + crc32 rax, bufp_b + pop rbx + pop bufptmp + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; global mul_table_72, mul_table_152, mul_table_312, mul_table_632, mul_table_1272 + +section .data +align 8 +mul_table_72: +DD 0x00000000,0x39d3b296,0x73a7652c,0x4a74d7ba +DD 0xe74eca58,0xde9d78ce,0x94e9af74,0xad3a1de2 +DD 0xcb71e241,0xf2a250d7,0xb8d6876d,0x810535fb +DD 0x2c3f2819,0x15ec9a8f,0x5f984d35,0x664bffa3 +DD 0x930fb273,0xaadc00e5,0xe0a8d75f,0xd97b65c9 +DD 0x7441782b,0x4d92cabd,0x07e61d07,0x3e35af91 +DD 0x587e5032,0x61ade2a4,0x2bd9351e,0x120a8788 +DD 0xbf309a6a,0x86e328fc,0xcc97ff46,0xf5444dd0 +DD 0x23f31217,0x1a20a081,0x5054773b,0x6987c5ad +DD 0xc4bdd84f,0xfd6e6ad9,0xb71abd63,0x8ec90ff5 +DD 0xe882f056,0xd15142c0,0x9b25957a,0xa2f627ec +DD 0x0fcc3a0e,0x361f8898,0x7c6b5f22,0x45b8edb4 +DD 0xb0fca064,0x892f12f2,0xc35bc548,0xfa8877de +DD 0x57b26a3c,0x6e61d8aa,0x24150f10,0x1dc6bd86 +DD 0x7b8d4225,0x425ef0b3,0x082a2709,0x31f9959f +DD 0x9cc3887d,0xa5103aeb,0xef64ed51,0xd6b75fc7 +DD 0x47e6242e,0x7e3596b8,0x34414102,0x0d92f394 +DD 0xa0a8ee76,0x997b5ce0,0xd30f8b5a,0xeadc39cc +DD 0x8c97c66f,0xb54474f9,0xff30a343,0xc6e311d5 +DD 0x6bd90c37,0x520abea1,0x187e691b,0x21addb8d +DD 0xd4e9965d,0xed3a24cb,0xa74ef371,0x9e9d41e7 +DD 0x33a75c05,0x0a74ee93,0x40003929,0x79d38bbf +DD 0x1f98741c,0x264bc68a,0x6c3f1130,0x55eca3a6 +DD 0xf8d6be44,0xc1050cd2,0x8b71db68,0xb2a269fe +DD 0x64153639,0x5dc684af,0x17b25315,0x2e61e183 +DD 0x835bfc61,0xba884ef7,0xf0fc994d,0xc92f2bdb +DD 0xaf64d478,0x96b766ee,0xdcc3b154,0xe51003c2 +DD 0x482a1e20,0x71f9acb6,0x3b8d7b0c,0x025ec99a +DD 0xf71a844a,0xcec936dc,0x84bde166,0xbd6e53f0 +DD 0x10544e12,0x2987fc84,0x63f32b3e,0x5a2099a8 +DD 0x3c6b660b,0x05b8d49d,0x4fcc0327,0x761fb1b1 +DD 0xdb25ac53,0xe2f61ec5,0xa882c97f,0x91517be9 +DD 0x8fcc485c,0xb61ffaca,0xfc6b2d70,0xc5b89fe6 +DD 0x68828204,0x51513092,0x1b25e728,0x22f655be +DD 0x44bdaa1d,0x7d6e188b,0x371acf31,0x0ec97da7 +DD 0xa3f36045,0x9a20d2d3,0xd0540569,0xe987b7ff +DD 0x1cc3fa2f,0x251048b9,0x6f649f03,0x56b72d95 +DD 0xfb8d3077,0xc25e82e1,0x882a555b,0xb1f9e7cd +DD 0xd7b2186e,0xee61aaf8,0xa4157d42,0x9dc6cfd4 +DD 0x30fcd236,0x092f60a0,0x435bb71a,0x7a88058c +DD 0xac3f5a4b,0x95ece8dd,0xdf983f67,0xe64b8df1 +DD 0x4b719013,0x72a22285,0x38d6f53f,0x010547a9 +DD 0x674eb80a,0x5e9d0a9c,0x14e9dd26,0x2d3a6fb0 +DD 0x80007252,0xb9d3c0c4,0xf3a7177e,0xca74a5e8 +DD 0x3f30e838,0x06e35aae,0x4c978d14,0x75443f82 +DD 0xd87e2260,0xe1ad90f6,0xabd9474c,0x920af5da +DD 0xf4410a79,0xcd92b8ef,0x87e66f55,0xbe35ddc3 +DD 0x130fc021,0x2adc72b7,0x60a8a50d,0x597b179b +DD 0xc82a6c72,0xf1f9dee4,0xbb8d095e,0x825ebbc8 +DD 0x2f64a62a,0x16b714bc,0x5cc3c306,0x65107190 +DD 0x035b8e33,0x3a883ca5,0x70fceb1f,0x492f5989 +DD 0xe415446b,0xddc6f6fd,0x97b22147,0xae6193d1 +DD 0x5b25de01,0x62f66c97,0x2882bb2d,0x115109bb +DD 0xbc6b1459,0x85b8a6cf,0xcfcc7175,0xf61fc3e3 +DD 0x90543c40,0xa9878ed6,0xe3f3596c,0xda20ebfa +DD 0x771af618,0x4ec9448e,0x04bd9334,0x3d6e21a2 +DD 0xebd97e65,0xd20accf3,0x987e1b49,0xa1ada9df +DD 0x0c97b43d,0x354406ab,0x7f30d111,0x46e36387 +DD 0x20a89c24,0x197b2eb2,0x530ff908,0x6adc4b9e +DD 0xc7e6567c,0xfe35e4ea,0xb4413350,0x8d9281c6 +DD 0x78d6cc16,0x41057e80,0x0b71a93a,0x32a21bac +DD 0x9f98064e,0xa64bb4d8,0xec3f6362,0xd5ecd1f4 +DD 0xb3a72e57,0x8a749cc1,0xc0004b7b,0xf9d3f9ed +DD 0x54e9e40f,0x6d3a5699,0x274e8123,0x1e9d33b5 + +mul_table_152: +DD 0x00000000,0x878a92a7,0x0af953bf,0x8d73c118 +DD 0x15f2a77e,0x927835d9,0x1f0bf4c1,0x98816666 +DD 0x2be54efc,0xac6fdc5b,0x211c1d43,0xa6968fe4 +DD 0x3e17e982,0xb99d7b25,0x34eeba3d,0xb364289a +DD 0x57ca9df8,0xd0400f5f,0x5d33ce47,0xdab95ce0 +DD 0x42383a86,0xc5b2a821,0x48c16939,0xcf4bfb9e +DD 0x7c2fd304,0xfba541a3,0x76d680bb,0xf15c121c +DD 0x69dd747a,0xee57e6dd,0x632427c5,0xe4aeb562 +DD 0xaf953bf0,0x281fa957,0xa56c684f,0x22e6fae8 +DD 0xba679c8e,0x3ded0e29,0xb09ecf31,0x37145d96 +DD 0x8470750c,0x03fae7ab,0x8e8926b3,0x0903b414 +DD 0x9182d272,0x160840d5,0x9b7b81cd,0x1cf1136a +DD 0xf85fa608,0x7fd534af,0xf2a6f5b7,0x752c6710 +DD 0xedad0176,0x6a2793d1,0xe75452c9,0x60dec06e +DD 0xd3bae8f4,0x54307a53,0xd943bb4b,0x5ec929ec +DD 0xc6484f8a,0x41c2dd2d,0xccb11c35,0x4b3b8e92 +DD 0x5ac60111,0xdd4c93b6,0x503f52ae,0xd7b5c009 +DD 0x4f34a66f,0xc8be34c8,0x45cdf5d0,0xc2476777 +DD 0x71234fed,0xf6a9dd4a,0x7bda1c52,0xfc508ef5 +DD 0x64d1e893,0xe35b7a34,0x6e28bb2c,0xe9a2298b +DD 0x0d0c9ce9,0x8a860e4e,0x07f5cf56,0x807f5df1 +DD 0x18fe3b97,0x9f74a930,0x12076828,0x958dfa8f +DD 0x26e9d215,0xa16340b2,0x2c1081aa,0xab9a130d +DD 0x331b756b,0xb491e7cc,0x39e226d4,0xbe68b473 +DD 0xf5533ae1,0x72d9a846,0xffaa695e,0x7820fbf9 +DD 0xe0a19d9f,0x672b0f38,0xea58ce20,0x6dd25c87 +DD 0xdeb6741d,0x593ce6ba,0xd44f27a2,0x53c5b505 +DD 0xcb44d363,0x4cce41c4,0xc1bd80dc,0x4637127b +DD 0xa299a719,0x251335be,0xa860f4a6,0x2fea6601 +DD 0xb76b0067,0x30e192c0,0xbd9253d8,0x3a18c17f +DD 0x897ce9e5,0x0ef67b42,0x8385ba5a,0x040f28fd +DD 0x9c8e4e9b,0x1b04dc3c,0x96771d24,0x11fd8f83 +DD 0xb58c0222,0x32069085,0xbf75519d,0x38ffc33a +DD 0xa07ea55c,0x27f437fb,0xaa87f6e3,0x2d0d6444 +DD 0x9e694cde,0x19e3de79,0x94901f61,0x131a8dc6 +DD 0x8b9beba0,0x0c117907,0x8162b81f,0x06e82ab8 +DD 0xe2469fda,0x65cc0d7d,0xe8bfcc65,0x6f355ec2 +DD 0xf7b438a4,0x703eaa03,0xfd4d6b1b,0x7ac7f9bc +DD 0xc9a3d126,0x4e294381,0xc35a8299,0x44d0103e +DD 0xdc517658,0x5bdbe4ff,0xd6a825e7,0x5122b740 +DD 0x1a1939d2,0x9d93ab75,0x10e06a6d,0x976af8ca +DD 0x0feb9eac,0x88610c0b,0x0512cd13,0x82985fb4 +DD 0x31fc772e,0xb676e589,0x3b052491,0xbc8fb636 +DD 0x240ed050,0xa38442f7,0x2ef783ef,0xa97d1148 +DD 0x4dd3a42a,0xca59368d,0x472af795,0xc0a06532 +DD 0x58210354,0xdfab91f3,0x52d850eb,0xd552c24c +DD 0x6636ead6,0xe1bc7871,0x6ccfb969,0xeb452bce +DD 0x73c44da8,0xf44edf0f,0x793d1e17,0xfeb78cb0 +DD 0xef4a0333,0x68c09194,0xe5b3508c,0x6239c22b +DD 0xfab8a44d,0x7d3236ea,0xf041f7f2,0x77cb6555 +DD 0xc4af4dcf,0x4325df68,0xce561e70,0x49dc8cd7 +DD 0xd15deab1,0x56d77816,0xdba4b90e,0x5c2e2ba9 +DD 0xb8809ecb,0x3f0a0c6c,0xb279cd74,0x35f35fd3 +DD 0xad7239b5,0x2af8ab12,0xa78b6a0a,0x2001f8ad +DD 0x9365d037,0x14ef4290,0x999c8388,0x1e16112f +DD 0x86977749,0x011de5ee,0x8c6e24f6,0x0be4b651 +DD 0x40df38c3,0xc755aa64,0x4a266b7c,0xcdacf9db +DD 0x552d9fbd,0xd2a70d1a,0x5fd4cc02,0xd85e5ea5 +DD 0x6b3a763f,0xecb0e498,0x61c32580,0xe649b727 +DD 0x7ec8d141,0xf94243e6,0x743182fe,0xf3bb1059 +DD 0x1715a53b,0x909f379c,0x1decf684,0x9a666423 +DD 0x02e70245,0x856d90e2,0x081e51fa,0x8f94c35d +DD 0x3cf0ebc7,0xbb7a7960,0x3609b878,0xb1832adf +DD 0x29024cb9,0xae88de1e,0x23fb1f06,0xa4718da1 + +mul_table_312: +DD 0x00000000,0xbac2fd7b,0x70698c07,0xcaab717c +DD 0xe0d3180e,0x5a11e575,0x90ba9409,0x2a786972 +DD 0xc44a46ed,0x7e88bb96,0xb423caea,0x0ee13791 +DD 0x24995ee3,0x9e5ba398,0x54f0d2e4,0xee322f9f +DD 0x8d78fb2b,0x37ba0650,0xfd11772c,0x47d38a57 +DD 0x6dabe325,0xd7691e5e,0x1dc26f22,0xa7009259 +DD 0x4932bdc6,0xf3f040bd,0x395b31c1,0x8399ccba +DD 0xa9e1a5c8,0x132358b3,0xd98829cf,0x634ad4b4 +DD 0x1f1d80a7,0xa5df7ddc,0x6f740ca0,0xd5b6f1db +DD 0xffce98a9,0x450c65d2,0x8fa714ae,0x3565e9d5 +DD 0xdb57c64a,0x61953b31,0xab3e4a4d,0x11fcb736 +DD 0x3b84de44,0x8146233f,0x4bed5243,0xf12faf38 +DD 0x92657b8c,0x28a786f7,0xe20cf78b,0x58ce0af0 +DD 0x72b66382,0xc8749ef9,0x02dfef85,0xb81d12fe +DD 0x562f3d61,0xecedc01a,0x2646b166,0x9c844c1d +DD 0xb6fc256f,0x0c3ed814,0xc695a968,0x7c575413 +DD 0x3e3b014e,0x84f9fc35,0x4e528d49,0xf4907032 +DD 0xdee81940,0x642ae43b,0xae819547,0x1443683c +DD 0xfa7147a3,0x40b3bad8,0x8a18cba4,0x30da36df +DD 0x1aa25fad,0xa060a2d6,0x6acbd3aa,0xd0092ed1 +DD 0xb343fa65,0x0981071e,0xc32a7662,0x79e88b19 +DD 0x5390e26b,0xe9521f10,0x23f96e6c,0x993b9317 +DD 0x7709bc88,0xcdcb41f3,0x0760308f,0xbda2cdf4 +DD 0x97daa486,0x2d1859fd,0xe7b32881,0x5d71d5fa +DD 0x212681e9,0x9be47c92,0x514f0dee,0xeb8df095 +DD 0xc1f599e7,0x7b37649c,0xb19c15e0,0x0b5ee89b +DD 0xe56cc704,0x5fae3a7f,0x95054b03,0x2fc7b678 +DD 0x05bfdf0a,0xbf7d2271,0x75d6530d,0xcf14ae76 +DD 0xac5e7ac2,0x169c87b9,0xdc37f6c5,0x66f50bbe +DD 0x4c8d62cc,0xf64f9fb7,0x3ce4eecb,0x862613b0 +DD 0x68143c2f,0xd2d6c154,0x187db028,0xa2bf4d53 +DD 0x88c72421,0x3205d95a,0xf8aea826,0x426c555d +DD 0x7c76029c,0xc6b4ffe7,0x0c1f8e9b,0xb6dd73e0 +DD 0x9ca51a92,0x2667e7e9,0xeccc9695,0x560e6bee +DD 0xb83c4471,0x02feb90a,0xc855c876,0x7297350d +DD 0x58ef5c7f,0xe22da104,0x2886d078,0x92442d03 +DD 0xf10ef9b7,0x4bcc04cc,0x816775b0,0x3ba588cb +DD 0x11dde1b9,0xab1f1cc2,0x61b46dbe,0xdb7690c5 +DD 0x3544bf5a,0x8f864221,0x452d335d,0xffefce26 +DD 0xd597a754,0x6f555a2f,0xa5fe2b53,0x1f3cd628 +DD 0x636b823b,0xd9a97f40,0x13020e3c,0xa9c0f347 +DD 0x83b89a35,0x397a674e,0xf3d11632,0x4913eb49 +DD 0xa721c4d6,0x1de339ad,0xd74848d1,0x6d8ab5aa +DD 0x47f2dcd8,0xfd3021a3,0x379b50df,0x8d59ada4 +DD 0xee137910,0x54d1846b,0x9e7af517,0x24b8086c +DD 0x0ec0611e,0xb4029c65,0x7ea9ed19,0xc46b1062 +DD 0x2a593ffd,0x909bc286,0x5a30b3fa,0xe0f24e81 +DD 0xca8a27f3,0x7048da88,0xbae3abf4,0x0021568f +DD 0x424d03d2,0xf88ffea9,0x32248fd5,0x88e672ae +DD 0xa29e1bdc,0x185ce6a7,0xd2f797db,0x68356aa0 +DD 0x8607453f,0x3cc5b844,0xf66ec938,0x4cac3443 +DD 0x66d45d31,0xdc16a04a,0x16bdd136,0xac7f2c4d +DD 0xcf35f8f9,0x75f70582,0xbf5c74fe,0x059e8985 +DD 0x2fe6e0f7,0x95241d8c,0x5f8f6cf0,0xe54d918b +DD 0x0b7fbe14,0xb1bd436f,0x7b163213,0xc1d4cf68 +DD 0xebaca61a,0x516e5b61,0x9bc52a1d,0x2107d766 +DD 0x5d508375,0xe7927e0e,0x2d390f72,0x97fbf209 +DD 0xbd839b7b,0x07416600,0xcdea177c,0x7728ea07 +DD 0x991ac598,0x23d838e3,0xe973499f,0x53b1b4e4 +DD 0x79c9dd96,0xc30b20ed,0x09a05191,0xb362acea +DD 0xd028785e,0x6aea8525,0xa041f459,0x1a830922 +DD 0x30fb6050,0x8a399d2b,0x4092ec57,0xfa50112c +DD 0x14623eb3,0xaea0c3c8,0x640bb2b4,0xdec94fcf +DD 0xf4b126bd,0x4e73dbc6,0x84d8aaba,0x3e1a57c1 + +mul_table_632: +DD 0x00000000,0x6b749fb2,0xd6e93f64,0xbd9da0d6 +DD 0xa83e0839,0xc34a978b,0x7ed7375d,0x15a3a8ef +DD 0x55906683,0x3ee4f931,0x837959e7,0xe80dc655 +DD 0xfdae6eba,0x96daf108,0x2b4751de,0x4033ce6c +DD 0xab20cd06,0xc05452b4,0x7dc9f262,0x16bd6dd0 +DD 0x031ec53f,0x686a5a8d,0xd5f7fa5b,0xbe8365e9 +DD 0xfeb0ab85,0x95c43437,0x285994e1,0x432d0b53 +DD 0x568ea3bc,0x3dfa3c0e,0x80679cd8,0xeb13036a +DD 0x53adecfd,0x38d9734f,0x8544d399,0xee304c2b +DD 0xfb93e4c4,0x90e77b76,0x2d7adba0,0x460e4412 +DD 0x063d8a7e,0x6d4915cc,0xd0d4b51a,0xbba02aa8 +DD 0xae038247,0xc5771df5,0x78eabd23,0x139e2291 +DD 0xf88d21fb,0x93f9be49,0x2e641e9f,0x4510812d +DD 0x50b329c2,0x3bc7b670,0x865a16a6,0xed2e8914 +DD 0xad1d4778,0xc669d8ca,0x7bf4781c,0x1080e7ae +DD 0x05234f41,0x6e57d0f3,0xd3ca7025,0xb8beef97 +DD 0xa75bd9fa,0xcc2f4648,0x71b2e69e,0x1ac6792c +DD 0x0f65d1c3,0x64114e71,0xd98ceea7,0xb2f87115 +DD 0xf2cbbf79,0x99bf20cb,0x2422801d,0x4f561faf +DD 0x5af5b740,0x318128f2,0x8c1c8824,0xe7681796 +DD 0x0c7b14fc,0x670f8b4e,0xda922b98,0xb1e6b42a +DD 0xa4451cc5,0xcf318377,0x72ac23a1,0x19d8bc13 +DD 0x59eb727f,0x329fedcd,0x8f024d1b,0xe476d2a9 +DD 0xf1d57a46,0x9aa1e5f4,0x273c4522,0x4c48da90 +DD 0xf4f63507,0x9f82aab5,0x221f0a63,0x496b95d1 +DD 0x5cc83d3e,0x37bca28c,0x8a21025a,0xe1559de8 +DD 0xa1665384,0xca12cc36,0x778f6ce0,0x1cfbf352 +DD 0x09585bbd,0x622cc40f,0xdfb164d9,0xb4c5fb6b +DD 0x5fd6f801,0x34a267b3,0x893fc765,0xe24b58d7 +DD 0xf7e8f038,0x9c9c6f8a,0x2101cf5c,0x4a7550ee +DD 0x0a469e82,0x61320130,0xdcafa1e6,0xb7db3e54 +DD 0xa27896bb,0xc90c0909,0x7491a9df,0x1fe5366d +DD 0x4b5bc505,0x202f5ab7,0x9db2fa61,0xf6c665d3 +DD 0xe365cd3c,0x8811528e,0x358cf258,0x5ef86dea +DD 0x1ecba386,0x75bf3c34,0xc8229ce2,0xa3560350 +DD 0xb6f5abbf,0xdd81340d,0x601c94db,0x0b680b69 +DD 0xe07b0803,0x8b0f97b1,0x36923767,0x5de6a8d5 +DD 0x4845003a,0x23319f88,0x9eac3f5e,0xf5d8a0ec +DD 0xb5eb6e80,0xde9ff132,0x630251e4,0x0876ce56 +DD 0x1dd566b9,0x76a1f90b,0xcb3c59dd,0xa048c66f +DD 0x18f629f8,0x7382b64a,0xce1f169c,0xa56b892e +DD 0xb0c821c1,0xdbbcbe73,0x66211ea5,0x0d558117 +DD 0x4d664f7b,0x2612d0c9,0x9b8f701f,0xf0fbefad +DD 0xe5584742,0x8e2cd8f0,0x33b17826,0x58c5e794 +DD 0xb3d6e4fe,0xd8a27b4c,0x653fdb9a,0x0e4b4428 +DD 0x1be8ecc7,0x709c7375,0xcd01d3a3,0xa6754c11 +DD 0xe646827d,0x8d321dcf,0x30afbd19,0x5bdb22ab +DD 0x4e788a44,0x250c15f6,0x9891b520,0xf3e52a92 +DD 0xec001cff,0x8774834d,0x3ae9239b,0x519dbc29 +DD 0x443e14c6,0x2f4a8b74,0x92d72ba2,0xf9a3b410 +DD 0xb9907a7c,0xd2e4e5ce,0x6f794518,0x040ddaaa +DD 0x11ae7245,0x7adaedf7,0xc7474d21,0xac33d293 +DD 0x4720d1f9,0x2c544e4b,0x91c9ee9d,0xfabd712f +DD 0xef1ed9c0,0x846a4672,0x39f7e6a4,0x52837916 +DD 0x12b0b77a,0x79c428c8,0xc459881e,0xaf2d17ac +DD 0xba8ebf43,0xd1fa20f1,0x6c678027,0x07131f95 +DD 0xbfadf002,0xd4d96fb0,0x6944cf66,0x023050d4 +DD 0x1793f83b,0x7ce76789,0xc17ac75f,0xaa0e58ed +DD 0xea3d9681,0x81490933,0x3cd4a9e5,0x57a03657 +DD 0x42039eb8,0x2977010a,0x94eaa1dc,0xff9e3e6e +DD 0x148d3d04,0x7ff9a2b6,0xc2640260,0xa9109dd2 +DD 0xbcb3353d,0xd7c7aa8f,0x6a5a0a59,0x012e95eb +DD 0x411d5b87,0x2a69c435,0x97f464e3,0xfc80fb51 +DD 0xe92353be,0x8257cc0c,0x3fca6cda,0x54bef368 + +mul_table_1272: +DD 0x00000000,0xdd66cbbb,0xbf21e187,0x62472a3c +DD 0x7bafb5ff,0xa6c97e44,0xc48e5478,0x19e89fc3 +DD 0xf75f6bfe,0x2a39a045,0x487e8a79,0x951841c2 +DD 0x8cf0de01,0x519615ba,0x33d13f86,0xeeb7f43d +DD 0xeb52a10d,0x36346ab6,0x5473408a,0x89158b31 +DD 0x90fd14f2,0x4d9bdf49,0x2fdcf575,0xf2ba3ece +DD 0x1c0dcaf3,0xc16b0148,0xa32c2b74,0x7e4ae0cf +DD 0x67a27f0c,0xbac4b4b7,0xd8839e8b,0x05e55530 +DD 0xd34934eb,0x0e2fff50,0x6c68d56c,0xb10e1ed7 +DD 0xa8e68114,0x75804aaf,0x17c76093,0xcaa1ab28 +DD 0x24165f15,0xf97094ae,0x9b37be92,0x46517529 +DD 0x5fb9eaea,0x82df2151,0xe0980b6d,0x3dfec0d6 +DD 0x381b95e6,0xe57d5e5d,0x873a7461,0x5a5cbfda +DD 0x43b42019,0x9ed2eba2,0xfc95c19e,0x21f30a25 +DD 0xcf44fe18,0x122235a3,0x70651f9f,0xad03d424 +DD 0xb4eb4be7,0x698d805c,0x0bcaaa60,0xd6ac61db +DD 0xa37e1f27,0x7e18d49c,0x1c5ffea0,0xc139351b +DD 0xd8d1aad8,0x05b76163,0x67f04b5f,0xba9680e4 +DD 0x542174d9,0x8947bf62,0xeb00955e,0x36665ee5 +DD 0x2f8ec126,0xf2e80a9d,0x90af20a1,0x4dc9eb1a +DD 0x482cbe2a,0x954a7591,0xf70d5fad,0x2a6b9416 +DD 0x33830bd5,0xeee5c06e,0x8ca2ea52,0x51c421e9 +DD 0xbf73d5d4,0x62151e6f,0x00523453,0xdd34ffe8 +DD 0xc4dc602b,0x19baab90,0x7bfd81ac,0xa69b4a17 +DD 0x70372bcc,0xad51e077,0xcf16ca4b,0x127001f0 +DD 0x0b989e33,0xd6fe5588,0xb4b97fb4,0x69dfb40f +DD 0x87684032,0x5a0e8b89,0x3849a1b5,0xe52f6a0e +DD 0xfcc7f5cd,0x21a13e76,0x43e6144a,0x9e80dff1 +DD 0x9b658ac1,0x4603417a,0x24446b46,0xf922a0fd +DD 0xe0ca3f3e,0x3dacf485,0x5febdeb9,0x828d1502 +DD 0x6c3ae13f,0xb15c2a84,0xd31b00b8,0x0e7dcb03 +DD 0x179554c0,0xcaf39f7b,0xa8b4b547,0x75d27efc +DD 0x431048bf,0x9e768304,0xfc31a938,0x21576283 +DD 0x38bffd40,0xe5d936fb,0x879e1cc7,0x5af8d77c +DD 0xb44f2341,0x6929e8fa,0x0b6ec2c6,0xd608097d +DD 0xcfe096be,0x12865d05,0x70c17739,0xada7bc82 +DD 0xa842e9b2,0x75242209,0x17630835,0xca05c38e +DD 0xd3ed5c4d,0x0e8b97f6,0x6cccbdca,0xb1aa7671 +DD 0x5f1d824c,0x827b49f7,0xe03c63cb,0x3d5aa870 +DD 0x24b237b3,0xf9d4fc08,0x9b93d634,0x46f51d8f +DD 0x90597c54,0x4d3fb7ef,0x2f789dd3,0xf21e5668 +DD 0xebf6c9ab,0x36900210,0x54d7282c,0x89b1e397 +DD 0x670617aa,0xba60dc11,0xd827f62d,0x05413d96 +DD 0x1ca9a255,0xc1cf69ee,0xa38843d2,0x7eee8869 +DD 0x7b0bdd59,0xa66d16e2,0xc42a3cde,0x194cf765 +DD 0x00a468a6,0xddc2a31d,0xbf858921,0x62e3429a +DD 0x8c54b6a7,0x51327d1c,0x33755720,0xee139c9b +DD 0xf7fb0358,0x2a9dc8e3,0x48dae2df,0x95bc2964 +DD 0xe06e5798,0x3d089c23,0x5f4fb61f,0x82297da4 +DD 0x9bc1e267,0x46a729dc,0x24e003e0,0xf986c85b +DD 0x17313c66,0xca57f7dd,0xa810dde1,0x7576165a +DD 0x6c9e8999,0xb1f84222,0xd3bf681e,0x0ed9a3a5 +DD 0x0b3cf695,0xd65a3d2e,0xb41d1712,0x697bdca9 +DD 0x7093436a,0xadf588d1,0xcfb2a2ed,0x12d46956 +DD 0xfc639d6b,0x210556d0,0x43427cec,0x9e24b757 +DD 0x87cc2894,0x5aaae32f,0x38edc913,0xe58b02a8 +DD 0x33276373,0xee41a8c8,0x8c0682f4,0x5160494f +DD 0x4888d68c,0x95ee1d37,0xf7a9370b,0x2acffcb0 +DD 0xc478088d,0x191ec336,0x7b59e90a,0xa63f22b1 +DD 0xbfd7bd72,0x62b176c9,0x00f65cf5,0xdd90974e +DD 0xd875c27e,0x051309c5,0x675423f9,0xba32e842 +DD 0xa3da7781,0x7ebcbc3a,0x1cfb9606,0xc19d5dbd +DD 0x2f2aa980,0xf24c623b,0x900b4807,0x4d6d83bc +DD 0x54851c7f,0x89e3d7c4,0xeba4fdf8,0x36c23643 + +%macro slversion 4 +global %1_slver_%2%3%4 +global %1_slver +%1_slver: +%1_slver_%2%3%4: + dw 0x%4 + db 0x%3, 0x%2 +%endmacro +;;; func core, ver, snum +slversion crc32_iscsi_zero_00, 00, 02, 0014 +; inform linker that this doesn't require executable stack +section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/src/common/crc32c_ppc_asm.S b/src/common/crc32c_ppc_asm.S deleted file mode 100644 index 1dc6dd1cf31..00000000000 --- a/src/common/crc32c_ppc_asm.S +++ /dev/null @@ -1,771 +0,0 @@ -/* - * Calculate the checksum of data that is 16 byte aligned and a multiple of - * 16 bytes. - * - * The first step is to reduce it to 1024 bits. We do this in 8 parallel - * chunks in order to mask the latency of the vpmsum instructions. If we - * have more than 32 kB of data to checksum we repeat this step multiple - * times, passing in the previous 1024 bits. - * - * The next step is to reduce the 1024 bits to 64 bits. This step adds - * 32 bits of 0s to the end - this matches what a CRC does. We just - * calculate constants that land the data in this 32 bits. - * - * We then use fixed point Barrett reduction to compute a mod n over GF(2) - * for n = CRC using POWER8 instructions. We use x = 32. - * - * http://en.wikipedia.org/wiki/Barrett_reduction - * - * Copyright (C) 2015 Anton Blanchard , IBM - * Copyright (C) 2017 International Business Machines Corp. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include "common/ppc-opcode.h" - -#undef toc - -#ifndef r1 -#define r1 1 -#endif - -#ifndef r2 -#define r2 2 -#endif - - .section .rodata -.balign 16 - -.byteswap_constant: - /* byte reverse permute constant */ - .octa 0x0F0E0D0C0B0A09080706050403020100 - -#define __ASSEMBLY__ -#include "crc32c_ppc_constants.h" - - .text - -#if defined(__BIG_ENDIAN__) && defined(REFLECT) -#define BYTESWAP_DATA -#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT) -#define BYTESWAP_DATA -#else -#undef BYTESWAP_DATA -#endif - -#define off16 r25 -#define off32 r26 -#define off48 r27 -#define off64 r28 -#define off80 r29 -#define off96 r30 -#define off112 r31 - -#define const1 v24 -#define const2 v25 - -#define byteswap v26 -#define mask_32bit v27 -#define mask_64bit v28 -#define zeroes v29 - -#ifdef BYTESWAP_DATA -#define VPERM(A, B, C, D) vperm A, B, C, D -#else -#define VPERM(A, B, C, D) -#endif - -/* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */ -FUNC_START(__crc32_vpmsum) - std r31,-8(r1) - std r30,-16(r1) - std r29,-24(r1) - std r28,-32(r1) - std r27,-40(r1) - std r26,-48(r1) - std r25,-56(r1) - - li off16,16 - li off32,32 - li off48,48 - li off64,64 - li off80,80 - li off96,96 - li off112,112 - li r0,0 - - /* Enough room for saving 10 non volatile VMX registers */ - subi r6,r1,56+10*16 - subi r7,r1,56+2*16 - - stvx v20,0,r6 - stvx v21,off16,r6 - stvx v22,off32,r6 - stvx v23,off48,r6 - stvx v24,off64,r6 - stvx v25,off80,r6 - stvx v26,off96,r6 - stvx v27,off112,r6 - stvx v28,0,r7 - stvx v29,off16,r7 - - mr r10,r3 - - vxor zeroes,zeroes,zeroes - vspltisw v0,-1 - - vsldoi mask_32bit,zeroes,v0,4 - vsldoi mask_64bit,zeroes,v0,8 - - /* Get the initial value into v8 */ - vxor v8,v8,v8 - MTVRD(v8, r3) -#ifdef REFLECT - vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */ -#else - vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */ -#endif - -#ifdef BYTESWAP_DATA - addis r3,r2,.byteswap_constant@toc@ha - addi r3,r3,.byteswap_constant@toc@l - - lvx byteswap,0,r3 - addi r3,r3,16 -#endif - - cmpdi r5,256 - blt .Lshort - - rldicr r6,r5,0,56 - - /* Checksum in blocks of MAX_SIZE */ -1: lis r7,MAX_SIZE@h - ori r7,r7,MAX_SIZE@l - mr r9,r7 - cmpd r6,r7 - bgt 2f - mr r7,r6 -2: subf r6,r7,r6 - - /* our main loop does 128 bytes at a time */ - srdi r7,r7,7 - - /* - * Work out the offset into the constants table to start at. Each - * constant is 16 bytes, and it is used against 128 bytes of input - * data - 128 / 16 = 8 - */ - sldi r8,r7,4 - srdi r9,r9,3 - subf r8,r8,r9 - - /* We reduce our final 128 bytes in a separate step */ - addi r7,r7,-1 - mtctr r7 - - addis r3,r2,.constants@toc@ha - addi r3,r3,.constants@toc@l - - /* Find the start of our constants */ - add r3,r3,r8 - - /* zero v0-v7 which will contain our checksums */ - vxor v0,v0,v0 - vxor v1,v1,v1 - vxor v2,v2,v2 - vxor v3,v3,v3 - vxor v4,v4,v4 - vxor v5,v5,v5 - vxor v6,v6,v6 - vxor v7,v7,v7 - - lvx const1,0,r3 - - /* - * If we are looping back to consume more data we use the values - * already in v16-v23. - */ - cmpdi r0,1 - beq 2f - - /* First warm up pass */ - lvx v16,0,r4 - lvx v17,off16,r4 - VPERM(v16,v16,v16,byteswap) - VPERM(v17,v17,v17,byteswap) - lvx v18,off32,r4 - lvx v19,off48,r4 - VPERM(v18,v18,v18,byteswap) - VPERM(v19,v19,v19,byteswap) - lvx v20,off64,r4 - lvx v21,off80,r4 - VPERM(v20,v20,v20,byteswap) - VPERM(v21,v21,v21,byteswap) - lvx v22,off96,r4 - lvx v23,off112,r4 - VPERM(v22,v22,v22,byteswap) - VPERM(v23,v23,v23,byteswap) - addi r4,r4,8*16 - - /* xor in initial value */ - vxor v16,v16,v8 - -2: bdz .Lfirst_warm_up_done - - addi r3,r3,16 - lvx const2,0,r3 - - /* Second warm up pass */ - VPMSUMD(v8,v16,const1) - lvx v16,0,r4 - VPERM(v16,v16,v16,byteswap) - ori r2,r2,0 - - VPMSUMD(v9,v17,const1) - lvx v17,off16,r4 - VPERM(v17,v17,v17,byteswap) - ori r2,r2,0 - - VPMSUMD(v10,v18,const1) - lvx v18,off32,r4 - VPERM(v18,v18,v18,byteswap) - ori r2,r2,0 - - VPMSUMD(v11,v19,const1) - lvx v19,off48,r4 - VPERM(v19,v19,v19,byteswap) - ori r2,r2,0 - - VPMSUMD(v12,v20,const1) - lvx v20,off64,r4 - VPERM(v20,v20,v20,byteswap) - ori r2,r2,0 - - VPMSUMD(v13,v21,const1) - lvx v21,off80,r4 - VPERM(v21,v21,v21,byteswap) - ori r2,r2,0 - - VPMSUMD(v14,v22,const1) - lvx v22,off96,r4 - VPERM(v22,v22,v22,byteswap) - ori r2,r2,0 - - VPMSUMD(v15,v23,const1) - lvx v23,off112,r4 - VPERM(v23,v23,v23,byteswap) - - addi r4,r4,8*16 - - bdz .Lfirst_cool_down - - /* - * main loop. We modulo schedule it such that it takes three iterations - * to complete - first iteration load, second iteration vpmsum, third - * iteration xor. - */ - .balign 16 -4: lvx const1,0,r3 - addi r3,r3,16 - ori r2,r2,0 - - vxor v0,v0,v8 - VPMSUMD(v8,v16,const2) - lvx v16,0,r4 - VPERM(v16,v16,v16,byteswap) - ori r2,r2,0 - - vxor v1,v1,v9 - VPMSUMD(v9,v17,const2) - lvx v17,off16,r4 - VPERM(v17,v17,v17,byteswap) - ori r2,r2,0 - - vxor v2,v2,v10 - VPMSUMD(v10,v18,const2) - lvx v18,off32,r4 - VPERM(v18,v18,v18,byteswap) - ori r2,r2,0 - - vxor v3,v3,v11 - VPMSUMD(v11,v19,const2) - lvx v19,off48,r4 - VPERM(v19,v19,v19,byteswap) - lvx const2,0,r3 - ori r2,r2,0 - - vxor v4,v4,v12 - VPMSUMD(v12,v20,const1) - lvx v20,off64,r4 - VPERM(v20,v20,v20,byteswap) - ori r2,r2,0 - - vxor v5,v5,v13 - VPMSUMD(v13,v21,const1) - lvx v21,off80,r4 - VPERM(v21,v21,v21,byteswap) - ori r2,r2,0 - - vxor v6,v6,v14 - VPMSUMD(v14,v22,const1) - lvx v22,off96,r4 - VPERM(v22,v22,v22,byteswap) - ori r2,r2,0 - - vxor v7,v7,v15 - VPMSUMD(v15,v23,const1) - lvx v23,off112,r4 - VPERM(v23,v23,v23,byteswap) - - addi r4,r4,8*16 - - bdnz 4b - -.Lfirst_cool_down: - /* First cool down pass */ - lvx const1,0,r3 - addi r3,r3,16 - - vxor v0,v0,v8 - VPMSUMD(v8,v16,const1) - ori r2,r2,0 - - vxor v1,v1,v9 - VPMSUMD(v9,v17,const1) - ori r2,r2,0 - - vxor v2,v2,v10 - VPMSUMD(v10,v18,const1) - ori r2,r2,0 - - vxor v3,v3,v11 - VPMSUMD(v11,v19,const1) - ori r2,r2,0 - - vxor v4,v4,v12 - VPMSUMD(v12,v20,const1) - ori r2,r2,0 - - vxor v5,v5,v13 - VPMSUMD(v13,v21,const1) - ori r2,r2,0 - - vxor v6,v6,v14 - VPMSUMD(v14,v22,const1) - ori r2,r2,0 - - vxor v7,v7,v15 - VPMSUMD(v15,v23,const1) - ori r2,r2,0 - -.Lsecond_cool_down: - /* Second cool down pass */ - vxor v0,v0,v8 - vxor v1,v1,v9 - vxor v2,v2,v10 - vxor v3,v3,v11 - vxor v4,v4,v12 - vxor v5,v5,v13 - vxor v6,v6,v14 - vxor v7,v7,v15 - -#ifdef REFLECT - /* - * vpmsumd produces a 96 bit result in the least significant bits - * of the register. Since we are bit reflected we have to shift it - * left 32 bits so it occupies the least significant bits in the - * bit reflected domain. - */ - vsldoi v0,v0,zeroes,4 - vsldoi v1,v1,zeroes,4 - vsldoi v2,v2,zeroes,4 - vsldoi v3,v3,zeroes,4 - vsldoi v4,v4,zeroes,4 - vsldoi v5,v5,zeroes,4 - vsldoi v6,v6,zeroes,4 - vsldoi v7,v7,zeroes,4 -#endif - - /* xor with last 1024 bits */ - lvx v8,0,r4 - lvx v9,off16,r4 - VPERM(v8,v8,v8,byteswap) - VPERM(v9,v9,v9,byteswap) - lvx v10,off32,r4 - lvx v11,off48,r4 - VPERM(v10,v10,v10,byteswap) - VPERM(v11,v11,v11,byteswap) - lvx v12,off64,r4 - lvx v13,off80,r4 - VPERM(v12,v12,v12,byteswap) - VPERM(v13,v13,v13,byteswap) - lvx v14,off96,r4 - lvx v15,off112,r4 - VPERM(v14,v14,v14,byteswap) - VPERM(v15,v15,v15,byteswap) - - addi r4,r4,8*16 - - vxor v16,v0,v8 - vxor v17,v1,v9 - vxor v18,v2,v10 - vxor v19,v3,v11 - vxor v20,v4,v12 - vxor v21,v5,v13 - vxor v22,v6,v14 - vxor v23,v7,v15 - - li r0,1 - cmpdi r6,0 - addi r6,r6,128 - bne 1b - - /* Work out how many bytes we have left */ - andi. r5,r5,127 - - /* Calculate where in the constant table we need to start */ - subfic r6,r5,128 - add r3,r3,r6 - - /* How many 16 byte chunks are in the tail */ - srdi r7,r5,4 - mtctr r7 - - /* - * Reduce the previously calculated 1024 bits to 64 bits, shifting - * 32 bits to include the trailing 32 bits of zeros - */ - lvx v0,0,r3 - lvx v1,off16,r3 - lvx v2,off32,r3 - lvx v3,off48,r3 - lvx v4,off64,r3 - lvx v5,off80,r3 - lvx v6,off96,r3 - lvx v7,off112,r3 - addi r3,r3,8*16 - - VPMSUMW(v0,v16,v0) - VPMSUMW(v1,v17,v1) - VPMSUMW(v2,v18,v2) - VPMSUMW(v3,v19,v3) - VPMSUMW(v4,v20,v4) - VPMSUMW(v5,v21,v5) - VPMSUMW(v6,v22,v6) - VPMSUMW(v7,v23,v7) - - /* Now reduce the tail (0 - 112 bytes) */ - cmpdi r7,0 - beq 1f - - lvx v16,0,r4 - lvx v17,0,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off16,r4 - lvx v17,off16,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off32,r4 - lvx v17,off32,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off48,r4 - lvx v17,off48,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off64,r4 - lvx v17,off64,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off80,r4 - lvx v17,off80,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off96,r4 - lvx v17,off96,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - - /* Now xor all the parallel chunks together */ -1: vxor v0,v0,v1 - vxor v2,v2,v3 - vxor v4,v4,v5 - vxor v6,v6,v7 - - vxor v0,v0,v2 - vxor v4,v4,v6 - - vxor v0,v0,v4 - -.Lbarrett_reduction: - /* Barrett constants */ - addis r3,r2,.barrett_constants@toc@ha - addi r3,r3,.barrett_constants@toc@l - - lvx const1,0,r3 - lvx const2,off16,r3 - - vsldoi v1,v0,v0,8 - vxor v0,v0,v1 /* xor two 64 bit results together */ - -#ifdef REFLECT - /* shift left one bit */ - vspltisb v1,1 - vsl v0,v0,v1 -#endif - - vand v0,v0,mask_64bit - -#ifndef REFLECT - /* - * Now for the Barrett reduction algorithm. The idea is to calculate q, - * the multiple of our polynomial that we need to subtract. By - * doing the computation 2x bits higher (ie 64 bits) and shifting the - * result back down 2x bits, we round down to the nearest multiple. - */ - VPMSUMD(v1,v0,const1) /* ma */ - vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */ - VPMSUMD(v1,v1,const2) /* qn */ - vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ - - /* - * Get the result into r3. We need to shift it left 8 bytes: - * V0 [ 0 1 2 X ] - * V0 [ 0 X 2 3 ] - */ - vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */ -#else - /* - * The reflected version of Barrett reduction. Instead of bit - * reflecting our data (which is expensive to do), we bit reflect our - * constants and our algorithm, which means the intermediate data in - * our vector registers goes from 0-63 instead of 63-0. We can reflect - * the algorithm because we don't carry in mod 2 arithmetic. - */ - vand v1,v0,mask_32bit /* bottom 32 bits of a */ - VPMSUMD(v1,v1,const1) /* ma */ - vand v1,v1,mask_32bit /* bottom 32bits of ma */ - VPMSUMD(v1,v1,const2) /* qn */ - vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ - - /* - * Since we are bit reflected, the result (ie the low 32 bits) is in - * the high 32 bits. We just need to shift it left 4 bytes - * V0 [ 0 1 X 3 ] - * V0 [ 0 X 2 3 ] - */ - vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */ -#endif - - /* Get it into r3 */ - MFVRD(r3, v0) - -.Lout: - subi r6,r1,56+10*16 - subi r7,r1,56+2*16 - - lvx v20,0,r6 - lvx v21,off16,r6 - lvx v22,off32,r6 - lvx v23,off48,r6 - lvx v24,off64,r6 - lvx v25,off80,r6 - lvx v26,off96,r6 - lvx v27,off112,r6 - lvx v28,0,r7 - lvx v29,off16,r7 - - ld r31,-8(r1) - ld r30,-16(r1) - ld r29,-24(r1) - ld r28,-32(r1) - ld r27,-40(r1) - ld r26,-48(r1) - ld r25,-56(r1) - - blr - -.Lfirst_warm_up_done: - lvx const1,0,r3 - addi r3,r3,16 - - VPMSUMD(v8,v16,const1) - VPMSUMD(v9,v17,const1) - VPMSUMD(v10,v18,const1) - VPMSUMD(v11,v19,const1) - VPMSUMD(v12,v20,const1) - VPMSUMD(v13,v21,const1) - VPMSUMD(v14,v22,const1) - VPMSUMD(v15,v23,const1) - - b .Lsecond_cool_down - -.Lshort: - cmpdi r5,0 - beq .Lzero - - addis r3,r2,.short_constants@toc@ha - addi r3,r3,.short_constants@toc@l - - /* Calculate where in the constant table we need to start */ - subfic r6,r5,256 - add r3,r3,r6 - - /* How many 16 byte chunks? */ - srdi r7,r5,4 - mtctr r7 - - vxor v19,v19,v19 - vxor v20,v20,v20 - - lvx v0,0,r4 - lvx v16,0,r3 - VPERM(v0,v0,v16,byteswap) - vxor v0,v0,v8 /* xor in initial value */ - VPMSUMW(v0,v0,v16) - bdz .Lv0 - - lvx v1,off16,r4 - lvx v17,off16,r3 - VPERM(v1,v1,v17,byteswap) - VPMSUMW(v1,v1,v17) - bdz .Lv1 - - lvx v2,off32,r4 - lvx v16,off32,r3 - VPERM(v2,v2,v16,byteswap) - VPMSUMW(v2,v2,v16) - bdz .Lv2 - - lvx v3,off48,r4 - lvx v17,off48,r3 - VPERM(v3,v3,v17,byteswap) - VPMSUMW(v3,v3,v17) - bdz .Lv3 - - lvx v4,off64,r4 - lvx v16,off64,r3 - VPERM(v4,v4,v16,byteswap) - VPMSUMW(v4,v4,v16) - bdz .Lv4 - - lvx v5,off80,r4 - lvx v17,off80,r3 - VPERM(v5,v5,v17,byteswap) - VPMSUMW(v5,v5,v17) - bdz .Lv5 - - lvx v6,off96,r4 - lvx v16,off96,r3 - VPERM(v6,v6,v16,byteswap) - VPMSUMW(v6,v6,v16) - bdz .Lv6 - - lvx v7,off112,r4 - lvx v17,off112,r3 - VPERM(v7,v7,v17,byteswap) - VPMSUMW(v7,v7,v17) - bdz .Lv7 - - addi r3,r3,128 - addi r4,r4,128 - - lvx v8,0,r4 - lvx v16,0,r3 - VPERM(v8,v8,v16,byteswap) - VPMSUMW(v8,v8,v16) - bdz .Lv8 - - lvx v9,off16,r4 - lvx v17,off16,r3 - VPERM(v9,v9,v17,byteswap) - VPMSUMW(v9,v9,v17) - bdz .Lv9 - - lvx v10,off32,r4 - lvx v16,off32,r3 - VPERM(v10,v10,v16,byteswap) - VPMSUMW(v10,v10,v16) - bdz .Lv10 - - lvx v11,off48,r4 - lvx v17,off48,r3 - VPERM(v11,v11,v17,byteswap) - VPMSUMW(v11,v11,v17) - bdz .Lv11 - - lvx v12,off64,r4 - lvx v16,off64,r3 - VPERM(v12,v12,v16,byteswap) - VPMSUMW(v12,v12,v16) - bdz .Lv12 - - lvx v13,off80,r4 - lvx v17,off80,r3 - VPERM(v13,v13,v17,byteswap) - VPMSUMW(v13,v13,v17) - bdz .Lv13 - - lvx v14,off96,r4 - lvx v16,off96,r3 - VPERM(v14,v14,v16,byteswap) - VPMSUMW(v14,v14,v16) - bdz .Lv14 - - lvx v15,off112,r4 - lvx v17,off112,r3 - VPERM(v15,v15,v17,byteswap) - VPMSUMW(v15,v15,v17) - -.Lv15: vxor v19,v19,v15 -.Lv14: vxor v20,v20,v14 -.Lv13: vxor v19,v19,v13 -.Lv12: vxor v20,v20,v12 -.Lv11: vxor v19,v19,v11 -.Lv10: vxor v20,v20,v10 -.Lv9: vxor v19,v19,v9 -.Lv8: vxor v20,v20,v8 -.Lv7: vxor v19,v19,v7 -.Lv6: vxor v20,v20,v6 -.Lv5: vxor v19,v19,v5 -.Lv4: vxor v20,v20,v4 -.Lv3: vxor v19,v19,v3 -.Lv2: vxor v20,v20,v2 -.Lv1: vxor v19,v19,v1 -.Lv0: vxor v20,v20,v0 - - vxor v0,v19,v20 - - b .Lbarrett_reduction - -.Lzero: - mr r3,r10 - b .Lout - -FUNC_END(__crc32_vpmsum) diff --git a/src/common/crc32c_ppc_asm.s b/src/common/crc32c_ppc_asm.s new file mode 100644 index 00000000000..1dc6dd1cf31 --- /dev/null +++ b/src/common/crc32c_ppc_asm.s @@ -0,0 +1,771 @@ +/* + * Calculate the checksum of data that is 16 byte aligned and a multiple of + * 16 bytes. + * + * The first step is to reduce it to 1024 bits. We do this in 8 parallel + * chunks in order to mask the latency of the vpmsum instructions. If we + * have more than 32 kB of data to checksum we repeat this step multiple + * times, passing in the previous 1024 bits. + * + * The next step is to reduce the 1024 bits to 64 bits. This step adds + * 32 bits of 0s to the end - this matches what a CRC does. We just + * calculate constants that land the data in this 32 bits. + * + * We then use fixed point Barrett reduction to compute a mod n over GF(2) + * for n = CRC using POWER8 instructions. We use x = 32. + * + * http://en.wikipedia.org/wiki/Barrett_reduction + * + * Copyright (C) 2015 Anton Blanchard , IBM + * Copyright (C) 2017 International Business Machines Corp. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include "common/ppc-opcode.h" + +#undef toc + +#ifndef r1 +#define r1 1 +#endif + +#ifndef r2 +#define r2 2 +#endif + + .section .rodata +.balign 16 + +.byteswap_constant: + /* byte reverse permute constant */ + .octa 0x0F0E0D0C0B0A09080706050403020100 + +#define __ASSEMBLY__ +#include "crc32c_ppc_constants.h" + + .text + +#if defined(__BIG_ENDIAN__) && defined(REFLECT) +#define BYTESWAP_DATA +#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT) +#define BYTESWAP_DATA +#else +#undef BYTESWAP_DATA +#endif + +#define off16 r25 +#define off32 r26 +#define off48 r27 +#define off64 r28 +#define off80 r29 +#define off96 r30 +#define off112 r31 + +#define const1 v24 +#define const2 v25 + +#define byteswap v26 +#define mask_32bit v27 +#define mask_64bit v28 +#define zeroes v29 + +#ifdef BYTESWAP_DATA +#define VPERM(A, B, C, D) vperm A, B, C, D +#else +#define VPERM(A, B, C, D) +#endif + +/* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */ +FUNC_START(__crc32_vpmsum) + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + std r26,-48(r1) + std r25,-56(r1) + + li off16,16 + li off32,32 + li off48,48 + li off64,64 + li off80,80 + li off96,96 + li off112,112 + li r0,0 + + /* Enough room for saving 10 non volatile VMX registers */ + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + stvx v20,0,r6 + stvx v21,off16,r6 + stvx v22,off32,r6 + stvx v23,off48,r6 + stvx v24,off64,r6 + stvx v25,off80,r6 + stvx v26,off96,r6 + stvx v27,off112,r6 + stvx v28,0,r7 + stvx v29,off16,r7 + + mr r10,r3 + + vxor zeroes,zeroes,zeroes + vspltisw v0,-1 + + vsldoi mask_32bit,zeroes,v0,4 + vsldoi mask_64bit,zeroes,v0,8 + + /* Get the initial value into v8 */ + vxor v8,v8,v8 + MTVRD(v8, r3) +#ifdef REFLECT + vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */ +#else + vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */ +#endif + +#ifdef BYTESWAP_DATA + addis r3,r2,.byteswap_constant@toc@ha + addi r3,r3,.byteswap_constant@toc@l + + lvx byteswap,0,r3 + addi r3,r3,16 +#endif + + cmpdi r5,256 + blt .Lshort + + rldicr r6,r5,0,56 + + /* Checksum in blocks of MAX_SIZE */ +1: lis r7,MAX_SIZE@h + ori r7,r7,MAX_SIZE@l + mr r9,r7 + cmpd r6,r7 + bgt 2f + mr r7,r6 +2: subf r6,r7,r6 + + /* our main loop does 128 bytes at a time */ + srdi r7,r7,7 + + /* + * Work out the offset into the constants table to start at. Each + * constant is 16 bytes, and it is used against 128 bytes of input + * data - 128 / 16 = 8 + */ + sldi r8,r7,4 + srdi r9,r9,3 + subf r8,r8,r9 + + /* We reduce our final 128 bytes in a separate step */ + addi r7,r7,-1 + mtctr r7 + + addis r3,r2,.constants@toc@ha + addi r3,r3,.constants@toc@l + + /* Find the start of our constants */ + add r3,r3,r8 + + /* zero v0-v7 which will contain our checksums */ + vxor v0,v0,v0 + vxor v1,v1,v1 + vxor v2,v2,v2 + vxor v3,v3,v3 + vxor v4,v4,v4 + vxor v5,v5,v5 + vxor v6,v6,v6 + vxor v7,v7,v7 + + lvx const1,0,r3 + + /* + * If we are looping back to consume more data we use the values + * already in v16-v23. + */ + cmpdi r0,1 + beq 2f + + /* First warm up pass */ + lvx v16,0,r4 + lvx v17,off16,r4 + VPERM(v16,v16,v16,byteswap) + VPERM(v17,v17,v17,byteswap) + lvx v18,off32,r4 + lvx v19,off48,r4 + VPERM(v18,v18,v18,byteswap) + VPERM(v19,v19,v19,byteswap) + lvx v20,off64,r4 + lvx v21,off80,r4 + VPERM(v20,v20,v20,byteswap) + VPERM(v21,v21,v21,byteswap) + lvx v22,off96,r4 + lvx v23,off112,r4 + VPERM(v22,v22,v22,byteswap) + VPERM(v23,v23,v23,byteswap) + addi r4,r4,8*16 + + /* xor in initial value */ + vxor v16,v16,v8 + +2: bdz .Lfirst_warm_up_done + + addi r3,r3,16 + lvx const2,0,r3 + + /* Second warm up pass */ + VPMSUMD(v8,v16,const1) + lvx v16,0,r4 + VPERM(v16,v16,v16,byteswap) + ori r2,r2,0 + + VPMSUMD(v9,v17,const1) + lvx v17,off16,r4 + VPERM(v17,v17,v17,byteswap) + ori r2,r2,0 + + VPMSUMD(v10,v18,const1) + lvx v18,off32,r4 + VPERM(v18,v18,v18,byteswap) + ori r2,r2,0 + + VPMSUMD(v11,v19,const1) + lvx v19,off48,r4 + VPERM(v19,v19,v19,byteswap) + ori r2,r2,0 + + VPMSUMD(v12,v20,const1) + lvx v20,off64,r4 + VPERM(v20,v20,v20,byteswap) + ori r2,r2,0 + + VPMSUMD(v13,v21,const1) + lvx v21,off80,r4 + VPERM(v21,v21,v21,byteswap) + ori r2,r2,0 + + VPMSUMD(v14,v22,const1) + lvx v22,off96,r4 + VPERM(v22,v22,v22,byteswap) + ori r2,r2,0 + + VPMSUMD(v15,v23,const1) + lvx v23,off112,r4 + VPERM(v23,v23,v23,byteswap) + + addi r4,r4,8*16 + + bdz .Lfirst_cool_down + + /* + * main loop. We modulo schedule it such that it takes three iterations + * to complete - first iteration load, second iteration vpmsum, third + * iteration xor. + */ + .balign 16 +4: lvx const1,0,r3 + addi r3,r3,16 + ori r2,r2,0 + + vxor v0,v0,v8 + VPMSUMD(v8,v16,const2) + lvx v16,0,r4 + VPERM(v16,v16,v16,byteswap) + ori r2,r2,0 + + vxor v1,v1,v9 + VPMSUMD(v9,v17,const2) + lvx v17,off16,r4 + VPERM(v17,v17,v17,byteswap) + ori r2,r2,0 + + vxor v2,v2,v10 + VPMSUMD(v10,v18,const2) + lvx v18,off32,r4 + VPERM(v18,v18,v18,byteswap) + ori r2,r2,0 + + vxor v3,v3,v11 + VPMSUMD(v11,v19,const2) + lvx v19,off48,r4 + VPERM(v19,v19,v19,byteswap) + lvx const2,0,r3 + ori r2,r2,0 + + vxor v4,v4,v12 + VPMSUMD(v12,v20,const1) + lvx v20,off64,r4 + VPERM(v20,v20,v20,byteswap) + ori r2,r2,0 + + vxor v5,v5,v13 + VPMSUMD(v13,v21,const1) + lvx v21,off80,r4 + VPERM(v21,v21,v21,byteswap) + ori r2,r2,0 + + vxor v6,v6,v14 + VPMSUMD(v14,v22,const1) + lvx v22,off96,r4 + VPERM(v22,v22,v22,byteswap) + ori r2,r2,0 + + vxor v7,v7,v15 + VPMSUMD(v15,v23,const1) + lvx v23,off112,r4 + VPERM(v23,v23,v23,byteswap) + + addi r4,r4,8*16 + + bdnz 4b + +.Lfirst_cool_down: + /* First cool down pass */ + lvx const1,0,r3 + addi r3,r3,16 + + vxor v0,v0,v8 + VPMSUMD(v8,v16,const1) + ori r2,r2,0 + + vxor v1,v1,v9 + VPMSUMD(v9,v17,const1) + ori r2,r2,0 + + vxor v2,v2,v10 + VPMSUMD(v10,v18,const1) + ori r2,r2,0 + + vxor v3,v3,v11 + VPMSUMD(v11,v19,const1) + ori r2,r2,0 + + vxor v4,v4,v12 + VPMSUMD(v12,v20,const1) + ori r2,r2,0 + + vxor v5,v5,v13 + VPMSUMD(v13,v21,const1) + ori r2,r2,0 + + vxor v6,v6,v14 + VPMSUMD(v14,v22,const1) + ori r2,r2,0 + + vxor v7,v7,v15 + VPMSUMD(v15,v23,const1) + ori r2,r2,0 + +.Lsecond_cool_down: + /* Second cool down pass */ + vxor v0,v0,v8 + vxor v1,v1,v9 + vxor v2,v2,v10 + vxor v3,v3,v11 + vxor v4,v4,v12 + vxor v5,v5,v13 + vxor v6,v6,v14 + vxor v7,v7,v15 + +#ifdef REFLECT + /* + * vpmsumd produces a 96 bit result in the least significant bits + * of the register. Since we are bit reflected we have to shift it + * left 32 bits so it occupies the least significant bits in the + * bit reflected domain. + */ + vsldoi v0,v0,zeroes,4 + vsldoi v1,v1,zeroes,4 + vsldoi v2,v2,zeroes,4 + vsldoi v3,v3,zeroes,4 + vsldoi v4,v4,zeroes,4 + vsldoi v5,v5,zeroes,4 + vsldoi v6,v6,zeroes,4 + vsldoi v7,v7,zeroes,4 +#endif + + /* xor with last 1024 bits */ + lvx v8,0,r4 + lvx v9,off16,r4 + VPERM(v8,v8,v8,byteswap) + VPERM(v9,v9,v9,byteswap) + lvx v10,off32,r4 + lvx v11,off48,r4 + VPERM(v10,v10,v10,byteswap) + VPERM(v11,v11,v11,byteswap) + lvx v12,off64,r4 + lvx v13,off80,r4 + VPERM(v12,v12,v12,byteswap) + VPERM(v13,v13,v13,byteswap) + lvx v14,off96,r4 + lvx v15,off112,r4 + VPERM(v14,v14,v14,byteswap) + VPERM(v15,v15,v15,byteswap) + + addi r4,r4,8*16 + + vxor v16,v0,v8 + vxor v17,v1,v9 + vxor v18,v2,v10 + vxor v19,v3,v11 + vxor v20,v4,v12 + vxor v21,v5,v13 + vxor v22,v6,v14 + vxor v23,v7,v15 + + li r0,1 + cmpdi r6,0 + addi r6,r6,128 + bne 1b + + /* Work out how many bytes we have left */ + andi. r5,r5,127 + + /* Calculate where in the constant table we need to start */ + subfic r6,r5,128 + add r3,r3,r6 + + /* How many 16 byte chunks are in the tail */ + srdi r7,r5,4 + mtctr r7 + + /* + * Reduce the previously calculated 1024 bits to 64 bits, shifting + * 32 bits to include the trailing 32 bits of zeros + */ + lvx v0,0,r3 + lvx v1,off16,r3 + lvx v2,off32,r3 + lvx v3,off48,r3 + lvx v4,off64,r3 + lvx v5,off80,r3 + lvx v6,off96,r3 + lvx v7,off112,r3 + addi r3,r3,8*16 + + VPMSUMW(v0,v16,v0) + VPMSUMW(v1,v17,v1) + VPMSUMW(v2,v18,v2) + VPMSUMW(v3,v19,v3) + VPMSUMW(v4,v20,v4) + VPMSUMW(v5,v21,v5) + VPMSUMW(v6,v22,v6) + VPMSUMW(v7,v23,v7) + + /* Now reduce the tail (0 - 112 bytes) */ + cmpdi r7,0 + beq 1f + + lvx v16,0,r4 + lvx v17,0,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off16,r4 + lvx v17,off16,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off32,r4 + lvx v17,off32,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off48,r4 + lvx v17,off48,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off64,r4 + lvx v17,off64,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off80,r4 + lvx v17,off80,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off96,r4 + lvx v17,off96,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + + /* Now xor all the parallel chunks together */ +1: vxor v0,v0,v1 + vxor v2,v2,v3 + vxor v4,v4,v5 + vxor v6,v6,v7 + + vxor v0,v0,v2 + vxor v4,v4,v6 + + vxor v0,v0,v4 + +.Lbarrett_reduction: + /* Barrett constants */ + addis r3,r2,.barrett_constants@toc@ha + addi r3,r3,.barrett_constants@toc@l + + lvx const1,0,r3 + lvx const2,off16,r3 + + vsldoi v1,v0,v0,8 + vxor v0,v0,v1 /* xor two 64 bit results together */ + +#ifdef REFLECT + /* shift left one bit */ + vspltisb v1,1 + vsl v0,v0,v1 +#endif + + vand v0,v0,mask_64bit + +#ifndef REFLECT + /* + * Now for the Barrett reduction algorithm. The idea is to calculate q, + * the multiple of our polynomial that we need to subtract. By + * doing the computation 2x bits higher (ie 64 bits) and shifting the + * result back down 2x bits, we round down to the nearest multiple. + */ + VPMSUMD(v1,v0,const1) /* ma */ + vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */ + VPMSUMD(v1,v1,const2) /* qn */ + vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ + + /* + * Get the result into r3. We need to shift it left 8 bytes: + * V0 [ 0 1 2 X ] + * V0 [ 0 X 2 3 ] + */ + vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */ +#else + /* + * The reflected version of Barrett reduction. Instead of bit + * reflecting our data (which is expensive to do), we bit reflect our + * constants and our algorithm, which means the intermediate data in + * our vector registers goes from 0-63 instead of 63-0. We can reflect + * the algorithm because we don't carry in mod 2 arithmetic. + */ + vand v1,v0,mask_32bit /* bottom 32 bits of a */ + VPMSUMD(v1,v1,const1) /* ma */ + vand v1,v1,mask_32bit /* bottom 32bits of ma */ + VPMSUMD(v1,v1,const2) /* qn */ + vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ + + /* + * Since we are bit reflected, the result (ie the low 32 bits) is in + * the high 32 bits. We just need to shift it left 4 bytes + * V0 [ 0 1 X 3 ] + * V0 [ 0 X 2 3 ] + */ + vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */ +#endif + + /* Get it into r3 */ + MFVRD(r3, v0) + +.Lout: + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + lvx v20,0,r6 + lvx v21,off16,r6 + lvx v22,off32,r6 + lvx v23,off48,r6 + lvx v24,off64,r6 + lvx v25,off80,r6 + lvx v26,off96,r6 + lvx v27,off112,r6 + lvx v28,0,r7 + lvx v29,off16,r7 + + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + ld r26,-48(r1) + ld r25,-56(r1) + + blr + +.Lfirst_warm_up_done: + lvx const1,0,r3 + addi r3,r3,16 + + VPMSUMD(v8,v16,const1) + VPMSUMD(v9,v17,const1) + VPMSUMD(v10,v18,const1) + VPMSUMD(v11,v19,const1) + VPMSUMD(v12,v20,const1) + VPMSUMD(v13,v21,const1) + VPMSUMD(v14,v22,const1) + VPMSUMD(v15,v23,const1) + + b .Lsecond_cool_down + +.Lshort: + cmpdi r5,0 + beq .Lzero + + addis r3,r2,.short_constants@toc@ha + addi r3,r3,.short_constants@toc@l + + /* Calculate where in the constant table we need to start */ + subfic r6,r5,256 + add r3,r3,r6 + + /* How many 16 byte chunks? */ + srdi r7,r5,4 + mtctr r7 + + vxor v19,v19,v19 + vxor v20,v20,v20 + + lvx v0,0,r4 + lvx v16,0,r3 + VPERM(v0,v0,v16,byteswap) + vxor v0,v0,v8 /* xor in initial value */ + VPMSUMW(v0,v0,v16) + bdz .Lv0 + + lvx v1,off16,r4 + lvx v17,off16,r3 + VPERM(v1,v1,v17,byteswap) + VPMSUMW(v1,v1,v17) + bdz .Lv1 + + lvx v2,off32,r4 + lvx v16,off32,r3 + VPERM(v2,v2,v16,byteswap) + VPMSUMW(v2,v2,v16) + bdz .Lv2 + + lvx v3,off48,r4 + lvx v17,off48,r3 + VPERM(v3,v3,v17,byteswap) + VPMSUMW(v3,v3,v17) + bdz .Lv3 + + lvx v4,off64,r4 + lvx v16,off64,r3 + VPERM(v4,v4,v16,byteswap) + VPMSUMW(v4,v4,v16) + bdz .Lv4 + + lvx v5,off80,r4 + lvx v17,off80,r3 + VPERM(v5,v5,v17,byteswap) + VPMSUMW(v5,v5,v17) + bdz .Lv5 + + lvx v6,off96,r4 + lvx v16,off96,r3 + VPERM(v6,v6,v16,byteswap) + VPMSUMW(v6,v6,v16) + bdz .Lv6 + + lvx v7,off112,r4 + lvx v17,off112,r3 + VPERM(v7,v7,v17,byteswap) + VPMSUMW(v7,v7,v17) + bdz .Lv7 + + addi r3,r3,128 + addi r4,r4,128 + + lvx v8,0,r4 + lvx v16,0,r3 + VPERM(v8,v8,v16,byteswap) + VPMSUMW(v8,v8,v16) + bdz .Lv8 + + lvx v9,off16,r4 + lvx v17,off16,r3 + VPERM(v9,v9,v17,byteswap) + VPMSUMW(v9,v9,v17) + bdz .Lv9 + + lvx v10,off32,r4 + lvx v16,off32,r3 + VPERM(v10,v10,v16,byteswap) + VPMSUMW(v10,v10,v16) + bdz .Lv10 + + lvx v11,off48,r4 + lvx v17,off48,r3 + VPERM(v11,v11,v17,byteswap) + VPMSUMW(v11,v11,v17) + bdz .Lv11 + + lvx v12,off64,r4 + lvx v16,off64,r3 + VPERM(v12,v12,v16,byteswap) + VPMSUMW(v12,v12,v16) + bdz .Lv12 + + lvx v13,off80,r4 + lvx v17,off80,r3 + VPERM(v13,v13,v17,byteswap) + VPMSUMW(v13,v13,v17) + bdz .Lv13 + + lvx v14,off96,r4 + lvx v16,off96,r3 + VPERM(v14,v14,v16,byteswap) + VPMSUMW(v14,v14,v16) + bdz .Lv14 + + lvx v15,off112,r4 + lvx v17,off112,r3 + VPERM(v15,v15,v17,byteswap) + VPMSUMW(v15,v15,v17) + +.Lv15: vxor v19,v19,v15 +.Lv14: vxor v20,v20,v14 +.Lv13: vxor v19,v19,v13 +.Lv12: vxor v20,v20,v12 +.Lv11: vxor v19,v19,v11 +.Lv10: vxor v20,v20,v10 +.Lv9: vxor v19,v19,v9 +.Lv8: vxor v20,v20,v8 +.Lv7: vxor v19,v19,v7 +.Lv6: vxor v20,v20,v6 +.Lv5: vxor v19,v19,v5 +.Lv4: vxor v20,v20,v4 +.Lv3: vxor v19,v19,v3 +.Lv2: vxor v20,v20,v2 +.Lv1: vxor v19,v19,v1 +.Lv0: vxor v20,v20,v0 + + vxor v0,v19,v20 + + b .Lbarrett_reduction + +.Lzero: + mr r3,r10 + b .Lout + +FUNC_END(__crc32_vpmsum)