CRC
In the past I have been using a standard algorithm for error checking communication of data between devices. The cyclic redundancy check algorithm, CRC-16 was implemented with a 256 byte lookup table, or more complex and slow calculations. Recently I was reading about a shorter, quicker algorithm based on two 16 word tables.
The C version is for x86 and ARM:
/* crc16 - crc16 routine * Thanks to R.K. Irvine, William James Hunt, Rex, & Binstock * This routine returns the crc16 for buf. * crc16 is given by: x^16 + x^15 + x^2 + 1 */ static uint16_t crc16l[] = { 0x0000, 0xc0c1, 0xc181, 0x0140, 0xc301, 0x03c0, 0x0280, 0xc241, 0xc601, 0x06c0, 0x0780, 0xc741, 0x0500, 0xc5c1, 0xc481, 0x0440 }; static uint16_t crc16h[] = { 0x0000, 0xcc01, 0xd801, 0x1400, 0xf001, 0x3c00, 0x2800, 0xe401, 0xa001, 0x6c00, 0x7800, 0xb401, 0x5000, 0x9c01, 0x8801, 0x4400, }; uint16_t crc16(char *buf, int bnobs, uint16_t crc) { uint8_t n; while(bnobs-- > 0) { n = *buf++ ^ crc; crc = (crc >> 8) ^ crc16l[n & 0x0f] ^ crc16h[(n >> 4) & 0x0f]; } return(crc); }
x86 assembler version:
; =============== S U B R O U T I N E ======================================= ; Call with: DS:SI = buffer to CRC check ; AX = char ; DX = start CRC ; ; =========================================================================== i33crc: ;crc16() push bx mov bl, al xor bl, dl ; n ^= crc push bx and bl, 0fh xor bh, bh shl bx, 1 ; word index mov ax, [cs:crc16l+bx] ; crc16l[n&0fh] xchg dl, dh ; crc >> 8 xor dh,dh xor dx, ax ; crc = (crc >> 8) ^ crc16l[n&0fh] pop bx shr bl, 1 shr bl, 1 shr bl, 1 shr bl, 1 and bl, 0fh xor bh, bh shl bx, 1 mov ax, [cs:crc16h+bx] ; crc16h[(n>>8) & 0fh] xor dx, ax ; ^ (crc >>8) mov ax, dx pop bx retn i33crcend: ; ; =============== S U B R O U T I N E ======================================= ; Call with: DS:SI = buffer to CRC check ; CX = length of buffer ; ; =========================================================================== i33crct: ;crc16t() push bx push dx cld ; auto inc xor ax, ax ; n =0; xor dx, dx ; crc = 0 .whrxn: ; while rxnobs > 0 lodsb ; n = *bfr++ call i33crc loop .whrxn .ewhrxn: pop dx pop bx retn i33crctend: ; crc16l: dw 00000h, 0c0c1h, 0c181h, 00140h, 0c301h, 003c0h, 00280h, 0c241h dw 0c601h, 006c0h, 00780h, 0c741h, 00500h, 0c5c1h, 0c481h, 00440h crc16h: dw 00000h, 0cc01h, 0d801h, 01400h, 0f001h, 03c00h, 02800h, 0e401h dw 0a001h, 06c00h, 07800h, 0b401h, 05000h, 09c01h, 08801h, 04400h
Z80 assembler version
Z80: ;***************************************************************************************** ; CRC16t ; crc16 => x^16 + x^15 + x^2 + 1 ;***************************************************************************************** ; crctl: defw 0000h, 0C0C1h, 0C181h, 0140h, 0C301h, 03C0h, 0280h, 0C241h, defw 0C601h, 06C0h, 0780h, 0C741h, 0500h, 0C5C1h, 0C481h, 0440h ; crcth: defw 0000h, 0CC01h, 0D801h, 1400h, 0F001h, 3C00h, 2800h, 0E401h defw 0A001h, 6C00h, 7800h, 0B401h, 5000h, 9C01h, 8801h, 4400h ; crc16t: ld bc, 400h ;count = 1024 ld de, 0 ;crc = 0 ld hl, (crcaddr) ;hl -> buf repeatcrc: push bc ;repeat ld ix, crctl ; ix -> crc table low ld iy, crcth ; iy -> crc table high ld c, (hl) ; n = *buf++ inc hl ld a, e ; n ^= crc xor c ld c, a push bc ; crc16l[n & 0fh] ld b,0 ld a, 0fh and c rla ;//times 2 16 bit pointer ld c,a add ix, bc pop bc push bc ; crc16h[(n >> 4) & 0fh] ld b,0 ld a, c rra rra rra rra and 0fh rla ;//times 2 16 bit pointer ld c,a add iy, bc pop bc xor a ; crc (low byte) = xor (ix+0) ; crc16l[n & 0fh] ^ xor (iy+0) ; crc16h[(n >> 4) & 0fh] ^ xor d ; crc >> 8 ld e,a xor a ; crc (high byte) = xor (ix+1) ; crc16l[n & 0fh] ^ xor (iy+1) ; crc16h[(n >> 4) & 0fh] ^ ld d,a pop bc dec bc ld a, b or c jp nz, repeatcrc ;until block count = 0 ld (crccalcd), de ret
The cost of 64 bytes (ROM) for the tables is worth the code simplicity and speed.