In the past I have been using a standard algorithm for error checking communication of data between devices. The cyclic redundancy check algorithm, CRC-16 was implemented with a 256 byte lookup table, or more complex and slow calculations. Recently I was reading about a shorter, quicker algorithm based on two 16 word tables.

The C version is for x86 and ARM:

    /*  crc16 - crc16 routine
     *  Thanks to R.K. Irvine, William James Hunt, Rex, & Binstock
     *  This routine returns the crc16 for buf.
     *  crc16 is given by:  x^16 + x^15 + x^2 + 1
     */
    static uint16_t crc16l[] = {
        0x0000, 0xc0c1, 0xc181, 0x0140, 0xc301, 0x03c0, 0x0280, 0xc241,
        0xc601, 0x06c0, 0x0780, 0xc741, 0x0500, 0xc5c1, 0xc481, 0x0440
    };
    
    static uint16_t crc16h[] = {
        0x0000, 0xcc01, 0xd801, 0x1400, 0xf001, 0x3c00, 0x2800, 0xe401,
        0xa001, 0x6c00, 0x7800, 0xb401, 0x5000, 0x9c01, 0x8801, 0x4400,
    };
    
    uint16_t crc16(char *buf, int bnobs, uint16_t crc)
    {
        uint8_t n;
    
        while(bnobs-- > 0)
        {
            n = *buf++ ^ crc;
            crc = (crc >> 8) ^ crc16l[n & 0x0f] ^ crc16h[(n >> 4) & 0x0f];
        }
        return(crc);
    }

x86 assembler version:

    ; =============== S U B R O U T I N E =======================================
    ; Call with:    DS:SI = buffer to CRC check
    ;               AX = char
    ;               DX = start CRC
    ;
    ; ===========================================================================
    i33crc:                                 ;crc16()
            push bx
            mov bl, al
            xor bl, dl                      ;  n ^= crc
            push bx
            and bl, 0fh
            xor bh, bh
            shl bx, 1                       ;  word index
            mov ax, [cs:crc16l+bx]          ;  crc16l[n&0fh]
            xchg dl, dh                     ;  crc >> 8
            xor dh,dh
            xor dx, ax                      ;  crc = (crc >> 8) ^ crc16l[n&0fh]
            pop bx
            shr bl, 1
            shr bl, 1
            shr bl, 1
            shr bl, 1
            and bl, 0fh
            xor bh, bh
            shl bx, 1
            mov ax, [cs:crc16h+bx]          ;  crc16h[(n>>8) & 0fh]
            xor dx, ax                      ;  ^ (crc >>8)
            mov ax, dx
            pop bx
            retn
    i33crcend:
    ;
    ; =============== S U B R O U T I N E =======================================
    ; Call with:    DS:SI = buffer to CRC check
    ;               CX = length of buffer
    ;
    ; ===========================================================================
    i33crct:                                ;crc16t()
            push bx
            push dx
            cld                             ; auto inc
            xor ax, ax                      ; n =0;
            xor dx, dx                      ; crc = 0
    .whrxn:                                 ; while rxnobs > 0
            lodsb                           ;  n = *bfr++
            call i33crc
            loop .whrxn
    .ewhrxn:
            pop dx
            pop bx
            retn
    i33crctend:
    ;
    crc16l:
            dw 00000h, 0c0c1h, 0c181h, 00140h, 0c301h, 003c0h, 00280h, 0c241h
            dw 0c601h, 006c0h, 00780h, 0c741h, 00500h, 0c5c1h, 0c481h, 00440h
    crc16h:
            dw 00000h, 0cc01h, 0d801h, 01400h, 0f001h, 03c00h, 02800h, 0e401h
            dw 0a001h, 06c00h, 07800h, 0b401h, 05000h, 09c01h, 08801h, 04400h

Z80 assembler version

    Z80:
    ;*****************************************************************************************
    ;                                   CRC16t
    ; crc16 => x^16 + x^15 + x^2 + 1
    ;*****************************************************************************************
    ;
    crctl:      defw 0000h, 0C0C1h, 0C181h, 0140h, 0C301h, 03C0h, 0280h, 0C241h,
                defw 0C601h, 06C0h, 0780h, 0C741h, 0500h, 0C5C1h, 0C481h, 0440h
    ;
    crcth:      defw 0000h, 0CC01h, 0D801h, 1400h, 0F001h, 3C00h, 2800h, 0E401h
                defw 0A001h, 6C00h, 7800h, 0B401h, 5000h, 9C01h, 8801h, 4400h
    ;
    crc16t:     ld bc, 400h                         ;count = 1024
                ld de, 0                            ;crc = 0
                ld hl, (crcaddr)                    ;hl -> buf
    repeatcrc:  push bc                             ;repeat
                ld ix, crctl                        ; ix -> crc table low
                ld iy, crcth                        ; iy -> crc table high
    
                ld c, (hl)                          ; n = *buf++
                inc hl
    
                ld a, e                             ; n ^= crc
                xor c
                ld c, a
    
                push bc                             ; crc16l[n & 0fh]
                ld b,0
                ld a, 0fh
                and c
                rla                                 ;//times 2 16 bit pointer
                ld c,a
                add ix, bc
                pop bc
    
                push bc                             ; crc16h[(n >> 4) & 0fh]
                ld b,0
                ld a, c
                rra
                rra
                rra
                rra
                and 0fh
                rla                                 ;//times 2 16 bit pointer
                ld c,a
                add iy, bc
                pop bc
    
                xor a                               ; crc  (low byte) =
                xor (ix+0)                          ;        crc16l[n & 0fh] ^
                xor (iy+0)                          ;        crc16h[(n >> 4) & 0fh] ^
                xor d                               ;        crc >> 8
                ld e,a
                xor a                               ; crc  (high byte) =
                xor (ix+1)                          ;        crc16l[n & 0fh] ^
                xor (iy+1)                          ;        crc16h[(n >> 4) & 0fh] ^
                ld d,a
    
                pop bc
                dec bc
                ld a, b
                or c
                jp nz, repeatcrc                    ;until block count = 0
                ld (crccalcd), de
                ret

The cost of 64 bytes (ROM) for the tables is worth the code simplicity and speed.