CRC32原理及其实现

Feb 13, 2010

最近看nginx的URL hash部分的代码,由于该模块采用的求URL的crc循环校验余码然后对后台服务器的数量求模的方法来进行负载分配的,所以对crc算法最近稍稍看了看,因为时间仓促,理解的也不是很深入,还是那句话,如果你是高手,请无视这篇文章。

CRC全称是循环冗余校验,由于把一段信息隐射成一段固定位数的验证码,常用于验证数据传输的完整性。 CRC的本质是模-2除法的余数,采用的除数不同,CRC的类型也就不一样。

CRC的除法有别于正常的除法,如下:

            1100001010
       _______________
10011 ) 11010110110000
        10011,,.,,....
        -----,,.,,....
         10011,.,,....
         10011,.,,....
         -----,.,,....
          00001.,,....
          00000.,,....
          -----.,,....
           00010,,....
           00000,,....
           -----,,....
            00101,....
            00000,....
            -----,....
             01011....
             00000....
             -----....
              10110...
              10011...
              -----...
               01010..
               00000..
               -----..
                10100.
                10011.
                -----.
                 01110
                 00000
                 -----
                  1110 = Remainder
如上,正常除法中的减法被异或取代(你可以把异或看成是不借位的减法),而且被除数后面要补【除数位数-1】个0,从而保证被除数的所有位对结果都有影响。而且如果一次运算的结果地最高位是0,那么要向左移一位。(具体的网上资料很多)我在这块主要介绍算法的优化以及实现。

CRC算法中的除数被称为生成多项式,对应要求生成检验码的位数不同,对应得生成多项式有所不同,如下:

CRC-1 x + 1 (用途:硬件,也称为奇偶校验位) 0x1 or 0x1 (0x1)
CRC-5-CCITT x5 + x3 + x + 1 (ITU G.704 标准) 0x15 (0x??)
CRC-5-USB x5 + x2 + 1 (用途:USB 信令包) 0x05 or 0x14 (0x9)
CRC-7 x7 + x3 + 1 (用途:通信系统) 0x09 or 0x48 (0x11)
CRC-8-ATM x8 + x2 + x + 1 (用途:ATM HEC) 0x07 or 0xE0 (0xC1)
CRC-8-CCITT x8 + x7 + x3 + x2 + 1 (用途:1-Wire 总线)
CRC-8-Dallas/Maxim x8 + x5 + x4 + 1 (用途:1-Wire bus) 0x31 or 0x8C
CRC-8 x8 + x7 + x6 + x4 + x2 + 1 0xEA(0x??)
CRC-10 x10 + x9 + x5 + x4 + x + 1 0x233 (0x????)
CRC-12 x12 + x11 + x3 + x2 + x + 1 (用 途:通信系统) 0x80F or 0xF01 (0xE03)
CRC-16-Fletcher 参见 Fletcher's checksum 用于 Adler-32 A & B CRC
CRC-16-CCITT x16 + x12 + x5 + 1 (X25, V.41, Bluetooth, PPP, IrDA) 0x1021 or 0x8408 (0x0811)
CRC-16-IBM x16 +x15 + x2 + 1 0x8005 or 0xA001 (0x4003)
CRC-16-BBS x16 + x15 + x10 + x3 (用途:XMODEM 协议) 0x8408 (0x????)
CRC-32-Adler See Adler-32 参见 Adler-32
CRC-32-MPEG2 See IEEE 802.3 参见 IEEE 802.3
CRC-32-IEEE 802.3 x32 + x26 + x23 + x22 + x16 + x12 + x11 + x10 + x8 + x7 + x5 + x4 + x2 + x + 1 0x04C11DB7 or 0xEDB88320 (0xDB710641)
CRC-32C (Castagnoli)[1] x32 + x28 + x27 + x26 + x25 + x23 + x22 + x20 + x19 + x18 + x14 + x13 + x11 + x10 + x9 + x8 + x6 + 1 0x1EDC6F41 or 0x82F63B78 (0x05EC76F1)
CRC-64-ISO x64 + x4 + x3 + x + 1 (use: ISO 3309) 0x000000000000001B or 0xD800000000000000 (0xB000000000000001)
CRC-64-ECMA-182 x64 + x62 + x57 + x55 + x54 + x53 + x52 + x47 + x46 + x45 + x40 + x39 + x38 + x37 + x35 + x33 + x32 + x31 + x29 + x27 + x24 + x23 + x22 + x21 + x19 + x17 + x13 + x12 + x10 + x9 + x7 + x4 + x + 1 (as described in ECMA-182 p.63) 0x42F0E1EBA9EA3693 or 0xC96C5795D7870F42 (0x92D8AF2BAF0E1E85)
CRC-128 IEEE-ITU 标准。被 MD5 & SHA-1 取代
CRC-160 IEEE-ITU 标准。被 MD5 & SHA-1 取代

下面是逐位求模的算法实现,也就是从crc算法的定义出发,不经过任何优化的算法实现【下面的代码中,PLOY是生成多项式,data是被除数,即信息码】:

/*
   Load the register with zero bits.
   Augment the message by appending W zero bits to the end of it.
   While (more message bits)
      Begin
      Shift the register left by one bit, reading the next bit of the
         augmented message into register bit position 0.
      If (a 1 bit popped out of the register during step 3)
         Register = Register XOR Poly.
      End
   The register now contains the remainder.
*/

#include <stdio .h>

#define POLY 0x13

int 
main(){
    // the data
    unsigned short data = 0x035b;
    // load the register with zero bits
    unsigned short regi = 0x0000;
    // augment the data by appending W(4) zero bits to the end of it.
    data < <= 4;

    // we do it bit after bit
    for ( int cur_bit = 15; cur_bit >= 0; -- cur_bit ){
       // test the highest bit which will be poped later.
        // in fact, the 5th bit from right is the hightest bit here
        if ( ( ( regi >> 4 ) & 0x0001 ) == 0x1 ){
            regi = regi ^ POLY;
        }
        // shift the register
        regi < <= 1;
        // reading the next bit of the augmented data
        unsigned short tmp = ( data >> cur_bit ) & 0x0001;
        regi |= tmp;

    }

    /**//// and now, register contains the remainder which is also called CRC value.

    return 0;
}

由于nginx采用的是CRC32,所以我们以CRC32为例进行crc的算法优化。
根据异或运算满足交换率,即:(A xor B) xor C= A xor (B xor C)。我们得到DATA xor (POLY 或着 0x0) xor (PLOY 或着 0x0) xor (PLOY 或着 0x0) xor ... = DATA xor [ (POLY 或着 0x0) xor (POLY 或着 0x0) xor (POLY 或着 0x0) ...],所以“在保证前一个字节的信息正确的情况下(由它决定是POLY 还是 0x0)”,我们可以以字节为单位进行运算,而不是像上面的那样逐位进行运算。具体的实现如下:

#include <stdio .h>
#include <stdlib .h>
#include <memory .h>
#define POLY 0x04C11DB7L

int main()
{
    /*/// the data*/
    unsigned long data = 0x1011035b;
    /*/// load the register with the data*/
    unsigned long regi = 0;
    /*/// allocate memory to contain the AUGMENTED data (added some zeros)*/
    unsigned char p[8];
    /*/// copy data*/
    memset( p, 0, 8 );
    memcpy( p, &data, 4 );
    int i, j;
    for(i = 0; i < 8; ++i){
        printf("p[%d]:%x\n", i, p[i]);
    }
    printf("PLOY: 0x%x\n", POLY);
    /*/// because data contains 4 bytes*/
    for (i = 0; i < 8; ++ i )
    {
        printf("++++++++++++++++++++++++++++++++++\n");
        /*/// get the top byte of the register*/
        unsigned char top_byte = (unsigned char)( ( regi >> 24 ) & 0xff );
        printf("top_byte:0x%x\n", top_byte);
        /*/// sum all the polys at various offsets*/
        unsigned long sum_poly = top_byte < < 24;
        printf("sum_poly:0x%x\n", sum_poly);
        for (j = 0; j < 8; ++ j )
        {
            /*/// check the top bit*/
            if ( ( sum_poly >> 31 ) != 0 )
            {
                /*/// TODO : understand why '< <' first*/
                sum_poly = ( sum_poly << 1 ) ^ POLY;
                printf("sum_ploy:0x%x\n", sum_poly);
            }
            else
            {
                sum_poly <<= 1;
                printf("sum_ploy:0x%x\n", sum_poly);
            }
        }
        /*/// shift the register left by on byte, reading a new*/
        regi = ( ( regi << 8 ) | p[i] );
        printf("regi:0x %x\n", regi);
        /*/// xor the summed polys to the register*/
        regi ^= sum_poly;
        printf("regi:0x %x\n", regi);
    }
     getchar();
    /* and now, register contains the remainder which is also called CRC value.*/

    return 0;
}

【printf主要用于下面方便说明,你完全可以忽略它。】对于如上代码中的sum_poly,我们完全可以用事先生成一张表,然后用查表的方法得到。如下:

#include <stdio .h>
#include <stdlib .h>
#include <memory .h>

#define POLY 0x04C11DB7L

unsigned long 
get_sum_poly( unsigned char top_byte ){
    /**//// sum all the polys at various offsets
    unsigned long sum_poly = top_byte < < 24;
    for ( int j = 0; j < 8; ++ j ){
        /**//// check the top bit
        if ( ( sum_poly >> 31 ) != 0 ){
            /**//// TODO : understand why '< <' first
            sum_poly = ( sum_poly << 1 ) ^ POLY;
        }
        else{
            sum_poly <<= 1;
        }
    }

    return sum_poly;
}

void 
create_table( unsigned long *table ){
    for ( int i = 0; i < 256; ++ i ){
        table[i] = get_sum_poly( (unsigned char) i );
    }
}

int
main(){

    unsigned long data = 0x1011035b;
    unsigned long regi = 0;
    unsigned char p[8];
    memset( p, 0, 8 );
    memcpy( p, &data, 4 );

    unsigned long table[256];
    create_table( table );

    for ( int i = 0; i < 8; ++ i ){
        unsigned char top_byte = (unsigned char)( ( regi >> 24 ) & 0xff );
        regi = ( ( regi < < 8 ) | p[i] );
        regi ^= table[top_byte];
        printf("%x\n", regi);
    }

    return 0;
}

上代码的精髓部分:

regi=0; 
while (len--)
      regi = ((regi < < 8) | *p++) ^ table[(regi >> 24) & 0xFF];

如上的代码中都是相当于在data后面添加了strlen(&POLY) * 8 位0,如果实际操作中的data数据过大,不方便拷贝到&p中怎么办?我们怎么?很简单:在不加0的情况下执行上面的操作,然后再执行

for (i=0; i< W/4; i++) 
       regi = (regi << 8) ^ table[(regi >> 24) & 0xFF] 
//【W=strlen(&POLY)】

即可。

为了说明上述代码的缺陷,我现在列出代码本文第二段C代码打印出的信息:

p[0]:5b
p[1]:3
p[2]:11
p[3]:10
p[4]:0
p[5]:0
p[6]:0
p[7]:0
PLOY: 0x4c11db7
+++++++++++++++++++++++++++++++++
top_byte:0x0
sum_poly:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
regi:0x 5b
regi:0x 5b
+++++++++++++++++++++++++++++++++
top_byte:0x0
sum_poly:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
regi:0x 5b03
regi:0x 5b03
+++++++++++++++++++++++++++++++++
top_byte:0x0
sum_poly:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
regi:0x 5b0311
regi:0x 5b0311
+++++++++++++++++++++++++++++++++
top_byte:0x0
sum_poly:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
sum_ploy:0x0
regi:0x 5b031110
regi:0x 5b031110
+++++++++++++++++++++++++++++++++
top_byte:0x5b
sum_poly:0x5b000000
sum_ploy:0xb6000000
sum_ploy:0x68c11db7
sum_ploy:0xd1823b6e
sum_ploy:0xa7c56b6b
sum_ploy:0x4b4bcb61
sum_ploy:0x969796c2
sum_ploy:0x29ee3033
sum_ploy:0x53dc6066
regi:0x 3111000
regi:0x 50cd7066
+++++++++++++++++++++++++++++++++
top_byte:0x50
sum_poly:0x50000000
sum_ploy:0xa0000000
sum_ploy:0x44c11db7
sum_ploy:0x89823b6e
sum_ploy:0x17c56b6b
sum_ploy:0x2f8ad6d6
sum_ploy:0x5f15adac
sum_ploy:0xbe2b5b58
sum_ploy:0x7897ab07
regi:0x cd706600
regi:0x b5e7cd07
+++++++++++++++++++++++++++++++++
top_byte:0xb5
sum_poly:0xb5000000
sum_ploy:0x6ec11db7
sum_ploy:0xdd823b6e
sum_ploy:0xbfc56b6b
sum_ploy:0x7b4bcb61
sum_ploy:0xf69796c2
sum_ploy:0xe9ee3033
sum_ploy:0xd71d7dd1
sum_ploy:0xaafbe615
regi:0x e7cd0700
regi:0x 4d36e115
+++++++++++++++++++++++++++++++++
top_byte:0x4d
sum_poly:0x4d000000
sum_ploy:0x9a000000
sum_ploy:0x30c11db7
sum_ploy:0x61823b6e
sum_ploy:0xc30476dc
sum_ploy:0x82c9f00f
sum_ploy:0x152fda9
sum_ploy:0x2a5fb52
sum_ploy:0x54bf6a4
regi:0x 36e11500
regi:0x 33aae3a4

我们看到前4次迭代中,由于regi首位始终是0,所以sum_poly的始终是0,显然一个数和0异或还是它本身,所以前几次迭代对整个运算的影响尽限于把data拷贝到regi中去,这样做无疑是浪费资源;而且我们还可以发现从第五次迭代开始regi后两个字节都是0,这些0对于regi的求值没有任何影响,这同样也是一种浪费。同样是根据异或运算满足交换律的原理,对上面的

regi=0; 
while (len--) 
     regi = ((regi < < 8) | *p++) ^ table[(regi >> 24) & 0xFF];

进行优化:

regi=0; 
while (len--) 
     regi = (regi < < 8) ^ table[(regi >> 24) ^ *p++];

(同样也是因为(A xor B) xor C= A xor (B xor C),在查表之前 r >> 24 和 *p异或,得到的t[]就不会是0,这样就不用白白循环那么多次了),具体实现如下:

#include <stdio .h>
#include <stdlib .h>
#include <memory .h>

#define POLY 0x04C11DB7L

unsigned long get_sum_poly( unsigned char top_byte )
{
    /**//// sum all the polys at various offsets
    unsigned long sum_poly = top_byte < < 24;
    for ( int j = 0; j < 8; ++ j )
    {
        /**//// check the top bit
        if ( ( sum_poly >> 31 ) != 0 )
        {
            /**//// TODO : understand why '< <' first
            sum_poly = ( sum_poly << 1 ) ^ POLY;
        }
        else
        {
            sum_poly <<= 1;
        }
    }

    return sum_poly;
}

void create_table( unsigned long *table )
{
    for ( int i = 0; i < 256; ++ i )
    {
        table[i] = get_sum_poly( (unsigned char) i );
    }
}


int main()
{
    /**//// the data
    unsigned long data = 0x1011035b;
    /**//// load the register with the data
    unsigned long regi = 0;
    /**//// allocate memory to contain the data
    unsigned char p[4];
    /**//// copy data
    memcpy( p, &data, 4 );

    /**//// the table
    unsigned long table[256];
    /**//// create the table
    create_table( table );

    /**//// because data contains 4 bytes
    for ( int i = 0; i < 4; ++ i )
    {
        printf("++++++++++++++++++++++++++++++\n");
        printf("table[(regi >> 24) ^ p[i]]:%x\n", table[(regi >> 24) ^ p[i]]);
        printf("regi < < 8 : %x\n", (regi << 8));
        regi = ( regi << 8 ) ^ table[ ( regi >> 24 ) ^ p[i] ];
        printf("regi: %x\n", regi);
    }

    /**//// and now, register contains the remainder which is also called CRC value.

    return 0;
}

同样下面列出优化后程序打印出的信息:

++++++++++++++++++++++++++++++
table[(regi >> 24) ^ p[i]]:53dc6066
regi < < 8 : 0
regi: 53dc6066
++++++++++++++++++++++++++++++
table[(regi >> 24) ^ p[i]]:7897ab07
regi < < 8 : dc606600
regi: a4f7cd07
++++++++++++++++++++++++++++++
table[(regi >> 24) ^ p[i]]:aafbe615
regi < < 8 : f7cd0700
regi: 5d36e115
++++++++++++++++++++++++++++++
table[(regi >> 24) ^ p[i]]:54bf6a4
regi < < 8 : 36e11500
regi: 33aae3a4

对比发现每次table的值和优化前程序的第5到8次迭代的table的值相等。

好了,对于crc的算法就介绍这么多,本文的参考资料见:http://www.ross.net/crc/download/crc_v3.txt写的很全,很详细,不过是洋文,看起来计较费劲。 本文代码来源处:http://blog.csdn.net/zhaodm/archive/2009/01/05/3711034.aspx


突然想起来学汇编的时候史先俊老师布置过一个关于生成crc验证码的作业,具体的题目我忘了,代码先贴出来,可能对师弟们有点帮助

;关于打印的宏
      SECOND  MACRO
              LOCAL     ONE,TWO
              CMP       DL,9
              JBE       DIGIT
              ADD       DL,37H
              JMP       DPC
        ONE:  ADD       DL,30H
        TWO:  MOV       AH,02H
              INT       21H
              ENDM

       FIRST  MACRO
              ROR       AL,4
              MOV       DL,AL
              AND       DL,0FH
              PUSH      AX
              SECOND
              POP       AX
              ROR       AL,4
              MOV       DL,AL
              AND       DL,0FH
              SECOND
              ENDM
.MODEL        SMALL
.STACK
.DATA
      STRING  DB        200
       COUNT  DB        0H
         BUF  DB        200 DUP(0)
       KINV1  DB        'Please input your string:',0DH,0AH,'$'
        SHOU  DB        ?
.CODE
      START:
              MOV       AX,@DATA
              MOV       DS,AX
              MOV       AH,09H
              MOV       DX,OFFSET KINV1
              INT       21H
              MOV       AH,0AH
              MOV       DX,OFFSET STRING
              INT       21H
              CALL      GENCR
              MOV       AL,BUF
              PRINT
              MOV       AL,BUF+1
              PRINT

              MOV       AX,4C00H
              INT       21H
;生成CRC子程序
       GENCR  PROC      NEAR
              MOV       BX,0
              MOV       BL,2
              ADD       BL,COUNT
              MOV       COUNT,BL
              MOV       BYTE PTR[BX],BH
              INC       BL
              MOV       BYTE PTR[BX],BH
              MOV       AL,COUNT
              MOV       BL,8
              MUL       BX
              MOV       AH,0
              MOV       CX,AX
       MORE:  DEC       CX
              MOV       AL,BUF
              CBW
              MOV       BL,80H
              DIV       BX
              MOV       SHOU, AL
              CALL      LSHIFT
              CMP       SHOU,1
              JB        A
              MOV       AL,BUF
              XOR       AL,80H
              MOV       BUF,AL
              MOV       AL,BUF+1
              XOR       AL,05H
              MOV       BUF+1,AL
          A:
              CMP       CX,16
              JA        MORE
              RET
       GENCR  ENDP

      LSHIFT  PROC      NEAR
              MOV       BL,COUNT
              MOV       BH,0
              MOV       DI,BX
              DEC       DI
              MOV       BL,BUF
              SHL       BL,1
              PUSHF
          B:
              CMP       DI,0
              JL        C
              MOV       BL,BUF+DI
              POPF
              RCL       BL,1
              PUSHF
              MOV       BUF+DI,BL
              DEC       DI
              JMP
          C:
              POPF
              JNC       OVER
              MOV       BL,COUNT
              MOV       AH,0
              MOV       DI,BX
              DEC       DI
              MOV       BL,BUF+DI
              XOR       BL,1
              MOV       BUF+DI,BL
       OVER:
              RET
      LSHIFT  ENDP

              END       START

Get In Touch With Us ...