-
-
Save bitbegin/971a12b2012d076f4bfd632f60bb823d to your computer and use it in GitHub Desktop.
Full deflate/inflate implementation in ~250 LoC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* =============================================================== | |
| * SDEFL | |
| * =============================================================== | |
| * public domain - no warranty implied; use at your own risk | |
| * References: | |
| https://bitbucket.org/rmitton/tigr/src/be3832bee7fb2f274fe5823e38f8ec7fa94e0ce9/src/tigr_inflate.c?at=default&fileviewer=file-view-default | |
| https://github.com/github/putty/blob/49fb598b0e78d09d6a2a42679ee0649df482090e/sshzlib.c | |
| https://www.ietf.org/rfc/rfc1951.txt | |
| */ | |
| #include <stdlib.h> | |
| #include <assert.h> | |
| #include <string.h> | |
| #define SDEFL_DICT_BITS 12 | |
| #define SDEFL_DICT_SIZE (1 << SDEFL_DICT_BITS) | |
| #define SDEFL_MAX_OFF (1 << 15) | |
| #define SDEFL_MIN_MATCH 3 | |
| #define SDEFL_MAX_MATCH 258 | |
| typedef unsigned char uchar; | |
| struct sdefl {int bits, off;}; | |
| #define sinfl_rev16(n) ((sdefl_mirror[(n)&0xff] << 8) | sdefl_mirror[((n)>>8)&0xff]) | |
| static const uchar sdefl_mirror[256] = { | |
| #define R2(n) n, n + 128, n + 64, n + 192 | |
| #define R4(n) R2(n), R2(n + 32), R2(n + 16), R2(n + 48) | |
| #define R6(n) R4(n), R4(n + 8), R4(n + 4), R4(n + 12) | |
| R6(0), R6(2), R6(1), R6(3), | |
| }; | |
| static int | |
| sdefl_nxpow2i(int n) | |
| { | |
| n--; | |
| n |= n >> 1; | |
| n |= n >> 2; | |
| n |= n >> 4; | |
| n |= n >> 8; | |
| n |= n >> 16; | |
| return ++n; | |
| } | |
| static int | |
| sdefl_ilog2(int n) | |
| { | |
| #define lt(n) n,n,n,n, n,n,n,n, n,n,n,n ,n,n,n,n | |
| static const char tbl[256] = {-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,lt(4),lt(5), | |
| lt(5),lt(6),lt(6),lt(6),lt(6),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7) | |
| }; int tt, t; | |
| if ((tt = (n >> 16))) | |
| return (t = (tt >> 8)) ? 24+tbl[t]: 16+tbl[tt]; | |
| else return (t = (n >> 8)) ? 8+tbl[t]: tbl[n]; | |
| #undef lt | |
| } | |
| static int | |
| sdefl_hash(const uchar *s) | |
| { | |
| int a = s[0], b = s[1], c = s[2]; | |
| int v = (a << 16) | (b << 8) | c; | |
| return ((v >> (3*8 - SDEFL_DICT_BITS)) - v) & (SDEFL_DICT_SIZE-1); | |
| } | |
| static uchar* | |
| sdefl_write(uchar *dst, struct sdefl *s, int code, int bitcnt) | |
| { | |
| s->bits |= (code << s->off); | |
| s->off += bitcnt; | |
| while (s->off >= 8) { | |
| *dst++ = (uchar)(s->bits & 0xFF); | |
| s->bits >>= 8; | |
| s->off -= 8; | |
| } return dst; | |
| } | |
| static uchar* | |
| sdefl_match(uchar *dst, struct sdefl *s, int dist, int len) | |
| { | |
| static const short lxmin[] = {0,11,19,35,67,131}; | |
| static const short dxmax[] = {0,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576}; | |
| static const short lmin[] = {11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227}; | |
| static const short dmin[] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257, | |
| 385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577}; | |
| /* length encoding */ | |
| int lc = len; | |
| int lx = sdefl_ilog2(len - 3) - 2; | |
| if (!(lx = (lx < 0) ? 0: lx)) lc += 254; | |
| else if (len >= 258) lx = 0, lc = 285; | |
| else lc = ((lx-1) << 2) + 265 + ((len - lxmin[lx]) >> lx); | |
| if (lc <= 279) | |
| dst = sdefl_write(dst, s, sdefl_mirror[(lc - 256) << 1], 7); | |
| else dst = sdefl_write(dst, s, sdefl_mirror[0xc0 - 280 + lc], 8); | |
| if (lx) dst = sdefl_write(dst, s, len - lmin[lc - 265], lx); | |
| /* distance encoding */ | |
| {int dc = dist - 1; | |
| int dx = sdefl_ilog2(sdefl_nxpow2i(dist) >> 2); | |
| if ((dx = (dx < 0) ? 0: dx)) | |
| dc = ((dx + 1) << 1) + (dist > dxmax[dx]); | |
| dst = sdefl_write(dst, s, sdefl_mirror[dc << 3], 5); | |
| if (dx) dst = sdefl_write(dst, s, dist - dmin[dc], dx);} | |
| return dst; | |
| } | |
| static uchar* | |
| sdefl_lit(uchar *dst, struct sdefl *s, int c) | |
| { | |
| if (c <= 143) | |
| return sdefl_write(dst, s, sdefl_mirror[0x30+c], 8); | |
| else return sdefl_write(dst, s, 1 + 2 * sdefl_mirror[0x90 - 144 + c], 9); | |
| } | |
| static int | |
| sdeflate(uchar *out, const uchar *in, int size) | |
| { | |
| struct sdefl st; | |
| uchar *dst = out; | |
| const uchar *e = in + size; | |
| const uchar *dict[SDEFL_DICT_SIZE]; | |
| memset(dict,0,sizeof(dict)); | |
| memset(&st,0,sizeof(st)); | |
| dst = sdefl_write(dst,&st,0x01,1); /* block */ | |
| dst = sdefl_write(dst,&st,0x01,2); /* static huffman */ | |
| while (in < e) { | |
| const int h = sdefl_hash(in); | |
| const uchar **ent = &dict[h & (SDEFL_DICT_SIZE-1)]; | |
| const uchar *sub = *ent; *ent = in; | |
| if (sub && (in > sub) && ((in-sub) < SDEFL_MAX_OFF) && | |
| !memcmp(in,sub,SDEFL_MIN_MATCH)) { | |
| /* match */ | |
| const uchar *s = sub + SDEFL_MIN_MATCH; | |
| int len, dist = (int)(in - sub); | |
| in += (len = SDEFL_MIN_MATCH); | |
| while ((*s == *in) && (len < SDEFL_MAX_MATCH)) | |
| len++, s++, in++; | |
| dst = sdefl_match(dst, &st, dist, len); | |
| } else dst = sdefl_lit(dst, &st, *in++); | |
| } | |
| /* zlib partial flush */ | |
| dst = sdefl_write(dst, &st, 0, 7); | |
| dst = sdefl_write(dst, &st, 2, 10); | |
| dst = sdefl_write(dst, &st, 2, 3); | |
| return (int)(dst - out); | |
| } | |
| struct sinfl { | |
| int bits, bitoff; | |
| unsigned lits[288]; | |
| unsigned dsts[32]; | |
| unsigned lens[19]; | |
| int tlit, tdist, tlen; | |
| }; | |
| static int | |
| sinfl_read(const uchar **src, const uchar *end, | |
| struct sinfl *s, int n) | |
| { | |
| const uchar *in = *src; | |
| int v = s->bits & ((1 << n)-1); | |
| s->bits >>= n; | |
| s->bitoff = s->bitoff - n; | |
| s->bitoff = s->bitoff < 0 ? 0 : s->bitoff; | |
| while (s->bitoff < 16 && in < end) { | |
| s->bits |= (*in++) << s->bitoff; | |
| s->bitoff += 8; | |
| } *src = in; | |
| return v; | |
| } | |
| static int | |
| sinfl_build(unsigned *tree, uchar *lens, int symcnt) | |
| { | |
| int n, cnt[16], first[16], codes[16]; | |
| memset(cnt, 0, sizeof(cnt)); | |
| cnt[0] = first[0] = codes[0] = 0; | |
| for (n = 0; n < symcnt; ++n) cnt[lens[n]]++; | |
| for (n = 1; n <= 15; n++) { | |
| codes[n] = (codes[n-1] + cnt[n-1]) << 1; | |
| first[n] = first[n-1] + cnt[n-1]; | |
| } | |
| for (n = 0; n < symcnt; n++) { | |
| int slot, code, len = lens[n]; | |
| if (!len) continue; | |
| code = codes[len]++; | |
| slot = first[len]++; | |
| tree[slot] = (unsigned)((code << (32-len)) | (n << 4) | len); | |
| } return first[15]; | |
| } | |
| static int | |
| sinfl_decode(const uchar **in, const uchar *end, | |
| struct sinfl *s, unsigned *tree, int max) | |
| { | |
| /* bsearch next prefix code */ | |
| unsigned key, lo = 0, hi = (unsigned)max; | |
| unsigned search = (unsigned)(sinfl_rev16(s->bits) << 16) | 0xffff; | |
| while (lo < hi) { | |
| unsigned guess = (lo + hi) / 2; | |
| if (search < tree[guess]) hi = guess; | |
| else lo = guess + 1; | |
| } | |
| /* pull out and check key */ | |
| key = tree[lo-1]; | |
| assert(((search^key) >> (32-(key&0xf))) == 0); | |
| sinfl_read(in, end, s, key & 0x0f); | |
| return (key >> 4) & 0x0fff; | |
| } | |
| static int | |
| sinflate(uchar *out, const uchar *in, int size) | |
| { | |
| static const char order[] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; | |
| static const short dbase[30+2] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, | |
| 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577}; | |
| static const uchar dbits[30+2] = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9, | |
| 10,10,11,11,12,12,13,13,0,0}; | |
| static const short lbase[29+2] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35, | |
| 43,51,59,67,83,99,115,131,163,195,227,258,0,0}; | |
| static const uchar lbits[29+2] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4, | |
| 4,4,4,5,5,5,5,0,0,0}; | |
| enum sinfl_states {hdr,stored,fixed,dyn,blk}; | |
| enum sinfl_states state = hdr; | |
| const uchar *e = in + size, *o = out; | |
| struct sinfl s; | |
| int last = 0; | |
| memset(&s, 0, sizeof(s)); | |
| sinfl_read(&in,e,&s,0); /* buffer input */ | |
| while (in < e || s.bitoff) { | |
| switch (state) { | |
| case hdr: { | |
| int type = 0; /* block header */ | |
| last = sinfl_read(&in,e,&s,1); | |
| type = sinfl_read(&in,e,&s,2); | |
| switch (type) {default: return (int)(out-o); | |
| case 0x00: state = stored; break; | |
| case 0x01: state = fixed; break; | |
| case 0x02: state = dyn; break;} | |
| } break; | |
| case stored: { | |
| int len, nlen; /* uncompressed block */ | |
| sinfl_read(&in,e,&s,s.bitoff & 7); | |
| len = sinfl_read(&in,e,&s,16); | |
| nlen = sinfl_read(&in,e,&s,16); | |
| if ((unsigned)len != ~((unsigned)nlen)) | |
| return (int)(out-o); | |
| if (len >= (e-in)) return (int)(out-o); | |
| memcpy(out, in, (size_t)len); | |
| in += len, out += len; | |
| sinfl_read(&in,e,&s,16); | |
| state = hdr; | |
| } break; | |
| case fixed: { | |
| /* fixed huffman codes */ | |
| int n; uchar lens[288+32]; | |
| for (n = 0; n <= 143; n++) lens[n] = 8; | |
| for (n = 144; n <= 255; n++) lens[n] = 9; | |
| for (n = 256; n <= 279; n++) lens[n] = 7; | |
| for (n = 280; n <= 287; n++) lens[n] = 8; | |
| for (n = 0; n < 32; n++) lens[288+n] = 5; | |
| /* build trees */ | |
| s.tlit = sinfl_build(s.lits, lens, 288); | |
| s.tdist = sinfl_build(s.dsts, lens + 288, 32); | |
| state = blk; | |
| } break; | |
| case dyn: { | |
| /* dynamic huffman codes */ | |
| int n, i, nlit, ndist, nlen; | |
| uchar nlens[19] = {0}, lens[288+32]; | |
| nlit = 257 + sinfl_read(&in,e,&s,5); | |
| ndist = 1 + sinfl_read(&in,e,&s,5); | |
| nlen = 4 + sinfl_read(&in,e,&s,4); | |
| for (n = 0; n < nlen; n++) | |
| nlens[order[n]] = (uchar)sinfl_read(&in,e,&s,3); | |
| s.tlen = sinfl_build(s.lens, nlens, 19); | |
| /* decode code lengths */ | |
| for (n = 0; n < nlit + ndist;) { | |
| int sym = sinfl_decode(&in, e, &s, s.lens, s.tlen); | |
| switch (sym) {default: lens[n++] = (uchar)sym; break; | |
| case 16: for (i=3+sinfl_read(&in,e,&s,2);i;i--,n++) lens[n]=lens[n-1]; break; | |
| case 17: for (i=3+sinfl_read(&in,e,&s,3);i;i--,n++) lens[n]=0; break; | |
| case 18: for (i=11+sinfl_read(&in,e,&s,7);i;i--,n++) lens[n]=0; break;} | |
| } | |
| /* build lit/dist trees */ | |
| s.tlit = sinfl_build(s.lits, lens, nlit); | |
| s.tdist = sinfl_build(s.dsts, lens+nlit, ndist); | |
| state = blk; | |
| } break; | |
| case blk: { | |
| /* decompress block */ | |
| int sym = sinfl_decode(&in, e, &s, s.lits, s.tlit); | |
| if (sym > 256) {sym -= 257; /* match symbol */ | |
| {int len = sinfl_read(&in, e, &s, lbits[sym]) + lbase[sym]; | |
| int dsym = sinfl_decode(&in, e, &s, s.dsts, s.tdist); | |
| int offs = sinfl_read(&in, e, &s, dbits[dsym]) + dbase[dsym]; | |
| if (offs > (int)(out-o)) return (int)(out-o); | |
| while (len--) *out = *(out-offs), out++;} | |
| } else if (sym == 256) { | |
| if (last) return (int)(out-o); | |
| state = hdr; | |
| } else *out++ = (uchar)sym; | |
| } break;} | |
| } return (int)(out-o); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment