Skip to content

Instantly share code, notes, and snippets.

@bitbegin
Forked from vurtun/defl.c
Created November 17, 2019 07:10
Show Gist options
  • Save bitbegin/971a12b2012d076f4bfd632f60bb823d to your computer and use it in GitHub Desktop.
Save bitbegin/971a12b2012d076f4bfd632f60bb823d to your computer and use it in GitHub Desktop.
Full deflate/inflate implementation in ~250 LoC
/* ===============================================================
* SDEFL
* ===============================================================
* public domain - no warranty implied; use at your own risk
* References:
https://bitbucket.org/rmitton/tigr/src/be3832bee7fb2f274fe5823e38f8ec7fa94e0ce9/src/tigr_inflate.c?at=default&fileviewer=file-view-default
https://github.com/github/putty/blob/49fb598b0e78d09d6a2a42679ee0649df482090e/sshzlib.c
https://www.ietf.org/rfc/rfc1951.txt
*/
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#define SDEFL_DICT_BITS 10
#define SDEFL_DICT_SIZE (1 << SDEFL_DICT_BITS)
#define SDEFL_CACHE_SIZE 4
#define SDEFL_MAX_OFF (1 << 15)
#define SDEFL_MIN_MATCH 3
#define SDEFL_MAX_MATCH 258
struct sdefl {int bits, cnt;};
#define sinfl_rev16(n) ((sdefl_mirror[(n)&0xff] << 8) | sdefl_mirror[((n)>>8)&0xff])
static const unsigned char sdefl_mirror[256] = {
#define R2(n) n, n + 128, n + 64, n + 192
#define R4(n) R2(n), R2(n + 32), R2(n + 16), R2(n + 48)
#define R6(n) R4(n), R4(n + 8), R4(n + 4), R4(n + 12)
R6(0), R6(2), R6(1), R6(3),
};
static int
sdefl_npow2(unsigned n)
{
n--;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
return ++n;
}
static int
sdefl_ilog2(unsigned n)
{
#define lt(n) n,n,n,n, n,n,n,n, n,n,n,n ,n,n,n,n
static const char tbl[256] = {-1,0,1,1,2,2,2,2,3,3,3,3,
3,3,3,3,lt(4),lt(5),lt(5),lt(6),lt(6),lt(6),lt(6),
lt(7),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7)
}; int tt, t;
if ((tt = (n >> 16)))
return (t = (tt >> 8)) ? 24+tbl[t]: 16+tbl[tt];
else return (t = (n >> 8)) ? 8+tbl[t]: tbl[n];
#undef lt
}
static int
sdefl_hash(const unsigned char *s)
{
unsigned a = s[0], b = s[1], c = s[2];
unsigned x = (a << 16) | (b << 8) | c;
x ^= x >> 16; x *= 0x7feb352d;
x ^= x >> 15; x *= 0x846ca68b;
x ^= x >> 16;
return x & (SDEFL_DICT_SIZE-1);
}
static int
sdefl_hash2(const unsigned char *s)
{
unsigned a = s[0], b = s[1], c = s[2];
unsigned v = (a << 16) | (b << 8) | c;
return ((v >> (3*8 - SDEFL_DICT_BITS)) - v) & (SDEFL_CACHE_SIZE-1);
}
static unsigned char*
sdefl_write(unsigned char *dst, struct sdefl *s, int code, int bitcnt)
{
s->bits |= (code << s->cnt);
s->cnt += bitcnt;
while (s->cnt >= 8) {
*dst++ = (unsigned char)(s->bits & 0xFF);
s->bits >>= 8;
s->cnt -= 8;
} return dst;
}
static unsigned char*
sdefl_match(unsigned char *dst, struct sdefl *s, int dist, int len)
{
static const short lxmin[] = {0,11,19,35,67,131};
static const short dxmax[] = {0,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576};
static const short lmin[] = {11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227};
static const short dmin[] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,
385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
/* length encoding */
int lc = len;
int lx = sdefl_ilog2(len - 3) - 2;
if (!(lx = (lx < 0) ? 0: lx)) lc += 254;
else if (len >= 258) lx = 0, lc = 285;
else lc = ((lx-1) << 2) + 265 + ((len - lxmin[lx]) >> lx);
if (lc <= 279)
dst = sdefl_write(dst, s, sdefl_mirror[(lc - 256) << 1], 7);
else dst = sdefl_write(dst, s, sdefl_mirror[0xc0 - 280 + lc], 8);
if (lx) dst = sdefl_write(dst, s, len - lmin[lc - 265], lx);
/* distance encoding */
{int dc = dist - 1;
int dx = sdefl_ilog2(sdefl_npow2(dist) >> 2);
if ((dx = (dx < 0) ? 0: dx))
dc = ((dx + 1) << 1) + (dist > dxmax[dx]);
dst = sdefl_write(dst, s, sdefl_mirror[dc << 3], 5);
if (dx) dst = sdefl_write(dst, s, dist - dmin[dc], dx);}
return dst;
}
static unsigned char*
sdefl_lit(unsigned char *dst, struct sdefl *s, int c)
{
if (c <= 143)
return sdefl_write(dst, s, sdefl_mirror[0x30+c], 8);
else return sdefl_write(dst, s, 1 + 2 * sdefl_mirror[0x90 - 144 + c], 9);
}
static int
sdeflate(unsigned char *out, const unsigned char *in, int size)
{
int i = 0;
struct sdefl st = {0};
unsigned char *dst = out;
const unsigned char *e = in + size;
const unsigned char *dict[SDEFL_DICT_SIZE][SDEFL_CACHE_SIZE] = {{0}};
dst = sdefl_write(dst,&st,0x01,1); /* block */
dst = sdefl_write(dst,&st,0x01,2); /* static huffman */
while (in < e) {
const unsigned char *ptr = in;
const int h = sdefl_hash(in);
const unsigned char **ents = dict[h & (SDEFL_DICT_SIZE-1)];
for (i = 0; i < SDEFL_CACHE_SIZE; ++i) {
const unsigned char *sub = ents[i];
if (sub && (in > sub) && ((in-sub) < SDEFL_MAX_OFF) &&
!memcmp(in,sub,SDEFL_MIN_MATCH)) {
/* match */
const unsigned char *s = sub + SDEFL_MIN_MATCH;
int len, dist = (int)(in - sub);
in += (len = SDEFL_MIN_MATCH);
while ((*s == *in) && (len < SDEFL_MAX_MATCH))
len++, s++, in++;
dst = sdefl_match(dst, &st, dist, len);
break;
}
}
if (i == SDEFL_CACHE_SIZE)
dst = sdefl_lit(dst, &st, *in++);
const int c = sdefl_hash2(in);
ents[c & SDEFL_CACHE_SIZE-1] = ptr;
}
/* zlib partial flush */
dst = sdefl_write(dst, &st, 0, 7);
dst = sdefl_write(dst, &st, 2, 10);
dst = sdefl_write(dst, &st, 2, 3);
return (int)(dst - out);
}
/* ===============================================================
* SINFL
* ===============================================================*/
struct sinfl {
int bits, bitcnt;
unsigned lits[288];
unsigned dsts[32];
unsigned lens[19];
int tlit, tdist, tlen;
};
static int
sinfl_read(const unsigned char **src, const unsigned char *end,
struct sinfl *s, int n)
{
const unsigned char *in = *src;
int v = s->bits & ((1 << n)-1);
s->bits >>= n;
s->bitcnt = s->bitcnt - n;
s->bitcnt = s->bitcnt < 0 ? 0 : s->bitcnt;
while (s->bitcnt < 16 && in < end) {
s->bits |= (*in++) << s->bitcnt;
s->bitcnt += 8;
} *src = in;
return v;
}
static int
sinfl_build(unsigned *tree, unsigned char *lens, int symcnt)
{
int n, cnt[16], first[16], codes[16];
memset(cnt, 0, sizeof(cnt));
cnt[0] = first[0] = codes[0] = 0;
for (n = 0; n < symcnt; ++n) cnt[lens[n]]++;
for (n = 1; n <= 15; n++) {
codes[n] = (codes[n-1] + cnt[n-1]) << 1;
first[n] = first[n-1] + cnt[n-1];
}
for (n = 0; n < symcnt; n++) {
int slot, code, len = lens[n];
if (!len) continue;
code = codes[len]++;
slot = first[len]++;
tree[slot] = (unsigned)((code << (32-len)) | (n << 4) | len);
} return first[15];
}
static int
sinfl_decode(const unsigned char **in, const unsigned char *end,
struct sinfl *s, unsigned *tree, int max)
{
/* bsearch next prefix code */
unsigned key, lo = 0, hi = (unsigned)max;
unsigned search = (unsigned)(sinfl_rev16(s->bits) << 16) | 0xffff;
while (lo < hi) {
unsigned guess = (lo + hi) / 2;
if (search < tree[guess]) hi = guess;
else lo = guess + 1;
}
/* pull out and check key */
key = tree[lo-1];
assert(((search^key) >> (32-(key&0xf))) == 0);
sinfl_read(in, end, s, key & 0x0f);
return (key >> 4) & 0x0fff;
}
static int
sinflate(unsigned char *out, const unsigned char *in, int size)
{
static const char order[] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
static const short dbase[30+2] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
static const unsigned char dbits[30+2] = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,
10,10,11,11,12,12,13,13,0,0};
static const short lbase[29+2] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,
43,51,59,67,83,99,115,131,163,195,227,258,0,0};
static const unsigned char lbits[29+2] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,
4,4,4,5,5,5,5,0,0,0};
const unsigned char *e = in + size, *o = out;
enum sinfl_states {hdr,stored,fixed,dyn,blk};
enum sinfl_states state = hdr;
struct sinfl s;
int last = 0;
memset(&s, 0, sizeof(s));
sinfl_read(&in,e,&s,0); /* buffer input */
while (in < e || s.bitcnt) {
switch (state) {
case hdr: {
int type = 0; /* block header */
last = sinfl_read(&in,e,&s,1);
type = sinfl_read(&in,e,&s,2);
switch (type) {default: return (int)(out-o);
case 0x00: state = stored; break;
case 0x01: state = fixed; break;
case 0x02: state = dyn; break;}
} break;
case stored: {
int len, nlen; /* uncompressed block */
sinfl_read(&in,e,&s,s.bitcnt & 7);
len = sinfl_read(&in,e,&s,16);
nlen = sinfl_read(&in,e,&s,16);
in -= 2; s.bitcnt = 0;
if (len > (e-in) || !len) return (int)(out-o);
memcpy(out, in, (size_t)len);
in += len, out += len;
state = hdr;
} break;
case fixed: {
/* fixed huffman codes */
int n; unsigned char lens[288+32];
for (n = 0; n <= 143; n++) lens[n] = 8;
for (n = 144; n <= 255; n++) lens[n] = 9;
for (n = 256; n <= 279; n++) lens[n] = 7;
for (n = 280; n <= 287; n++) lens[n] = 8;
for (n = 0; n < 32; n++) lens[288+n] = 5;
/* build trees */
s.tlit = sinfl_build(s.lits, lens, 288);
s.tdist = sinfl_build(s.dsts, lens + 288, 32);
state = blk;
} break;
case dyn: {
/* dynamic huffman codes */
int n, i, nlit, ndist, nlen;
unsigned char nlens[19] = {0}, lens[288+32];
nlit = 257 + sinfl_read(&in,e,&s,5);
ndist = 1 + sinfl_read(&in,e,&s,5);
nlen = 4 + sinfl_read(&in,e,&s,4);
for (n = 0; n < nlen; n++)
nlens[order[n]] = (unsigned char)sinfl_read(&in,e,&s,3);
s.tlen = sinfl_build(s.lens, nlens, 19);
/* decode code lengths */
for (n = 0; n < nlit + ndist;) {
int sym = sinfl_decode(&in, e, &s, s.lens, s.tlen);
switch (sym) {default: lens[n++] = (unsigned char)sym; break;
case 16: for (i=3+sinfl_read(&in,e,&s,2);i;i--,n++) lens[n]=lens[n-1]; break;
case 17: for (i=3+sinfl_read(&in,e,&s,3);i;i--,n++) lens[n]=0; break;
case 18: for (i=11+sinfl_read(&in,e,&s,7);i;i--,n++) lens[n]=0; break;}
}
/* build lit/dist trees */
s.tlit = sinfl_build(s.lits, lens, nlit);
s.tdist = sinfl_build(s.dsts, lens+nlit, ndist);
state = blk;
} break;
case blk: {
/* decompress block */
int sym = sinfl_decode(&in, e, &s, s.lits, s.tlit);
if (sym > 256) {sym -= 257; /* match symbol */
{int len = sinfl_read(&in, e, &s, lbits[sym]) + lbase[sym];
int dsym = sinfl_decode(&in, e, &s, s.dsts, s.tdist);
int offs = sinfl_read(&in, e, &s, dbits[dsym]) + dbase[dsym];
if (offs > (int)(out-o)) return (int)(out-o);
while (len--) *out = *(out-offs), out++;}
} else if (sym == 256) {
if (last) return (int)(out-o);
state = hdr;
} else *out++ = (unsigned char)sym;
} break;}
} return (int)(out-o);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment