huffman.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. #pragma GCC optimize ("O3")
  2. #define printf printk
  3. static uint8_t buffer[7096];
  4. static int verbose = 0;
  5. int hhh = 1023;
  6. /* 8-bit alphabet plus an escape code for emitting symbols not represented in
  7. * the Huffman tree, and an end-of-stream code to exit the decoder. */
  8. #define NR_SYMBOLS 258
  9. #define SYM_ESC 256
  10. #define SYM_EOS 257
  11. /* LUT: 8-bit code prefix -> Huffman tree node or leaf. */
  12. #define lent_codelen(e) ((e)>>16)
  13. #define lent_node(e) ((uint16_t)(e))
  14. #define mk_lent(node, codelen) (((codelen)<<16)|(node))
  15. typedef uint32_t lent_t;
  16. typedef lent_t *lut_t;
  17. /* Dict: input symbol -> { code, codelen }. */
  18. #define dent_codelen(e) ((e)>>16)
  19. #define dent_code(e) ((uint16_t)(e))
  20. #define mk_dent(code, codelen) (((codelen)<<16)|(code))
  21. typedef uint32_t dent_t;
  22. typedef dent_t *dict_t;
  23. /* A node can be a leaf or internal. Only internal nodes have child links. */
  24. #define NODE_INTERNAL 0x8000
  25. #define node_is_internal(n) ((n) & NODE_INTERNAL)
  26. #define node_is_leaf(n) !node_is_internal(n)
  27. #define node_idx(n) ((n) & 0x7fff)
  28. /* Internal Huffman tree node. */
  29. #define node_left(e) ((e)>>16)
  30. #define node_right(e) ((uint16_t)(e))
  31. #define mk_node(l,r) (((l)<<16)|(r))
  32. typedef uint32_t node_t;
  33. /* Heap: Min-heap used for constructing the Huffman tree. */
  34. #define hent_count(e) ((uint16_t)(e))
  35. #define hent_node(e) ((e)>>16)
  36. #define mk_hent(node, count) (((node)<<16)|(count))
  37. typedef uint32_t hent_t;
  38. typedef hent_t *heap_t; /* [0] -> nr */
  39. struct huffman_state {
  40. node_t nodes[NR_SYMBOLS]; /* 258 * 4 bytes */
  41. union {
  42. hent_t heap[NR_SYMBOLS+1]; /* 259 * 4 bytes */
  43. lent_t lut[256]; /* 256 * 4 bytes */
  44. dent_t dict[NR_SYMBOLS]; /* 258 * 4 bytes */
  45. } u; /* 259 * 4 bytes */
  46. }; /* 517 * 4 = 2068 bytes */
  47. static struct huffman_state huffman_state;
  48. /* Percolate item @i downwards to correct position among subheaps. */
  49. static void heap_percolate_down(heap_t heap, unsigned int i)
  50. {
  51. unsigned int nr = heap[0];
  52. uint32_t x = heap[i];
  53. for (;;) {
  54. unsigned int l = 2*i, r = 2*i+1, smallest = i;
  55. uint32_t s = x;
  56. /* Find the smallest of three nodes. */
  57. if (likely(l <= nr) && (hent_count(heap[l]) < hent_count(s))) {
  58. smallest = l;
  59. s = heap[l];
  60. }
  61. if (likely(r <= nr) && (hent_count(heap[r]) < hent_count(s))) {
  62. smallest = r;
  63. s = heap[r];
  64. }
  65. if (smallest == i)
  66. break;
  67. /* Swap with smallest subtree root. */
  68. heap[i] = s;
  69. heap[smallest] = x;
  70. /* Iterate into the subtree we swapped with. */
  71. i = smallest;
  72. }
  73. }
  74. static void build_heap(heap_t heap, unsigned int nr)
  75. {
  76. unsigned int i, j;
  77. for (i = j = 1; i <= nr; i++) {
  78. uint32_t he = heap[i];
  79. if (hent_count(he) != 0)
  80. heap[j++] = he;
  81. }
  82. heap[0] = --j;
  83. for (i = j/2; i > 0; i--)
  84. heap_percolate_down(heap, i);
  85. }
  86. static uint16_t build_huffman_tree(heap_t heap, node_t *nodes)
  87. {
  88. unsigned int nr = heap[0];
  89. uint32_t x, y;
  90. for (;;) {
  91. /* heap_get_min #1 */
  92. x = heap[1];
  93. heap[1] = heap[nr];
  94. if (unlikely((heap[0] = --nr) == 0))
  95. break;
  96. heap_percolate_down(heap, 1);
  97. /* heap_get_min #2 */
  98. y = heap[1];
  99. nodes[nr] = mk_node(hent_node(x), hent_node(y));
  100. heap[1] = mk_hent(nr|NODE_INTERNAL, hent_count(x) + hent_count(y));
  101. heap_percolate_down(heap, 1);
  102. }
  103. return hent_node(x);
  104. }
  105. static uint16_t build_huffman_heap_and_tree(
  106. unsigned char *model_p, unsigned int model_nr, heap_t heap, node_t *nodes,
  107. time_t *t)
  108. {
  109. uint32_t *h = &heap[1];
  110. unsigned int i;
  111. t[0] = time_now();
  112. for (i = 0; i < 256; i++)
  113. h[i] = mk_hent(i, 0);
  114. h[SYM_ESC] = mk_hent(SYM_ESC, 1);
  115. h[SYM_EOS] = mk_hent(SYM_EOS, 1);
  116. t[1] = time_now();
  117. for (i = 0; i < model_nr; i++)
  118. h[(unsigned int)model_p[i]]++;
  119. t[2] = time_now();
  120. if (verbose) {
  121. printf("Frequencies:\n");
  122. for (i = 0; i < 256; i++)
  123. if (hent_count(h[i]))
  124. printf("%03x: %d\n", i, hent_count(h[i]));
  125. printf("\n");
  126. }
  127. build_heap(heap, NR_SYMBOLS);
  128. t[3] = time_now();
  129. return build_huffman_tree(heap, nodes);
  130. }
  131. static char *prefix_str(uint32_t prefix, unsigned int prefix_len)
  132. {
  133. static char s[33];
  134. int i;
  135. s[prefix_len] = '\0';
  136. for (i = prefix_len; i > 0; i--) {
  137. s[i-1] = '0' + (prefix&1);
  138. prefix >>= 1;
  139. }
  140. return s;
  141. }
  142. static void build_huffman_dict(uint16_t root, node_t *nodes, dict_t dict)
  143. {
  144. uint16_t stack[32], node = root;
  145. unsigned int sp = 0, prefix_len = 0;
  146. uint32_t prefix = 0;
  147. memset(dict, 0, NR_SYMBOLS * sizeof(*dict));
  148. for (;;) {
  149. if (node_is_leaf(node)) {
  150. /* Visit leaf */
  151. dict[node] = mk_dent(prefix, prefix_len);
  152. if (verbose)
  153. printf("%03x: %d %s\n", node,
  154. prefix_len, prefix_str(prefix, prefix_len));
  155. /* Visit ancestors until we follow a left-side link. */
  156. do {
  157. if (sp == 0) {
  158. /* Returned to root via right-side link. We're done. */
  159. return;
  160. }
  161. node = stack[--sp];
  162. prefix >>= 1;
  163. prefix_len--;
  164. } while (node == 0);
  165. /* Walk right-side link. Dummy on stack for tracking prefix. */
  166. stack[sp++] = 0;
  167. node = node_right(nodes[node_idx(node)]);
  168. prefix = (prefix<<1)|1;
  169. } else {
  170. /* Walk left-side link. */
  171. stack[sp++] = node;
  172. node = node_left(nodes[node_idx(node)]);
  173. prefix <<= 1;
  174. }
  175. prefix_len++;
  176. }
  177. }
  178. static void build_huffman_lut(uint16_t root, node_t *nodes, lut_t lut)
  179. {
  180. uint16_t stack[32], node = root;
  181. unsigned int sp = 0, prefix_len = 0;
  182. uint32_t prefix = 0;
  183. for (;;) {
  184. if (node_is_leaf(node)) {
  185. /* Visit leaf */
  186. int idx = prefix << (8-prefix_len);
  187. int nr = 1 << (8-prefix_len);
  188. while (nr--)
  189. lut[idx+nr] = mk_lent(node, prefix_len);
  190. up:
  191. /* Visit ancestors until we follow a left-side link. */
  192. do {
  193. if (sp == 0) {
  194. /* Returned to root via right-side link. We're done. */
  195. return;
  196. }
  197. node = stack[--sp];
  198. prefix >>= 1;
  199. prefix_len--;
  200. } while (node == 0);
  201. /* Walk right-side link. Dummy on stack for tracking prefix. */
  202. stack[sp++] = 0;
  203. node = node_right(nodes[node_idx(node)]);
  204. prefix = (prefix<<1)|1;
  205. } else if (prefix_len == 8) {
  206. /* Reached max depth for LUT. */
  207. lut[prefix] = mk_lent(node, prefix_len);
  208. goto up;
  209. } else {
  210. /* Walk left-side link. */
  211. stack[sp++] = node;
  212. node = node_left(nodes[node_idx(node)]);
  213. prefix <<= 1;
  214. }
  215. prefix_len++;
  216. }
  217. }
  218. static int huffman_compress(
  219. struct huffman_state *state,
  220. unsigned char *model_p, unsigned int model_nr,
  221. unsigned char *msg_p, unsigned int msg_nr,
  222. unsigned char *out_p)
  223. {
  224. dent_t dent;
  225. dict_t dict = state->u.dict;
  226. unsigned char *p, *q;
  227. unsigned int i, tot, root, bits;
  228. uint32_t x;
  229. time_t t[10];
  230. /* Verbatim please */
  231. if (model_p == NULL)
  232. goto verbatim;
  233. root = build_huffman_heap_and_tree(
  234. model_p, model_nr, state->u.heap, state->nodes, t);
  235. t[4] = time_now();
  236. build_huffman_dict(root, state->nodes, dict);
  237. t[5] = time_now();
  238. x = bits = 0;
  239. p = out_p + 2;
  240. q = p + msg_nr;
  241. for (i = 0; i < msg_nr; i++) {
  242. unsigned int symbol = msg_p[i];
  243. if (unlikely((dent = dict[symbol]) == 0)) {
  244. dent = dict[SYM_ESC];
  245. x <<= dent_codelen(dent) + 8;
  246. x |= ((uint32_t)dent_code(dent) << 8) | symbol;
  247. bits += dent_codelen(dent) + 8;
  248. } else {
  249. x <<= dent_codelen(dent);
  250. x |= dent_code(dent);
  251. bits += dent_codelen(dent);
  252. }
  253. while (bits >= 8) {
  254. bits -= 8;
  255. *p++ = x >> bits;
  256. }
  257. if (unlikely(p >= q)) goto verbatim;
  258. }
  259. dent = dict[SYM_EOS];
  260. x <<= dent_codelen(dent);
  261. x |= dent_code(dent);
  262. bits += dent_codelen(dent);
  263. while (bits >= 8) {
  264. bits -= 8;
  265. *p++ = x >> bits;
  266. }
  267. if (bits)
  268. *p++ = x << (8 - bits);
  269. tot = p - out_p;
  270. out_p[0] = tot >> 8;
  271. out_p[1] = tot;
  272. if (tot > msg_nr+2) {
  273. verbatim:
  274. tot = msg_nr + 2;
  275. out_p[0] = (tot >> 8) | 0x80;
  276. out_p[1] = tot;
  277. memcpy(&out_p[2], msg_p, msg_nr);
  278. }
  279. t[6] = time_now();
  280. printf("init:%d count:%d heap:%d tree:%d tab:%d codec:%d\n",
  281. (t[1]-t[0])/TIME_MHZ,
  282. (t[2]-t[1])/TIME_MHZ,
  283. (t[3]-t[2])/TIME_MHZ,
  284. (t[4]-t[3])/TIME_MHZ,
  285. (t[5]-t[4])/TIME_MHZ,
  286. (t[6]-t[5])/TIME_MHZ);
  287. return tot;
  288. }
  289. static int huffman_decompress(
  290. struct huffman_state *state,
  291. unsigned char *model_p, unsigned int model_nr,
  292. unsigned char *msg_p, unsigned int msg_nr,
  293. unsigned char *out_p)
  294. {
  295. lut_t lut = state->u.lut;
  296. node_t *nodes = state->nodes;
  297. unsigned char *p = msg_p, *q = out_p;
  298. unsigned int root, bits, node;
  299. uint32_t x;
  300. time_t t[10];
  301. unsigned int j = 0;
  302. root = build_huffman_heap_and_tree(
  303. model_p, model_nr, state->u.heap, nodes, t);
  304. t[4] = time_now();
  305. build_huffman_lut(root, nodes, lut);
  306. t[5] = time_now();
  307. x = bits = 0;
  308. for (;;) {
  309. uint32_t entry;
  310. unsigned int codelen;
  311. while (bits < 24) {
  312. x |= (uint32_t)(*p++) << (24 - bits);
  313. bits += 8;
  314. }
  315. entry = lut[x >> 24];
  316. node = lent_node(entry);
  317. codelen = lent_codelen(entry);
  318. x <<= codelen; bits -= codelen;
  319. if (likely(node < 256))
  320. goto fast_path;
  321. while (likely(node_is_internal(node))) {
  322. entry = nodes[node_idx(node)];
  323. node = (int32_t)x < 0 ? node_right(entry) : node_left(entry);
  324. x <<= 1; bits--;
  325. }
  326. if (likely(node < 256)) {
  327. fast_path:
  328. q[j++&hhh] = node;
  329. continue;
  330. }
  331. switch (node) {
  332. case SYM_EOS:
  333. goto out;
  334. case SYM_ESC:
  335. q[j++&hhh] = x >> 24;
  336. x <<= 8; bits -= 8;
  337. break;
  338. }
  339. }
  340. out:
  341. t[6] = time_now();
  342. printf("init:%d count:%d heap:%d tree:%d tab:%d codec:%d %d\n",
  343. (t[1]-t[0])/TIME_MHZ,
  344. (t[2]-t[1])/TIME_MHZ,
  345. (t[3]-t[2])/TIME_MHZ,
  346. (t[4]-t[3])/TIME_MHZ,
  347. (t[5]-t[4])/TIME_MHZ,
  348. (t[6]-t[5])/TIME_MHZ);
  349. return q - out_p;
  350. }
  351. void test_huffman(void)
  352. {
  353. #define NR 4000
  354. unsigned char *p;
  355. int header, nr;
  356. time_t t;
  357. int i,j;
  358. unsigned char *q = &buffer[4000];
  359. for (i = 0; i < 256; i++)
  360. *q++ = i;
  361. for (j = 0; j < 2048; j++)
  362. *q++ = _stext[j+1024];
  363. q = &buffer[4000];
  364. /* COMPRESS */
  365. t = time_now();
  366. nr = huffman_compress(&huffman_state,
  367. q, NR, (unsigned char *)_stext+1204, NR,
  368. &buffer[0]);
  369. t = time_now()-t;
  370. printf("FINAL: %d bytes "
  371. "Original = %d bytes %d cy, %d us\n",
  372. nr, NR, t, t/TIME_MHZ);
  373. /* DECOMPRESS */
  374. t = time_now();
  375. p = buffer;
  376. header = (p[0] << 8) | p[1];
  377. if (header & (1u<<15)) {
  378. /* verbatim */
  379. header &= 0x7fff;
  380. nr = header - 2;
  381. printk("Verbatim %d\n", nr);
  382. } else {
  383. /* compressed */
  384. nr = huffman_decompress(&huffman_state,
  385. q, NR,
  386. p+2, header-2, &buffer[header]);
  387. }
  388. t = time_now()-t;
  389. printf("%d cy, %d us\n", t, t/TIME_MHZ);
  390. }
  391. /*
  392. * Local variables:
  393. * mode: C
  394. * c-file-style: "Linux"
  395. * c-basic-offset: 4
  396. * tab-width: 4
  397. * indent-tabs-mode: nil
  398. * End:
  399. */