{ "cells": [ { "cell_type": "markdown", "id": "589fb75f-e6c1-4e1e-94f5-eb0e03e2c537", "metadata": {}, "source": [ "# Defining CRC32" ] }, { "cell_type": "code", "execution_count": 1, "id": "d2d16187-054c-4f5d-a5d9-81df84cff1a7", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:56:13.705489Z", "iopub.status.busy": "2024-05-07T11:56:13.704415Z", "iopub.status.idle": "2024-05-07T11:56:13.719273Z", "shell.execute_reply": "2024-05-07T11:56:13.716939Z", "shell.execute_reply.started": "2024-05-07T11:56:13.705354Z" } }, "outputs": [], "source": [ "POLY = 0xEDB8_8320\n", "# ONE = 0x8000_0000 # 1\n", "# TWO = 0x4000_0000 # x\n", "# THREE = 0xC000_0000 # x + 1\n", "# THREE_INV = 0x492f023f # power(0xc000_0000, 2**32-2)\n", "MAGIC = 0xB6D0_FDC0 # mul(POLY, THREE_INV) = x^32 / (x+1)\n", "\n", "def mul_x(a):\n", " # LSB\n", " if a & 1:\n", " a >>= 1\n", " a ^= POLY\n", " else:\n", " a >>= 1\n", " return a\n", "\n", "\n", "def mul(a, b):\n", " res = 0\n", " while b:\n", " if b & 0x8000_0000:\n", " res ^= a\n", " b ^= 0x8000_0000\n", " a = mul_x(a)\n", " b <<= 1\n", " return res\n", "\n", "\n", "def power(a, e):\n", " # square and multiply\n", " res = 0x8000_0000 # one\n", " while e:\n", " if e & 1:\n", " res = mul(res, a)\n", " e >>= 1\n", " a = mul(a, a)\n", " return res \n", " \n", "\n", "def mycrc32(msg):\n", " s = 0xffff_ffff\n", " for byte in msg:\n", " for bit in f\"{byte:08b}\"[::-1]:\n", " # bit plays the role of x^31 since it's in LSB\n", " s = mul_x(s ^ int(bit))\n", " s ^= 0xffff_ffff\n", " return s\n", "\n", "\n", "def extend_zeros(crc, n): # n in bits\n", " crc ^= 0xffff_ffff\n", " crc = mul(crc, power(0x4000_0000, n))\n", " crc ^= 0xffff_ffff\n", " return crc\n", "\n", "\n", "def extend_ones(crc, n): # n in bits\n", " \"\"\"\n", " We are repeating the step\n", "\n", " > c = (c + x^31) * x = c*x + x^32\n", "\n", " `n` times, getting\n", "\n", " c = c0 * x^n + x^32 * (x^(n-1) + x^(n-2) + ... + x^2 + x + 1)\n", " = c0 * x^n + x^32 * (x^n - 1) / (x - 1)\n", "\n", " Letting\n", " \n", " MAGIC = x^32 / (x-1) = POLY / (x-1) = POLY * (x-1)^(2^32-2)\n", "\n", " We get\n", "\n", " c = c0 * x^n + MAGIC * (x^n-1)\n", " = x^n * (c0 + MAGIC) - MAGIC\n", "\n", " This also means that adding MAGIC before and after digesting some text\n", " corresponds to flipping all the bits in the text.\n", " \"\"\"\n", " crc ^= 0xffff_ffff\n", " crc = mul(crc ^ MAGIC, power(0x4000_0000, n)) ^ MAGIC\n", " crc ^= 0xffff_ffff\n", " return crc" ] }, { "cell_type": "code", "execution_count": 2, "id": "c4b3acee-feac-4299-8b90-3b1d7d926968", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:56:13.730593Z", "iopub.status.busy": "2024-05-07T11:56:13.727921Z", "iopub.status.idle": "2024-05-07T11:56:13.749154Z", "shell.execute_reply": "2024-05-07T11:56:13.747708Z", "shell.execute_reply.started": "2024-05-07T11:56:13.730496Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " zlib my | msg\n", "00000000 00000000 | b''\n", "d202ef8d d202ef8d | b'\\x00'\n", "a505df1b a505df1b | b'\\x01'\n", "2144df1c 2144df1c | b'\\x00\\x00\\x00\\x00'\n", "ed82cd11 ed82cd11 | b'abcd'\n", "8587d865 8587d865 | b'abcde'\n" ] } ], "source": [ "import zlib\n", "\n", "print(\" zlib my | msg\")\n", "for m in [b\"\", b\"\\x00\", b\"\\x01\", b\"\\x00\"*4, b\"abcd\", b\"abcde\"]:\n", " print(\"%08x %08x\" % (zlib.crc32(m), mycrc32(m)), \"|\", m)\n", " assert zlib.crc32(m) == mycrc32(m)" ] }, { "cell_type": "markdown", "id": "7935cfe6-721e-4623-b6ac-61ce26fdb77f", "metadata": {}, "source": [ "# Test extend with zeros" ] }, { "cell_type": "code", "execution_count": 3, "id": "efe1b9b7-71de-46ee-ac60-778d5a6992c3", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:56:13.752903Z", "iopub.status.busy": "2024-05-07T11:56:13.751733Z", "iopub.status.idle": "2024-05-07T11:56:13.781665Z", "shell.execute_reply": "2024-05-07T11:56:13.771365Z", "shell.execute_reply.started": "2024-05-07T11:56:13.752340Z" } }, "outputs": [], "source": [ "import random, os\n", "for _ in range(100):\n", " a = random.randrange(1000)\n", " b = random.randrange(1000)\n", " s1 = os.urandom(a)\n", " c = zlib.crc32(s1)\n", " c1 = zlib.crc32(s1 + b\"\\x00\" * b)\n", " c2 = extend_zeros(c, b*8)\n", " assert c1 == c2" ] }, { "cell_type": "markdown", "id": "f464e1d2-a140-45ff-a436-b2681f609f65", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:12:36.359239Z", "iopub.status.busy": "2024-05-07T11:12:36.357896Z", "iopub.status.idle": "2024-05-07T11:12:36.369277Z", "shell.execute_reply": "2024-05-07T11:12:36.366900Z", "shell.execute_reply.started": "2024-05-07T11:12:36.359096Z" } }, "source": [ "# Test extend with ones" ] }, { "cell_type": "code", "execution_count": 4, "id": "33469630-2d7f-4f3e-b867-97cdd250fe6a", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:56:13.787432Z", "iopub.status.busy": "2024-05-07T11:56:13.786199Z", "iopub.status.idle": "2024-05-07T11:56:13.824654Z", "shell.execute_reply": "2024-05-07T11:56:13.822975Z", "shell.execute_reply.started": "2024-05-07T11:56:13.787261Z" } }, "outputs": [], "source": [ "import random, os\n", "for _ in range(100):\n", " a = random.randrange(1000)\n", " b = random.randrange(1000)\n", " s1 = os.urandom(a)\n", " c = zlib.crc32(s1)\n", " c1 = zlib.crc32(s1 + b\"\\xff\" * b)\n", " c2 = extend_ones(c, b*8)\n", " assert c1 == c2 " ] }, { "cell_type": "markdown", "id": "7350084c-6ea1-40e6-aae0-d6f92664a7d2", "metadata": {}, "source": [ "# Test flip" ] }, { "cell_type": "code", "execution_count": 5, "id": "8f2c5a10-934b-4e1d-bc06-2097b8069ab5", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:56:13.830373Z", "iopub.status.busy": "2024-05-07T11:56:13.828268Z", "iopub.status.idle": "2024-05-07T11:56:13.854759Z", "shell.execute_reply": "2024-05-07T11:56:13.852262Z", "shell.execute_reply.started": "2024-05-07T11:56:13.830142Z" } }, "outputs": [], "source": [ "import random, os\n", "for _ in range(100):\n", " a = random.randrange(1000)\n", " s1 = os.urandom(a)\n", " c = zlib.crc32(s1)\n", " c1 = zlib.crc32(bytes(0xff^byte for byte in s1))\n", " c2 = c ^ mul(MAGIC, power(0x4000_0000, 8*a) ^ 0x8000_0000) # MAGIC * (x^n + 1)\n", " assert c1 == c2 " ] }, { "cell_type": "markdown", "id": "ccf597cc-ec3f-4f7d-a37d-546dd4cf5884", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:51:10.326870Z", "iopub.status.busy": "2024-05-07T11:51:10.326168Z", "iopub.status.idle": "2024-05-07T11:51:10.334345Z", "shell.execute_reply": "2024-05-07T11:51:10.332169Z", "shell.execute_reply.started": "2024-05-07T11:51:10.326792Z" } }, "source": [ "# Timings" ] }, { "cell_type": "code", "execution_count": 12, "id": "07141352-c856-4a2e-afb3-0fe0ba5e7d40", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:56:40.302266Z", "iopub.status.busy": "2024-05-07T11:56:40.299987Z", "iopub.status.idle": "2024-05-07T11:56:40.320996Z", "shell.execute_reply": "2024-05-07T11:56:40.318714Z", "shell.execute_reply.started": "2024-05-07T11:56:40.301942Z" } }, "outputs": [ { "data": { "text/plain": [ "'3.10.13 (f1607341da97ff5a1e93430b6e8c4af0ad1aa019, Sep 28 2023, 05:41:26)\\n[PyPy 7.3.13 with GCC 10.2.1 20210130 (Red Hat 10.2.1-11)]'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sys\n", "sys.version" ] }, { "cell_type": "code", "execution_count": 6, "id": "b525bd2b-fbb1-4ca1-89cd-4d0b50361419", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:56:13.860035Z", "iopub.status.busy": "2024-05-07T11:56:13.858262Z", "iopub.status.idle": "2024-05-07T11:56:16.169806Z", "shell.execute_reply": "2024-05-07T11:56:16.167463Z", "shell.execute_reply.started": "2024-05-07T11:56:13.859233Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.81 µs ± 62.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" ] } ], "source": [ "# 0.5 GB = 2^29 bytes\n", "%timeit extend_zeros(0xabcd1234, 2**32) " ] }, { "cell_type": "code", "execution_count": 7, "id": "e2ac7d50-f0c6-484a-8467-4366af970bb5", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:56:16.176654Z", "iopub.status.busy": "2024-05-07T11:56:16.175935Z", "iopub.status.idle": "2024-05-07T11:56:18.996801Z", "shell.execute_reply": "2024-05-07T11:56:18.995091Z", "shell.execute_reply.started": "2024-05-07T11:56:16.176554Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.52 µs ± 603 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" ] } ], "source": [ "# 0.5 GB = 2^29 bytes\n", "%timeit extend_ones(0xabcd1234, 2**32)" ] }, { "cell_type": "code", "execution_count": 8, "id": "56dd220c-7689-4b6f-be61-5c07b9c336f9", "metadata": { "execution": { "iopub.execute_input": "2024-05-07T11:56:19.001380Z", "iopub.status.busy": "2024-05-07T11:56:18.999161Z", "iopub.status.idle": "2024-05-07T11:56:24.205812Z", "shell.execute_reply": "2024-05-07T11:56:24.204084Z", "shell.execute_reply.started": "2024-05-07T11:56:19.001045Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "63.2 µs ± 3.29 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" ] } ], "source": [ "%timeit extend_zeros(0xabcd1234, 2**512)" ] } ], "metadata": { "kernelspec": { "display_name": "PyPy3", "language": "python", "name": "pypy3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }