Curiouspaul1 · June 29, 2025 02:46 · Jun 29, 2025 · Jun 29, 2025 · Jun 29, 2025 · Jun 29, 2025
diff --git a/test.txt b/test.txt
@@ -0,0 +1 @@
+Lorem ipsum dolor sit amet consectetur adipisicing elit. Fugiat excepturi qui aperiam esse laboriosam laborum quidem rem delectus optio debitis suscipit laudantium, maxime nostrum? Enim blanditiis corporis, eligendi repudiandae iusto eius magni veniam ut expedita nobis doloremque recusandae cum eaque eveniet autem consequuntur neque eum, repellat, ipsam asperiores. Exercitationem, sed cum ipsum ad eveniet perferendis, fuga dolorem eligendi, excepturi similique architecto. Alias, totam repellat temporibus officia aperiam iusto quibusdam harum, soluta sed vero maiores. Corporis optio hic libero molestias iure sed, incidunt architecto deleniti, dignissimos aliquid, totam molestiae animi placeat rem. Esse perspiciatis eligendi adipisci laboriosam enim ab sapiente consequuntur!
diff --git a/sha_256.py b/sha_256.py
@@ -252,6 +252,7 @@ def compression(chunk_idx):
         working_variables[i] = mod_add(W_VARS[i], working_variables[i])
 
 
+# adjust as need (determined by how many shards you've got)
 generate_expansions(13)  # called for however many shards you have, my test file had 13 shards
 
 # run through all message expansions for each shard,

diff --git a/sha_256.py b/sha_256.py
@@ -66,6 +66,8 @@ def shr(num, shift_amount, bit_len=4):
 def split_file(shard_size):
     # splits input file into chunks/shards of
     # specified 'shard_size' for SHA-256 its 64B
+    # PS: only needs to be called once separately
+    # once you have your shards you're good.
     global last_shard
     global chunk_num
     global INPUT_FILE_SIZE
@@ -250,14 +252,18 @@ def compression(chunk_idx):
         working_variables[i] = mod_add(W_VARS[i], working_variables[i])
 
 
-generate_expansions(13)
+generate_expansions(13)  # called for however many shards you have, my test file had 13 shards
 
+# run through all message expansions for each shard,
+# and pass each one through the 64 cycles of compression step
 for idx, chunk in EXPANSIONS.items():
     compression(idx)
 
+# compute your final hash
 final_hash = b''
 for b in working_variables:
     final_hash += b.to_bytes(4)
 final_hash = final_hash.hex()
 
+# show results!
 print(final_hash)
diff --git a/gistfile1.txt → sha_256.py b/gistfile1.txt → sha_256.py
@@ -64,6 +64,8 @@ def shr(num, shift_amount, bit_len=4):
 
 
 def split_file(shard_size):
+    # splits input file into chunks/shards of
+    # specified 'shard_size' for SHA-256 its 64B
     global last_shard
     global chunk_num
     global INPUT_FILE_SIZE

diff --git a/gistfile1.txt b/gistfile1.txt
@@ -1,11 +1,11 @@
+import os
 import math
 import pprint as pp
 
 chunk_num = 1
 last_shard = None
 EXPANSIONS = {}
-
-## WORKING VARIABLES ##
+INPUT_FILE_SIZE = 0
 
 working_variables = [
     0x6a09e667,
@@ -18,23 +18,6 @@ working_variables = [
     0x5be0cd19
 ]
 
-# H0 (a)
-IV_H0 = working_variables[0]
-# H1 (b)
-IV_H1 = working_variables[1]
-# H2 (c)
-IV_H2 = working_variables[2]
-# H3 (d)
-IV_H3 = working_variables[3]
-# H4 (e)
-IV_H4 = working_variables[4]
-# H5 (f)
-IV_H5 = working_variables[5]
-# H6 (g)
-IV_H6 = working_variables[6]
-# H7 (h)
-IV_H7 = working_variables[7]
-
 
 def prime_sieve(x, y):
     state = [True for _ in range(y+1)]
@@ -83,8 +66,10 @@ def shr(num, shift_amount, bit_len=4):
 def split_file(shard_size):
     global last_shard
     global chunk_num
+    global INPUT_FILE_SIZE
 
     with open('test.txt', 'rb') as file:
+        INPUT_FILE_SIZE = os.path.getsize(file)
         while True:
             chunk = file.read(shard_size)
             if not chunk: break
@@ -112,9 +97,9 @@ def pad_shard(shard):
     current_word += zeros_to_add
     print(current_word)
 
-    # append original shard length to reminaing 64-bit slot
+    # append original input message length to reminaing 64-bit slot
     # big-Endian order
-    shard_len_in_byte = len(shard).to_bytes(8, 'big')
+    shard_len_in_byte = INPUT_FILE_SIZE.to_bytes(8, 'big')
     current_word += shard_len_in_byte
     print(current_word)
 
@@ -180,6 +165,11 @@ def message_expand(idx, message_expansions):
 
 
 def generate_expansions(no_of_chunks):
+    """
+    expands each chunk into 64 32-bit words, and stores the byte
+    data in the EXPANSIONS map with key matching idx of the current
+    chunk.
+    """
     global EXPANSIONS
     for n in range(1, no_of_chunks+1):
         _exp = split_block_into_init_expansions(n)
@@ -193,17 +183,38 @@ def mod_add(x, y, mod=32):
     return (x + y) % MOD
 
 
-def update_variables(var, slots):
-    for idx, val in slots:
-        var[idx] = val
-
-    for i in range(len(var)-1, -1, -1):
-        var[i] = var[i-1]
+def update_variables(var, T1, T2):
+    prev = var[0]
+    for i in range(1, len(var)):
+        tmp = var[i]
+        var[i] = prev
+        prev = tmp
+    var[0] = mod_add(T1, T2)
+    var[4] = mod_add(var[4], T1)
 
 
 def compression(chunk_idx):
     _expanse = EXPANSIONS[chunk_idx]
     W_VARS = working_variables.copy()
+
+    ## WORKING VARIABLES ##
+    # H0 (a)
+    IV_H0 = W_VARS[0]
+    # H1 (b)
+    IV_H1 = W_VARS[1]
+    # H2 (c)
+    IV_H2 = W_VARS[2]
+    # H3 (d)
+    IV_H3 = W_VARS[3]
+    # H4 (e)
+    IV_H4 = W_VARS[4]
+    # H5 (f)
+    IV_H5 = W_VARS[5]
+    # H6 (g)
+    IV_H6 = W_VARS[6]
+    # H7 (h)
+    IV_H7 = W_VARS[7]
+
     kt_gen = round_kt(64)
     MOD = 2**32
     for idx, wt in _expanse.items():
@@ -219,34 +230,32 @@ def compression(chunk_idx):
         T_2 = mod_add(_sum_0, _maj)
 
         # update variables
-        update_variables(W_VARS, [(4, T_1), (0, mod_add(T_1, T_2))])
+        # print("before update", [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7])
+        IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7 = \
+            (T_1 + T_2) % MOD, \
+            IV_H0, \
+            IV_H1, \
+            IV_H2, \
+            (IV_H3 + T_1) % MOD, \
+            IV_H4, \
+            IV_H5, \
+            IV_H6
+        # print("after update", [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7])
 
     # update current state for next chunk compression
+    W_VARS = [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7]
     for i in range(len(W_VARS)):
         working_variables[i] = mod_add(W_VARS[i], working_variables[i])
 
 
+generate_expansions(13)
+
 for idx, chunk in EXPANSIONS.items():
     compression(idx)
 
-# generate_expansions(13)
-# print("======BEFORE======")
-# print(working_variables)
-# compression(1)
-# print("======AFTER======")
-# print(working_variables)
 final_hash = b''
 for b in working_variables:
     final_hash += b.to_bytes(4)
 final_hash = final_hash.hex()
 
 print(final_hash)
-# def rotate(num, no_of_rotations, bit_len=4):
-#     x = num
-#     for _ in range(no_of_rotations):
-#         rmb = x & 1
-#         x >>= 1
-#         if rmb:
-#             print(x, 2**(bit_len-1))
-#             x += 2**(bit_len-1)
-#     return x
diff --git a/gistfile1.txt b/gistfile1.txt
@@ -7,32 +7,33 @@ EXPANSIONS = {}
 
 ## WORKING VARIABLES ##
 
+working_variables = [
+    0x6a09e667,
+    0xbb67ae85,
+    0x3c6ef372,
+    0xa54ff53a,
+    0x510e527f,
+    0x9b05688c,
+    0x1f83d9ab,
+    0x5be0cd19
+]
+
 # H0 (a)
-IV_H0 = 0x6a09e667
+IV_H0 = working_variables[0]
 # H1 (b)
-IV_H1 = 0xbb67ae85
+IV_H1 = working_variables[1]
 # H2 (c)
-IV_H2 = 0x3c6ef372
+IV_H2 = working_variables[2]
 # H3 (d)
-IV_H3 = 0xa54ff53a
+IV_H3 = working_variables[3]
 # H4 (e)
-IV_H4 = 0x510e527f
+IV_H4 = working_variables[4]
 # H5 (f)
-IV_H5 = 0x9b05688c
+IV_H5 = working_variables[5]
 # H6 (g)
-IV_H6 = 0x1f83d9ab
+IV_H6 = working_variables[6]
 # H7 (h)
-IV_H7 = 0x5be0cd19
-working_variables = [
-    IV_H0,
-    IV_H1,
-    IV_H2,
-    IV_H3,
-    IV_H4,
-    IV_H5,
-    IV_H6,
-    IV_H7
-]
+IV_H7 = working_variables[7]
 
 
 def prime_sieve(x, y):
@@ -58,6 +59,7 @@ def round_kt(x):
         cube_root = math.cbrt(PRIMES[pos])
         frac = (cube_root % 1) * MULTIP
         val = math.floor(frac)
+        pos += 1
         yield val
 
 
@@ -204,12 +206,12 @@ def compression(chunk_idx):
     W_VARS = working_variables.copy()
     kt_gen = round_kt(64)
     MOD = 2**32
-    for wt in _expanse:
+    for idx, wt in _expanse.items():
         # avalanche mixing
         _sum_1 = e_1(IV_H4)
         _sum_0 = e_0(IV_H0)
         _ch = ch(
-            (IV_H4, IV_H5, IV_H6)
+            IV_H4, IV_H5, IV_H6
         )
         _kt = next(kt_gen)
         _maj = maj(IV_H0, IV_H1, IV_H2)
@@ -218,20 +220,27 @@ def compression(chunk_idx):
 
         # update variables
         update_variables(W_VARS, [(4, T_1), (0, mod_add(T_1, T_2))])
-    pass
 
+    # update current state for next chunk compression
+    for i in range(len(W_VARS)):
+        working_variables[i] = mod_add(W_VARS[i], working_variables[i])
 
-# expand_map = split_block_into_init_expansions(1)
-# print(expand_map)
 
-# print("EXPANSION FOR 16th word:: ")
-# message_expand(16, expand_map)
-# print(expand_map)
+for idx, chunk in EXPANSIONS.items():
+    compression(idx)
 
 # generate_expansions(13)
-# pp.pprint(EXPANSIONS)
-
-
+# print("======BEFORE======")
+# print(working_variables)
+# compression(1)
+# print("======AFTER======")
+# print(working_variables)
+final_hash = b''
+for b in working_variables:
+    final_hash += b.to_bytes(4)
+final_hash = final_hash.hex()
+
+print(final_hash)
 # def rotate(num, no_of_rotations, bit_len=4):
 #     x = num
 #     for _ in range(no_of_rotations):
@@ -240,4 +249,4 @@ def compression(chunk_idx):
 #         if rmb:
 #             print(x, 2**(bit_len-1))
 #             x += 2**(bit_len-1)
-#     return x
+#     return x
diff --git a/gistfile1.txt b/gistfile1.txt
@@ -0,0 +1,243 @@
+import math
+import pprint as pp
+
+chunk_num = 1
+last_shard = None
+EXPANSIONS = {}
+
+## WORKING VARIABLES ##
+
+# H0 (a)
+IV_H0 = 0x6a09e667
+# H1 (b)
+IV_H1 = 0xbb67ae85
+# H2 (c)
+IV_H2 = 0x3c6ef372
+# H3 (d)
+IV_H3 = 0xa54ff53a
+# H4 (e)
+IV_H4 = 0x510e527f
+# H5 (f)
+IV_H5 = 0x9b05688c
+# H6 (g)
+IV_H6 = 0x1f83d9ab
+# H7 (h)
+IV_H7 = 0x5be0cd19
+working_variables = [
+    IV_H0,
+    IV_H1,
+    IV_H2,
+    IV_H3,
+    IV_H4,
+    IV_H5,
+    IV_H6,
+    IV_H7
+]
+
+
+def prime_sieve(x, y):
+    state = [True for _ in range(y+1)]
+    state[2] = True
+
+    for i in range(2, len(state)):
+        if state[i]:
+            for j in range(i+i, len(state), i):
+                state[j] = False
+
+    return [p for p in range(1, len(state)) if state[p] and p > 1]
+
+
+PRIMES = prime_sieve(1, 312)  # first 64 lie in this range
+
+
+def round_kt(x):
+    # use seive of erastothenes
+    pos = 0
+    MULTIP = 2**32
+    while pos < x:
+        cube_root = math.cbrt(PRIMES[pos])
+        frac = (cube_root % 1) * MULTIP
+        val = math.floor(frac)
+        yield val
+
+
+def rotr(num, no_of_rotations, bit_len=4):
+    shift_amnt = no_of_rotations % bit_len
+    mask = (1 << shift_amnt) - 1
+    right_bits = num & mask
+    right_bits_on_left = right_bits << (bit_len - shift_amnt)
+    shift_rht = num >> shift_amnt
+    return right_bits_on_left | shift_rht
+
+
+def shr(num, shift_amount, bit_len=4):
+    if bit_len < shift_amount:
+        res = num >> bit_len
+    else:
+        res = num >> shift_amount
+    return res
+
+
+def split_file(shard_size):
+    global last_shard
+    global chunk_num
+
+    with open('test.txt', 'rb') as file:
+        while True:
+            chunk = file.read(shard_size)
+            if not chunk: break
+
+            print(f"Chunk size is {len(chunk)}")
+
+            chunk_op_path = f"chunk_{chunk_num}"
+            with open(chunk_op_path, 'wb') as chunk_file:
+                chunk_file.write(chunk)
+            chunk_num += 1
+            last_shard = chunk
+
+
+def pad_shard(shard):
+    # append '1' to end of the last shard
+    # '1' is appended as 1 byte since that's the smallest
+    #  addressible unit in modern computers
+    current_word = shard + b'\x80'
+    current_word_len = len(current_word) * 8
+    # pad with zeros up until you have 64bit slots left in a 512-bit block
+    len_to_add = (448 - (current_word_len % 512)) % 512
+    len_to_add //= 8  # convert to byte length
+
+    zeros_to_add = b'\x00' * len_to_add
+    current_word += zeros_to_add
+    print(current_word)
+
+    # append original shard length to reminaing 64-bit slot
+    # big-Endian order
+    shard_len_in_byte = len(shard).to_bytes(8, 'big')
+    current_word += shard_len_in_byte
+    print(current_word)
+
+    chunk_op_path = f"chunk_{chunk_num-1}"
+    with open(chunk_op_path, 'wb') as chunk_file:
+        chunk_file.write(current_word)
+
+    return current_word
+
+
+def sigma_1(x):
+    res = rotr(x, 17, bit_len=32) ^ rotr(x, 19, bit_len=32) ^ shr(x, 10, bit_len=32)
+    return res
+
+
+def sigma_0(x):
+    res = rotr(x, 7, bit_len=32) ^ rotr(x, 18, bit_len=32) ^ shr(x, 3, bit_len=32)
+    return res
+
+
+def maj(x, y, z):
+    return (x & y) ^ (x & z) ^ (y & z)
+
+
+def ch(x, y, z):
+    return (x & y) ^ (~x & z)
+
+
+def e_0(x):
+    res = rotr(x, 2, bit_len=32) ^ rotr(x, 13, bit_len=32) ^ rotr(x, 22, bit_len=32)
+    return res
+
+
+def e_1(x):
+    res = rotr(x, 6, bit_len=32) ^ rotr(x, 11, bit_len=32) ^ rotr(x, 25, bit_len=32)
+    return res
+
+
+def split_block_into_init_expansions(shard_idx):
+    _expansions = {}
+    pos = 0
+    with open(f"chunk_{shard_idx}", 'rb') as _file:
+        while True:
+            _chunk = _file.read(4) # get 32-bit (or 4 byte chunk)
+            if not _chunk: break
+
+            _expansions[pos] = _chunk
+            pos += 1
+    return _expansions
+
+
+def message_expand(idx, message_expansions):
+    MOD = 2**32
+    w_7 = int.from_bytes(message_expansions[idx-7])
+    w_2 = int.from_bytes(message_expansions[idx-2])
+    w_15 = int.from_bytes(message_expansions[idx-15])
+    w_16 = int.from_bytes(message_expansions[idx-16])
+
+    res: int = (sigma_1(w_2) + w_7) % MOD
+    res += (sigma_0(w_15) + w_16) % MOD
+    res %= MOD
+    message_expansions[idx] = res.to_bytes(length=4)
+
+
+def generate_expansions(no_of_chunks):
+    global EXPANSIONS
+    for n in range(1, no_of_chunks+1):
+        _exp = split_block_into_init_expansions(n)
+        for i in range(16, 64):
+            message_expand(i, _exp)
+        EXPANSIONS[n] = _exp
+
+
+def mod_add(x, y, mod=32):
+    MOD = 2**mod
+    return (x + y) % MOD
+
+
+def update_variables(var, slots):
+    for idx, val in slots:
+        var[idx] = val
+
+    for i in range(len(var)-1, -1, -1):
+        var[i] = var[i-1]
+
+
+def compression(chunk_idx):
+    _expanse = EXPANSIONS[chunk_idx]
+    W_VARS = working_variables.copy()
+    kt_gen = round_kt(64)
+    MOD = 2**32
+    for wt in _expanse:
+        # avalanche mixing
+        _sum_1 = e_1(IV_H4)
+        _sum_0 = e_0(IV_H0)
+        _ch = ch(
+            (IV_H4, IV_H5, IV_H6)
+        )
+        _kt = next(kt_gen)
+        _maj = maj(IV_H0, IV_H1, IV_H2)
+        T_1 = (mod_add(IV_H7, _sum_1) + mod_add(_ch, _kt) + int.from_bytes(wt)) % MOD
+        T_2 = mod_add(_sum_0, _maj)
+
+        # update variables
+        update_variables(W_VARS, [(4, T_1), (0, mod_add(T_1, T_2))])
+    pass
+
+
+# expand_map = split_block_into_init_expansions(1)
+# print(expand_map)
+
+# print("EXPANSION FOR 16th word:: ")
+# message_expand(16, expand_map)
+# print(expand_map)
+
+# generate_expansions(13)
+# pp.pprint(EXPANSIONS)
+
+
+# def rotate(num, no_of_rotations, bit_len=4):
+#     x = num
+#     for _ in range(no_of_rotations):
+#         rmb = x & 1
+#         x >>= 1
+#         if rmb:
+#             print(x, 2**(bit_len-1))
+#             x += 2**(bit_len-1)
+#     return x
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Lorem ipsum dolor sit amet consectetur adipisicing elit. Fugiat excepturi qui aperiam esse laboriosam laborum quidem rem delectus optio debitis suscipit laudantium, maxime nostrum? Enim blanditiis corporis, eligendi repudiandae iusto eius magni veniam ut expedita nobis doloremque recusandae cum eaque eveniet autem consequuntur neque eum, repellat, ipsam asperiores. Exercitationem, sed cum ipsum ad eveniet perferendis, fuga dolorem eligendi, excepturi similique architecto. Alias, totam repellat temporibus officia aperiam iusto quibusdam harum, soluta sed vero maiores. Corporis optio hic libero molestias iure sed, incidunt architecto deleniti, dignissimos aliquid, totam molestiae animi placeat rem. Esse perspiciatis eligendi adipisci laboriosam enim ab sapiente consequuntur!