md5-nohashlib.py with syntax coloring

This HTML file was generated with Kalle's syntaxcolor.py


   1"""
   2Calculates the MD5 hash of a string without hashlib etc.
   3By Kalle (http://qalle.net)
   4"""
   5
   6import sys
   7import math
   8import struct
   9
  10# initial state of algorithm
  11MD5_INITIAL_STATE = (0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476)
  12
  13# constants for the algorithm
  14MD5_SINE_TABLE = tuple(
  15    math.floor(abs(math.sin(i + 1)) * (2 ** 32))
  16    for i in range(64)
  17)
  18
  19ROTATE_AMOUNTS = (
  20    4 * (7, 12, 17, 22) +
  21    4 * (5, 9, 14, 20) +
  22    4 * (4, 11, 16, 23) +
  23    4 * (6, 10, 15, 21)
  24)
  25
  26CHUNK_PART_INDICES = tuple(
  27    list(range(16)) +
  28    list((5 * i + 1) % 16 for i in range(16)) +
  29    list((3 * i + 5) % 16 for i in range(16)) +
  30    list(7 * i % 16 for i in range(16))
  31)
  32
  33PAD_BYTE = b"\x00"
  34
  35HELP_TEXT = """\
  36Calculates the MD5 hash of a string without hashlib etc.
  37Argument: string (only 7-bit ASCII characters are allowed)\
  38"""
  39
  40def MD5_pad_message(message):
  41    """Prepares the message for the main calculation."""
  42
  43    # remember original length in bytes
  44    originalLength = len(message)
  45
  46    # add one "1" bit (and seven "0" bits)
  47    message += b"\x80"
  48
  49    # add 0-63 pad bytes so that
  50    # (length in bits) modulo 512 = 448
  51    # or
  52    # (length in bytes) modulo 64 = 56
  53    message += (56 - len(message) % 64) % 64 * PAD_BYTE
  54
  55    # add (original length in bits) modulo (2 ** 64)
  56    # 64 bits or 8 bytes, little-endian
  57    originalLengthInBits = (originalLength * 8) & 0xffffffffffffffff
  58    message += struct.pack("<Q", originalLengthInBits)
  59
  60    return message
  61
  62def rotate_left(number, amount):
  63    """
  64    Rotate number left using unsigned 32-bit integer arithmetic.
  65    Assumes input fits in 32 bits.
  66    """
  67
  68    return ((number << amount) & 0xffffffff) | (number >> (32 - amount))
  69
  70def MD5_hash_chunk(state, chunk):
  71    """
  72    Hash one 64-byte chunk.
  73    Args:
  74        state: current state (tuple of four 32-bit ints)
  75        chunk: chunk to hash (tuple of sixteen 32-bit ints)
  76    Returns: values to change the state with (tuple of four 32-bit ints).
  77    """
  78
  79    # extract state to integers
  80    (S0, S1, S2, S3) = state
  81
  82    # change state 64 times
  83    for i in range(64):
  84        if i < 16:
  85            bits = (S1 & S2) | (~S1 & S3)
  86        elif i < 32:
  87            bits = (S3 & S1) | (~S3 & S2)
  88        elif i < 48:
  89            bits = S1 ^ S2 ^ S3
  90        else:
  91            bits = S2 ^ (S1 | ~S3)
  92
  93        chunkPart = chunk[CHUNK_PART_INDICES[i]]
  94        sum_ = (S0 + MD5_SINE_TABLE[i] + bits + chunkPart) & 0xffffffff
  95        rotateAmount = ROTATE_AMOUNTS[i]
  96        (S0, S1, S2, S3) = \
  97        (S3, (S1 + rotate_left(sum_, rotateAmount)) & 0xffffffff, S1, S2)
  98
  99    return (S0, S1, S2, S3)
 100
 101def MD5_hash(message):
 102    """
 103    Calculates the MD5 hash of a message.
 104    Argument: message as bytes.
 105    Returns: hash as a hexadecimal string.
 106    """
 107
 108    # initialize algorithm state (four 32-bit ints)
 109    state = MD5_INITIAL_STATE
 110
 111    # prepare message
 112    message = MD5_pad_message(message)
 113
 114    # read message 64 bytes at a time and interpret each as sixteen 32-bit ints
 115    for chunk in struct.iter_unpack("<16I", message):
 116        change = MD5_hash_chunk(state, chunk)
 117        state = tuple((state[i] + change[i]) & 0xffffffff for i in range(4))
 118
 119    # the algorithm's final state is the hash; convert to bytes; each int is
 120    # encoded in little-endian order
 121    binaryState = b"".join(struct.pack("<I", number) for number in state)
 122
 123    # convert hash to hexadecimal
 124    return "".join(format(byte, "02x") for byte in binaryState)
 125
 126def main():
 127    if len(sys.argv) != 2:
 128        exit(HELP_TEXT)
 129
 130    message = sys.argv[1]
 131
 132    try:
 133        messageBytes = message.encode("ascii")
 134    except UnicodeError:
 135        exit("Only 7-bit ASCII characters are allowed.")
 136
 137    print(MD5_hash(messageBytes))
 138
 139if __name__ == "__main__":
 140    main()