Breder.org

Implementation of MD5 in the C Programming Language

MD5 is a digest or hash algorithm, meaning that it takes a message, comprised of an arbitrary number of bytes, and outputs a fixed-size digest or hash which, for MD5, is a sequence of 16 bytes.

When given the same message, the same digest is always generated. For even a slightly different message -- say, differing by a single bit flip --, a completely different digest is expected.

MD5 was original published in 1992 through the RFC 1321 and has since been considered obsolete for cryptography purposes.

Still, it is simple and good enough for data integrity purposes when it is safe to assume there is no threat of a malicious attacker (for example, finding duplicate files in a directory three by comparing the MD5 hash from the content of the files, instead of the file contents themselves).

I explored implementing the whole algorithm by having as my only reference the original RFC 1321 and I was successful at that. It means that the RFC itself is a full specification of the algorithm and is enough for any (sufficiently knowledgeable) programmer to implement what the authors intended.

One interesting note is that I caught a mistake in my implementation of the padding logic by generating test cases with strings with length from 1 to 64 (namely, one string for each length). The examples provided in the RFC -- which are also implemented as test cases -- wouldn't be sufficient to catch it.

The source code is reproduced below.

Update (2024-01-29): Simplified the code and reduced the line count.

md5.h

// MD5 (Message-Digest Algorithm 5)
// Copyright Victor Breder 2024
// SPDX-License-Identifier: MIT

#pragma once
#include <stdint.h>

typedef struct {
	uint32_t a, b, c, d;
} md5_context;

void md5_init(md5_context* ctx);
void md5_digest(md5_context* ctx, void* buffer, size_t size);
void md5_output(md5_context* ctx, uint8_t out[16]);

md5.c

// MD5 (Message-Digest Algorithm 5)
// Copyright Victor Breder 2024
// SPDX-License-Identifier: MIT

// Implemented from reference RFC 1321 available at
// <https://www.ietf.org/rfc/rfc1321.txt>

#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "md5.h"

// from RFC 1321, Section 3.4:
#define F(X, Y, Z) (((X) & (Y)) | ((~(X)) & (Z)))
#define G(X, Y, Z) (((X) & (Z)) | (Y & (~(Z))))
#define H(X, Y, Z) ((X) ^ (Y) ^ (Z))
#define I(X, Y, Z) ((Y) ^ ((X) | (~(Z))))

static
uint32_t rotl(uint32_t x, int s) { return (x << s) | (x >> (32 - s)); }
#define STEP(OP, a, b, c, d, k, s, i) do{ \
	a = b + rotl(a + OP(b, c, d) + X[k] + i, s); \
	}while(0)

#define TO_I32(x,i) ((x[i]) | (x[i+1]<<8) | (x[i+2]<<16) | (x[i+3]<<24))
static
void md5_block(md5_context* ctx, const uint8_t m[64]) {
	assert(ctx != NULL);

	uint32_t X[16] = {
		TO_I32(m,0), TO_I32(m,4), TO_I32(m,8), TO_I32(m,12),
		TO_I32(m,16), TO_I32(m,20), TO_I32(m,24), TO_I32(m,28),
		TO_I32(m,32), TO_I32(m,36), TO_I32(m,40), TO_I32(m,44),
		TO_I32(m,48), TO_I32(m,52), TO_I32(m,56), TO_I32(m,60)
	};

	uint32_t a = ctx->a;
	uint32_t b = ctx->b;
	uint32_t c = ctx->c;
	uint32_t d = ctx->d;

	STEP(F, a, b, c, d, 0,  7, 0xd76aa478);
	STEP(F, d, a, b, c, 1, 12, 0xe8c7b756);
	STEP(F, c, d, a, b, 2, 17, 0x242070db);
	STEP(F, b, c, d, a, 3, 22, 0xc1bdceee);
	STEP(F, a, b, c, d, 4,  7, 0xf57c0faf);
	STEP(F, d, a, b, c, 5, 12, 0x4787c62a);
	STEP(F, c, d, a, b, 6, 17, 0xa8304613);
	STEP(F, b, c, d, a, 7, 22, 0xfd469501);
	STEP(F, a, b, c, d,  8,  7, 0x698098d8);
	STEP(F, d, a, b, c,  9, 12, 0x8b44f7af);
	STEP(F, c, d, a, b, 10, 17, 0xffff5bb1);
	STEP(F, b, c, d, a, 11, 22, 0x895cd7be);
	STEP(F, a, b, c, d, 12,  7, 0x6b901122);
	STEP(F, d, a, b, c, 13, 12, 0xfd987193);
	STEP(F, c, d, a, b, 14, 17, 0xa679438e);
	STEP(F, b, c, d, a, 15, 22, 0x49b40821);
	STEP(G, a, b, c, d,  1,  5, 0xf61e2562);
	STEP(G, d, a, b, c,  6,  9, 0xc040b340);
	STEP(G, c, d, a, b, 11, 14, 0x265e5a51);
	STEP(G, b, c, d, a,  0, 20, 0xe9b6c7aa);
	STEP(G, a, b, c, d,  5,  5, 0xd62f105d);
	STEP(G, d, a, b, c, 10,  9, 0x02441453);
	STEP(G, c, d, a, b, 15, 14, 0xd8a1e681);
	STEP(G, b, c, d, a,  4, 20, 0xe7d3fbc8);
	STEP(G, a, b, c, d,  9,  5, 0x21e1cde6);
	STEP(G, d, a, b, c, 14,  9, 0xc33707d6);
	STEP(G, c, d, a, b,  3, 14, 0xf4d50d87);
	STEP(G, b, c, d, a,  8, 20, 0x455a14ed);
	STEP(G, a, b, c, d, 13,  5, 0xa9e3e905);
	STEP(G, d, a, b, c,  2,  9, 0xfcefa3f8);
	STEP(G, c, d, a, b,  7, 14, 0x676f02d9);
	STEP(G, b, c, d, a, 12, 20, 0x8d2a4c8a);
	STEP(H, a, b, c, d,  5,  4, 0xfffa3942);
	STEP(H, d, a, b, c,  8, 11, 0x8771f681);
	STEP(H, c, d, a, b, 11, 16, 0x6d9d6122);
	STEP(H, b, c, d, a, 14, 23, 0xfde5380c);
	STEP(H, a, b, c, d,  1,  4, 0xa4beea44);
	STEP(H, d, a, b, c,  4, 11, 0x4bdecfa9);
	STEP(H, c, d, a, b,  7, 16, 0xf6bb4b60);
	STEP(H, b, c, d, a, 10, 23, 0xbebfbc70);
	STEP(H, a, b, c, d, 13,  4, 0x289b7ec6);
	STEP(H, d, a, b, c,  0, 11, 0xeaa127fa);
	STEP(H, c, d, a, b,  3, 16, 0xd4ef3085);
	STEP(H, b, c, d, a,  6, 23, 0x04881d05);
	STEP(H, a, b, c, d,  9,  4, 0xd9d4d039);
	STEP(H, d, a, b, c, 12, 11, 0xe6db99e5);
	STEP(H, c, d, a, b, 15, 16, 0x1fa27cf8);
	STEP(H, b, c, d, a,  2, 23, 0xc4ac5665);
	STEP(I, a, b, c, d,  0,  6, 0xf4292244);
	STEP(I, d, a, b, c,  7, 10, 0x432aff97);
	STEP(I, c, d, a, b, 14, 15, 0xab9423a7);
	STEP(I, b, c, d, a,  5, 21, 0xfc93a039);
	STEP(I, a, b, c, d, 12,  6, 0x655b59c3);
	STEP(I, d, a, b, c,  3, 10, 0x8f0ccc92);
	STEP(I, c, d, a, b, 10, 15, 0xffeff47d);
	STEP(I, b, c, d, a,  1, 21, 0x85845dd1);
	STEP(I, a, b, c, d,  8,  6, 0x6fa87e4f);
	STEP(I, d, a, b, c, 15, 10, 0xfe2ce6e0);
	STEP(I, c, d, a, b,  6, 15, 0xa3014314);
	STEP(I, b, c, d, a, 13, 21, 0x4e0811a1);
	STEP(I, a, b, c, d,  4,  6, 0xf7537e82);
	STEP(I, d, a, b, c, 11, 10, 0xbd3af235);
	STEP(I, c, d, a, b,  2, 15, 0x2ad7d2bb);
	STEP(I, b, c, d, a,  9, 21, 0xeb86d391);

	ctx->a += a;
	ctx->b += b;
	ctx->c += c;
	ctx->d += d;

	memset(X, 0, sizeof(X));
}

void md5_init(md5_context* ctx) {
	assert(ctx != NULL);
	memset(ctx, 0, sizeof(md5_context));
	// initialization values from RFC 1321, Section 3.3:
	ctx->a = 0x67452301;
	ctx->b = 0xEFCDAB89;
	ctx->c = 0x98BADCFE;
	ctx->d = 0x10325476;
}

#define TO_U8(x,o,i) do{ \
	o[i] = (x) & 0xFF; \
	o[i+1] = ((x) >> 8) & 0xFF; \
	o[i+2] = ((x) >> 16) & 0xFF; \
	o[i+3] = ((x) >> 24) & 0xFF; }while(0)

void md5_digest(md5_context* ctx, void* buffer, size_t size) {
	uint8_t* bytes = (uint8_t*)buffer;
	uint64_t message_bits = size * 8;
	ssize_t rem_size = size;
	while (rem_size > 64) {
		md5_block(ctx, bytes);
		bytes += 64;
		rem_size -= 64;
	}
	uint8_t scratch[64];
	memset(scratch, 0, 64);
	memcpy(scratch, bytes, rem_size);
	if (rem_size == 64) {
		md5_block(ctx, scratch);
		memset(scratch, 0, 64);
		scratch[0] = 0x80;
	} else {
		scratch[rem_size] = 0x80;
		if (64 - (rem_size + 1) < 8) {
			md5_block(ctx, scratch);
			memset(scratch, 0, 64);
		}
	}
	TO_U8(message_bits, scratch, 56);
	TO_U8(message_bits>>32, scratch, 60);
	md5_block(ctx, scratch);
	memset(scratch, 0x00, 64);
}

void md5_output(md5_context* ctx, uint8_t out[16]) {
	TO_U8(ctx->a, out, 0);
	TO_U8(ctx->b, out, 4);
	TO_U8(ctx->c, out, 8);
	TO_U8(ctx->d, out, 12);
}

md5_test.c

// MD5 (Message-Digest Algorithm 5)
// Copyright Victor Breder 2024
// SPDX-License-Identifier: MIT

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "md5.h"

int hex_digit_to_int(char c) {
	switch (c) {
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
		return c - '0';
	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
		return c + 10 - 'A';
	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
		return c + 10 - 'a';
	default: assert(0 && "invalid hex character");
	}
	return -1;
}

void hex_string_to_bytes(char* hex, uint8_t bytes[16]) {
	assert(strlen(hex) == 32);
	int j = 0;
	for (int i = 0; i < 32; i+= 2) {
		bytes[j] = 16 * hex_digit_to_int(hex[i])
			+ hex_digit_to_int(hex[i+1]);
		j++;
	}
}

void print_bytes(uint8_t bytes[16]) {
	for (int i = 0; i < 16; i++) {
		printf("%02X", bytes[i]);
	}
	printf("\n");
}

void test(char* input, char* expected) {
	md5_context ctx;
	md5_init(&ctx);
	md5_digest(&ctx, input, strlen(input));

	uint8_t md5_actual[16] = {0};
	md5_output(&ctx, md5_actual);

	uint8_t md5_expected[16] = {0};
	hex_string_to_bytes(expected, md5_expected);

	if (memcmp(md5_actual, md5_expected, 16) != 0) {
		printf("test:     %s\n", input);
		printf("actual:   "); print_bytes(md5_actual);
		printf("expected: "); print_bytes(md5_expected);
		assert(0 && "test cases failed");
	}
}

int main() {
	// test cases from RFC 1321
	test("", "d41d8cd98f00b204e9800998ecf8427e");
	test("a", "0cc175b9c0f1b6a831c399e269772661");
	test("abc", "900150983cd24fb0d6963f7d28e17f72");
	test("message digest", "f96b697d7cb7938d525a2f31aaf161d0");
	test("abcdefghijklmnopqrstuvwxyz", "c3fcd3d76192e4007dfb496cca67e13b");
	test("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
		"d174ab98d277d9f5a5611c2c9f419d9f");
	test("12345678901234567890123456789012345678901234567890123456789012345678901234567890",
		"57edf4a22be3c955ac49da2e2107b67a");

	// additional test cases
	test("a", "0cc175b9c0f1b6a831c399e269772661");
	test("ab", "187ef4436122d1cc2f40dc2b92f0eba0");
	test("abc", "900150983cd24fb0d6963f7d28e17f72");
	test("abcd", "e2fc714c4727ee9395f324cd2e7f331f");
	test("abcde", "ab56b4d92b40713acc5af89985d4b786");
	test("abcdef", "e80b5017098950fc58aad83c8c14978e");
	test("abcdefg", "7ac66c0f148de9519b8bd264312c4d64");
	test("abcdefgh", "e8dc4081b13434b45189a720b77b6818");
	test("abcdefghi", "8aa99b1f439ff71293e95357bac6fd94");
	test("abcdefghij", "a925576942e94b2ef57a066101b48876");
	test("abcdefghijk", "92b9cccc0b98c3a0b8d0df25a421c0e3");
	test("abcdefghijkl", "9fc9d606912030dca86582ed62595cf7");
	test("abcdefghijklm", "22aebdd14e72f6b379476a146347d546");
	test("abcdefghijklmn", "0845a5972cd9ad4a46bad66f1253581f");
	test("abcdefghijklmno", "8a7319dbf6544a7422c9e25452580ea5");
	test("abcdefghijklmnop", "1d64dce239c4437b7736041db089e1b9");
	test("abcdefghijklmnopq", "9a8d9845a6b4d82dfcb2c2e35162c830");
	test("abcdefghijklmnopqr", "fbaf6ecf6acbe54aa33858818d351197");
	test("abcdefghijklmnopqrs", "320620468e1c404151741bbbaff2ada5");
	test("abcdefghijklmnopqrst", "6aa8de45918023095f6e831efe48d00b");
	test("abcdefghijklmnopqrstu", "ce7e0645565441733f794b29a30afd05");
	test("abcdefghijklmnopqrstuv", "44a66044834cbe55040089cabfc102d5");
	test("abcdefghijklmnopqrstuvw", "74525c3a55bdcaee9139bd1e33c764fb");
	test("abcdefghijklmnopqrstuvwx", "0c0e84cf0bb7a8688e5238dbbc1c3953");
	test("abcdefghijklmnopqrstuvwxy", "f1784031a03a8f5b11ead16ab90cc18e");
	test("abcdefghijklmnopqrstuvwxyz", "c3fcd3d76192e4007dfb496cca67e13b");
	test("abcdefghijklmnopqrstuvwxyz0", "953f2c6ba8e5293075470b6982386c70");
	test("abcdefghijklmnopqrstuvwxyz01", "af3a8331ab2aa7e0e39ebd34316f3726");
	test("abcdefghijklmnopqrstuvwxyz012", "a754a04fb0a57f19ec918c285f93d594");
	test("abcdefghijklmnopqrstuvwxyz0123", "71da0db811d4c94847964ecef1e28f4e");
	test("abcdefghijklmnopqrstuvwxyz01234", "d51a934194d2ec472ef1d459037027fa");
	test("abcdefghijklmnopqrstuvwxyz012345", "357e82db934fc45f4a25b4b83dc8bd19");
	test("abcdefghijklmnopqrstuvwxyz0123456", "2e34e06618c6dc00a857b09cd22e3ab2");
	test("abcdefghijklmnopqrstuvwxyz01234567", "5a219f31a9f5539c8c3146ef1059cac3");
	test("abcdefghijklmnopqrstuvwxyz012345678", "8aec8672f465ae1503f90428479ace5a");
	test("abcdefghijklmnopqrstuvwxyz0123456789", "6d2286301265512f019781cc0ce7a39f");
	test("abcdefghijklmnopqrstuvwxyz0123456789a", "6900112698cde7b2a6f3febd2a13220c");
	test("abcdefghijklmnopqrstuvwxyz0123456789ab", "bffb54e8622bf911fb71e0d81fdef97e");
	test("abcdefghijklmnopqrstuvwxyz0123456789abc", "f7c79b5457c74a247051d1b976060f9f");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcd", "fa74c9791663dfa4da17226804752bba");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcde", "fe06d350409272a62ec465518e143d5e");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdef", "91a7fb99f3ea046f86de08322d149beb");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefg", "8e9937ced1bf9c0e743e5ac69febc9e6");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefgh", "96c6c3d71f9b47547a6e2ca759163b15");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghi", "c1163d9cb56290f62b5ca3d0f274c396");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghij", "576cbaf35532c05b256223e401345d4b");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijk", "26e6d1cd9f1a9c556549a190a7aa4020");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijkl", "c9e2f788260a5548fcb6b70cba8c56dd");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklm", "acafc98134c87e63f802b7676de1df7c");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmn", "c52b7685f01375182427c6c379e19578");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmno", "1bb1758cd6859b739d3380bc7cea21bd");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnop", "93161812269ced4eb77bc8119a77d095");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopq", "f5090549ecd7350109a7a04bf15f2f9e");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqr", "11f67500e60fdf623ec9c8ccf0dc0499");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrs", "a49d85aaac8495cbb53b120f3b987478");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrst", "0b74570ac5c5b441888f67619534aa88");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstu", "c63717b1c4cb3f3db2e37d5052eccd2a");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuv", "db33a3917d0fa52cd350f8ee2b7ac9d1");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvw", "4afae235610295e426b4bd6dba1a13d5");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwx", "f206a2c7855398982a305aedf776bb70");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxy", "e58ba530cb66f461a57d8fb5061c229e");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz", "d1caadf5c979bfcdbe1b3962ac6015d1");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0", "87d2cdc81ca700a7259acd6bc75abf0b");
	test("abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz01", "bbd17cbd1784152cd93cca62dee11b5b");

	printf("All tests passed!\n");
}

Makefile

test: md5.h md5.c md5_test.c
	gcc -O3 -Wall -o $@ md5.c md5_test.c