I am looking to learn more about hashing algorithms, specifically the Message Digest 5 algorithm. And specifically in a slightly more up-to-date version of Python (3+.)
I know you can do the whole from hashlib import md5
thing, and that implementations of it in Python can be found online. However, I've found that the most recent one that mimics the original C code only works in Python version 2.4.X and that there is also a 3.2 version which is very condensed and not at all what I'm looking for (http://rosettacode.org/wiki/MD5/Implementation.)
After spending a few hours converting from JavaScript to Python 3.5, I can't get it right no matter how hard I try, the resulting hashes are always different than the ones from the hashlib
module itself. Can anyone help me out?
EDIT: This is the JavaScript implementation I am converting from: How does MD5Sum algorithm work? This implementation works, I've verified it using the online tool that w3schools.com has.
Here is my current code (that still won't work.) The issue is most definitely in the bitwise operations, the main hashing function is not the issue since I directly copy and pasted from the JavaScript version which works
def newArray(num):
array=[]
for x in range(num):
array.append(0)
return array
def convertToWordArray(string):
#print(string)
lMessageLength=len(string)
#print(lMessageLength)
lNumberOfWords_temp1=lMessageLength+8
#print(lNumberOfWords_temp1)
lNumberOfWords_temp2=int((lNumberOfWords_temp1-(lNumberOfWords_temp1%64))/64)
#print(lNumberOfWords_temp2)
lNumberOfWords=(lNumberOfWords_temp2+1)*16
#print(lNumberOfWords)
lWordArray=newArray(lNumberOfWords-1)
lBytePosition=0
lByteCount=0
while lByteCount<lMessageLength:
lWordCount=int((lByteCount-(lByteCount%4))/4)
lBytePosition=(lByteCount%4)*8
#print(string[int(lByteCount)])
lWordArray[lWordCount]=(lWordArray[lWordCount]|(ord(string[int(lByteCount)])<<lBytePosition))
lByteCount+=1
lWordCount=int((lByteCount-(lByteCount%4))/4)
lBytePosition=(lByteCount%4)*8
lWordArray[lWordCount]=lWordArray[lWordCount]|(0x80<<lBytePosition)
lWordArray[lNumberOfWords-2]=lMessageLength<<3
lWordArray.append(lMessageLength>>29)
return lWordArray
def F(x,y,z):
return (x & y)|((~x) & z)
def G(x,y,z):
return (x & z)|(y & (~z))
def H(x,y,z):
return (x ^ y ^ z)
def I(x,y,z):
return (y ^ (x|(~z)))
def C(q,a,b,x,s,ac):
return addu(rol(addu(addu(a,q),addu(x,ac)),s),b)
def FF(a,b,c,d,x,s,ac):
return C((b & c)|((~b) & d),a,b,x,s,ac)
def GG(a,b,c,d,x,s,ac):
return C((b & d)|(c & (~d)),a,b,x,s,ac)
def HH(a,b,c,d,x,s,ac):
return C(b ^ c ^ d,a,b,x,s,ac)
def II(a,b,c,d,x,s,ac):
return C(c ^ (b|(~d)),a,b,x,s,ac)
def addu(x,y):
ls=(x & 0xFFFF)+(y & 0xFFFF)
return (((x>>16)+(y>>16)+(ls>>16))<<16)|(ls & 0xFFFF)
def rol(v,s):
return (v<<s)|(v>>(32-s))
def wordToHex(lValue):
wordToHexValue=''
wordToHexValue_temp=''
for lCount in range(4):
lByte=(lValue>>(lCount*8)) & 255
wordToHexValue_temp="0"+format(lByte, 'x')
wordToHexValue=wordToHexValue+wordToHexValue_temp[-2:]
return wordToHexValue
def md5hash(message):
x=convertToWordArray(message)
a=0x67452301
b=0xEFCDAB89
c=0x98BADCFE
d=0x10325476
xl=len(x)
j=0
while j<xl:
aa=a
bb=b
cc=c
dd=d
a=FF(a,b,c,d, x[j+0], 7,0xD76AA478)
d=FF(d,a,b,c, x[j+1],12,0xE8C7B756)
c=FF(c,d,a,b, x[j+2],17,0x242070DB)
b=FF(b,c,d,a, x[j+3],22,0xC1BDCEEE)
a=FF(a,b,c,d, x[j+4], 7,0xF57C0FAF)
d=FF(d,a,b,c, x[j+5],12,0x4787C62A)
c=FF(c,d,a,b, x[j+6],17,0xA8304613)
b=FF(b,c,d,a, x[j+7],22,0xFD469501)
a=FF(a,b,c,d, x[j+8], 7,0x698098D8)
d=FF(d,a,b,c, x[j+9],12,0x8B44F7AF)
c=FF(c,d,a,b,x[j+10],17,0xFFFF5BB1)
b=FF(b,c,d,a,x[j+11],22,0x895CD7BE)
a=FF(a,b,c,d,x[j+12], 7,0x6B901122)
d=FF(d,a,b,c,x[j+13],12,0xFD987193)
c=FF(c,d,a,b,x[j+14],17,0xA679438E)
b=FF(b,c,d,a,x[j+15],22,0x49B40821)
a=GG(a,b,c,d, x[j+1], 5,0xF61E2562)
d=GG(d,a,b,c, x[j+6], 9,0xC040B340)
c=GG(c,d,a,b,x[j+11],14,0x265E5A51)
b=GG(b,c,d,a, x[j+0],20,0xE9B6C7AA)
a=GG(a,b,c,d, x[j+5], 5,0xD62F105D)
d=GG(d,a,b,c,x[j+10], 9,0x2441453)
c=GG(c,d,a,b,x[j+15],14,0xD8A1E681)
b=GG(b,c,d,a, x[j+4],20,0xE7D3FBC8)
a=GG(a,b,c,d, x[j+9], 5,0x21E1CDE6)
d=GG(d,a,b,c,x[j+14], 9,0xC33707D6)
c=GG(c,d,a,b, x[j+3],14,0xF4D50D87)
b=GG(b,c,d,a, x[j+8],20,0x455A14ED)
a=GG(a,b,c,d,x[j+13], 5,0xA9E3E905)
d=GG(d,a,b,c, x[j+2], 9,0xFCEFA3F8)
c=GG(c,d,a,b, x[j+7],14,0x676F02D9)
b=GG(b,c,d,a,x[j+12],20,0x8D2A4C8A)
a=HH(a,b,c,d, x[j+5], 4,0xFFFA3942)
d=HH(d,a,b,c, x[j+8],11,0x8771F681)
c=HH(c,d,a,b,x[j+11],16,0x6D9D6122)
b=HH(b,c,d,a,x[j+14],23,0xFDE5380C)
a=HH(a,b,c,d, x[j+1], 4,0xA4BEEA44)
d=HH(d,a,b,c, x[j+4],11,0x4BDECFA9)
c=HH(c,d,a,b, x[j+7],16,0xF6BB4B60)
b=HH(b,c,d,a,x[j+10],23,0xBEBFBC70)
a=HH(a,b,c,d,x[j+13], 4,0x289B7EC6)
d=HH(d,a,b,c, x[j+0],11,0xEAA127FA)
c=HH(c,d,a,b, x[j+3],16,0xD4EF3085)
b=HH(b,c,d,a, x[j+6],23,0x4881D05)
a=HH(a,b,c,d, x[j+9], 4,0xD9D4D039)
d=HH(d,a,b,c,x[j+12],11,0xE6DB99E5)
c=HH(c,d,a,b,x[j+15],16,0x1FA27CF8)
b=HH(b,c,d,a, x[j+2],23,0xC4AC5665)
a=II(a,b,c,d, x[j+0], 6,0xF4292244)
d=II(d,a,b,c, x[j+7],10,0x432AFF97)
c=II(c,d,a,b,x[j+14],15,0xAB9423A7)
b=II(b,c,d,a, x[j+5],21,0xFC93A039)
a=II(a,b,c,d,x[j+12], 6,0x655B59C3)
d=II(d,a,b,c, x[j+3],10,0x8F0CCC92)
c=II(c,d,a,b,x[j+10],15,0xFFEFF47D)
b=II(b,c,d,a, x[j+1],21,0x85845DD1)
a=II(a,b,c,d, x[j+8], 6,0x6FA87E4F)
d=II(d,a,b,c,x[j+15],10,0xFE2CE6E0)
c=II(c,d,a,b, x[j+6],15,0xA3014314)
b=II(b,c,d,a,x[j+13],21,0x4E0811A1)
a=II(a,b,c,d, x[j+4], 6,0xF7537E82)
d=II(d,a,b,c,x[j+11],10,0xBD3AF235)
c=II(c,d,a,b, x[j+2],15,0x2AD7D2BB)
b=II(b,c,d,a, x[j+9],21,0xEB86D391)
a=addu(a,aa)
b=addu(b,bb)
c=addu(c,cc)
d=addu(d,dd)
j+=16
return (wordToHex(a)+wordToHex(b)+wordToHex(c)+wordToHex(d)).lower()
I've figured it out, here is my new code. This time I tried utilising the 'Master FGHI Function' in the 2.4 version and it worked. Not sure what the true issue was with my original code, but this new method works fine in place of the FF, GG, HH, II functions.
def XX(func, a, b, c, d, x, s, ac):
res=0
res=res+a+func(b,c,d)
res+=x
res+=ac
res=res & 0xffffffff
res=rol(res,s)
res=res & 0xffffffff
res+=b
return res & 0xffffffff
The full code for a Python 3.4 MD5 algorithm implementation is as follows:
def newArray(num):
array=[]
for x in range(num):
array.append(0)
return array
def convertToWordArray(string):
#print(string)
lMessageLength=len(string)
#print(lMessageLength)
lNumberOfWords_temp1=lMessageLength+8
#print(lNumberOfWords_temp1)
lNumberOfWords_temp2=(lNumberOfWords_temp1-(lNumberOfWords_temp1%64))/64
#print(lNumberOfWords_temp2)
lNumberOfWords=int((lNumberOfWords_temp2+1)*16)
#print(lNumberOfWords)
lWordArray=newArray(lNumberOfWords-1)
lBytePosition=0
lByteCount=0
while lByteCount<lMessageLength:
lWordCount=int((lByteCount-(lByteCount%4))/4)
lBytePosition=(lByteCount%4)*8
#print(string[int(lByteCount)])
lWordArray[lWordCount]=(lWordArray[lWordCount]|(ord(string[int(lByteCount)])<<lBytePosition))
lByteCount+=1
lWordCount=int((lByteCount-(lByteCount%4))/4)
lBytePosition=(lByteCount%4)*8
lWordArray[lWordCount]=lWordArray[lWordCount]|(0x80<<lBytePosition)
lWordArray[lNumberOfWords-2]=lMessageLength<<3
lWordArray.append(lMessageLength>>29)
return lWordArray
def F(x,y,z):
print(x,y,x)
return (x & y) | ((~x) & z)
def G(x,y,z):
return (x & z) | (y & (~z))
def H(x,y,z):
return x ^ y ^ z
def I(x,y,z):
return y ^ (x | (~z))
def XX(func, a, b, c, d, x, s, ac):
res=0
res=res+a+func(b,c,d)
res+=x
res+=ac
res=res & 0xffffffff
res=rol(res,s)
res=res & 0xffffffff
res+=b
return res & 0xffffffff
## return addu(rol(addu(addu(a,q),addu(x,ac)),s),b)
##def FF(a,b,c,d,x,s,ac):
## return C((b & c)|((~b) & d),a,b,x,s,ac)
##def GG(a,b,c,d,x,s,ac):
## return C((b & d)|(c & (~d)),a,b,x,s,ac)
##def HH(a,b,c,d,x,s,ac):
## return C(b ^ c ^ d,a,b,x,s,ac)
##def II(a,b,c,d,x,s,ac):
## return C(c ^ (b|(~d)),a,b,x,s,ac)
def addu(x,y):
ls=(x & 0xffffffff)+(y & 0xffffffff)
return (((x>>16)+(y>>16)+(ls>>16))<<16)|(ls & 0xffffffff)
def rol(v,s):
return (v<<s)|(v>>(32-s))
def wordToHex(lValue):
wordToHexValue=''
wordToHexValue_temp=''
for lCount in range(4):
lByte=(lValue>>(lCount*8)) & 255
wordToHexValue_temp="0"+format(lByte, 'x')
wordToHexValue=wordToHexValue+wordToHexValue_temp[-2:]
return wordToHexValue
def md5hash(message):
x=convertToWordArray(message)
a=0x67452301
b=0xEFCDAB89
c=0x98BADCFE
d=0x10325476
xl=len(x)
j=0
while j<xl:
aa=a
bb=b
cc=c
dd=d
a=XX(F,a,b,c,d, x[j+0], 7,0xD76AA478)
d=XX(F,d,a,b,c, x[j+1],12,0xE8C7B756)
c=XX(F,c,d,a,b, x[j+2],17,0x242070DB)
b=XX(F,b,c,d,a, x[j+3],22,0xC1BDCEEE)
a=XX(F,a,b,c,d, x[j+4], 7,0xF57C0FAF)
d=XX(F,d,a,b,c, x[j+5],12,0x4787C62A)
c=XX(F,c,d,a,b, x[j+6],17,0xA8304613)
b=XX(F,b,c,d,a, x[j+7],22,0xFD469501)
a=XX(F,a,b,c,d, x[j+8], 7,0x698098D8)
d=XX(F,d,a,b,c, x[j+9],12,0x8B44F7AF)
c=XX(F,c,d,a,b,x[j+10],17,0xFFFF5BB1)
b=XX(F,b,c,d,a,x[j+11],22,0x895CD7BE)
a=XX(F,a,b,c,d,x[j+12], 7,0x6B901122)
d=XX(F,d,a,b,c,x[j+13],12,0xFD987193)
c=XX(F,c,d,a,b,x[j+14],17,0xA679438E)
b=XX(F,b,c,d,a,x[j+15],22,0x49B40821)
a=XX(G,a,b,c,d, x[j+1], 5,0xF61E2562)
d=XX(G,d,a,b,c, x[j+6], 9,0xC040B340)
c=XX(G,c,d,a,b,x[j+11],14,0x265E5A51)
b=XX(G,b,c,d,a, x[j+0],20,0xE9B6C7AA)
a=XX(G,a,b,c,d, x[j+5], 5,0xD62F105D)
d=XX(G,d,a,b,c,x[j+10], 9,0x2441453)
c=XX(G,c,d,a,b,x[j+15],14,0xD8A1E681)
b=XX(G,b,c,d,a, x[j+4],20,0xE7D3FBC8)
a=XX(G,a,b,c,d, x[j+9], 5,0x21E1CDE6)
d=XX(G,d,a,b,c,x[j+14], 9,0xC33707D6)
c=XX(G,c,d,a,b, x[j+3],14,0xF4D50D87)
b=XX(G,b,c,d,a, x[j+8],20,0x455A14ED)
a=XX(G,a,b,c,d,x[j+13], 5,0xA9E3E905)
d=XX(G,d,a,b,c, x[j+2], 9,0xFCEFA3F8)
c=XX(G,c,d,a,b, x[j+7],14,0x676F02D9)
b=XX(G,b,c,d,a,x[j+12],20,0x8D2A4C8A)
a=XX(H,a,b,c,d, x[j+5], 4,0xFFFA3942)
d=XX(H,d,a,b,c, x[j+8],11,0x8771F681)
c=XX(H,c,d,a,b,x[j+11],16,0x6D9D6122)
b=XX(H,b,c,d,a,x[j+14],23,0xFDE5380C)
a=XX(H,a,b,c,d, x[j+1], 4,0xA4BEEA44)
d=XX(H,d,a,b,c, x[j+4],11,0x4BDECFA9)
c=XX(H,c,d,a,b, x[j+7],16,0xF6BB4B60)
b=XX(H,b,c,d,a,x[j+10],23,0xBEBFBC70)
a=XX(H,a,b,c,d,x[j+13], 4,0x289B7EC6)
d=XX(H,d,a,b,c, x[j+0],11,0xEAA127FA)
c=XX(H,c,d,a,b, x[j+3],16,0xD4EF3085)
b=XX(H,b,c,d,a, x[j+6],23,0x4881D05)
a=XX(H,a,b,c,d, x[j+9], 4,0xD9D4D039)
d=XX(H,d,a,b,c,x[j+12],11,0xE6DB99E5)
c=XX(H,c,d,a,b,x[j+15],16,0x1FA27CF8)
b=XX(H,b,c,d,a, x[j+2],23,0xC4AC5665)
a=XX(I,a,b,c,d, x[j+0], 6,0xF4292244)
d=XX(I,d,a,b,c, x[j+7],10,0x432AFF97)
c=XX(I,c,d,a,b,x[j+14],15,0xAB9423A7)
b=XX(I,b,c,d,a, x[j+5],21,0xFC93A039)
a=XX(I,a,b,c,d,x[j+12], 6,0x655B59C3)
d=XX(I,d,a,b,c, x[j+3],10,0x8F0CCC92)
c=XX(I,c,d,a,b,x[j+10],15,0xFFEFF47D)
b=XX(I,b,c,d,a, x[j+1],21,0x85845DD1)
a=XX(I,a,b,c,d, x[j+8], 6,0x6FA87E4F)
d=XX(I,d,a,b,c,x[j+15],10,0xFE2CE6E0)
c=XX(I,c,d,a,b, x[j+6],15,0xA3014314)
b=XX(I,b,c,d,a,x[j+13],21,0x4E0811A1)
a=XX(I,a,b,c,d, x[j+4], 6,0xF7537E82)
d=XX(I,d,a,b,c,x[j+11],10,0xBD3AF235)
c=XX(I,c,d,a,b, x[j+2],15,0x2AD7D2BB)
b=XX(I,b,c,d,a, x[j+9],21,0xEB86D391)
a=addu(a,aa)
b=addu(b,bb)
c=addu(c,cc)
d=addu(d,dd)
j+=16
return (wordToHex(a)+wordToHex(b)+wordToHex(c)+wordToHex(d)).lower()