Edited to include Altren s solution below.
A modified and more compact version of CrouZ s answer, with a slightly improved performance, using a for loop and file buffering:
def forLoopCrc(fpath):
"""With for loop and buffer."""
crc = 0
with open(fpath, rb , 65536) as ins:
for x in range(int((os.stat(fpath).st_size / 65536)) + 1):
crc = zlib.crc32(ins.read(65536), crc)
return %08X % (crc & 0xFFFFFFFF)
Results, in a 6700k, HDD:
(Note: Retested multiple times and it was consistently faster.)
Warming up the machine...
Finished.
Beginning tests...
File size: 90288KB
Test cycles: 500
With for loop and buffer.
Result 45.24728019630359
CrouZ solution
Result 45.433838356097894
kobor42 solution
Result 104.16215688703986
Altren solution
Result 101.7247863946586
Tested in Python 3.6.4 x64 using the script below:
import os, timeit, zlib, random, binascii
def forLoopCrc(fpath):
"""With for loop and buffer."""
crc = 0
with open(fpath, rb , 65536) as ins:
for x in range(int((os.stat(fpath).st_size / 65536)) + 1):
crc = zlib.crc32(ins.read(65536), crc)
return %08X % (crc & 0xFFFFFFFF)
def crc32(fileName):
"""CrouZ solution"""
with open(fileName, rb ) as fh:
hash = 0
while True:
s = fh.read(65536)
if not s:
break
hash = zlib.crc32(s, hash)
return "%08X" % (hash & 0xFFFFFFFF)
def crc(fileName):
"""kobor42 solution"""
prev = 0
for eachLine in open(fileName,"rb"):
prev = zlib.crc32(eachLine, prev)
return "%X"%(prev & 0xFFFFFFFF)
def crc32altren(filename):
"""Altren solution"""
buf = open(filename, rb ).read()
hash = binascii.crc32(buf) & 0xFFFFFFFF
return "%08X" % hash
fpath = r D: est est.dat
tests = {forLoopCrc: With for loop and buffer. ,
crc32: CrouZ solution , crc: kobor42 solution ,
crc32altren: Altren solution }
count = 500
# CPU, HDD warmup
randomItm = [x for x in tests.keys()]
random.shuffle(randomItm)
print(
Warming up the machine... )
for c in range(count):
randomItm[0](fpath)
print( Finished.
)
# Begin test
print( Beginning tests...
File size: %dKB
Test cycles: %d
% (
os.stat(fpath).st_size/1024, count))
for x in tests:
print(tests[x])
start_time = timeit.default_timer()
for c in range(count):
x(fpath)
print( Result , timeit.default_timer() - start_time,
)
It is faster because for loops are faster than while loops (sources: here and here).