Merge #9984: devtools: Make github-merge compute SHA512 from git, instead of worktree

a327e8e devtools: Make github-merge compute SHA512 from git, instead of worktree (Wladimir J. van der Laan)

Tree-SHA512: 22ec7712876be4ab361015a2dd75a09628ec59105ffe3260126f899d8f3ff8666351b65b9a4dfe83f78eb777730442cd0352b155d7f573424f7fc1c4dbc0ddd2
This commit is contained in:
Wladimir J. van der Laan 2017-03-14 07:33:43 +01:00
commit cce056d729
No known key found for this signature in database
GPG Key ID: 74810B012346C9A6
1 changed files with 37 additions and 8 deletions

View File

@ -78,24 +78,53 @@ def get_symlink_files():
ret.append(f.decode('utf-8').split("\t")[1])
return ret
def tree_sha512sum():
files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines())
def tree_sha512sum(commit='HEAD'):
# request metadata for entire tree, recursively
files = []
blob_by_name = {}
for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines():
name_sep = line.index(b'\t')
metadata = line[:name_sep].split() # perms, 'blob', blobid
assert(metadata[1] == b'blob')
name = line[name_sep+1:]
files.append(name)
blob_by_name[name] = metadata[2]
files.sort()
# open connection to git-cat-file in batch mode to request data for all blobs
# this is much faster than launching it per file
p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
overall = hashlib.sha512()
for f in files:
blob = blob_by_name[f]
# request blob
p.stdin.write(blob + b'\n')
p.stdin.flush()
# read header: blob, "blob", size
reply = p.stdout.readline().split()
assert(reply[0] == blob and reply[1] == b'blob')
size = int(reply[2])
# hash the blob data
intern = hashlib.sha512()
fi = open(f, 'rb')
while True:
piece = fi.read(65536)
if piece:
ptr = 0
while ptr < size:
bs = min(65536, size - ptr)
piece = p.stdout.read(bs)
if len(piece) == bs:
intern.update(piece)
else:
break
fi.close()
raise IOError('Premature EOF reading git cat-file output')
ptr += bs
dig = intern.hexdigest()
assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data
# update overall hash with file hash
overall.update(dig.encode("utf-8"))
overall.update(" ".encode("utf-8"))
overall.update(f)
overall.update("\n".encode("utf-8"))
p.stdin.close()
if p.wait():
raise IOError('Non-zero return value executing git cat-file')
return overall.hexdigest()