devtools: Make github-merge compute SHA512 from git, instead of worktree

This changes tree_sha512sum() to requests the objects for hashing from
git instead of from the working tree.

The change should make the process more deterministic (it hashes what
will be pushed) and hopefully avoids the frequent miscomputed SHA512's
that happen now.
This commit is contained in:
Wladimir J. van der Laan 2017-03-13 16:09:38 +01:00
parent 8040ae6fc5
commit a327e8ea30
1 changed files with 37 additions and 8 deletions

View File

@ -78,24 +78,53 @@ def get_symlink_files():
ret.append(f.decode('utf-8').split("\t")[1])
return ret
def tree_sha512sum():
files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines())
def tree_sha512sum(commit='HEAD'):
# request metadata for entire tree, recursively
files = []
blob_by_name = {}
for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines():
name_sep = line.index(b'\t')
metadata = line[:name_sep].split() # perms, 'blob', blobid
assert(metadata[1] == b'blob')
name = line[name_sep+1:]
files.append(name)
blob_by_name[name] = metadata[2]
files.sort()
# open connection to git-cat-file in batch mode to request data for all blobs
# this is much faster than launching it per file
p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
overall = hashlib.sha512()
for f in files:
blob = blob_by_name[f]
# request blob
p.stdin.write(blob + b'\n')
p.stdin.flush()
# read header: blob, "blob", size
reply = p.stdout.readline().split()
assert(reply[0] == blob and reply[1] == b'blob')
size = int(reply[2])
# hash the blob data
intern = hashlib.sha512()
fi = open(f, 'rb')
while True:
piece = fi.read(65536)
if piece:
ptr = 0
while ptr < size:
bs = min(65536, size - ptr)
piece = p.stdout.read(bs)
if len(piece) == bs:
intern.update(piece)
else:
break
fi.close()
raise IOError('Premature EOF reading git cat-file output')
ptr += bs
dig = intern.hexdigest()
assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data
# update overall hash with file hash
overall.update(dig.encode("utf-8"))
overall.update(" ".encode("utf-8"))
overall.update(f)
overall.update("\n".encode("utf-8"))
p.stdin.close()
if p.wait():
raise IOError('Non-zero return value executing git cat-file')
return overall.hexdigest()