From a6b3929f8e156244dc22cae546a15079c1754f48 Mon Sep 17 00:00:00 2001 From: cel Date: Wed, 27 Dec 2017 14:35:42 -1000 Subject: Factor out git internals --- lib/git.js | 667 ------------------------------------------------------------- 1 file changed, 667 deletions(-) delete mode 100644 lib/git.js (limited to 'lib/git.js') diff --git a/lib/git.js b/lib/git.js deleted file mode 100644 index cfcea9e..0000000 --- a/lib/git.js +++ /dev/null @@ -1,667 +0,0 @@ -var pull = require('pull-stream') -var paramap = require('pull-paramap') -var lru = require('hashlru') -var memo = require('asyncmemo') -var u = require('./util') -var packidx = require('pull-git-packidx-parser') -var Reader = require('pull-reader') -var toPull = require('stream-to-pull-stream') -var zlib = require('zlib') -var looper = require('looper') -var multicb = require('multicb') -var kvdiff = require('pull-kvdiff') - -var ObjectNotFoundError = u.customError('ObjectNotFoundError') - -var types = { - blob: true, - commit: true, - tree: true, -} -var emptyBlobHash = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' - -module.exports = Git - -function Git(app) { - this.app = app - - this.findObject = memo({ - cache: lru(5), - asString: function (opts) { - return opts.obj + opts.headMsgId - } - }, this._findObject.bind(this)) - - this.findObjectInMsg = memo({ - cache: lru(5), - asString: function (opts) { - return opts.obj + opts.msg - } - }, this._findObjectInMsg.bind(this)) - - this.getPackIndex = memo({ - cache: lru(4), - asString: JSON.stringify - }, this._getPackIndex.bind(this)) -} - -// open, read, buffer and callback an object -Git.prototype.getObject = function (opts, cb) { - var self = this - self.openObject(opts, function (err, obj) { - if (err) return cb(err) - pull( - self.readObject(obj), - u.pullConcat(cb) - ) - }) -} - -// get a message that pushed an object -Git.prototype.getObjectMsg = function (opts, cb) { - this.findObject(opts, function (err, loc) { - if (err) return cb(err) - cb(null, loc.msg) - }) -} - -Git.prototype.openObject = function (opts, cb) { - var self = this - self.findObjectInMsg(opts, function (err, loc) { - if (err) return cb(err) - self.app.ensureHasBlobs([loc.packLink], function (err) { - if (err) return cb(err) - cb(null, { - type: opts.type, - length: opts.length, - offset: loc.offset, - next: loc.next, - packLink: loc.packLink, - idx: loc.idx, - msg: loc.msg, - }) - }) - }) -} - -Git.prototype.readObject = function (obj) { - if (obj.offset === obj.next) return pull.empty() - return pull( - this.app.readBlobSlice(obj.packLink, {start: obj.offset, end: obj.next}), - this.decodeObject({ - type: obj.type, - length: obj.length, - packLink: obj.packLink, - idx: obj.idx, - }) - ) -} - -// find which packfile contains a git object, and where in the packfile it is -// located -Git.prototype._findObject = function (opts, cb) { - if (!opts.headMsgId) return cb(new TypeError('missing head message id')) - if (!opts.obj) return cb(new TypeError('missing object id')) - var self = this - var objId = opts.obj - if (objId === emptyBlobHash) { - // special case: the empty blob may be found anywhere - self.app.getMsgDecrypted(opts.headMsgId, function (err, msg) { - if (err) return cb(err) - return cb(null, { - offset: 0, - next: 0, - packLink: null, - idx: null, - msg: msg, - }) - }) - } - self.findObjectMsgs(opts, function (err, msgs) { - if (err) return cb(err) - if (msgs.length === 0) - return cb(new ObjectNotFoundError('unable to find git object ' + objId)) - self.findObjectInMsgs(objId, msgs, cb) - }) -} - -Git.prototype._findObjectInMsg = function (opts, cb) { - if (!opts.msg) return cb(new TypeError('missing message id')) - if (!opts.obj) return cb(new TypeError('missing object id')) - var self = this - self.app.getMsgDecrypted(opts.msg, function (err, msg) { - if (err) return cb(err) - self.findObjectInMsgs(opts.obj, [msg], cb) - }) -} - -Git.prototype.findObjectInMsgs = function (objId, msgs, cb) { - var self = this - var objIdBuf = new Buffer(objId, 'hex') - // if blobs may need to be fetched, try to ask the user about as many of them - // at one time as possible - var packidxs = [].concat.apply([], msgs.map(function (msg) { - var c = msg.value.content - var idxs = u.toArray(c.indexes).map(u.toLink) - return u.toArray(c.packs).map(u.toLink).map(function (pack, i) { - var idx = idxs[i] - if (pack && idx) return { - msg: msg, - packLink: pack, - idxLink: idx, - } - }) - })).filter(Boolean) - var blobLinks = packidxs.length === 1 - ? [packidxs[0].idxLink, packidxs[0].packLink] - : packidxs.map(function (packidx) { - return packidx.idxLink - }) - self.app.ensureHasBlobs(blobLinks, function (err) { - if (err) return cb(err) - pull( - pull.values(packidxs), - paramap(function (pack, cb) { - self.getPackIndex(pack.idxLink, function (err, idx) { - if (err) return cb(err) - var offset = idx.find(objIdBuf) - if (!offset) return cb() - cb(null, { - offset: offset.offset, - next: offset.next, - packLink: pack.packLink, - idx: idx, - msg: pack.msg, - }) - }) - }, 4), - pull.filter(), - pull.take(1), - pull.collect(function (err, offsets) { - if (err) return cb(err) - if (offsets.length === 0) - return cb(new ObjectNotFoundError('unable to find git object ' - + objId + ' in ' + msgs.length + ' messages')) - cb(null, offsets[0]) - }) - ) - }) -} - -// given an object id and ssb msg id, get a set of messages of which at least one pushed the object. -Git.prototype.findObjectMsgs = function (opts, cb) { - var self = this - var id = opts.obj - var headMsgId = opts.headMsgId - var ended = false - var waiting = 0 - var maybeMsgs = [] - - function cbOnce(err, msgs) { - if (ended) return - ended = true - cb(err, msgs) - } - - function objectMatches(commit) { - return commit && (commit === id || commit.sha1 === id) - } - - if (!headMsgId) return cb(new TypeError('missing head message id')) - if (!u.isRef(headMsgId)) - return cb(new TypeError('bad head message id \'' + headMsgId + '\'')) - - ;(function getMsg(id) { - waiting++ - self.app.getMsgDecrypted(id, function (err, msg) { - waiting-- - if (ended) return - if (err && err.name == 'NotFoundError') - return cbOnce(new Error('missing message ' + headMsgId)) - if (err) return cbOnce(err) - var c = msg.value.content - if (typeof c === 'string') - return cbOnce(new Error('unable to decrypt message ' + msg.key)) - if ((u.toArray(c.object_ids).some(objectMatches)) - || (u.toArray(c.tags).some(objectMatches)) - || (u.toArray(c.commits).some(objectMatches))) { - // found the object - return cbOnce(null, [msg]) - } else if (!c.object_ids) { - // the object might be here - maybeMsgs.push(msg) - } - // traverse the DAG to keep looking for the object - u.toArray(c.repoBranch).filter(u.isRef).forEach(getMsg) - if (waiting === 0) { - cbOnce(null, maybeMsgs) - } - }) - })(headMsgId) -} - -Git.prototype._getPackIndex = function (idxBlobLink, cb) { - pull(this.app.readBlob(idxBlobLink), packidx(cb)) -} - -var objectTypes = [ - 'none', 'commit', 'tree', 'blob', - 'tag', 'unused', 'ofs-delta', 'ref-delta' -] - -function readTypedVarInt(reader, cb) { - var type, value, shift - reader.read(1, function (end, buf) { - if (ended = end) return cb(end) - var firstByte = buf[0] - type = objectTypes[(firstByte >> 4) & 7] - value = firstByte & 15 - shift = 4 - checkByte(firstByte) - }) - - function checkByte(byte) { - if (byte & 0x80) - reader.read(1, gotByte) - else - cb(null, type, value) - } - - function gotByte(end, buf) { - if (ended = end) return cb(end) - var byte = buf[0] - value += (byte & 0x7f) << shift - shift += 7 - checkByte(byte) - } -} - -function readVarInt(reader, cb) { - var value = 0, shift = 0 - reader.read(1, function gotByte(end, buf) { - if (ended = end) return cb(end) - var byte = buf[0] - value += (byte & 0x7f) << shift - shift += 7 - if (byte & 0x80) - reader.read(1, gotByte) - else - cb(null, value) - }) -} - -function inflate(read) { - return toPull(zlib.createInflate())(read) -} - -Git.prototype.decodeObject = function (opts) { - var self = this - var packLink = opts.packLink - return function (read) { - var reader = Reader() - reader(read) - return u.readNext(function (cb) { - readTypedVarInt(reader, function (end, type, length) { - if (end === true) cb(new Error('Missing object type')) - else if (end) cb(end) - else if (type === 'ref-delta') getObjectFromRefDelta(length, cb) - else if (opts.type && type !== opts.type) - cb(new Error('expected type \'' + opts.type + '\' ' + - 'but found \'' + type + '\'')) - else if (opts.length && length !== opts.length) - cb(new Error('expected length ' + opts.length + ' ' + - 'but found ' + length)) - else cb(null, inflate(reader.read())) - }) - }) - - function getObjectFromRefDelta(length, cb) { - reader.read(20, function (end, sourceHash) { - if (end) return cb(end) - var inflatedReader = Reader() - pull(reader.read(), inflate, inflatedReader) - readVarInt(inflatedReader, function (err, expectedSourceLength) { - if (err) return cb(err) - readVarInt(inflatedReader, function (err, expectedTargetLength) { - if (err) return cb(err) - var offset = opts.idx.find(sourceHash) - if (!offset) return cb(null, 'missing source object ' + - sourcehash.toString('hex')) - var readSource = pull( - self.app.readBlobSlice(opts.packLink, { - start: offset.offset, - end: offset.next - }), - self.decodeObject({ - type: opts.type, - length: expectedSourceLength, - packLink: opts.packLink, - idx: opts.idx - }) - ) - cb(null, patchObject(inflatedReader, length, readSource, expectedTargetLength)) - }) - }) - }) - } - } -} - -function readOffsetSize(cmd, reader, readCb) { - var offset = 0, size = 0 - - function addByte(bit, outPos, cb) { - if (cmd & (1 << bit)) - reader.read(1, function (err, buf) { - if (err) readCb(err) - else cb(buf[0] << (outPos << 3)) - }) - else - cb(0) - } - - addByte(0, 0, function (val) { - offset = val - addByte(1, 1, function (val) { - offset |= val - addByte(2, 2, function (val) { - offset |= val - addByte(3, 3, function (val) { - offset |= val - addSize() - }) - }) - }) - }) - function addSize() { - addByte(4, 0, function (val) { - size = val - addByte(5, 1, function (val) { - size |= val - addByte(6, 2, function (val) { - size |= val - readCb(null, offset, size || 0x10000) - }) - }) - }) - } -} - -function patchObject(deltaReader, deltaLength, readSource, targetLength) { - var srcBuf - var ended - - return u.readNext(function (cb) { - pull(readSource, u.pullConcat(function (err, buf) { - if (err) return cb(err) - srcBuf = buf - cb(null, read) - })) - }) - - function read(abort, cb) { - if (ended) return cb(ended) - deltaReader.read(1, function (end, dBuf) { - if (ended = end) return cb(end) - var cmd = dBuf[0] - if (cmd & 0x80) - // skip a variable amount and then pass through a variable amount - readOffsetSize(cmd, deltaReader, function (err, offset, size) { - if (err) return earlyEnd(err) - var buf = srcBuf.slice(offset, offset + size) - cb(end, buf) - }) - else if (cmd) - // insert `cmd` bytes from delta - deltaReader.read(cmd, cb) - else - cb(new Error("unexpected delta opcode 0")) - }) - - function earlyEnd(err) { - cb(err === true ? new Error('stream ended early') : err) - } - } -} - -var gitNameRegex = /^(.*) <(([^>@]*)(@[^>]*)?)> (.*) (.*)$/ -function parseName(line) { - var m = gitNameRegex.exec(line) - if (!m) return null - return { - name: m[1], - email: m[2], - localpart: m[3], - feed: u.isRef(m[4]) && m[4] || undefined, - date: new Date(m[5] * 1000), - tz: m[6], - } -} - -Git.prototype.getCommit = function (obj, cb) { - pull(this.readObject(obj), u.pullConcat(function (err, buf) { - if (err) return cb(err) - var commit = { - msg: obj.msg, - parents: [], - } - var authorLine, committerLine - var lines = buf.toString('utf8').split('\n') - for (var line; (line = lines.shift()); ) { - var parts = line.split(' ') - var prop = parts.shift() - var value = parts.join(' ') - switch (prop) { - case 'tree': - commit.tree = value - break - case 'parent': - commit.parents.push(value) - break - case 'author': - authorLine = value - break - case 'committer': - committerLine = value - break - case 'gpgsig': - var sigLines = [value] - while (lines[0] && lines[0][0] == ' ') - sigLines.push(lines.shift().slice(1)) - commit.gpgsig = sigLines.join('\n') - break - default: - return cb(new TypeError('unknown git object property ' + prop)) - } - } - commit.committer = parseName(committerLine) - if (authorLine !== committerLine) commit.author = parseName(authorLine) - commit.body = lines.join('\n') - cb(null, commit) - })) -} - -Git.prototype.getTag = function (obj, cb) { - pull(this.readObject(obj), u.pullConcat(function (err, buf) { - if (err) return cb(err) - var tag = { - msg: obj.msg, - } - var authorLine, tagterLine - var lines = buf.toString('utf8').split('\n') - for (var line; (line = lines.shift()); ) { - var parts = line.split(' ') - var prop = parts.shift() - var value = parts.join(' ') - switch (prop) { - case 'object': - tag.object = value - break - case 'type': - if (!types[value]) - return cb(new TypeError('unknown git object type ' + type)) - tag.type = value - break - case 'tag': - tag.tag = value - break - case 'tagger': - tag.tagger = parseName(value) - break - default: - return cb(new TypeError('unknown git object property ' + prop)) - } - } - tag.body = lines.join('\n') - cb(null, tag) - })) -} - -function readCString(reader, cb) { - var chars = [] - var loop = looper(function () { - reader.read(1, next) - }) - function next(err, ch) { - if (err) return cb(err) - if (ch[0] === 0) return cb(null, Buffer.concat(chars).toString('utf8')) - chars.push(ch) - loop() - } - loop() -} - -Git.prototype.readTree = function (obj) { - var self = this - var reader = Reader() - reader(this.readObject(obj)) - return function (abort, cb) { - if (abort) return reader.abort(abort, cb) - readCString(reader, function (err, str) { - if (err) return cb(err) - var parts = str.split(' ') - var mode = parseInt(parts[0], 8) - var name = parts.slice(1).join(' ') - reader.read(20, function (err, hash) { - if (err) return cb(err) - cb(null, { - name: name, - mode: mode, - hash: hash.toString('hex'), - type: mode === 0040000 ? 'tree' : - mode === 0160000 ? 'commit' : 'blob', - }) - }) - }) - } -} - -Git.prototype.readCommitChanges = function (commit) { - var self = this - return u.readNext(function (cb) { - var done = multicb({pluck: 1}) - commit.parents.forEach(function (rev) { - var cb = done() - self.getObjectMsg({ - obj: rev, - headMsgId: commit.msg.key, - type: 'commit', - }, function (err, msg) { - if (err) return cb(err) - self.openObject({ - obj: rev, - msg: msg.key, - }, function (err, obj) { - if (err) return cb(err) - self.getCommit(obj, cb) - }) - }) - }) - done()(null, commit) - done(function (err, commits) { - if (err) return cb(err) - var done = multicb({pluck: 1}) - commits.forEach(function (commit) { - var cb = done() - if (!commit.tree) return cb(null, pull.empty()) - self.getObjectMsg({ - obj: commit.tree, - headMsgId: commit.msg.key, - type: 'tree', - }, function (err, msg) { - if (err) return cb(err) - self.openObject({ - obj: commit.tree, - msg: commit.msg.key, - }, cb) - }) - }) - done(function (err, trees) { - if (err) return cb(err) - cb(null, self.diffTreesRecursive(trees)) - }) - }) - }) -} - -Git.prototype.diffTrees = function (objs) { - var self = this - return pull( - kvdiff(objs.map(function (obj) { - return self.readTree(obj) - }), 'name'), - pull.map(function (item) { - var diff = item.diff || {} - var head = item.values[item.values.length-1] - var created = true - for (var k = 0; k < item.values.length-1; k++) - if (item.values[k]) created = false - return { - name: item.key, - hash: item.values.map(function (val) { return val.hash }), - mode: diff.mode, - type: item.values.map(function (val) { return val.type }), - deleted: !head, - created: created - } - }) - ) -} - -Git.prototype.diffTreesRecursive = function (objs) { - var self = this - return pull( - self.diffTrees(objs), - paramap(function (item, cb) { - if (!item.type.some(function (t) { return t === 'tree' })) - return cb(null, [item]) - var done = multicb({pluck: 1}) - item.type.forEach(function (type, i) { - var cb = done() - if (type !== 'tree') return cb(null, pull.once(item)) - var hash = item.hash[i] - self.getObjectMsg({ - obj: hash, - headMsgId: objs[i].msg.key, - }, function (err, msg) { - if (err) return cb(err) - self.openObject({ - obj: hash, - msg: msg.key, - }, cb) - }) - }) - done(function (err, objs) { - if (err) return cb(err) - cb(null, pull( - self.diffTreesRecursive(objs), - pull.map(function (f) { - f.name = item.name + '/' + f.name - return f - }) - )) - }) - }, 4), - pull.flatten() - ) -} -- cgit v1.2.3