diff options
| author | Nishanth Aravamudan <nish.aravamudan@canonical.com> | 2016-09-16 22:37:02 (GMT) |
|---|---|---|
| committer | Nishanth Aravamudan <nish.aravamudan@canonical.com> | 2016-09-20 17:03:42 (GMT) |
| commit | dc27af24c7abfe9eaf6c51f63f6c47af076a414a (patch) | |
| tree | e6acbf52d31a57626eb34fa04a7129a8205a313c | |
| parent | 182187d9965e7dc04cd476778cdb6fcbb82398d7 (diff) | |
usd-import: convert from python3-git to python3-pygit2
When loading an existing directory's git tree, python3-git can take a
*very* long time, as it runs `git cat-file --batch` and `git cat-file
--batch-check`, which effectively iterates over every object in the
repository at the specified directory, in order to validate it. While
there are use-cases for that level of depth of git-support, we do not
actually need it in the importer. Switch to python3-pygit2, as it's also
used by Launchpad (which means it's probably fast) and provides
basically the same level of API as python3-git. As there is no longer a
direct access to a repository's `git` command, convert the subprocess
calls to be shell-based.
Signed-off-by: Nishanth Aravamudan <nish.aravamudan@canonical.com>
| -rwxr-xr-x | usd-import | 254 |
1 files changed, 127 insertions, 127 deletions
@@ -39,8 +39,8 @@ import tempfile try: pkg = 'python3-debian' import debian - pkg = 'python3-git' - import git + pkg = 'python3-pygit2' + import pygit2 pkg = 'python3-launchpadlib' from launchpadlib.launchpad import Launchpad pkg = 'python3-ubuntutools' @@ -77,40 +77,33 @@ class USDGitRepository: except FileExistsError: pass - # Unpleasant, but I could not find a cleaner way to set up an - # xgit-like environment with gitpython - # First, use a direct git invocation for the init() - g = git.Git(local_repo_dir) - g.update_environment(GIT_DIR=local_repo_dir) - g.update_environment(GIT_WORK_TREE=local_repo_wdir) - - # check if the repository has already been initialized - try: - g.status() - except git.exc.GitCommandError: - g.init() + self._local_repo = pygit2.init_repository(local_repo_dir, + flags=pygit2.GIT_REPOSITORY_INIT_NO_DOTGIT_DIR, + workdir_path=local_repo_wdir + ) - # Second, create a repo object which will be passed around that - # points to the same .git directory - self._local_repo = git.Repo(local_repo_dir) - self._local_repo.git.update_environment(GIT_DIR=local_repo_dir) - self._local_repo.git.update_environment(GIT_WORK_TREE=local_repo_wdir) + self._env = os.environ.copy() + self._env['GIT_DIR'] = self._local_repo.path + self._env['GIT_WORK_TREE'] = self._local_repo.workdir try: + self._local_repo.remotes.set_url('%s-%s' % (owner, pkgname), remote_url) self._target_remote = self._local_repo.remotes['%s-%s' % (owner, pkgname)] - except IndexError: - self._target_remote = self._local_repo.create_remote( + except KeyError: + self._target_remote = self._local_repo.remotes.create( '%s-%s' % (owner, pkgname), remote_url) try: + # this is throwing a malformed URL, possibly due to git+ssh + # or other lp quirks (+source)? self._target_remote.fetch() - for ref in self._target_remote.refs: - local_head_name = str(ref)[len('%s-%s/' % (owner, pkgname)):] + for branch in self.listall_branches(pygit2.GIT_BRANCH_REMOTE): + local_head_name = branch[len('%s-%s/' % (owner, pkgname)):] # ensure local heads exist, tracking remotes if they are # being freshly created - self.get_or_create_head(local_head_name, ref) - except git.exc.GitCommandError: + self.get_or_create_head(local_head_name, branch) + except pygit2.GitError: logging.warning('No objects found in remote %s', remote_url) @property @@ -126,11 +119,11 @@ class USDGitRepository: return self._target_remote def garbage_collect(self): - self._local_repo.git.gc() + subprocess.run('git gc', shell=True, env=self._env) def push(self, force_push=None): # handle force_push - for branch in self._local_repo.heads: + for branch in self._local_repo.listall_branches(): if force_push is True: self._target_remote.push(branch, force=True) else: @@ -146,21 +139,21 @@ class USDGitRepository: last_version = None else: try: - # test that the ref exists - self._local_repo.git.cat_file('-e', ref) cp = subprocess.run( - ['dpkg-parsechangelog', '-l', '-', '-n', '1', '-S', 'Version'], - input=self._local_repo.git.show('%s:debian/changelog' % ref), + 'git show %s:debian/changelog | dpkg-parsechangelog -l- -n1 -SVersion' % ref, stdout=subprocess.PIPE, universal_newlines=True, - check=True) + check=True, + shell=True, + env=self._env) current_version = cp.stdout.strip() cp = subprocess.run( - ['dpkg-parsechangelog', '-l', '-', '-n', '1', '-o', '1', '-S', 'Version'], - input=self._local_repo.git.show('%s:debian/changelog' % ref), + 'git show %s:debian/changelog | dpkg-parsechangelog -l- -n1 -o1 -SVersion' % ref, stdout=subprocess.PIPE, universal_newlines=True, - check=True) + check=True, + shell=True, + env=self._env) last_version = cp.stdout.strip() except Exception: logging.exception('Cannot get changelog versions') @@ -171,49 +164,46 @@ class USDGitRepository: def get_heads_and_versions(self, head_prefix=None): versions = dict() - for head in self._local_repo.heads: + for head in self._local_repo.listall_branches(): if (head_prefix is not None and - not head.name.startswith(head_prefix) + not head.startswith(head_prefix) ): continue - versions[head], _ = self.get_changelog_versions_from_treeish(head) + versions[head] = dict() + versions[head]['head'] = self._local_repo.lookup_branch(head) + versions[head]['version'], _ = self.get_changelog_versions_from_treeish(versions[head]['head'].peel().tree.id) return versions def treeishs_identical(self, ref1, ref2): if ref1 is None or ref2 is None: return False - try: - self._local_repo.git.diff('--quiet', ref1, ref2) - return True - except git.exc.GitCommandError as e: - if e.status == 1: - return False - else: - raise + return len(self._local_repo.diff(self._local_repo.get(ref1), self._local_repo.get(ref2))) == 0 def get_head(self, name): - if name in self._local_repo.heads: - return self._local_repo.heads[name] + if name in self._local_repo.listall_branches(): + return self._local_repo.lookup_branch(name) return None - def get_or_create_head(self, local_head_name, remote_ref): + def get_or_create_head(self, local_head_name, remote_branch): try: - for head in self._local_repo.heads: - if head.name == local_head_name: - return head - return self._local_repo.create_head( - local_head_name, - remote_ref - ).set_tracking_branch(remote_ref) + if local_head_name in self._local_repo.listall_branches(): + return self._local_repo.lookup_branch(local_head_name) + branch = self._local_repo.create_branch(local_head_name) + branch.remote = self._target_remote + branch.upstream = remote_branch + return branch except IndexError: - return self._local_repo.create_head(local_head_name) + return self._local_repo.create_branch(local_head_name) def tag_exists(self, tag): - return tag in self._local_repo.tags + try: + return self._local_repo.lookup_reference('refs/tags/%s' % tag) + except (KeyError, ValueError): + return None @staticmethod def tag_commit_hash_equals(tag, ref): - return tag.commit.hexsha == ref.commit.hexsha + return self.tag_exists(tag).peel().id == self._local_repo.lookup_reference(ref).peel().id @staticmethod def import_tag(version): @@ -228,30 +218,31 @@ class USDGitRepository: return 'orphan/%s' % git_dsc_commit_tag(version) def get_import_tag(self, version): - if self.tag_exists(self.import_tag(version)): - return self._local_repo.tags[self.import_tag(version)] - if self.tag_exists(self.upload_tag(version)): - return self._local_repo.tags[self.upload_tag(version)] - return None + tag = self.tag_exists(self.import_tag(version)) + if tag is None: + tag = self.tag_exists(self.upload_tag(version)) + return tag def nearest_tag(self, commit): - return self._local_repo.git.describe('--tags', commit) + return self._local_repo.describe(committish=self._local_repo.get(commit), describe_strategy=pygit2.GIT_DESCRIBE_TAGS) def get_commit_authorship(self, ref): cp = subprocess.run( - ['dpkg-parsechangelog', '-l', '-', '-S', 'Maintainer'], - input=self._local_repo.git.show('%s:debian/changelog' % ref), + 'git show %s:debian/changelog | dpkg-parsechangelog -l- -SMaintainer' % ref, stdout=subprocess.PIPE, universal_newlines=True, - check=True) + check=True, + shell=True, + env=self._env) author = cp.stdout.strip() m = re.match(r'(?P<name>.*) <(?P<email>.*)>', author) cp = subprocess.run( - ['dpkg-parsechangelog', '-l', '-', '-S', 'Date'], - input=self._local_repo.git.show('%s:debian/changelog' % ref), + 'git show %s:debian/changelog | dpkg-parsechangelog -l- -SDate' % ref, stdout=subprocess.PIPE, universal_newlines=True, - check=True) + check=True, + shell=True, + env=self._env) date = cp.stdout.strip() return (m.group('name'), m.group('email'), date) @@ -272,33 +263,36 @@ class USDGitRepository: 'GIT_COMMITTER_DATE':committer_date} def update_head_to_commit(self, head_name, commit_hash): - if head_name not in self._local_repo.heads: + if head_name not in self._local_repo.listall_branches(): # first commit in this series - self._local_repo.create_head(head_name, commit_hash) + self._local_repo.create_branch(head_name, self._local_repo.get(commit_hash)) else: - self._local_repo.heads[head_name].reference = commit_hash + self._local_repo.lookup_branch(head_name).set_target(commit_hash) def commit_import(self, publish_parent_commit, changelog_parent_commit, tree_hash, head_name, spi): tag = None if publish_parent_commit is None and \ changelog_parent_commit is None and \ - head_name in self._local_repo.heads: + head_name in self._local_repo.listall_branches(): tag = self.orphan_tag(spi.version) elif self.get_import_tag(spi.version) is None: tag = self.import_tag(spi.version) - commit_tree_args = (tree_hash, - '-m', - 'Import version %s to %s\n\nImported using usd-importer.' % - (spi.version, head_name) - ) + commit_tree = 'git commit-tree %s -m \'Import version %s to %s\n\nImported using usd-importer.\'' % (tree_hash, spi.version, head_name) if publish_parent_commit is not None: - commit_tree_args += ('-p', publish_parent_commit) + commit_tree += ' -p %s' % publish_parent_commit if changelog_parent_commit is not None: - commit_tree_args += ('-p', changelog_parent_commit) - with self._local_repo.git.custom_environment( - **self.get_commit_environment(tree_hash, spi) - ): - commit_hash = self._local_repo.git.commit_tree(*commit_tree_args) + commit_tree += ' -p %s' % changelog_parent_commit + commit_env = self._env + commit_env.update(self.get_commit_environment(tree_hash, spi)) + cp = subprocess.run( + commit_tree, + stdout=subprocess.PIPE, + universal_newlines=True, + check=True, + shell=True, + env=commit_env + ) + commit_hash = cp.stdout.strip() self.update_head_to_commit(head_name, commit_hash) logging.info('Committed import of %s as %s in %s', @@ -306,10 +300,15 @@ class USDGitRepository: ) if tag is not None: + # should be annotated to use create_tag API logging.info('Creating tag %s pointing to %s', tag, commit_hash) - self._local_repo.create_tag(tag, ref=commit_hash) - - self.update_head_to_commit(head_name, commit_hash) + cp = subprocess.run( + 'git tag %s %s' % (tag, commit_hash), + stdout=subprocess.PIPE, + universal_newlines=True, + check=True, + shell=True, + env=self._env) @staticmethod def version_compare(a, b): @@ -324,17 +323,13 @@ class USDGitRepository: def get_versions_from_changelog(self, treeish): try: cp = subprocess.run( - ['dpkg-parsechangelog', - '-l', '-', - '--format', 'rfc822', - '-S', 'Version', - '--all' - ], - input=self._local_repo.git.show('%s:debian/changelog' % treeish), + 'git show %s:debian/changelog | dpkg-parsechangelog -l- --format rfc822 -SVersion --all' % treeish, stdout=subprocess.PIPE, stderr=None, universal_newlines=True, - check=True + check=True, + shell=True, + env=self._env ) except: logging.error( @@ -371,9 +366,14 @@ class USDGitRepository: raise e # relies on this API being present, handle error if not? - import_tree_hash = self._local_repo.git.dsc_commit( - '--tree-only', srcpkg.dsc_pathname - ) + cp = subprocess.run( + 'git dsc-commit --tree-only %s' % srcpkg.dsc_pathname, + stdout=subprocess.PIPE, + universal_newlines=True, + check=True, + shell=True, + env=self._env) + import_tree_hash = cp.stdout.strip() logging.info('Imported version %s as tree %s', spi.version, import_tree_hash ) @@ -392,18 +392,18 @@ class USDGitRepository: 'source pkg version: {} != changelog version: {}'.format( spi.version, changelog_version)) + tip_version = None # check if the version to import has already been imported to # this head if tip_head is not None: if self.treeishs_identical( - import_tree_hash, tip_head.commit.tree.hexsha + import_tree_hash, tip_head.peel().id ): logging.warn('%s is identical to %s', head_name, spi.version ) return - - tip_version, _ = self.get_changelog_versions_from_treeish(tip_head) + tip_version, _ = self.get_changelog_versions_from_treeish(tip_head.peel().id) logging.info('Tip version is %s', tip_version) previous_changelog_version_tag = \ @@ -424,7 +424,7 @@ class USDGitRepository: parent_overrides[spi.version]['publish_parent'] ) parent_publish_version, _ = \ - self.get_changelog_versions_from_treeish(publish_parent_tag.commit.tree.hexsha) + self.get_changelog_versions_from_treeish(publish_parent_tag.peel().id) if parent_publish_version != parent_overrides[spi.version]['publish_parent']: logging.error('Found a tag corresponding to ' 'version %s, but d/changelog ' @@ -459,7 +459,7 @@ class USDGitRepository: parent_overrides[spi.version]['changelog_parent'] ) parent_changelog_version, _ = \ - self.get_changelog_versions_from_treeish(changelog_parent_tag.commit.tree.hexsha) + self.get_changelog_versions_from_treeish(changelog_parent_tag.peel().id) if parent_changelog_version != parent_overrides[spi.version]['changelog_parent']: logging.error('Found a tag corresponding to ' 'version %s, but d/changelog ' @@ -493,13 +493,13 @@ class USDGitRepository: parent_head = self.get_head(spi.parent_head_name) if parent_head is not None: logging.info('Publishing parent (tag) is %s', - self.nearest_tag(parent_head.commit) + self.nearest_tag(parent_head.peel().id) ) - publish_parent_commit = parent_head.commit + publish_parent_commit = parent_head.peel().id else: logging.info('Publishing parent (tag) is %s', - self.nearest_tag(tip_head.commit)) - publish_parent_commit = tip_head.commit + self.nearest_tag(tip_head.peel().id)) + publish_parent_commit = tip_head.peel().id if self.version_compare(str(srcpkg.version), tip_version) <= 0: logging.warn('Version to import (%s) is before %s tip (%s)', @@ -514,7 +514,7 @@ class USDGitRepository: # sanity check that version from d/changelog of the # tagged parent matches ours parent_changelog_version, _ = \ - self.get_changelog_versions_from_treeish(changelog_parent_tag.commit.tree.hexsha) + self.get_changelog_versions_from_treeish(changelog_parent_tag.peel(pygit2.Tree).id) if parent_changelog_version != version: logging.error('Found a tag corresponding to ' 'version %s, but d/changelog ' @@ -525,8 +525,8 @@ class USDGitRepository: ) ) else: - changelog_parent_commit = changelog_parent_tag.commit - logging.info('Changelog parent (tag) is %s', changelog_parent_tag) + changelog_parent_commit = changelog_parent_tag.peel().id + logging.info('Changelog parent (tag) is %s', self.nearest_tag(changelog_parent_tag.peel().id)) break # If the two parents are tree-identical, then favor publication @@ -639,14 +639,15 @@ class USDSourceInformation: @staticmethod def _head_version_is_equal(head_versions, spi): - for head in head_versions: - if head.name == spi.head_name: - if (head_versions[head] == spi.version and - (spi.date_published is None or - int(spi.date_published.timestamp()) == head.commit.committed_date) - ): - return True - return False + try: + if (head_versions[spi.head_name]['version'] == spi.version and + (spi.date_published is None or + int(spi.date_published.timestamp()) == head_versions[spi.head_name]['head'].peel().commit_time) + ): + return True + return False + except KeyError: + return False def launchpad_versions_published_after(self, head_versions): args = { @@ -833,23 +834,22 @@ def main(): ubuntu_source_information = USDSourceInformation('ubuntu', pkgname) debian_head_versions = local_repo.get_heads_and_versions('debian') - for head in debian_head_versions: + for head_name in debian_head_versions: logging.info('Last %s version is %s', - head.name, - debian_head_versions[head] + head_name, + debian_head_versions[head_name]['version'] ) ubuntu_head_versions = local_repo.get_heads_and_versions('ubuntu') - for head in ubuntu_head_versions: + for head_name in ubuntu_head_versions: logging.info('Last %s version is %s', - head.name, - ubuntu_head_versions[head] + head_name, + ubuntu_head_versions[head_name]['version'] ) oldcwd = os.getcwd() os.chdir(local_repo.local_dir) - history_found = [] for distname, versions, dist_sinfo in ( ("debian", debian_head_versions, debian_source_information), |
