summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNishanth Aravamudan <nish.aravamudan@canonical.com>2016-09-16 22:37:02 (GMT)
committerNishanth Aravamudan <nish.aravamudan@canonical.com>2016-09-20 17:03:42 (GMT)
commitdc27af24c7abfe9eaf6c51f63f6c47af076a414a (patch)
treee6acbf52d31a57626eb34fa04a7129a8205a313c
parent182187d9965e7dc04cd476778cdb6fcbb82398d7 (diff)
usd-import: convert from python3-git to python3-pygit2
When loading an existing directory's git tree, python3-git can take a *very* long time, as it runs `git cat-file --batch` and `git cat-file --batch-check`, which effectively iterates over every object in the repository at the specified directory, in order to validate it. While there are use-cases for that level of depth of git-support, we do not actually need it in the importer. Switch to python3-pygit2, as it's also used by Launchpad (which means it's probably fast) and provides basically the same level of API as python3-git. As there is no longer a direct access to a repository's `git` command, convert the subprocess calls to be shell-based. Signed-off-by: Nishanth Aravamudan <nish.aravamudan@canonical.com>
-rwxr-xr-xusd-import254
1 files changed, 127 insertions, 127 deletions
diff --git a/usd-import b/usd-import
index 02d6063..ee2eab3 100755
--- a/usd-import
+++ b/usd-import
@@ -39,8 +39,8 @@ import tempfile
try:
pkg = 'python3-debian'
import debian
- pkg = 'python3-git'
- import git
+ pkg = 'python3-pygit2'
+ import pygit2
pkg = 'python3-launchpadlib'
from launchpadlib.launchpad import Launchpad
pkg = 'python3-ubuntutools'
@@ -77,40 +77,33 @@ class USDGitRepository:
except FileExistsError:
pass
- # Unpleasant, but I could not find a cleaner way to set up an
- # xgit-like environment with gitpython
- # First, use a direct git invocation for the init()
- g = git.Git(local_repo_dir)
- g.update_environment(GIT_DIR=local_repo_dir)
- g.update_environment(GIT_WORK_TREE=local_repo_wdir)
-
- # check if the repository has already been initialized
- try:
- g.status()
- except git.exc.GitCommandError:
- g.init()
+ self._local_repo = pygit2.init_repository(local_repo_dir,
+ flags=pygit2.GIT_REPOSITORY_INIT_NO_DOTGIT_DIR,
+ workdir_path=local_repo_wdir
+ )
- # Second, create a repo object which will be passed around that
- # points to the same .git directory
- self._local_repo = git.Repo(local_repo_dir)
- self._local_repo.git.update_environment(GIT_DIR=local_repo_dir)
- self._local_repo.git.update_environment(GIT_WORK_TREE=local_repo_wdir)
+ self._env = os.environ.copy()
+ self._env['GIT_DIR'] = self._local_repo.path
+ self._env['GIT_WORK_TREE'] = self._local_repo.workdir
try:
+ self._local_repo.remotes.set_url('%s-%s' % (owner, pkgname), remote_url)
self._target_remote = self._local_repo.remotes['%s-%s' % (owner, pkgname)]
- except IndexError:
- self._target_remote = self._local_repo.create_remote(
+ except KeyError:
+ self._target_remote = self._local_repo.remotes.create(
'%s-%s' % (owner, pkgname),
remote_url)
try:
+ # this is throwing a malformed URL, possibly due to git+ssh
+ # or other lp quirks (+source)?
self._target_remote.fetch()
- for ref in self._target_remote.refs:
- local_head_name = str(ref)[len('%s-%s/' % (owner, pkgname)):]
+ for branch in self.listall_branches(pygit2.GIT_BRANCH_REMOTE):
+ local_head_name = branch[len('%s-%s/' % (owner, pkgname)):]
# ensure local heads exist, tracking remotes if they are
# being freshly created
- self.get_or_create_head(local_head_name, ref)
- except git.exc.GitCommandError:
+ self.get_or_create_head(local_head_name, branch)
+ except pygit2.GitError:
logging.warning('No objects found in remote %s', remote_url)
@property
@@ -126,11 +119,11 @@ class USDGitRepository:
return self._target_remote
def garbage_collect(self):
- self._local_repo.git.gc()
+ subprocess.run('git gc', shell=True, env=self._env)
def push(self, force_push=None):
# handle force_push
- for branch in self._local_repo.heads:
+ for branch in self._local_repo.listall_branches():
if force_push is True:
self._target_remote.push(branch, force=True)
else:
@@ -146,21 +139,21 @@ class USDGitRepository:
last_version = None
else:
try:
- # test that the ref exists
- self._local_repo.git.cat_file('-e', ref)
cp = subprocess.run(
- ['dpkg-parsechangelog', '-l', '-', '-n', '1', '-S', 'Version'],
- input=self._local_repo.git.show('%s:debian/changelog' % ref),
+ 'git show %s:debian/changelog | dpkg-parsechangelog -l- -n1 -SVersion' % ref,
stdout=subprocess.PIPE,
universal_newlines=True,
- check=True)
+ check=True,
+ shell=True,
+ env=self._env)
current_version = cp.stdout.strip()
cp = subprocess.run(
- ['dpkg-parsechangelog', '-l', '-', '-n', '1', '-o', '1', '-S', 'Version'],
- input=self._local_repo.git.show('%s:debian/changelog' % ref),
+ 'git show %s:debian/changelog | dpkg-parsechangelog -l- -n1 -o1 -SVersion' % ref,
stdout=subprocess.PIPE,
universal_newlines=True,
- check=True)
+ check=True,
+ shell=True,
+ env=self._env)
last_version = cp.stdout.strip()
except Exception:
logging.exception('Cannot get changelog versions')
@@ -171,49 +164,46 @@ class USDGitRepository:
def get_heads_and_versions(self, head_prefix=None):
versions = dict()
- for head in self._local_repo.heads:
+ for head in self._local_repo.listall_branches():
if (head_prefix is not None and
- not head.name.startswith(head_prefix)
+ not head.startswith(head_prefix)
):
continue
- versions[head], _ = self.get_changelog_versions_from_treeish(head)
+ versions[head] = dict()
+ versions[head]['head'] = self._local_repo.lookup_branch(head)
+ versions[head]['version'], _ = self.get_changelog_versions_from_treeish(versions[head]['head'].peel().tree.id)
return versions
def treeishs_identical(self, ref1, ref2):
if ref1 is None or ref2 is None:
return False
- try:
- self._local_repo.git.diff('--quiet', ref1, ref2)
- return True
- except git.exc.GitCommandError as e:
- if e.status == 1:
- return False
- else:
- raise
+ return len(self._local_repo.diff(self._local_repo.get(ref1), self._local_repo.get(ref2))) == 0
def get_head(self, name):
- if name in self._local_repo.heads:
- return self._local_repo.heads[name]
+ if name in self._local_repo.listall_branches():
+ return self._local_repo.lookup_branch(name)
return None
- def get_or_create_head(self, local_head_name, remote_ref):
+ def get_or_create_head(self, local_head_name, remote_branch):
try:
- for head in self._local_repo.heads:
- if head.name == local_head_name:
- return head
- return self._local_repo.create_head(
- local_head_name,
- remote_ref
- ).set_tracking_branch(remote_ref)
+ if local_head_name in self._local_repo.listall_branches():
+ return self._local_repo.lookup_branch(local_head_name)
+ branch = self._local_repo.create_branch(local_head_name)
+ branch.remote = self._target_remote
+ branch.upstream = remote_branch
+ return branch
except IndexError:
- return self._local_repo.create_head(local_head_name)
+ return self._local_repo.create_branch(local_head_name)
def tag_exists(self, tag):
- return tag in self._local_repo.tags
+ try:
+ return self._local_repo.lookup_reference('refs/tags/%s' % tag)
+ except (KeyError, ValueError):
+ return None
@staticmethod
def tag_commit_hash_equals(tag, ref):
- return tag.commit.hexsha == ref.commit.hexsha
+ return self.tag_exists(tag).peel().id == self._local_repo.lookup_reference(ref).peel().id
@staticmethod
def import_tag(version):
@@ -228,30 +218,31 @@ class USDGitRepository:
return 'orphan/%s' % git_dsc_commit_tag(version)
def get_import_tag(self, version):
- if self.tag_exists(self.import_tag(version)):
- return self._local_repo.tags[self.import_tag(version)]
- if self.tag_exists(self.upload_tag(version)):
- return self._local_repo.tags[self.upload_tag(version)]
- return None
+ tag = self.tag_exists(self.import_tag(version))
+ if tag is None:
+ tag = self.tag_exists(self.upload_tag(version))
+ return tag
def nearest_tag(self, commit):
- return self._local_repo.git.describe('--tags', commit)
+ return self._local_repo.describe(committish=self._local_repo.get(commit), describe_strategy=pygit2.GIT_DESCRIBE_TAGS)
def get_commit_authorship(self, ref):
cp = subprocess.run(
- ['dpkg-parsechangelog', '-l', '-', '-S', 'Maintainer'],
- input=self._local_repo.git.show('%s:debian/changelog' % ref),
+ 'git show %s:debian/changelog | dpkg-parsechangelog -l- -SMaintainer' % ref,
stdout=subprocess.PIPE,
universal_newlines=True,
- check=True)
+ check=True,
+ shell=True,
+ env=self._env)
author = cp.stdout.strip()
m = re.match(r'(?P<name>.*) <(?P<email>.*)>', author)
cp = subprocess.run(
- ['dpkg-parsechangelog', '-l', '-', '-S', 'Date'],
- input=self._local_repo.git.show('%s:debian/changelog' % ref),
+ 'git show %s:debian/changelog | dpkg-parsechangelog -l- -SDate' % ref,
stdout=subprocess.PIPE,
universal_newlines=True,
- check=True)
+ check=True,
+ shell=True,
+ env=self._env)
date = cp.stdout.strip()
return (m.group('name'), m.group('email'), date)
@@ -272,33 +263,36 @@ class USDGitRepository:
'GIT_COMMITTER_DATE':committer_date}
def update_head_to_commit(self, head_name, commit_hash):
- if head_name not in self._local_repo.heads:
+ if head_name not in self._local_repo.listall_branches():
# first commit in this series
- self._local_repo.create_head(head_name, commit_hash)
+ self._local_repo.create_branch(head_name, self._local_repo.get(commit_hash))
else:
- self._local_repo.heads[head_name].reference = commit_hash
+ self._local_repo.lookup_branch(head_name).set_target(commit_hash)
def commit_import(self, publish_parent_commit, changelog_parent_commit, tree_hash, head_name, spi):
tag = None
if publish_parent_commit is None and \
changelog_parent_commit is None and \
- head_name in self._local_repo.heads:
+ head_name in self._local_repo.listall_branches():
tag = self.orphan_tag(spi.version)
elif self.get_import_tag(spi.version) is None:
tag = self.import_tag(spi.version)
- commit_tree_args = (tree_hash,
- '-m',
- 'Import version %s to %s\n\nImported using usd-importer.' %
- (spi.version, head_name)
- )
+ commit_tree = 'git commit-tree %s -m \'Import version %s to %s\n\nImported using usd-importer.\'' % (tree_hash, spi.version, head_name)
if publish_parent_commit is not None:
- commit_tree_args += ('-p', publish_parent_commit)
+ commit_tree += ' -p %s' % publish_parent_commit
if changelog_parent_commit is not None:
- commit_tree_args += ('-p', changelog_parent_commit)
- with self._local_repo.git.custom_environment(
- **self.get_commit_environment(tree_hash, spi)
- ):
- commit_hash = self._local_repo.git.commit_tree(*commit_tree_args)
+ commit_tree += ' -p %s' % changelog_parent_commit
+ commit_env = self._env
+ commit_env.update(self.get_commit_environment(tree_hash, spi))
+ cp = subprocess.run(
+ commit_tree,
+ stdout=subprocess.PIPE,
+ universal_newlines=True,
+ check=True,
+ shell=True,
+ env=commit_env
+ )
+ commit_hash = cp.stdout.strip()
self.update_head_to_commit(head_name, commit_hash)
logging.info('Committed import of %s as %s in %s',
@@ -306,10 +300,15 @@ class USDGitRepository:
)
if tag is not None:
+ # should be annotated to use create_tag API
logging.info('Creating tag %s pointing to %s', tag, commit_hash)
- self._local_repo.create_tag(tag, ref=commit_hash)
-
- self.update_head_to_commit(head_name, commit_hash)
+ cp = subprocess.run(
+ 'git tag %s %s' % (tag, commit_hash),
+ stdout=subprocess.PIPE,
+ universal_newlines=True,
+ check=True,
+ shell=True,
+ env=self._env)
@staticmethod
def version_compare(a, b):
@@ -324,17 +323,13 @@ class USDGitRepository:
def get_versions_from_changelog(self, treeish):
try:
cp = subprocess.run(
- ['dpkg-parsechangelog',
- '-l', '-',
- '--format', 'rfc822',
- '-S', 'Version',
- '--all'
- ],
- input=self._local_repo.git.show('%s:debian/changelog' % treeish),
+ 'git show %s:debian/changelog | dpkg-parsechangelog -l- --format rfc822 -SVersion --all' % treeish,
stdout=subprocess.PIPE,
stderr=None,
universal_newlines=True,
- check=True
+ check=True,
+ shell=True,
+ env=self._env
)
except:
logging.error(
@@ -371,9 +366,14 @@ class USDGitRepository:
raise e
# relies on this API being present, handle error if not?
- import_tree_hash = self._local_repo.git.dsc_commit(
- '--tree-only', srcpkg.dsc_pathname
- )
+ cp = subprocess.run(
+ 'git dsc-commit --tree-only %s' % srcpkg.dsc_pathname,
+ stdout=subprocess.PIPE,
+ universal_newlines=True,
+ check=True,
+ shell=True,
+ env=self._env)
+ import_tree_hash = cp.stdout.strip()
logging.info('Imported version %s as tree %s',
spi.version, import_tree_hash
)
@@ -392,18 +392,18 @@ class USDGitRepository:
'source pkg version: {} != changelog version: {}'.format(
spi.version, changelog_version))
+ tip_version = None
# check if the version to import has already been imported to
# this head
if tip_head is not None:
if self.treeishs_identical(
- import_tree_hash, tip_head.commit.tree.hexsha
+ import_tree_hash, tip_head.peel().id
):
logging.warn('%s is identical to %s',
head_name, spi.version
)
return
-
- tip_version, _ = self.get_changelog_versions_from_treeish(tip_head)
+ tip_version, _ = self.get_changelog_versions_from_treeish(tip_head.peel().id)
logging.info('Tip version is %s', tip_version)
previous_changelog_version_tag = \
@@ -424,7 +424,7 @@ class USDGitRepository:
parent_overrides[spi.version]['publish_parent']
)
parent_publish_version, _ = \
- self.get_changelog_versions_from_treeish(publish_parent_tag.commit.tree.hexsha)
+ self.get_changelog_versions_from_treeish(publish_parent_tag.peel().id)
if parent_publish_version != parent_overrides[spi.version]['publish_parent']:
logging.error('Found a tag corresponding to '
'version %s, but d/changelog '
@@ -459,7 +459,7 @@ class USDGitRepository:
parent_overrides[spi.version]['changelog_parent']
)
parent_changelog_version, _ = \
- self.get_changelog_versions_from_treeish(changelog_parent_tag.commit.tree.hexsha)
+ self.get_changelog_versions_from_treeish(changelog_parent_tag.peel().id)
if parent_changelog_version != parent_overrides[spi.version]['changelog_parent']:
logging.error('Found a tag corresponding to '
'version %s, but d/changelog '
@@ -493,13 +493,13 @@ class USDGitRepository:
parent_head = self.get_head(spi.parent_head_name)
if parent_head is not None:
logging.info('Publishing parent (tag) is %s',
- self.nearest_tag(parent_head.commit)
+ self.nearest_tag(parent_head.peel().id)
)
- publish_parent_commit = parent_head.commit
+ publish_parent_commit = parent_head.peel().id
else:
logging.info('Publishing parent (tag) is %s',
- self.nearest_tag(tip_head.commit))
- publish_parent_commit = tip_head.commit
+ self.nearest_tag(tip_head.peel().id))
+ publish_parent_commit = tip_head.peel().id
if self.version_compare(str(srcpkg.version), tip_version) <= 0:
logging.warn('Version to import (%s) is before %s tip (%s)',
@@ -514,7 +514,7 @@ class USDGitRepository:
# sanity check that version from d/changelog of the
# tagged parent matches ours
parent_changelog_version, _ = \
- self.get_changelog_versions_from_treeish(changelog_parent_tag.commit.tree.hexsha)
+ self.get_changelog_versions_from_treeish(changelog_parent_tag.peel(pygit2.Tree).id)
if parent_changelog_version != version:
logging.error('Found a tag corresponding to '
'version %s, but d/changelog '
@@ -525,8 +525,8 @@ class USDGitRepository:
)
)
else:
- changelog_parent_commit = changelog_parent_tag.commit
- logging.info('Changelog parent (tag) is %s', changelog_parent_tag)
+ changelog_parent_commit = changelog_parent_tag.peel().id
+ logging.info('Changelog parent (tag) is %s', self.nearest_tag(changelog_parent_tag.peel().id))
break
# If the two parents are tree-identical, then favor publication
@@ -639,14 +639,15 @@ class USDSourceInformation:
@staticmethod
def _head_version_is_equal(head_versions, spi):
- for head in head_versions:
- if head.name == spi.head_name:
- if (head_versions[head] == spi.version and
- (spi.date_published is None or
- int(spi.date_published.timestamp()) == head.commit.committed_date)
- ):
- return True
- return False
+ try:
+ if (head_versions[spi.head_name]['version'] == spi.version and
+ (spi.date_published is None or
+ int(spi.date_published.timestamp()) == head_versions[spi.head_name]['head'].peel().commit_time)
+ ):
+ return True
+ return False
+ except KeyError:
+ return False
def launchpad_versions_published_after(self, head_versions):
args = {
@@ -833,23 +834,22 @@ def main():
ubuntu_source_information = USDSourceInformation('ubuntu', pkgname)
debian_head_versions = local_repo.get_heads_and_versions('debian')
- for head in debian_head_versions:
+ for head_name in debian_head_versions:
logging.info('Last %s version is %s',
- head.name,
- debian_head_versions[head]
+ head_name,
+ debian_head_versions[head_name]['version']
)
ubuntu_head_versions = local_repo.get_heads_and_versions('ubuntu')
- for head in ubuntu_head_versions:
+ for head_name in ubuntu_head_versions:
logging.info('Last %s version is %s',
- head.name,
- ubuntu_head_versions[head]
+ head_name,
+ ubuntu_head_versions[head_name]['version']
)
oldcwd = os.getcwd()
os.chdir(local_repo.local_dir)
-
history_found = []
for distname, versions, dist_sinfo in (
("debian", debian_head_versions, debian_source_information),