普通文本  |  489行  |  16.11 KB

"""
Module with abstraction layers to revision control systems.

With this library, autotest developers can handle source code checkouts and
updates on both client as well as server code.
"""

import os, warnings, logging
import error, utils
from autotest_lib.client.bin import os_dep


class RevisionControlError(Exception):
    """Local exception to be raised by code in this file."""


class GitError(RevisionControlError):
    """Exceptions raised for general git errors."""


class GitCloneError(GitError):
    """Exceptions raised for git clone errors."""


class GitFetchError(GitError):
    """Exception raised for git fetch errors."""


class GitPullError(GitError):
    """Exception raised for git pull errors."""


class GitResetError(GitError):
    """Exception raised for git reset errors."""


class GitCommitError(GitError):
    """Exception raised for git commit errors."""


class GitRepo(object):
    """
    This class represents a git repo.

    It is used to pull down a local copy of a git repo, check if the local
    repo is up-to-date, if not update.  It delegates the install to
    implementation classes.
    """

    def __init__(self, repodir, giturl=None, weburl=None, abs_work_tree=None):
        """
        Initialized reposotory.

        @param repodir: destination repo directory.
        @param giturl: master repo git url.
        @param weburl: a web url for the master repo.
        @param abs_work_tree: work tree of the git repo. In the
            absence of a work tree git manipulations will occur
            in the current working directory for non bare repos.
            In such repos the -git-dir option should point to
            the .git directory and -work-tree should point to
            the repos working tree.
        Note: a bare reposotory is one which contains all the
        working files (the tree) and the other wise hidden files
        (.git) in the same directory. This class assumes non-bare
        reposotories.
        """
        if repodir is None:
            raise ValueError('You must provide a path that will hold the'
                             'git repository')
        self.repodir = utils.sh_escape(repodir)
        self._giturl = giturl
        if weburl is not None:
            warnings.warn("Param weburl: You are no longer required to provide "
                          "a web URL for your git repos", DeprecationWarning)

        # path to .git dir
        self.gitpath = utils.sh_escape(os.path.join(self.repodir,'.git'))

        # Find git base command. If not found, this will throw an exception
        self.git_base_cmd = os_dep.command('git')
        self.work_tree = abs_work_tree

        # default to same remote path as local
        self._build = os.path.dirname(self.repodir)


    @property
    def giturl(self):
        """
        A giturl is necessary to perform certain actions (clone, pull, fetch)
        but not others (like diff).
        """
        if self._giturl is None:
            raise ValueError('Unsupported operation -- this object was not'
                             'constructed with a git URL.')
        return self._giturl


    def gen_git_cmd_base(self):
        """
        The command we use to run git cannot be set. It is reconstructed
        on each access from it's component variables. This is it's getter.
        """
        # base git command , pointing to gitpath git dir
        gitcmdbase = '%s --git-dir=%s' % (self.git_base_cmd,
                                          self.gitpath)
        if self.work_tree:
            gitcmdbase += ' --work-tree=%s' % self.work_tree
        return gitcmdbase


    def _run(self, command, timeout=None, ignore_status=False):
        """
        Auxiliary function to run a command, with proper shell escaping.

        @param timeout: Timeout to run the command.
        @param ignore_status: Whether we should supress error.CmdError
                exceptions if the command did return exit code !=0 (True), or
                not supress them (False).
        """
        return utils.run(r'%s' % (utils.sh_escape(command)),
                         timeout, ignore_status)


    def gitcmd(self, cmd, ignore_status=False, error_class=None,
               error_msg=None):
        """
        Wrapper for a git command.

        @param cmd: Git subcommand (ex 'clone').
        @param ignore_status: If True, ignore the CmdError raised by the
                underlying command runner. NB: Passing in an error_class
                impiles ignore_status=True.
        @param error_class: When ignore_status is False, optional error
                error class to log and raise in case of errors. Must be a
                (sub)type of GitError.
        @param error_msg: When passed with error_class, used as a friendly
                error message.
        """
        # TODO(pprabhu) Get rid of the ignore_status argument.
        # Now that we support raising custom errors, we always want to get a
        # return code from the command execution, instead of an exception.
        ignore_status = ignore_status or error_class is not None
        cmd = '%s %s' % (self.gen_git_cmd_base(), cmd)
        rv = self._run(cmd, ignore_status=ignore_status)
        if rv.exit_status != 0 and error_class is not None:
            logging.error('git command failed: %s: %s',
                          cmd, error_msg if error_msg is not None else '')
            logging.error(rv.stderr)
            raise error_class(error_msg if error_msg is not None
                              else rv.stderr)

        return rv


    def clone(self, remote_branch=None):
        """
        Clones a repo using giturl and repodir.

        Since we're cloning the master repo we don't have a work tree yet,
        make sure the getter of the gitcmd doesn't think we do by setting
        work_tree to None.

        @param remote_branch: Specify the remote branch to clone. None if to
                              clone master branch.

        @raises GitCloneError: if cloning the master repo fails.
        """
        logging.info('Cloning git repo %s', self.giturl)
        cmd = 'clone %s %s ' % (self.giturl, self.repodir)
        if remote_branch:
            cmd += '-b %s' % remote_branch
        abs_work_tree = self.work_tree
        self.work_tree = None
        try:
            rv = self.gitcmd(cmd, True)
            if rv.exit_status != 0:
                logging.error(rv.stderr)
                raise GitCloneError('Failed to clone git url', rv)
            else:
                logging.info(rv.stdout)
        finally:
            self.work_tree = abs_work_tree


    def pull(self, rebase=False):
        """
        Pulls into repodir using giturl.

        @param rebase: If true forces git pull to perform a rebase instead of a
                        merge.
        @raises GitPullError: if pulling from giturl fails.
        """
        logging.info('Updating git repo %s', self.giturl)
        cmd = 'pull '
        if rebase:
            cmd += '--rebase '
        cmd += self.giturl

        rv = self.gitcmd(cmd, True)
        if rv.exit_status != 0:
            logging.error(rv.stderr)
            e_msg = 'Failed to pull git repo data'
            raise GitPullError(e_msg, rv)


    def commit(self, msg='default'):
        """
        Commit changes to repo with the supplied commit msg.

        @param msg: A message that goes with the commit.
        """
        rv = self.gitcmd('commit -a -m %s' % msg)
        if rv.exit_status != 0:
            logging.error(rv.stderr)
            raise GitCommitError('Unable to commit', rv)


    def reset(self, branch_or_sha):
        """
        Reset repo to the given branch or git sha.

        @param branch_or_sha: Name of a local or remote branch or git sha.

        @raises GitResetError if operation fails.
        """
        self.gitcmd('reset --hard %s' % branch_or_sha,
                    error_class=GitResetError,
                    error_msg='Failed to reset to %s' % branch_or_sha)


    def reset_head(self):
        """
        Reset repo to HEAD@{0} by running git reset --hard HEAD.

        TODO(pprabhu): cleanup. Use reset.

        @raises GitResetError: if we fails to reset HEAD.
        """
        logging.info('Resetting head on repo %s', self.repodir)
        rv = self.gitcmd('reset --hard HEAD')
        if rv.exit_status != 0:
            logging.error(rv.stderr)
            e_msg = 'Failed to reset HEAD'
            raise GitResetError(e_msg, rv)


    def fetch_remote(self):
        """
        Fetches all files from the remote but doesn't reset head.

        @raises GitFetchError: if we fail to fetch all files from giturl.
        """
        logging.info('fetching from repo %s', self.giturl)
        rv = self.gitcmd('fetch --all')
        if rv.exit_status != 0:
            logging.error(rv.stderr)
            e_msg = 'Failed to fetch from %s' % self.giturl
            raise GitFetchError(e_msg, rv)


    def reinit_repo_at(self, remote_branch):
        """
        Does all it can to ensure that the repo is at remote_branch.

        This will try to be nice and detect any local changes and bail early.
        OTOH, if it finishes successfully, it'll blow away anything and
        everything so that local repo reflects the upstream branch requested.

        @param remote_branch: branch to check out.
        """
        if not self.is_repo_initialized():
            self.clone()

        # Play nice. Detect any local changes and bail.
        # Re-stat all files before comparing index. This is needed for
        # diff-index to work properly in cases when the stat info on files is
        # stale. (e.g., you just untarred the whole git folder that you got from
        # Alice)
        rv = self.gitcmd('update-index --refresh -q',
                         error_class=GitError,
                         error_msg='Failed to refresh index.')
        rv = self.gitcmd(
                'diff-index --quiet HEAD --',
                error_class=GitError,
                error_msg='Failed to check for local changes.')
        if rv.stdout:
            logging.error(rv.stdout)
            e_msg = 'Local checkout dirty. (%s)'
            raise GitError(e_msg % rv.stdout)

        # Play the bad cop. Destroy everything in your path.
        # Don't trust the existing repo setup at all (so don't trust the current
        # config, current branches / remotes etc).
        self.gitcmd('config remote.origin.url %s' % self.giturl,
                    error_class=GitError,
                    error_msg='Failed to set origin.')
        self.gitcmd('checkout -f',
                    error_class=GitError,
                    error_msg='Failed to checkout.')
        self.gitcmd('clean -qxdf',
                    error_class=GitError,
                    error_msg='Failed to clean.')
        self.fetch_remote()
        self.reset('origin/%s' % remote_branch)


    def get(self, **kwargs):
        """
        This method overrides baseclass get so we can do proper git
        clone/pulls, and check for updated versions.  The result of
        this method will leave an up-to-date version of git repo at
        'giturl' in 'repodir' directory to be used by build/install
        methods.

        @param kwargs: Dictionary of parameters to the method get.
        """
        if not self.is_repo_initialized():
            # this is your first time ...
            self.clone()
        elif self.is_out_of_date():
            # exiting repo, check if we're up-to-date
            self.pull()
        else:
            logging.info('repo up-to-date')

        # remember where the source is
        self.source_material = self.repodir


    def get_local_head(self):
        """
        Get the top commit hash of the current local git branch.

        @return: Top commit hash of local git branch
        """
        cmd = 'log --pretty=format:"%H" -1'
        l_head_cmd = self.gitcmd(cmd)
        return l_head_cmd.stdout.strip()


    def get_remote_head(self):
        """
        Get the top commit hash of the current remote git branch.

        @return: Top commit hash of remote git branch
        """
        cmd1 = 'remote show'
        origin_name_cmd = self.gitcmd(cmd1)
        cmd2 = 'log --pretty=format:"%H" -1 ' + origin_name_cmd.stdout.strip()
        r_head_cmd = self.gitcmd(cmd2)
        return r_head_cmd.stdout.strip()


    def is_out_of_date(self):
        """
        Return whether this branch is out of date with regards to remote branch.

        @return: False, if the branch is outdated, True if it is current.
        """
        local_head = self.get_local_head()
        remote_head = self.get_remote_head()

        # local is out-of-date, pull
        if local_head != remote_head:
            return True

        return False


    def is_repo_initialized(self):
        """
        Return whether the git repo was already initialized.

        Counts objects in .git directory, since these will exist even if the
        repo is empty. Assumes non-bare reposotories like the rest of this file.

        @return: True if the repo is initialized.
        """
        cmd = 'count-objects'
        rv = self.gitcmd(cmd, True)
        if rv.exit_status == 0:
            return True

        return False


    def get_latest_commit_hash(self):
        """
        Get the commit hash of the latest commit in the repo.

        We don't raise an exception if no commit hash was found as
        this could be an empty repository. The caller should notice this
        methods return value and raise one appropriately.

        @return: The first commit hash if anything has been committed.
        """
        cmd = 'rev-list -n 1 --all'
        rv = self.gitcmd(cmd, True)
        if rv.exit_status == 0:
            return rv.stdout
        return None


    def is_repo_empty(self):
        """
        Checks for empty but initialized repos.

        eg: we clone an empty master repo, then don't pull
        after the master commits.

        @return True if the repo has no commits.
        """
        if self.get_latest_commit_hash():
            return False
        return True


    def get_revision(self):
        """
        Return current HEAD commit id
        """
        if not self.is_repo_initialized():
            self.get()

        cmd = 'rev-parse --verify HEAD'
        gitlog = self.gitcmd(cmd, True)
        if gitlog.exit_status != 0:
            logging.error(gitlog.stderr)
            raise error.CmdError('Failed to find git sha1 revision', gitlog)
        else:
            return gitlog.stdout.strip('\n')


    def checkout(self, remote, local=None):
        """
        Check out the git commit id, branch, or tag given by remote.

        Optional give the local branch name as local.

        @param remote: Remote commit hash
        @param local: Local commit hash
        @note: For git checkout tag git version >= 1.5.0 is required
        """
        if not self.is_repo_initialized():
            self.get()

        assert(isinstance(remote, basestring))
        if local:
            cmd = 'checkout -b %s %s' % (local, remote)
        else:
            cmd = 'checkout %s' % (remote)
        gitlog = self.gitcmd(cmd, True)
        if gitlog.exit_status != 0:
            logging.error(gitlog.stderr)
            raise error.CmdError('Failed to checkout git branch', gitlog)
        else:
            logging.info(gitlog.stdout)


    def get_branch(self, all=False, remote_tracking=False):
        """
        Show the branches.

        @param all: List both remote-tracking branches and local branches (True)
                or only the local ones (False).
        @param remote_tracking: Lists the remote-tracking branches.
        """
        if not self.is_repo_initialized():
            self.get()

        cmd = 'branch --no-color'
        if all:
            cmd = " ".join([cmd, "-a"])
        if remote_tracking:
            cmd = " ".join([cmd, "-r"])

        gitlog = self.gitcmd(cmd, True)
        if gitlog.exit_status != 0:
            logging.error(gitlog.stderr)
            raise error.CmdError('Failed to get git branch', gitlog)
        elif all or remote_tracking:
            return gitlog.stdout.strip('\n')
        else:
            branch = [b[2:] for b in gitlog.stdout.split('\n')
                      if b.startswith('*')][0]
            return branch