# -*- coding: utf-8 -*-
# Copyright 2013 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of Unix-like du command for cloud storage providers."""

from __future__ import absolute_import

import sys

from gslib.boto_translation import S3_DELETE_MARKER_GUID
from gslib.bucket_listing_ref import BucketListingObject
from gslib.command import Command
from gslib.command_argument import CommandArgument
from gslib.cs_api_map import ApiSelector
from gslib.exception import CommandException
from gslib.ls_helper import LsHelper
from gslib.storage_url import ContainsWildcard
from gslib.storage_url import StorageUrlFromString
from gslib.util import MakeHumanReadable
from gslib.util import NO_MAX
from gslib.util import UTF8

_SYNOPSIS = """
  gsutil du url...
"""

_DETAILED_HELP_TEXT = ("""
<B>SYNOPSIS</B>
""" + _SYNOPSIS + """


<B>DESCRIPTION</B>
  The du command displays the amount of space (in bytes) being used by the
  objects in the file or object hierarchy under a given URL. The syntax emulates
  the Linux du command (which stands for disk usage). For example, the command:

  gsutil du -s gs://your-bucket/dir

  will report the total space used by all objects under gs://your-bucket/dir and
  any sub-directories.


<B>OPTIONS</B>
  -0          Ends each output line with a 0 byte rather than a newline. This
              can be useful to make the output more easily machine-readable.

  -a          Includes non-current object versions / generations in the listing
              (only useful with a versioning-enabled bucket). Also prints
              generation and metageneration for each listed object.

  -c          Produce a grand total.

  -e          A pattern to exclude from reporting. Example: -e "*.o" would
              exclude any object that ends in ".o". Can be specified multiple
              times.

  -h          Prints object sizes in human-readable format (e.g., 1 KiB,
              234 MiB, 2GiB, etc.)

  -s          Display only a summary total for each argument.

  -X          Similar to -e, but excludes patterns from the given file. The
              patterns to exclude should be one per line.


<B>EXAMPLES</B>
  To list the size of all objects in a bucket:

    gsutil du gs://bucketname

  To list the size of all objects underneath a prefix:

    gsutil du gs://bucketname/prefix/*

  To print the total number of bytes in a bucket, in human-readable form:

    gsutil du -ch gs://bucketname

  To see a summary of the total bytes in the two given buckets:

    gsutil du -s gs://bucket1 gs://bucket2

  To list the size of all objects in a versioned bucket, including objects that
  are not the latest:

    gsutil du -a gs://bucketname

  To list all objects in a bucket, except objects that end in ".bak",
  with each object printed ending in a null byte:

    gsutil du -e "*.bak" -0 gs://bucketname

  To get a total of all buckets in a project with a grand total for an entire
  project:

      gsutil -o GSUtil:default_project_id=project-name du -shc
""")


class DuCommand(Command):
  """Implementation of gsutil du command."""

  # Command specification. See base class for documentation.
  command_spec = Command.CreateCommandSpec(
      'du',
      command_name_aliases=[],
      usage_synopsis=_SYNOPSIS,
      min_args=0,
      max_args=NO_MAX,
      supported_sub_args='0ace:hsX:',
      file_url_ok=False,
      provider_url_ok=True,
      urls_start_arg=0,
      gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
      gs_default_api=ApiSelector.JSON,
      argparse_arguments=[
          CommandArgument.MakeZeroOrMoreCloudURLsArgument()
      ]
  )
  # Help specification. See help_provider.py for documentation.
  help_spec = Command.HelpSpec(
      help_name='du',
      help_name_aliases=[],
      help_type='command_help',
      help_one_line_summary='Display object size usage',
      help_text=_DETAILED_HELP_TEXT,
      subcommand_help_text={},
  )

  def _PrintSummaryLine(self, num_bytes, name):
    size_string = (MakeHumanReadable(num_bytes)
                   if self.human_readable else str(num_bytes))
    sys.stdout.write('%(size)-10s  %(name)s%(ending)s' % {
        'size': size_string, 'name': name, 'ending': self.line_ending})

  def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref):
    """Print listing info for given bucket_listing_ref.

    Args:
      bucket_listing_ref: BucketListing being listed.

    Returns:
      Tuple (number of objects, object size)

    Raises:
      Exception: if calling bug encountered.
    """
    obj = bucket_listing_ref.root_object
    url_str = bucket_listing_ref.url_string
    if (obj.metadata and S3_DELETE_MARKER_GUID in
        obj.metadata.additionalProperties):
      size_string = '0'
      num_bytes = 0
      num_objs = 0
      url_str += '<DeleteMarker>'
    else:
      size_string = (MakeHumanReadable(obj.size)
                     if self.human_readable else str(obj.size))
      num_bytes = obj.size
      num_objs = 1

    if not self.summary_only:
      sys.stdout.write('%(size)-10s  %(url)s%(ending)s' % {
          'size': size_string,
          'url': url_str.encode(UTF8),
          'ending': self.line_ending})

    return (num_objs, num_bytes)

  def RunCommand(self):
    """Command entry point for the du command."""
    self.line_ending = '\n'
    self.all_versions = False
    self.produce_total = False
    self.human_readable = False
    self.summary_only = False
    self.exclude_patterns = []
    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-0':
          self.line_ending = '\0'
        elif o == '-a':
          self.all_versions = True
        elif o == '-c':
          self.produce_total = True
        elif o == '-e':
          self.exclude_patterns.append(a)
        elif o == '-h':
          self.human_readable = True
        elif o == '-s':
          self.summary_only = True
        elif o == '-X':
          if a == '-':
            f = sys.stdin
          else:
            f = open(a, 'r')
          try:
            for line in f:
              line = line.strip()
              if line:
                self.exclude_patterns.append(line)
          finally:
            f.close()

    if not self.args:
      # Default to listing all gs buckets.
      self.args = ['gs://']

    total_bytes = 0
    got_nomatch_errors = False

    def _PrintObjectLong(blr):
      return self._PrintInfoAboutBucketListingRef(blr)

    def _PrintNothing(unused_blr=None):
      pass

    def _PrintDirectory(num_bytes, name):
      if not self.summary_only:
        self._PrintSummaryLine(num_bytes, name)

    for url_arg in self.args:
      top_level_storage_url = StorageUrlFromString(url_arg)
      if top_level_storage_url.IsFileUrl():
        raise CommandException('Only cloud URLs are supported for %s'
                               % self.command_name)
      bucket_listing_fields = ['size']

      ls_helper = LsHelper(
          self.WildcardIterator, self.logger,
          print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing,
          print_dir_header_func=_PrintNothing,
          print_dir_summary_func=_PrintDirectory,
          print_newline_func=_PrintNothing, all_versions=self.all_versions,
          should_recurse=True, exclude_patterns=self.exclude_patterns,
          fields=bucket_listing_fields)

      # ls_helper expands to objects and prefixes, so perform a top-level
      # expansion first.
      if top_level_storage_url.IsProvider():
        # Provider URL: use bucket wildcard to iterate over all buckets.
        top_level_iter = self.WildcardIterator(
            '%s://*' % top_level_storage_url.scheme).IterBuckets(
                bucket_fields=['id'])
      elif top_level_storage_url.IsBucket():
        top_level_iter = self.WildcardIterator(
            '%s://%s' % (top_level_storage_url.scheme,
                         top_level_storage_url.bucket_name)).IterBuckets(
                             bucket_fields=['id'])
      else:
        top_level_iter = [BucketListingObject(top_level_storage_url)]

      for blr in top_level_iter:
        storage_url = blr.storage_url
        if storage_url.IsBucket() and self.summary_only:
          storage_url = StorageUrlFromString(
              storage_url.CreatePrefixUrl(wildcard_suffix='**'))
        _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url)
        if (storage_url.IsObject() and exp_objs == 0 and
            ContainsWildcard(url_arg) and not self.exclude_patterns):
          got_nomatch_errors = True
        total_bytes += exp_bytes

        if self.summary_only:
          self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/'))

    if self.produce_total:
      self._PrintSummaryLine(total_bytes, 'total')

    if got_nomatch_errors:
      raise CommandException('One or more URLs matched no objects.')

    return 0