# -*- coding: utf-8 -*-
# Copyright 2012 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of setmeta command for setting cloud object metadata."""
from __future__ import absolute_import
from gslib.cloud_api import AccessDeniedException
from gslib.cloud_api import PreconditionException
from gslib.cloud_api import Preconditions
from gslib.command import Command
from gslib.command_argument import CommandArgument
from gslib.cs_api_map import ApiSelector
from gslib.exception import CommandException
from gslib.name_expansion import NameExpansionIterator
from gslib.storage_url import StorageUrlFromString
from gslib.translation_helper import CopyObjectMetadata
from gslib.translation_helper import ObjectMetadataFromHeaders
from gslib.translation_helper import PreconditionsFromHeaders
from gslib.util import GetCloudApiInstance
from gslib.util import NO_MAX
from gslib.util import Retry
_SYNOPSIS = """
gsutil setmeta -h [header:value|header] ... url...
"""
_DETAILED_HELP_TEXT = ("""
<B>SYNOPSIS</B>
""" + _SYNOPSIS + """
<B>DESCRIPTION</B>
The gsutil setmeta command allows you to set or remove the metadata on one
or more objects. It takes one or more header arguments followed by one or
more URLs, where each header argument is in one of two forms:
- if you specify header:value, it will set the given header on all
named objects.
- if you specify header (with no value), it will remove the given header
from all named objects.
For example, the following command would set the Content-Type and
Cache-Control and remove the Content-Disposition on the specified objects:
gsutil setmeta -h "Content-Type:text/html" \\
-h "Cache-Control:public, max-age=3600" \\
-h "Content-Disposition" gs://bucket/*.html
If you have a large number of objects to update you might want to use the
gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
update:
gsutil -m setmeta -h "Content-Type:text/html" \\
-h "Cache-Control:public, max-age=3600" \\
-h "Content-Disposition" gs://bucket/*.html
You can also use the setmeta command to set custom metadata on an object:
gsutil setmeta -h "x-goog-meta-icecreamflavor:vanilla" gs://bucket/object
See "gsutil help metadata" for details about how you can set metadata
while uploading objects, what metadata fields can be set and the meaning of
these fields, use of custom metadata, and how to view currently set metadata.
NOTE: By default, publicly readable objects are served with a Cache-Control
header allowing such objects to be cached for 3600 seconds. For more details
about this default behavior see the CACHE-CONTROL section of
"gsutil help metadata". If you need to ensure that updates become visible
immediately, you should set a Cache-Control header of "Cache-Control:private,
max-age=0, no-transform" on such objects. You can do this with the command:
gsutil setmeta -h "Content-Type:text/html" \\
-h "Cache-Control:private, max-age=0, no-transform" gs://bucket/*.html
The setmeta command reads each object's current generation and metageneration
and uses those as preconditions unless they are otherwise specified by
top-level arguments. For example:
gsutil -h "x-goog-if-metageneration-match:2" setmeta
-h "x-goog-meta-icecreamflavor:vanilla"
will set the icecreamflavor:vanilla metadata if the current live object has a
metageneration of 2.
<B>OPTIONS</B>
-h Specifies a header:value to be added, or header to be removed,
from each named object.
""")
# Setmeta assumes a header-like model which doesn't line up with the JSON way
# of doing things. This list comes from functionality that was supported by
# gsutil3 at the time gsutil4 was released.
SETTABLE_FIELDS = ['cache-control', 'content-disposition',
'content-encoding', 'content-language',
'content-md5', 'content-type']
def _SetMetadataExceptionHandler(cls, e):
"""Exception handler that maintains state about post-completion status."""
cls.logger.error(e)
cls.everything_set_okay = False
def _SetMetadataFuncWrapper(cls, name_expansion_result, thread_state=None):
cls.SetMetadataFunc(name_expansion_result, thread_state=thread_state)
class SetMetaCommand(Command):
"""Implementation of gsutil setmeta command."""
# Command specification. See base class for documentation.
command_spec = Command.CreateCommandSpec(
'setmeta',
command_name_aliases=['setheader'],
usage_synopsis=_SYNOPSIS,
min_args=1,
max_args=NO_MAX,
supported_sub_args='h:rR',
file_url_ok=False,
provider_url_ok=False,
urls_start_arg=1,
gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
gs_default_api=ApiSelector.JSON,
argparse_arguments=[
CommandArgument.MakeZeroOrMoreCloudURLsArgument()
]
)
# Help specification. See help_provider.py for documentation.
help_spec = Command.HelpSpec(
help_name='setmeta',
help_name_aliases=['setheader'],
help_type='command_help',
help_one_line_summary='Set metadata on already uploaded objects',
help_text=_DETAILED_HELP_TEXT,
subcommand_help_text={},
)
def RunCommand(self):
"""Command entry point for the setmeta command."""
headers = []
if self.sub_opts:
for o, a in self.sub_opts:
if o == '-h':
if 'x-goog-acl' in a or 'x-amz-acl' in a:
raise CommandException(
'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
'set ... to set canned ACLs.')
headers.append(a)
(metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)
self.metadata_change = metadata_plus
for header in metadata_minus:
self.metadata_change[header] = ''
if len(self.args) == 1 and not self.recursion_requested:
url = StorageUrlFromString(self.args[0])
if not (url.IsCloudUrl() and url.IsObject()):
raise CommandException('URL (%s) must name an object' % self.args[0])
# Used to track if any objects' metadata failed to be set.
self.everything_set_okay = True
self.preconditions = PreconditionsFromHeaders(self.headers)
name_expansion_iterator = NameExpansionIterator(
self.command_name, self.debug, self.logger, self.gsutil_api,
self.args, self.recursion_requested, all_versions=self.all_versions,
continue_on_error=self.parallel_operations)
try:
# Perform requests in parallel (-m) mode, if requested, using
# configured number of parallel processes and threads. Otherwise,
# perform requests with sequential function calls in current process.
self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator,
_SetMetadataExceptionHandler, fail_on_error=True)
except AccessDeniedException as e:
if e.status == 403:
self._WarnServiceAccounts()
raise
if not self.everything_set_okay:
raise CommandException('Metadata for some objects could not be set.')
return 0
@Retry(PreconditionException, tries=3, timeout_secs=1)
def SetMetadataFunc(self, name_expansion_result, thread_state=None):
"""Sets metadata on an object.
Args:
name_expansion_result: NameExpansionResult describing target object.
thread_state: gsutil Cloud API instance to use for the operation.
"""
gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
exp_src_url = name_expansion_result.expanded_storage_url
self.logger.info('Setting metadata on %s...', exp_src_url)
fields = ['generation', 'metadata', 'metageneration']
cloud_obj_metadata = gsutil_api.GetObjectMetadata(
exp_src_url.bucket_name, exp_src_url.object_name,
generation=exp_src_url.generation, provider=exp_src_url.scheme,
fields=fields)
preconditions = Preconditions(
gen_match=self.preconditions.gen_match,
meta_gen_match=self.preconditions.meta_gen_match)
if preconditions.gen_match is None:
preconditions.gen_match = cloud_obj_metadata.generation
if preconditions.meta_gen_match is None:
preconditions.meta_gen_match = cloud_obj_metadata.metageneration
# Patch handles the patch semantics for most metadata, but we need to
# merge the custom metadata field manually.
patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change)
api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme)
# For XML we only want to patch through custom metadata that has
# changed. For JSON we need to build the complete set.
if api == ApiSelector.XML:
pass
elif api == ApiSelector.JSON:
CopyObjectMetadata(patch_obj_metadata, cloud_obj_metadata,
override=True)
patch_obj_metadata = cloud_obj_metadata
# Patch body does not need the object generation and metageneration.
patch_obj_metadata.generation = None
patch_obj_metadata.metageneration = None
gsutil_api.PatchObjectMetadata(
exp_src_url.bucket_name, exp_src_url.object_name, patch_obj_metadata,
generation=exp_src_url.generation, preconditions=preconditions,
provider=exp_src_url.scheme)
def _ParseMetadataHeaders(self, headers):
"""Validates and parses metadata changes from the headers argument.
Args:
headers: Header dict to validate and parse.
Returns:
(metadata_plus, metadata_minus): Tuple of header sets to add and remove.
"""
metadata_minus = set()
cust_metadata_minus = set()
metadata_plus = {}
cust_metadata_plus = {}
# Build a count of the keys encountered from each plus and minus arg so we
# can check for dupe field specs.
num_metadata_plus_elems = 0
num_cust_metadata_plus_elems = 0
num_metadata_minus_elems = 0
num_cust_metadata_minus_elems = 0
for md_arg in headers:
parts = md_arg.split(':')
if len(parts) not in (1, 2):
raise CommandException(
'Invalid argument: must be either header or header:value (%s)' %
md_arg)
if len(parts) == 2:
(header, value) = parts
else:
(header, value) = (parts[0], None)
_InsistAsciiHeader(header)
# Translate headers to lowercase to match the casing assumed by our
# sanity-checking operations.
header = header.lower()
if value:
if _IsCustomMeta(header):
# Allow non-ASCII data for custom metadata fields.
cust_metadata_plus[header] = value
num_cust_metadata_plus_elems += 1
else:
# Don't unicode encode other fields because that would perturb their
# content (e.g., adding %2F's into the middle of a Cache-Control
# value).
_InsistAsciiHeaderValue(header, value)
value = str(value)
metadata_plus[header] = value
num_metadata_plus_elems += 1
else:
if _IsCustomMeta(header):
cust_metadata_minus.add(header)
num_cust_metadata_minus_elems += 1
else:
metadata_minus.add(header)
num_metadata_minus_elems += 1
if (num_metadata_plus_elems != len(metadata_plus)
or num_cust_metadata_plus_elems != len(cust_metadata_plus)
or num_metadata_minus_elems != len(metadata_minus)
or num_cust_metadata_minus_elems != len(cust_metadata_minus)
or metadata_minus.intersection(set(metadata_plus.keys()))):
raise CommandException('Each header must appear at most once.')
other_than_base_fields = (set(metadata_plus.keys())
.difference(SETTABLE_FIELDS))
other_than_base_fields.update(
metadata_minus.difference(SETTABLE_FIELDS))
for f in other_than_base_fields:
# This check is overly simple; it would be stronger to check, for each
# URL argument, whether f.startswith the
# provider metadata_prefix, but here we just parse the spec
# once, before processing any of the URLs. This means we will not
# detect if the user tries to set an x-goog-meta- field on an another
# provider's object, for example.
if not _IsCustomMeta(f):
raise CommandException(
'Invalid or disallowed header (%s).\nOnly these fields (plus '
'x-goog-meta-* fields) can be set or unset:\n%s' % (
f, sorted(list(SETTABLE_FIELDS))))
metadata_plus.update(cust_metadata_plus)
metadata_minus.update(cust_metadata_minus)
return (metadata_minus, metadata_plus)
def _InsistAscii(string, message):
if not all(ord(c) < 128 for c in string):
raise CommandException(message)
def _InsistAsciiHeader(header):
_InsistAscii(header, 'Invalid non-ASCII header (%s).' % header)
def _InsistAsciiHeaderValue(header, value):
_InsistAscii(
value, ('Invalid non-ASCII value (%s) was provided for header %s.'
% (value, header)))
def _IsCustomMeta(header):
return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-')