# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
import throttler_lib
import utils_lib
# File extensions that can not be shrunk., as partial content will corrupt the
# file.
UNSHRINKABLE_EXTENSIONS = set([
'.bin',
'.data',
'.dmp',
'.gz',
'.htm',
'.html',
'.img',
'.journal',
'.jpg',
'.json',
'.png',
'.tar',
'.tgz',
'.xml',
'.xz',
'.zip',
])
# Regex for files that should not be shrunk.
UNSHRINKABLE_FILE_PATTERNS = [
]
TRIMMED_FILE_HEADER = '!!! This file is trimmed !!!\n'
ORIGINAL_SIZE_TEMPLATE = 'Original size: %d bytes\n\n'
# Regex pattern to retrieve the original size of the file.
ORIGINAL_SIZE_REGEX = 'Original size: (\d+) bytes'
TRIMMED_FILE_INJECT_TEMPLATE = """
========================================================================
< %d > characters are trimmed here.
========================================================================
"""
# Percent of file content to keep at the beginning and end of the file, default
# to 20%.
HEAD_SIZE_PERCENT = 0.20
# Default size in byte to trim the file down to.
DEFAULT_FILE_SIZE_LIMIT_BYTE = 100 * 1024
def _trim_file(file_info, file_size_limit_byte):
"""Remove the file content in the middle to reduce the file size.
@param file_info: A ResultInfo object containing summary for the file to be
shrunk.
@param file_size_limit_byte: Maximum file size in bytes after trimming.
"""
utils_lib.LOG('Trimming file %s to reduce size from %d bytes to %d bytes' %
(file_info.path, file_info.original_size,
file_size_limit_byte))
new_path = os.path.join(os.path.dirname(file_info.path),
file_info.name + '_trimmed')
original_size_bytes = file_info.original_size
with open(new_path, 'w') as new_file, open(file_info.path) as old_file:
# Read the beginning part of the old file, if it's already started with
# TRIMMED_FILE_HEADER, no need to add the header again.
header = old_file.read(len(TRIMMED_FILE_HEADER))
if header != TRIMMED_FILE_HEADER:
new_file.write(TRIMMED_FILE_HEADER)
new_file.write(ORIGINAL_SIZE_TEMPLATE % file_info.original_size)
else:
line = old_file.readline()
match = re.match(ORIGINAL_SIZE_REGEX, line)
if match:
original_size_bytes = int(match.group(1))
header_size_bytes = new_file.tell()
# Move old file reader to the beginning of the file.
old_file.seek(0, os.SEEK_SET)
new_file.write(old_file.read(
int((file_size_limit_byte - header_size_bytes) *
HEAD_SIZE_PERCENT)))
# Position to seek from the end of the file.
seek_pos = -(file_size_limit_byte - new_file.tell() -
len(TRIMMED_FILE_INJECT_TEMPLATE))
bytes_to_skip = original_size_bytes + seek_pos - old_file.tell()
# Adjust seek position based on string TRIMMED_FILE_INJECT_TEMPLATE
seek_pos += len(str(bytes_to_skip)) - 2
bytes_to_skip = original_size_bytes + seek_pos - old_file.tell()
new_file.write(TRIMMED_FILE_INJECT_TEMPLATE % bytes_to_skip)
old_file.seek(seek_pos, os.SEEK_END)
new_file.write(old_file.read())
stat = os.stat(file_info.path)
if not throttler_lib.try_delete_file_on_disk(file_info.path):
# Clean up the intermediate file.
throttler_lib.try_delete_file_on_disk(new_path)
utils_lib.LOG('Failed to shrink %s' % file_info.path)
return
os.rename(new_path, file_info.path)
# Modify the new file's timestamp to the old one.
os.utime(file_info.path, (stat.st_atime, stat.st_mtime))
# Update the trimmed_size.
file_info.trimmed_size = file_info.size
def _get_shrinkable_files(file_infos, file_size_limit_byte):
"""Filter the files that can be throttled.
@param file_infos: A list of ResultInfo objects.
@param file_size_limit_byte: Minimum file size in bytes to be throttled.
@yield: ResultInfo objects that can be shrunk.
"""
for info in file_infos:
ext = os.path.splitext(info.name)[1].lower()
if ext in UNSHRINKABLE_EXTENSIONS:
continue
match_found = False
for pattern in UNSHRINKABLE_FILE_PATTERNS:
if re.match(pattern, info.name):
match_found = True
break
if match_found:
continue
if info.trimmed_size <= file_size_limit_byte:
continue
yield info
def throttle(summary, max_result_size_KB,
file_size_limit_byte=DEFAULT_FILE_SIZE_LIMIT_BYTE,
skip_autotest_log=False):
"""Throttle the files in summary by trimming file content.
Stop throttling until all files are processed or the result file size is
already reduced to be under the given max_result_size_KB.
@param summary: A ResultInfo object containing result summary.
@param max_result_size_KB: Maximum test result size in KB.
@param file_size_limit_byte: Limit each file's size in the summary to be
under the given threshold, until all files are processed or the
result size is under the given max_result_size_KB.
@param skip_autotest_log: True to skip shrink Autotest logs, default is
False.
"""
file_infos, _ = throttler_lib.sort_result_files(summary)
extra_patterns = ([throttler_lib.AUTOTEST_LOG_PATTERN] if skip_autotest_log
else [])
file_infos = throttler_lib.get_throttleable_files(
file_infos, extra_patterns)
file_infos = _get_shrinkable_files(file_infos, file_size_limit_byte)
for info in file_infos:
_trim_file(info, file_size_limit_byte)
if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
return