# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import re
import subprocess
import threading
import time
# A path of the log file
_PATH_LOG_FILE = '/var/log/messages'
# Time margin as a number of seconds. It is used during parsing of the log file.
# Sometimes, logs are not in strict datetime order. This value defines maximum
# possible datetime error in logs sequence.
_TIME_MARGIN = 2
# Max time in seconds to wait for CUPS logs to appear in the log file.
_TIME_MAX_WAIT = 100
# A list of filters that are installed by loading components.
# Currently, they are Star and Epson drivers.
_COMPONENT_FILTERS = ['rastertostar', 'rastertostarlm', 'epson-escpr-wrapper']
class LogReader():
"""
An instance of this class is responsible for gathering data about CUPS jobs
and extracting their results and pipelines by parsing the main log file. It
can check for internal errors during a job execution and build a bash script
simulating an executed pipeline. CUPS server must have logging level set to
'debug' to make a pipeline extraction possible.
"""
def _read_last_lines(self, lines_count):
"""
The function reads given number of lines from the end of the log file.
The last line is omitted if it is not ended with EOL character. If a
whole file was read, count of returned lines may be smaller than
requested number. Always at least one line is returned; if the file has
no EOL characters an exception is raised.
@param lines_count: a number of lines to read
@returns a list of lines
"""
assert lines_count > 0
argv = ['tail', '-n', '%d' % (lines_count+1), _PATH_LOG_FILE]
p1 = subprocess.Popen(argv, stdout=subprocess.PIPE)
out,err = p1.communicate()
lines = out.split('\n')
lines.pop()
if len(lines) > lines_count:
if len(lines) == 0:
raise Exception('The file %s is empty' % _PATH_LOG_FILE)
lines = lines[1:]
return lines
def _get_datetime_from_line(self, line):
"""
Parse datetime from the given line read from the log file.
@param line_count: single line from the log files
@returns a datetime as a number of seconds from the epoch (float)
"""
return time.mktime(time.strptime(line[:20], "%Y-%m-%dT%H:%M:%S."))
def __init__(self):
"""
Constructor.
"""
# the last line read from the log file
self._last_line_read = self._read_last_lines(1)[0]
# logs obtained from the log file after preprocessing
# key = a tuple (CUPS pid, job id)
# value = list with logs
self._logs = dict()
# data extracted from logs
# key = a tuple (CUPS pid, job id)
# value = a tuple (result(bool),envp(dict),argv(list),filters(list))
self._results = dict()
# mapping from printer's name to a list of CUPS jobs
# key = printer name
# value = a list of tuples (CUPS pid, job id)
self._printer_name_to_key = dict()
# a lock for synchronization
self._critical_section = threading.Lock()
def _parse_logs(self):
"""
This method read and parses new lines from the log file (all complete
lines after the line saved in self._last_line_read). The lines are
filtered and saved in self._logs. Dictionaries self._results and
self._printer_name_to_key are updated accordingly.
"""
# ==============================================
# try to read lines from the end of the log file until the line from
# self._last_line_read is found
dt_last_line_read = self._get_datetime_from_line(self._last_line_read)
lines_count = 1024
pos_last_line_read = None
while True:
lines = self._read_last_lines(lines_count)
# search for self._last_line_read in lines read from the file
for index,line in enumerate(lines):
dt_line = self._get_datetime_from_line(line)
if dt_line > dt_last_line_read + _TIME_MARGIN:
# a datetime of the line is far behind, exit the loop
break
if line == self._last_line_read:
# the line was found, save its index and exit the loop
pos_last_line_read = index
break
# if the self._last_line_read was found, exit the 'while' loop
if pos_last_line_read is not None:
break
# check if we can try to read more lines from the file to repeat
# the search, if not then throw an exception
if ( len(lines) < lines_count or
self._get_datetime_from_line(lines[0]) +
_TIME_MARGIN < dt_last_line_read ):
raise Exception('Cannot find the last read line in the log'
' file (maybe it has been erased?)')
# increase the number of lines to read
lines_count = lines_count * 2
# check if there are any new lines, exit if not
if pos_last_line_read + 1 == len(lines):
return
# update the field with the last line read from the log file
self._last_line_read = lines[-1]
# ==============================================
# parse logs, select these ones from CUPS jobs and save them to
# self._logs
updated_jobs = set()
for line in lines[pos_last_line_read+1:]:
tokens = line.split(' ', 5)
if ( len(tokens) < 6 or tokens[2][:6] != 'cupsd[' or
tokens[3] != '[Job' ):
continue
pid = int(tokens[2][6:-2])
job_id = int(tokens[4][:-1])
message = tokens[5]
key = (pid,job_id)
if key not in self._logs:
self._logs[key] = []
self._logs[key].append(message)
updated_jobs.add(key)
# ==============================================
# try to interpret logs
for key in updated_jobs:
number_of_processes_to_check = None
printer_name = None
result = True
envp = dict()
argv = []
filters = []
for msg in self._logs[key]:
if number_of_processes_to_check is None:
regexp = re.match(r'(\d) filters for job:', msg)
if regexp is not None:
number_of_processes_to_check = int(regexp.group(1)) + 1
assert number_of_processes_to_check > 0
elif printer_name is None:
regexp = re.match(r'(\S+) \(\S+ to (\S+), cost \d+\)', msg)
if regexp is not None:
fltr = regexp.group(1)
if fltr == '-':
number_of_processes_to_check -= 1
else:
filters.append(fltr)
trg = regexp.group(2)
if trg.startswith('printer/') and '/' not in trg[8:]:
printer_name = trg[8:]
else:
regexp = re.match(r'envp\[\d+\]="(\S+)=(\S+)"', msg)
if regexp is not None:
envp[regexp.group(1)] = regexp.group(2)
continue
regexp = re.match(r'argv\[(\d+)\]="(.+)"', msg)
if regexp is not None:
if len(argv) != int(regexp.group(1)):
raise Exception('Error when parsing argv for %s'
% printer_name)
argv.append(regexp.group(2))
continue
if number_of_processes_to_check > 0:
regexp = re.match(r'PID \d+ \(\S+\) ', msg)
if regexp is not None:
number_of_processes_to_check -= 1
if ( not msg.endswith(' exited with no errors.') and
'was terminated normally with signal'
not in msg ):
result = False
if number_of_processes_to_check == 0:
# if it is a new job, update self._printer_name_to_key
if key not in self._results:
if printer_name not in self._printer_name_to_key:
self._printer_name_to_key[printer_name] = [key]
else:
self._printer_name_to_key[printer_name].append(key)
# update results
self._results[key] = (result,envp,argv,filters)
def _build_pipeline(
self, printer_name, envp, argv, filters,
path_ppd, path_doc, path_workdir):
"""
This function tries to build a bash script containing a pipeline used
internally by CUPS. All printer's parameters are passed by the field
self._printers_data. Resultant script is saved in self._pipelines.
@param printer_name: a printer's name
@param envp: a dictionary with environment variables for the pipeline
@param argv: a list with command line parameters for filters
@param filters: a list of filters
@param path_ppd: a path to PPD file (string)
@param path_doc: a path to input document (string)
@param path_workdir: a path to a directory, that should be used as home
directory and temporary location by filters
@returns a pipeline as a bash script (string)
"""
# update paths set in environment variables
envp['PPD'] = path_ppd
envp['HOME'] = path_workdir
envp['TMPDIR'] = path_workdir
# drop the last argument, it is an input file
argv.pop()
# replace filter names by full path for Star and Epson drivers
if filters[-1] in _COMPONENT_FILTERS:
find_cmd = ['find', '/run/imageloader/', '-type', 'f', '-name']
find_cmd.append(filters[-1])
filters[-1] = subprocess.check_output(find_cmd).rstrip()
# build and return the script
script = '#!/bin/bash\nset -e\nset -o pipefail\n'
for name, value in envp.iteritems():
script += ('export %s=%s\n' % (name, value))
for ind, filt in enumerate(filters):
if ind > 0:
script += ('gzip -dc %d.doc.gz | ' % ind)
script += ('(exec -a "%s" %s' % (argv[0], filt))
for arg in argv[1:]:
script += (' "%s"' % arg)
if ind == 0:
script += (' "%s"' % path_doc)
script += (') 2>%d.err | gzip -9 >%d.doc.gz\n' % (ind+1, ind+1))
return script
def extract_result(
self, printer_name,
path_ppd=None, path_doc=None, path_workdir=None):
"""
This function extract from the log file CUPS logs related to printer
with given name. Extracted logs are also interpreted, the function
checks for any failures. If all optional parameters are set, a pipeline
used by CUPS is extracted from the logs. If some of required results
cannot be extracted, an exception is raised. This function is
thread-safe, it can be called simultaneously by many Python threads.
However, all simultaneous calls must have different values of the
printer_name parameter (calls with the same printer_name must be done
sequentially).
@param printer_name: a printer's name
@param path_ppd: a path to PPD file (string), required for pipelinie
extraction only
@param path_doc: a path to input document (string), required for
pipelinie extraction only
@param path_workdir: a path to a directory, that should be used as home
directory and temporary location by filters, required for
pipelinie extraction only
@returns a tuple of three variables:
1. True if no errors were detected, False otherwise
2. logs extracted from the log file (string)
3. pipeline as a bash script (string), None if not requested
@raises Exception in case of any errors
"""
# time when we started efforts to get logs
time_start = time.time()
# all three parameters must be set to extract a pipeline
pipeline_required = (path_ppd is not None and path_doc is not None
and path_workdir is not None)
try:
# try to get and parse logs
while True:
with self._critical_section:
# leave the loop if we have everything
if printer_name in self._printer_name_to_key:
if not pipeline_required:
break
key = self._printer_name_to_key[printer_name][0]
envp = self._results[key][1]
argv = self._results[key][2]
if len(envp) > 0 and len(argv) > 0:
break
# save current time and try to parse the log file
time_last_read = time.time()
self._parse_logs()
# leave the loop if we have everything
if printer_name in self._printer_name_to_key:
if not pipeline_required:
break
key = self._printer_name_to_key[printer_name][0]
envp = self._results[key][1]
argv = self._results[key][2]
if len(envp) > 0 and len(argv) > 0:
break
# raise an exception if we have been waiting too long
if time_last_read - time_start > _TIME_MAX_WAIT:
raise Exception('Cannot parse logs for %s or they did not'
' appear in %s within %d seconds from printing the'
' document' % (printer_name, _PATH_LOG_FILE,
_TIME_MAX_WAIT))
# wait 1 second and try again to parse acquired logs
time.sleep(1)
# retrieve required data
with self._critical_section:
if pipeline_required:
# key, envp, argv are already set in the loop above
filters = self._results[key][3]
else:
key = self._printer_name_to_key[printer_name][0]
no_errors = self._results[key][0]
logs = '\n'.join(self._logs[key])
# build a pipeline script if required
if pipeline_required:
pipeline = self._build_pipeline(printer_name, envp, argv,
filters, path_ppd, path_doc, path_workdir)
else:
pipeline = None
# return results
return no_errors, logs, pipeline
finally:
# cleanup
if printer_name in self._printer_name_to_key:
with self._critical_section:
key = self._printer_name_to_key[printer_name].pop(0)
if len(self._printer_name_to_key[printer_name]) == 0:
self._printer_name_to_key.pop(printer_name)
self._logs.pop(key)
self._results.pop(key)