# vim: set encoding=UTF-8 fileencoding=UTF-8 : '''Store, load, and handle problem reports.''' # Copyright (C) 2006 - 2012 Canonical Ltd. # Author: Martin Pitt # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. See http://www.gnu.org/copyleft/gpl.html for # the full text of the license. import zlib, base64, time, sys, gzip, struct, os from email.encoders import encode_base64 from email.mime.multipart import MIMEMultipart from email.mime.base import MIMEBase from email.mime.text import MIMEText from io import BytesIO if sys.version[0] < '3': from UserDict import IterableUserDict as UserDict UserDict # pyflakes _python2 = True else: from collections import UserDict _python2 = False class CompressedValue: '''Represent a ProblemReport value which is gzip compressed.''' def __init__(self, value=None, name=None): '''Initialize an empty CompressedValue object with an optional name.''' self.gzipvalue = None self.name = name # By default, compressed values are in gzip format. Earlier versions of # problem_report used zlib format (without gzip header). If you have # such a case, set legacy_zlib to True. self.legacy_zlib = False if value: self.set_value(value) def set_value(self, value): '''Set uncompressed value.''' out = BytesIO() gzip.GzipFile(self.name, mode='wb', fileobj=out).write(value) self.gzipvalue = out.getvalue() self.legacy_zlib = False def get_value(self): '''Return uncompressed value.''' if not self.gzipvalue: return None if self.legacy_zlib: return zlib.decompress(self.gzipvalue) return gzip.GzipFile(fileobj=BytesIO(self.gzipvalue)).read() def write(self, file): '''Write uncompressed value into given file-like object.''' assert self.gzipvalue if self.legacy_zlib: file.write(zlib.decompress(self.gzipvalue)) return gz = gzip.GzipFile(fileobj=BytesIO(self.gzipvalue)) while True: block = gz.read(1048576) if not block: break file.write(block) def __len__(self): '''Return length of uncompressed value.''' assert self.gzipvalue if self.legacy_zlib: return len(self.get_value()) return int(struct.unpack(' 0: value += b'\n' if line.endswith(b'\n'): value += line[1:-1] else: value += line[1:] else: if b64_block: if bd: value += bd.flush() b64_block = False bd = None if key: assert value is not None self.data[key] = self._try_unicode(value) (key, value) = line.split(b':', 1) if not _python2: key = key.decode('ASCII') value = value.strip() if value == b'base64': if binary == 'compressed': value = CompressedValue(key.encode()) value.gzipvalue = b'' else: value = b'' b64_block = True if key is not None: self.data[key] = self._try_unicode(value) self.old_keys = set(self.data.keys()) def has_removed_fields(self): '''Check if the report has any keys which were not loaded. This could happen when using binary=False in load(). ''' return ('' in self.values()) @classmethod def _is_binary(klass, string): '''Check if the given strings contains binary data.''' if _python2: return klass._is_binary_py2(string) if type(string) == bytes: for c in string: if c < 32 and not chr(c).isspace(): return True return False @classmethod def _is_binary_py2(klass, string): '''Check if the given strings contains binary data. (Python 2)''' if type(string) == unicode: return False for c in string: if c < ' ' and not c.isspace(): return True return False @classmethod def _try_unicode(klass, value): '''Try to convert bytearray value to unicode''' if type(value) == bytes and not klass._is_binary(value): try: return value.decode('UTF-8') except UnicodeDecodeError: return value return value def write(self, file, only_new=False): '''Write information into the given file-like object. If only_new is True, only keys which have been added since the last load() are written (i. e. those returned by new_keys()). If a value is a string, it is written directly. Otherwise it must be a tuple of the form (file, encode=True, limit=None, fail_on_empty=False). The first argument can be a file name or a file-like object, which will be read and its content will become the value of this key. 'encode' specifies whether the contents will be gzip compressed and base64-encoded (this defaults to True). If limit is set to a positive integer, the file is not attached if it's larger than the given limit, and the entire key will be removed. If fail_on_empty is True, reading zero bytes will cause an IOError. file needs to be opened in binary mode. Files are written in RFC822 format. ''' self._assert_bin_mode(file) # sort keys into ASCII non-ASCII/binary attachment ones, so that # the base64 ones appear last in the report asckeys = [] binkeys = [] for k in self.data.keys(): if only_new and k in self.old_keys: continue v = self.data[k] if hasattr(v, 'find'): if self._is_binary(v): binkeys.append(k) else: asckeys.append(k) else: if not isinstance(v, CompressedValue) and len(v) >= 2 and not v[1]: # force uncompressed asckeys.append(k) else: binkeys.append(k) asckeys.sort() if 'ProblemType' in asckeys: asckeys.remove('ProblemType') asckeys.insert(0, 'ProblemType') binkeys.sort() # write the ASCII keys first for k in asckeys: v = self.data[k] # if it's a tuple, we have a file reference; read the contents if not hasattr(v, 'find'): if len(v) >= 3 and v[2] is not None: limit = v[2] else: limit = None fail_on_empty = len(v) >= 4 and v[3] if hasattr(v[0], 'read'): v = v[0].read() # file-like object else: with open(v[0], 'rb') as f: # file name v = f.read() if fail_on_empty and len(v) == 0: raise IOError('did not get any data for field ' + k) if limit is not None and len(v) > limit: del self.data[k] continue if _python2: if isinstance(v, unicode): # unicode → str v = v.encode('UTF-8') else: if isinstance(v, str): # unicode → str v = v.encode('UTF-8') file.write(k.encode('ASCII')) if b'\n' in v: # multiline value file.write(b':\n ') file.write(v.replace(b'\n', b'\n ')) else: file.write(b': ') file.write(v) file.write(b'\n') # now write the binary keys with gzip compression and base64 encoding for k in binkeys: v = self.data[k] limit = None size = 0 curr_pos = file.tell() file.write(k.encode('ASCII')) file.write(b': base64\n ') # CompressedValue if isinstance(v, CompressedValue): file.write(base64.b64encode(v.gzipvalue)) file.write(b'\n') continue # write gzip header gzip_header = b'\037\213\010\010\000\000\000\000\002\377' + k.encode('UTF-8') + b'\000' file.write(base64.b64encode(gzip_header)) file.write(b'\n ') crc = zlib.crc32(b'') bc = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0) # direct value if hasattr(v, 'find'): size += len(v) crc = zlib.crc32(v, crc) outblock = bc.compress(v) if outblock: file.write(base64.b64encode(outblock)) file.write(b'\n ') # file reference else: if len(v) >= 3 and v[2] is not None: limit = v[2] if hasattr(v[0], 'read'): f = v[0] # file-like object else: f = open(v[0], 'rb') # file name while True: block = f.read(1048576) size += len(block) crc = zlib.crc32(block, crc) if limit is not None: if size > limit: # roll back file.seek(curr_pos) file.truncate(curr_pos) del self.data[k] crc = None break if block: outblock = bc.compress(block) if outblock: file.write(base64.b64encode(outblock)) file.write(b'\n ') else: break if not hasattr(v[0], 'read'): f.close() if len(v) >= 4 and v[3]: if size == 0: raise IOError('did not get any data for field %s from %s' % (k, str(v[0]))) # flush compressor and write the rest if not limit or size <= limit: block = bc.flush() # append gzip trailer: crc (32 bit) and size (32 bit) if crc: block += struct.pack('= 2 and v[1] in (True, False))) and (hasattr(v[0], 'isalnum') or hasattr(v[0], 'read')))) return self.data.__setitem__(k, v) def new_keys(self): '''Return newly added keys. Return the set of keys which have been added to the report since it was constructed or loaded. ''' return set(self.data.keys()) - self.old_keys @classmethod def _strip_gzip_header(klass, line): '''Strip gzip header from line and return the rest.''' if _python2: return klass._strip_gzip_header_py2(line) flags = line[3] offset = 10 if flags & 4: # FLG.FEXTRA offset += line[offset] + 1 if flags & 8: # FLG.FNAME while line[offset] != 0: offset += 1 offset += 1 if flags & 16: # FLG.FCOMMENT while line[offset] != 0: offset += 1 offset += 1 if flags & 2: # FLG.FHCRC offset += 2 return line[offset:] @classmethod def _strip_gzip_header_py2(klass, line): '''Strip gzip header from line and return the rest. (Python 2)''' flags = ord(line[3]) offset = 10 if flags & 4: # FLG.FEXTRA offset += line[offset] + 1 if flags & 8: # FLG.FNAME while ord(line[offset]) != 0: offset += 1 offset += 1 if flags & 16: # FLG.FCOMMENT while ord(line[offset]) != 0: offset += 1 offset += 1 if flags & 2: # FLG.FHCRC offset += 2 return line[offset:] @classmethod def _assert_bin_mode(klass, file): '''Assert that given file object is in binary mode''' if _python2: assert (type(file) == BytesIO or 'b' in file.mode), 'file stream must be in binary mode' else: assert not hasattr(file, 'encoding'), 'file stream must be in binary mode'