#!/usr/bin/python3 # Collect information about a crash and create a report in the directory # specified by apport.fileutils.report_dir. # See https://wiki.ubuntu.com/Apport for details. # # Copyright (c) 2006 - 2011 Canonical Ltd. # Author: Martin Pitt # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. See http://www.gnu.org/copyleft/gpl.html for # the full text of the license. import sys, os, os.path, subprocess, time, traceback, pwd, io import signal, inspect, grp, fcntl import errno import apport, apport.fileutils ################################################################# # # functions # ################################################################# def check_lock(): '''Abort if another instance of apport is already running. This avoids bringing down the system to its knees if there is a series of crashes.''' # create a lock file lockfile = os.path.join(apport.fileutils.report_dir, '.lock') try: fd = os.open(lockfile, os.O_WRONLY | os.O_CREAT | os.O_NOFOLLOW) except OSError as e: error_log('cannot create lock file (uid %i): %s' % (os.getuid(), str(e))) sys.exit(1) try: fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: error_log('another apport instance is already running, aborting') sys.exit(1) (pidstat, real_uid, real_gid, cwd) = (None, None, None, None) def get_pid_info(pid): '''Read /proc information about pid''' global pidstat, real_uid, real_gid, cwd # unhandled exceptions on missing or invalidly formatted files are okay # here -- we want to know in the log file pidstat = os.stat('/proc/%s/stat' % pid) # determine real UID of the target process; do *not* use the owner of # /proc/pid/stat, as that will be root for setuid or unreadable programs! # (this matters when suid_dumpable is enabled) with open('/proc/%s/status' % pid) as f: for line in f: if line.startswith('Uid:'): real_uid = int(line.split()[1]) elif line.startswith('Gid:'): real_gid = int(line.split()[1]) break assert real_uid is not None, 'failed to parse Uid' assert real_gid is not None, 'failed to parse Gid' cwd = os.readlink('/proc/' + pid + '/cwd') def drop_privileges(pid, real_only=False): '''Change user and group to match the given target process Normally that irrevocably drops privileges to the real user/group of the target process. With real_only=True only the real IDs are changed, but the effective IDs remain. ''' if real_only: os.setregid(real_gid, -1) os.setreuid(real_uid, -1) else: os.setgid(real_gid) os.setuid(real_uid) assert os.getegid() == real_gid assert os.geteuid() == real_uid assert os.getgid() == real_gid assert os.getuid() == real_uid def init_error_log(): '''Open a suitable error log if sys.stderr is not a tty.''' if not os.isatty(2): log = os.environ.get('APPORT_LOG_FILE', '/var/log/apport.log') try: f = os.open(log, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600) try: admgid = grp.getgrnam('adm')[2] os.chown(log, -1, admgid) os.chmod(log, 0o640) except KeyError: pass # if group adm doesn't exist, just leave it as root except OSError: # on a permission error, don't touch stderr return os.dup2(f, 1) os.dup2(f, 2) sys.stderr = os.fdopen(2, 'wb') if sys.version_info.major >= 3: sys.stderr = io.TextIOWrapper(sys.stderr) sys.stdout = sys.stderr def error_log(msg): '''Output something to the error log.''' apport.error('apport (pid %s) %s: %s', os.getpid(), time.asctime(), msg) def _log_signal_handler(sgn, frame): '''Internal apport signal handler. Just log the signal handler and exit.''' # reset handler so that we do not get stuck in loops signal.signal(sgn, signal.SIG_IGN) try: error_log('Got signal %i, aborting; frame:' % sgn) for s in inspect.stack(): error_log(str(s)) except: pass sys.exit(1) def setup_signals(): '''Install a signal handler for all crash-like signals, so that apport is not called on itself when apport crashed.''' signal.signal(signal.SIGILL, _log_signal_handler) signal.signal(signal.SIGABRT, _log_signal_handler) signal.signal(signal.SIGFPE, _log_signal_handler) signal.signal(signal.SIGSEGV, _log_signal_handler) signal.signal(signal.SIGPIPE, _log_signal_handler) signal.signal(signal.SIGBUS, _log_signal_handler) def write_user_coredump(pid, cwd, limit, from_report=None): '''Write the core into the current directory if ulimit requests it.''' # three cases: # limit == 0: do not write anything # limit < 0: unlimited, write out everything # limit nonzero: crashed process' core size ulimit in bytes if limit == 0: return # don't write a core dump for suid/sgid/unreadable or otherwise # protected executables, in accordance with core(5) # (suid_dumpable==2 and core_pattern restrictions); when this happens, # /proc/pid/stat is owned by root (or the user suid'ed to), but we already # changed to the crashed process' real uid assert pidstat, 'pidstat not initialized' if pidstat.st_uid != os.getuid() or pidstat.st_gid != os.getgid(): error_log('disabling core dump for suid/sgid/unreadable executable') return core_path = os.path.join(cwd, 'core') try: with open('/proc/sys/kernel/core_uses_pid') as f: if f.read().strip() != '0': core_path += '.' + str(pid) core_file = os.open(core_path, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600) except (OSError, IOError): return error_log('writing core dump to %s (limit: %s)' % (core_path, str(limit))) written = 0 # Priming read if from_report: r = apport.Report() r.load(from_report) core_size = len(r['CoreDump']) if limit > 0 and core_size > limit: error_log('aborting core dump writing, size %i exceeds current limit' % core_size) os.close(core_file) os.unlink(core_path) return error_log('writing core dump %s of size %i' % (core_path, core_size)) os.write(core_file, r['CoreDump']) else: # read from stdin block = os.read(0, 1048576) while True: size = len(block) if size == 0: break written += size if limit > 0 and written > limit: error_log('aborting core dump writing, size exceeds current limit %i' % limit) os.close(core_file) os.unlink(core_path) return if os.write(core_file, block) != size: error_log('aborting core dump writing, could not write') os.close(core_file) os.unlink(core_path) return block = os.read(0, 1048576) os.close(core_file) return core_path def usable_ram(): '''Return how many bytes of RAM is currently available that can be allocated without causing major thrashing.''' # abuse our excellent RFC822 parser to parse /proc/meminfo r = apport.Report() with open('/proc/meminfo', 'rb') as f: r.load(f) memfree = int(r['MemFree'].split()[0]) cached = int(r['Cached'].split()[0]) writeback = int(r['Writeback'].split()[0]) return (memfree + cached - writeback) * 1024 def is_closing_session(pid, uid): '''Check if pid is in a closing user session. During that, crashes are common as the session D-BUS and X.org are going away, etc. These crash reports are mostly noise, so should be ignored. ''' with open('/proc/%s/environ' % pid, 'rb') as e: env = e.read().split(b'\0') for e in env: if e.startswith(b'DBUS_SESSION_BUS_ADDRESS='): dbus_addr = e.split(b'=', 1)[1].decode() break else: error_log('is_closing_session(): no DBUS_SESSION_BUS_ADDRESS in environment') return False orig_uid = os.geteuid() os.setresuid(-1, os.getuid(), -1) try: gdbus = subprocess.Popen(['/usr/bin/gdbus', 'call', '-e', '-d', 'org.gnome.SessionManager', '-o', '/org/gnome/SessionManager', '-m', 'org.gnome.SessionManager.IsSessionRunning'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={'DBUS_SESSION_BUS_ADDRESS': dbus_addr}) (out, err) = gdbus.communicate() if err: error_log('gdbus call error: ' + err.decode('UTF-8')) except OSError as e: error_log('gdbus call failed, cannot determine running session: ' + str(e)) return False finally: os.setresuid(-1, orig_uid, -1) error_log('debug: session gdbus call: ' + out.decode('UTF-8')) if out.startswith(b'(false,'): return True return False def is_container_pid(pid): if not os.path.exists('/proc/self/ns/pid') or \ not os.path.exists('/proc/%s/ns/pid' % pid): return False try: # Crashed process is in the same namespace as apport, not a container if os.readlink('/proc/%s/ns/pid' % pid) == os.readlink('/proc/self/ns/pid'): return False except OSError as e: if e.errno == errno.ENOENT: return False else: raise return True ################################################################# # # main # ################################################################# if len(sys.argv) not in (5, 6): try: print('Usage: %s [global pid]' % sys.argv[0]) print('The core dump is read from stdin.') except IOError: # sys.stderr might not actually exist, expecially not when being called # from the kernel pass sys.exit(1) init_error_log() # Check if we received a valid global PID (kernel >= 3.12). If we do, # then compare it with the local PID. If they don't match, it's an # indication that the crash originated from another PID namespace. # Simply log an entry in the host error log and exit 0. if len(sys.argv) == 6: host_pid = int(sys.argv[5]) if is_container_pid(host_pid): error_log('pid %s crashed in a container' % host_pid) sys.exit(0) else: # If it doesn't look like the crash originated from within a # full container or if the is_container_pid() function fails open (returning # False), then take the global pid and replace the local pid with it, # then move on to normal handling. sys.argv[1] = str(host_pid) check_lock() try: setup_signals() (pid, signum, core_ulimit, dump_mode) = sys.argv[1:5] # drop our process priority level to not disturb userspace so much try: os.nice(10) except OSError: pass # *shrug*, we tried get_pid_info(pid) # Partially drop privs to gain proper os.access() checks drop_privileges(pid, True) error_log('called for pid %s, signal %s, core limit %s, dump mode %s' % (pid, signum, core_ulimit, dump_mode)) try: core_ulimit = int(core_ulimit) except ValueError: error_log('core limit is invalid, disabling core files') core_ulimit = 0 # clamp core_ulimit to a sensible size, for -1 the kernel reports something # absurdly big if core_ulimit > 9223372036854775807: error_log('ignoring implausibly big core limit, treating as unlimited') core_ulimit = -1 # ulimit specifies blocks, which are kB if core_ulimit > 0: core_ulimit *= 1024 if dump_mode == '2': error_log('not creating core for pid with dump mode of %s' % (dump_mode)) # a report should be created but not a core file core_ulimit = 0 # ignore SIGQUIT (it's usually deliberately generated by users) if signum == str(signal.SIGQUIT): drop_privileges(pid) write_user_coredump(pid, cwd, core_ulimit) sys.exit(0) # check if the executable was modified after the process started (e. g. # package got upgraded in between) exe_mtime = os.stat('/proc/%s/exe' % pid).st_mtime process_start = os.lstat('/proc/%s/cmdline' % pid).st_mtime if not os.path.exists(os.readlink('/proc/%s/exe' % pid)) or exe_mtime > process_start: error_log('executable was modified after program start, ignoring') sys.exit(1) info = apport.Report('Crash') info['Signal'] = signum if sys.version_info.major < 3: info['CoreDump'] = (sys.stdin, True, usable_ram() * 3 / 4, True) else: # read binary data from stdio info['CoreDump'] = (sys.stdin.detach(), True, usable_ram() * 3 / 4, True) # We already need this here to figure out the ExecutableName (for scripts, # etc). info.add_proc_info(pid) if 'ExecutablePath' not in info: error_log('could not determine ExecutablePath, aborting') sys.exit(1) subject = info['ExecutablePath'].replace('/', '_') base = '%s.%s.%s.hanging' % (subject, str(pidstat.st_uid), pid) hanging = os.path.join(apport.fileutils.report_dir, base) if os.path.exists(hanging): if (os.stat('/proc/uptime').st_ctime < os.stat(hanging).st_mtime): info['ProblemType'] = 'Hang' os.unlink(hanging) if 'InterpreterPath' in info: error_log('script: %s, interpreted by %s (command line "%s")' % (info['ExecutablePath'], info['InterpreterPath'], info['ProcCmdline'])) else: error_log('executable: %s (command line "%s")' % (info['ExecutablePath'], info['ProcCmdline'])) # ignore non-package binaries (unless configured otherwise) if not apport.fileutils.likely_packaged(info['ExecutablePath']): if not apport.fileutils.get_config('main', 'unpackaged', False, bool=True): error_log('executable does not belong to a package, ignoring') # check if the user wants a core dump drop_privileges(pid) write_user_coredump(pid, cwd, core_ulimit) sys.exit(1) # ignore SIGXCPU and SIGXFSZ since this indicates some external # influence changing soft RLIMIT values when running programs. if signum in [str(signal.SIGXCPU), str(signal.SIGXFSZ)]: error_log('Ignoring signal %s (caused by exceeding soft RLIMIT)' % signum) drop_privileges(pid) write_user_coredump(pid, cwd, core_ulimit) sys.exit(0) # ignore blacklisted binaries if info.check_ignored(): error_log('executable version is blacklisted, ignoring') sys.exit(1) if is_closing_session(pid, pidstat.st_uid): error_log('happens for shutting down session, ignoring') sys.exit(1) crash_counter = 0 # Create crash report file descriptor for writing the report into # report_dir try: report = '%s/%s.%i.crash' % (apport.fileutils.report_dir, info['ExecutablePath'].replace('/', '_'), pidstat.st_uid) if os.path.exists(report): if apport.fileutils.seen_report(report): # do not flood the logs and the user with repeated crashes with open(report, 'rb') as f: crash_counter = apport.fileutils.get_recent_crashes(f) crash_counter += 1 if crash_counter > 1: drop_privileges(pid) write_user_coredump(pid, cwd, core_ulimit) error_log('this executable already crashed %i times, ignoring' % crash_counter) sys.exit(1) # remove the old file, so that we can create the new one with # os.O_CREAT|os.O_EXCL os.unlink(report) else: error_log('apport: report %s already exists and unseen, doing nothing to avoid disk usage DoS' % report) drop_privileges(pid) write_user_coredump(pid, cwd, core_ulimit) sys.exit(1) # we prefer having a file mode of 0 while writing; this doesn't work # for suid binaries as we completely drop privs and thus can't chmod # them later on if pidstat.st_uid != os.getuid(): mode = 0o640 else: mode = 0 reportfile = os.fdopen(os.open(report, os.O_RDWR | os.O_CREAT | os.O_EXCL, mode), 'w+b') assert reportfile.fileno() > sys.stderr.fileno() # Make sure the crash reporting daemon can read this report try: gid = pwd.getpwnam('whoopsie').pw_gid os.chown(report, pidstat.st_uid, gid) except (OSError, KeyError): os.chown(report, pidstat.st_uid, pidstat.st_gid) except (OSError, IOError) as e: error_log('Could not create report file: %s' % str(e)) sys.exit(1) # Totally drop privs before writing out the reportfile. drop_privileges(pid) info.add_user_info() info.add_os_info() if crash_counter > 0: info['CrashCounter'] = '%i' % crash_counter try: info.write(reportfile) if reportfile != sys.stderr: # Ensure that the file gets written to disk in the event of an # Upstart crash. if info.get('ExecutablePath', '') == '/sbin/init': reportfile.flush() os.fsync(reportfile.fileno()) parent_directory = os.path.dirname(report) try: fd = os.open(parent_directory, os.O_RDONLY) os.fsync(fd) finally: os.close(fd) except IOError: if reportfile != sys.stderr: os.unlink(report) raise if report and mode == 0: # for non-suid programs, make the report writable now, when it's # completely written os.chmod(report, 0o640) if reportfile != sys.stderr: error_log('wrote report %s' % report) # Check if the user wants a core file. We need to create that from the # written report, as we can only read stdin once and write_user_coredump() # might abort reading from stdin and remove the written core file when # core_ulimit is > 0 and smaller than the core size. reportfile.seek(0) write_user_coredump(pid, cwd, core_ulimit, from_report=reportfile) except (SystemExit, KeyboardInterrupt): raise except Exception as e: error_log('Unhandled exception:') traceback.print_exc() error_log('pid: %i, uid: %i, gid: %i, euid: %i, egid: %i' % ( os.getpid(), os.getuid(), os.getgid(), os.geteuid(), os.getegid())) error_log('environment: %s' % str(os.environ))