# # Parse::DebianChangelog # # Copyright 1996 Ian Jackson # Copyright 2005,2011 Frank Lichtenheld # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # =head1 NAME Parse::DebianChangelog - parse Debian changelogs and output them in other formats =head1 SYNOPSIS use Parse::DebianChangelog; my $chglog = Parse::DebianChangelog->init( { infile => 'debian/changelog', HTML => { outfile => 'changelog.html' } ); $chglog->html; # the following is semantically equivalent my $chglog = Parse::DebianChangelog->init(); $chglog->parse( { infile => 'debian/changelog' } ); $chglog->html( { outfile => 'changelog.html' } ); my $changes = $chglog->dpkg_str( { since => '1.0-1' } ); print $changes; =head1 DESCRIPTION Parse::DebianChangelog parses Debian changelogs as described in the Debian policy (version 3.6.2.1 at the time of this writing). See section L<"SEE ALSO"> for locations where to find this definition. The parser tries to ignore most cruft like # or /* */ style comments, CVS comments, vim variables, emacs local variables and stuff from older changelogs with other formats at the end of the file. NOTE: most of these are ignored silently currently, there is no parser error issued for them. This should become configurable in the future. Beside giving access to the details of the parsed file via the L<"data"> method, Parse::DebianChangelog also supports converting these changelogs to various other formats. These are currently: =over 4 =item dpkg Format as known from L. All requested entries (see L<"METHODS"> for an explanation what this means) are returned in the usual Debian control format, merged in one stanza, ready to be used in a F<.changes> file. =item rfc822 Similar to the C format, but the requested entries are returned as one stanza each, i.e. they are not merged. This is probably the format to use if you want a machine-usable representation of the changelog. =item xml Just a simple XML dump of the changelog data. Without any schema or DTD currently, just some made up XML. The actual format might still change. Comments and Improvements welcome. =item html The changelog is converted to a somewhat nice looking HTML file with some nice features as a quick-link bar with direct links to every entry. NOTE: This is not very configurable yet and was specifically designed to be used on L. This is planned to be changed until version 1.0. =back =head2 METHODS =cut package Parse::DebianChangelog; use 5.010; use strict; use warnings; use Fcntl qw( :flock ); use Locale::gettext; use Date::Parse; use Parse::DebianChangelog::Util qw( :all ); use Parse::DebianChangelog::Entry; our $VERSION = '1.2.0'; =pod =head3 init Creates a new object instance. Takes a reference to a hash as optional argument, which is interpreted as configuration options. There are currently no supported general configuration options, but see the other methods for more specific configuration options which can also specified to C. If C or C are specified (see L), C is called from C. If a fatal error is encountered during parsing (e.g. the file can't be opened), C will not return a valid object but C! =cut sub init { my $classname = shift; my $config = shift || {}; my $self = {}; bless( $self, $classname ); $config->{verbose} = 1 if $config->{debug}; $self->{config} = $config; $self->init_filters; $self->reset_parse_errors; if ($self->{config}{infile} || $self->{config}{instring}) { defined($self->parse) or return undef; } return $self; } =pod =head3 reset_parse_errors Can be used to delete all information about errors ocurred during previous L runs. Note that C also calls this method. =cut sub reset_parse_errors { my ($self) = @_; $self->{errors}{parser} = []; } sub _do_parse_error { my ($self, $file, $line_nr, $error, $line) = @_; shift; push @{$self->{errors}{parser}}, [ @_ ]; $file = substr $file, 0, 20; unless ($self->{config}{quiet}) { if ($line) { warn "WARN: $file(l$.): $error\nLINE: $line\n"; } else { warn "WARN: $file(l$.): $error\n"; } } } =pod =head3 get_parse_errors Returns all error messages from the last L run. If called in scalar context returns a human readable string representation. If called in list context returns an array of arrays. Each of these arrays contains =over 4 =item 1. the filename of the parsed file or C if a string was parsed directly =item 2. the line number where the error occurred =item 3. an error description =item 4. the original line =back NOTE: This format isn't stable yet and may change in later versions of this module. =cut sub get_parse_errors { my ($self) = @_; if (wantarray) { return @{$self->{errors}{parser}}; } else { my $res = ""; foreach my $e (@{$self->{errors}{parser}}) { if ($e->[3]) { $res .= __g( "WARN: %s(l%s): %s\nLINE: %s\n", @$e ); } else { $res .= __g( "WARN: %s(l%s): %s\n", @$e ); } } return $res; } } sub _do_fatal_error { my ($self, @msg) = @_; $self->{errors}{fatal} = "@msg"; warn __g( "FATAL: %s", "@msg")."\n" unless $self->{config}{quiet}; } =pod =head3 get_error Get the last non-parser error (e.g. the file to parse couldn't be opened). =cut sub get_error { my ($self) = @_; return $self->{errors}{fatal}; } =pod =head3 parse Parses either the file named in configuration item C or the string saved in configuration item C. Accepts a hash ref as optional argument which can contain configuration items. Returns C in case of error (e.g. "file not found", B parse errors) and the object if successful. If C was returned, you can get the reason for the failure by calling the L method. =cut sub __g { my $string = shift; return sprintf( dgettext( 'Parse-DebianChangelog', $string ), @_ ); } sub parse { my ($self, $config) = @_; foreach my $c (keys %$config) { $self->{config}{$c} = $config->{$c}; } my ($fh, $file); if ($file = $self->{config}{infile}) { open $fh, '<', $file or do { $self->_do_fatal_error( __g( "can't open file %s: %s", $file, $! )); return undef; }; flock $fh, LOCK_SH or do { $self->_do_fatal_error( __g( "can't lock file %s: %s", $file, $! )); return undef; }; } elsif (my $string = $self->{config}{instring}) { eval { require IO::String }; if ($@) { $self->_do_fatal_error( __g( "can't load IO::String: %s", $@ )); return undef; } $fh = IO::String->new( $string ); $file = 'String'; } else { $self->_do_fatal_error( __g( 'no changelog file specified' )); return undef; } $self->reset_parse_errors; $self->{data} = []; # based on /usr/lib/dpkg/parsechangelog/debian my $expect='first heading'; my $entry = Parse::DebianChangelog::Entry->init(); my $blanklines = 0; my $unknowncounter = 1; # to make version unique, e.g. for using as id local $_; while (<$fh>) { s/\s*\n$//; # printf(STDERR "%-39.39s %-39.39s\n",$expect,$_); if (m/^(?\w[-+0-9a-z.]*) \((?[^\(\) \t]+)\)(?(?:\s+[-+0-9a-z.]+)+)\;\s*(?.*)$/i) { unless ($expect eq 'first heading' || $expect eq 'next heading or eof') { $entry->{ERROR} = [ $file, $., __g( "found start of entry where expected %s", $expect ), "$_" ]; $self->_do_parse_error(@{$entry->{ERROR}}); } unless ($entry->is_empty) { $entry->{'Closes'} = find_closes( $entry->{Changes} ); # print STDERR, Dumper($entry); push @{$self->{data}}, $entry; $entry = Parse::DebianChangelog::Entry->init(); } { $entry->{'Source'} = $+{Source}; $entry->{'Version'} = $+{Version}; $entry->{'Header'} = "$_"; ($entry->{'Distribution'} = $+{Distribution}) =~ s/^\s+//; $entry->{'Changes'} = $entry->{'Urgency_Comment'} = ''; $entry->{'Urgency'} = $entry->{'Urgency_LC'} = 'unknown'; } my %kvdone; for my $kv (split(/\s*,\s*/,$+{kv})) { $kv =~ m/^([-0-9a-z]+)\=\s*(.*\S)$/i || $self->_do_parse_error($file, $., __g( "bad key-value after \`;': \`%s'", $kv )); my $k = ucfirst $1; my $v = $2; $kvdone{$k}++ && $self->_do_parse_error($file, $., __g( "repeated key-value %s", $k )); if ($k eq 'Urgency') { $v =~ m/^([-0-9a-z]+)((\s+.*)?)$/i || $self->_do_parse_error($file, $., __g( "badly formatted urgency value" ), $v); $entry->{'Urgency'} = "$1"; $entry->{'Urgency_LC'} = lc("$1"); $entry->{'Urgency_Comment'} = "$2"; } elsif ($k =~ m/^X[BCS]+-/i) { # Extensions - XB for putting in Binary, # XC for putting in Control, XS for putting in Source $entry->{$k}= $v; } else { $self->_do_parse_error($file, $., __g( "unknown key-value key %s - copying to XS-%s", $k, $k )); $entry->{ExtraFields}{"XS-$k"} = $v; } } $expect= 'start of change data'; $blanklines = 0; } elsif (m/^(;;\s*)?Local variables:/io) { last; # skip Emacs variables at end of file } elsif (m/^vim:/io) { last; # skip vim variables at end of file } elsif (m/^\$\w+:.*\$/o) { next; # skip stuff that look like a CVS keyword } elsif (m/^\# /o) { next; # skip comments, even that's not supported } elsif (m,^/\*.*\*/,o) { next; # more comments } elsif (m/^(\w+\s+\w+\s+\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}\s+[\w\s]*\d{4})\s+(.*)\s+(<|\()(.*)(\)|>)/o || m/^(\w+\s+\w+\s+\d{1,2},?\s*\d{4})\s+(.*)\s+(<|\()(.*)(\)|>)/o || m/^(\w[-+0-9a-z.]*) \(([^\(\) \t]+)\)\;?/io || m/^([\w.+-]+)(-| )(\S+) Debian (\S+)/io || m/^Changes from version (.*) to (.*):/io || m/^Changes for [\w.+-]+-[\w.+-]+:?$/io || m/^Old Changelog:$/io || m/^(?:\d+:)?\w[\w.+~-]*:?$/o) { # save entries on old changelog format verbatim # we assume the rest of the file will be in old format once we # hit it for the first time $self->{oldformat} = "$_\n"; $self->{oldformat} .= join "", <$fh>; } elsif (m/^\S/) { $self->_do_parse_error($file, $., __g( "badly formatted heading line" ), "$_"); } elsif (m/^ \-\- (?.*) <(?.*)>(? ?)(?(?:\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/o) { $expect eq 'more change data or trailer' || $self->_do_parse_error($file, $., __g( "found trailer where expected %s", $expect ), "$_"); if ($+{sep} ne ' ') { $self->_do_parse_error($file, $., __g( "badly formatted trailer line" ), "$_"); } $entry->{'Trailer'} = $_; $entry->{'Maintainer'} = "$+{name} <$+{email}>" unless $entry->{'Maintainer'}; unless($entry->{'Date'} && defined $entry->{'Timestamp'}) { $entry->{'Date'} = "$+{date}"; $entry->{'Timestamp'} = str2time($+{date}); unless (defined $entry->{'Timestamp'}) { $self->_do_parse_error( $file, $., __g( "couldn't parse date %s", "$+{date}" ) ); } } $expect = 'next heading or eof'; } elsif (m/^ \-\-/) { $entry->{ERROR} = [ $file, $., __g( "badly formatted trailer line" ), "$_" ]; $self->_do_parse_error(@{$entry->{ERROR}}); # $expect = 'next heading or eof' # if $expect eq 'more change data or trailer'; } elsif (m/^\s{2,}(\S)/) { $expect eq 'start of change data' || $expect eq 'more change data or trailer' || do { $self->_do_parse_error($file, $., __g( "found change data where expected %s", $expect ), "$_"); if (($expect eq 'next heading or eof') && !$entry->is_empty) { # lets assume we have missed the actual header line $entry->{'Closes'} = find_closes( $entry->{Changes} ); # print STDERR, Dumper($entry); push @{$self->{data}}, $entry; $entry = Parse::DebianChangelog::Entry->init(); $entry->{Source} = $entry->{Distribution} = $entry->{Urgency} = $entry->{Urgency_LC} = 'unknown'; $entry->{Version} = 'unknown'.($unknowncounter++); $entry->{Urgency_Comment} = ''; $entry->{ERROR} = [ $file, $., __g( "found change data where expected %s", $expect ), "$_" ]; } }; $entry->{'Changes'} .= (" \n" x $blanklines)." $_\n"; if (!$entry->{'Items'} || ($1 eq '*')) { $entry->{'Items'} ||= []; push @{$entry->{'Items'}}, "$_\n"; } else { $entry->{'Items'}[-1] .= (" \n" x $blanklines)." $_\n"; } $blanklines = 0; $expect = 'more change data or trailer'; } elsif (!m/\S/) { next if $expect eq 'start of change data' || $expect eq 'next heading or eof'; $expect eq 'more change data or trailer' || $self->_do_parse_error($file, $., __g( "found blank line where expected %s", $expect )); $blanklines++; } else { $self->_do_parse_error($file, $., __g( "unrecognised line" ), "$_"); ($expect eq 'start of change data' || $expect eq 'more change data or trailer') && do { # lets assume change data if we expected it $entry->{'Changes'} .= (" \n" x $blanklines)." $_\n"; if (!$entry->{'Items'}) { $entry->{'Items'} ||= []; push @{$entry->{'Items'}}, "$_\n"; } else { $entry->{'Items'}[-1] .= (" \n" x $blanklines)." $_\n"; } $blanklines = 0; $expect = 'more change data or trailer'; $entry->{ERROR} = [ $file, $., __g( "unrecognised line" ), "$_" ]; }; } } $expect eq 'next heading or eof' || do { $entry->{ERROR} = [ $file, $., __g( "found eof where expected %s", $expect ) ]; $self->_do_parse_error( @{$entry->{ERROR}} ); }; unless ($entry->is_empty) { $entry->{'Closes'} = find_closes( $entry->{Changes} ); push @{$self->{data}}, $entry; } if ($self->{config}{infile}) { close $fh or do { $self->_do_fatal_error( __g( "can't close file %s: %s", $file, $! )); return undef; }; } # use Data::Dumper; # print Dumper( $self ); return $self; } =pod =head3 data C returns an array (if called in list context) or a reference to an array of Parse::DebianChangelog::Entry objects which each represent one entry of the changelog. This is currently merely a placeholder to enable users to get to the raw data, expect changes to this API in the near future. This method supports the common output options described in section L<"COMMON OUTPUT OPTIONS">. =cut sub data { my ($self, $config) = @_; my $data = $self->{data}; if ($config) { $self->{config}{DATA} = $config if $config; $data = $self->_data_range( $config ) or return undef; } return @$data if wantarray; return $data; } sub __sanity_check_range { my ( $data, $from, $to, $since, $until, $start, $end ) = @_; if (($$start || $$end) && ($$from || $$since || $$to || $$until)) { warn( __g( "you can't combine 'count' or 'offset' with any other range option" ) ."\n"); $$from = $$since = $$to = $$until = ''; } if ($$from && $$since) { warn( __g( "you can only specify one of 'from' and 'since'" ) ."\n"); $$from = ''; } if ($$to && $$until) { warn( __g( "you can only specify one of 'to' and 'until'" ) ."\n"); $$to = ''; } if ($$since && ($data->[0]{Version} eq $$since)) { warn( __g( "'since' option specifies most recent version" ) ."\n"); $$since = ''; } if ($$until && ($data->[$#{$data}]{Version} eq $$until)) { warn( __g( "'until' option specifies oldest version" ) ."\n"); $$until = ''; } $$start = 0 if $$start < 0; return if $$start > $#$data; $$end = $#$data if $$end > $#$data; return if $$end < 0; $$end = $$start if $$end < $$start; #TODO: compare versions return 1; } sub _data_range { my ($self, $config) = @_; my $data = $self->data or return undef; return [ @$data ] if $config->{all}; my $since = $config->{since} || ''; my $until = $config->{until} || ''; my $from = $config->{from} || ''; my $to = $config->{to} || ''; my $count = $config->{count} || 0; my $offset = $config->{offset} || 0; return if $offset and not $count; if ($offset > 0) { $offset -= ($count < 0); } elsif ($offset < 0) { $offset = $#$data + ($count > 0) + $offset; } else { $offset = $#$data if $count < 0; } my $start = my $end = $offset; $start += $count+1 if $count < 0; $end += $count-1 if $count > 0; return unless __sanity_check_range( $data, \$from, \$to, \$since, \$until, \$start, \$end ); unless ($from or $to or $since or $until or $start or $end) { return [ @$data ] if $config->{default_all} and not $count; return [ $data->[0] ]; } return [ @{$data}[$start .. $end] ] if $start or $end; my @result; my $include = 1; $include = 0 if $to or $until; foreach (@$data) { my $v = $_->{Version}; $include = 1 if $v eq $to; last if $v eq $since; push @result, $_ if $include; $include = 1 if $v eq $until; last if $v eq $from; } return \@result; } =pod =head3 dpkg (and B) C returns a hash (in list context) or a hash reference (in scalar context) where the keys are field names and the values are field values. The following fields are given: =over 4 =item Source package name (in the first entry) =item Version packages' version (from first entry) =item Distribution target distribution (from first entry) =item Urgency urgency (highest of all printed entries) =item Maintainer person that created the (first) entry =item Date date of the (first) entry =item Closes bugs closed by the entry/entries, sorted by bug number =item Changes content of the the entry/entries =back C returns a stringified version of this hash which should look exactly like the output of L. The fields are ordered like in the list above. Both methods only support the common output options described in section L<"COMMON OUTPUT OPTIONS">. =head3 dpkg_str See L. =cut our ( %FIELDIMPS, %URGENCIES ); BEGIN { my $i=100; grep($FIELDIMPS{$_}=$i--, qw(Source Version Distribution Urgency Maintainer Date Closes Changes)); $i=1; grep($URGENCIES{$_}=$i++, qw(low medium high critical emergency)); } sub dpkg { my ($self, $config) = @_; $self->{config}{DPKG} = $config if $config; $config = $self->{config}{DPKG} || {}; my $data = $self->_data_range( $config ) or return undef; my %f; foreach my $field (qw( Urgency Source Version Distribution Maintainer Date )) { $f{$field} = $data->[0]{$field}; } $f{Changes} = get_dpkg_changes( $data->[0] ); my %closes = map { $_ => 1 } @{$data->[0]{Closes}}; my $first = 1; my $urg_comment = ''; foreach my $entry (@$data) { $first = 0, next if $first; my $oldurg = $f{Urgency} || ''; my $oldurgn = $URGENCIES{$f{Urgency}} || -1; my $newurg = $entry->{Urgency_LC} || ''; my $newurgn = $URGENCIES{$entry->{Urgency_LC}} || -1; $f{Urgency} = ($newurgn > $oldurgn) ? $newurg : $oldurg; $urg_comment .= $entry->{Urgency_Comment}; $f{Changes} .= "\n .".get_dpkg_changes( $entry ); $closes{ $_ } = 1 for @{$entry->{Closes}}; } $f{Closes} = join " ", sort { $a <=> $b } keys %closes; $f{Urgency} .= $urg_comment; return %f if wantarray; return \%f; } sub dpkg_str { return data2rfc822( scalar dpkg(@_), \%FIELDIMPS ); } =pod =head3 rfc822 (and B) C returns an array of hashes (in list context) or a reference to this array (in scalar context) where each hash represents one entry in the changelog. For the format of such a hash see the description of the L<"dpkg"> method (while ignoring the remarks about which values are taken from the first entry). C returns a stringified version of this hash which looks similar to the output of dpkg-parsechangelog but instead of one stanza the output contains one stanza for each entry. Both methods only support the common output options described in section L<"COMMON OUTPUT OPTIONS">. =head3 rfc822_str See L. =cut sub rfc822 { my ($self, $config) = @_; $self->{config}{RFC822} = $config if $config; $config = $self->{config}{RFC822} || {}; my $data = $self->_data_range( $config ) or return undef; my @out_data; foreach my $entry (@$data) { my %f; foreach my $field (qw( Urgency Source Version Distribution Maintainer Date )) { $f{$field} = $entry->{$field}; } $f{Urgency} .= $entry->{Urgency_Comment}; $f{Changes} = get_dpkg_changes( $entry ); $f{Closes} = join " ", sort { $a <=> $b } @{$entry->{Closes}}; push @out_data, \%f; } return @out_data if wantarray; return \@out_data; } sub rfc822_str { return data2rfc822_mult( scalar rfc822(@_), \%FIELDIMPS ); } sub __version2id { my $version = shift; $version =~ s/[^\w.:-]/_/go; return "version$version"; } =pod =head3 xml (and B) C converts the changelog to some free-form (i.e. there is neither a DTD or a schema for it) XML. The method C is an alias for C. Both methods support the common output options described in section L<"COMMON OUTPUT OPTIONS"> and additionally the following configuration options (as usual to give in a hash reference as parameter to the method call): =over 4 =item outfile directly write the output to the file specified =back =head3 xml_str See L. =cut sub xml { my ($self, $config) = @_; $self->{config}{XML} = $config if $config; $config = $self->{config}{XML} || {}; $config->{default_all} = 1 unless exists $config->{all}; my $data = $self->_data_range( $config ) or return undef; my %out_data; $out_data{Entry} = []; require XML::Simple; import XML::Simple qw( :strict ); foreach my $entry (@$data) { my %f; foreach my $field (qw( Urgency Source Version Distribution Closes )) { $f{$field} = $entry->{$field}; } foreach my $field (qw( Maintainer Changes )) { $f{$field} = [ $entry->{$field} ]; } $f{Urgency} .= $entry->{Urgency_Comment}; $f{Date} = { timestamp => $entry->{Timestamp}, content => $entry->{Date} }; push @{$out_data{Entry}}, \%f; } my $xml_str; my %xml_opts = ( SuppressEmpty => 1, KeyAttr => {}, RootName => 'Changelog' ); $xml_str = XMLout( \%out_data, %xml_opts ); if ($config->{outfile}) { open my $fh, '>', $config->{outfile} or return undef; flock $fh, LOCK_EX or return undef; print $fh $xml_str; close $fh or return undef; } return $xml_str; } sub xml_str { return xml(@_); } =pod =head3 html (and B) C converts the changelog to a HTML file with some nice features such as a quick-link bar with direct links to every entry. The HTML is generated with the help of HTML::Template. If you want to change the output you should use the default template provided with this module as a base and read the documentation of HTML::Template to understand how to edit it. The method C is an alias for C. Both methods support the common output options described in section L<"COMMON OUTPUT OPTIONS"> and additionally the following configuration options (as usual to give in a hash reference as parameter to the method call): =over 4 =item outfile directly write the output to the file specified =item template template file to use, defaults to tmpl/default.tmpl, so you most likely want to override that. NOTE: The plan is to provide a configuration file for the module later to be able to use sane defaults here. =item style path to the CSS stylesheet to use (a default might be specified in the template and will be honoured, see the default template for an example) =item print_style path to the CSS stylesheet to use for printing (see the notes for C