postgresql/src/tools/git_changelog
Tom Lane 0245f8db36 Pre-beta mechanical code beautification.
Run pgindent, pgperltidy, and reformat-dat-files.

This set of diffs is a bit larger than typical.  We've updated to
pg_bsd_indent 2.1.2, which properly indents variable declarations that
have multi-line initialization expressions (the continuation lines are
now indented one tab stop).  We've also updated to perltidy version
20230309 and changed some of its settings, which reduces its desire to
add whitespace to lines to make assignments etc. line up.  Going
forward, that should make for fewer random-seeming changes to existing
code.

Discussion: https://postgr.es/m/20230428092545.qfb3y5wcu4cm75ur@alvherre.pgsql
2023-05-19 17:24:48 -04:00

419 lines
11 KiB
Perl
Executable file

#!/usr/bin/perl
# Copyright (c) 2021-2023, PostgreSQL Global Development Group
#
# src/tools/git_changelog
#
# Display all commits on active branches, merging together commits from
# different branches that occur close together in time and with identical
# log messages.
#
# By default, commits are annotated with branch and release info thus:
# Branch: REL8_3_STABLE Release: REL8_3_2 [92c3a8004] 2008-03-29 00:15:37 +0000
# This shows that the commit on REL8_3_STABLE was released in 8.3.2.
# Commits on master will usually instead have notes like
# Branch: master Release: REL8_4_BR [6fc9d4272] 2008-03-29 00:15:28 +0000
# showing that this commit is ancestral to release branches 8.4 and later.
# If no Release: marker appears, the commit hasn't yet made it into any
# release.
#
# The --brief option shortens that to a format like:
# YYYY-MM-DD [hash] abbreviated commit subject line
# Since the branch isn't shown, this is mainly useful in conjunction
# with --master-only.
#
# Most of the time, matchable commits occur in the same order on all branches,
# and we print them out in that order. However, if commit A occurs before
# commit B on branch X and commit B occurs before commit A on branch Y, then
# there's no ordering which is consistent with both branches. In such cases
# we sort a merged commit according to its timestamp on the newest branch
# it appears in.
#
# The default output of this script is meant for generating minor release
# notes, where we need to know which branches a merged commit affects.
#
# To generate major release notes, use:
# git_changelog --master-only --brief --oldest-first --since='start-date'
# To find the appropriate start date, use:
# git show --summary $(git merge-base REL_12_STABLE master)
# where the branch to mention is the previously forked-off branch. This
# shows the last commit before that branch was made.
#
# Note that --master-only is an imperfect filter, since it will not detect
# cases where a master patch was back-patched awhile later or with a slightly
# different commit message. To find such cases, it's a good idea to look
# through the output of
# git_changelog --non-master-only --oldest-first --since='start-date'
# and then remove anything from the --master-only output that would be
# duplicative.
use strict;
use warnings;
require Time::Local;
require Getopt::Long;
require IPC::Open2;
# Adjust this list when the set of interesting branches changes.
# (We could get this from "git branches", but not worth the trouble.)
# NB: master must be first!
my @BRANCHES = qw(master
REL_15_STABLE REL_14_STABLE REL_13_STABLE
REL_12_STABLE REL_11_STABLE REL_10_STABLE REL9_6_STABLE REL9_5_STABLE
REL9_4_STABLE REL9_3_STABLE REL9_2_STABLE REL9_1_STABLE REL9_0_STABLE
REL8_4_STABLE REL8_3_STABLE REL8_2_STABLE REL8_1_STABLE REL8_0_STABLE
REL7_4_STABLE REL7_3_STABLE REL7_2_STABLE REL7_1_STABLE REL7_0_PATCHES
REL6_5_PATCHES REL6_4);
# Might want to make this parameter user-settable.
my $timestamp_slop = 24 * 60 * 60;
my $brief = 0;
my $details_after = 0;
my $post_date = 0;
my $master_only = 0;
my $non_master_only = 0;
my $oldest_first = 0;
my $since;
my @output_buffer;
my $output_line = '';
Getopt::Long::GetOptions(
'brief' => \$brief,
'details-after' => \$details_after,
'master-only' => \$master_only,
'non-master-only' => \$non_master_only,
'post-date' => \$post_date,
'oldest-first' => \$oldest_first,
'since=s' => \$since) || usage();
usage() if @ARGV;
my @git = qw(git log --format=fuller --date=iso);
push @git, '--since=' . $since if defined $since;
# Collect the release tag data
my %rel_tags;
{
my $cmd = "git for-each-ref refs/tags";
my $pid = IPC::Open2::open2(my $git_out, my $git_in, $cmd)
|| die "can't run $cmd: $!";
while (my $line = <$git_out>)
{
if ($line =~ m|^([a-f0-9]+)\s+commit\s+refs/tags/(\S+)|)
{
my $commit = $1;
my $tag = $2;
if ( $tag =~ /^REL_\d+_\d+$/
|| $tag =~ /^REL\d+_\d+$/
|| $tag =~ /^REL\d+_\d+_\d+$/)
{
$rel_tags{$commit} = $tag;
}
}
}
waitpid($pid, 0);
my $child_exit_status = $? >> 8;
die "$cmd failed" if $child_exit_status != 0;
}
# Collect the commit data
my %all_commits;
my %all_commits_by_branch;
# This remembers where each branch sprouted from master. Note the values
# will be wrong if --since terminates the log listing before the branch
# sprouts; but in that case it doesn't matter since we also won't reach
# the part of master where it would matter.
my %sprout_tags;
for my $branch (@BRANCHES)
{
my @cmd = @git;
if ($branch eq "master")
{
push @cmd, "origin/$branch";
}
else
{
push @cmd, "--parents";
push @cmd, "master..origin/$branch";
}
my $pid = IPC::Open2::open2(my $git_out, my $git_in, @cmd)
|| die "can't run @cmd: $!";
my $last_tag = undef;
my $last_parent;
my %commit;
while (my $line = <$git_out>)
{
if ($line =~ /^commit\s+(\S+)/)
{
push_commit(\%commit) if %commit;
$last_tag = $rel_tags{$1} if defined $rel_tags{$1};
%commit = (
'branch' => $branch,
'commit' => $1,
'last_tag' => $last_tag,
'message' => '',);
if ($line =~ /^commit\s+\S+\s+(\S+)/)
{
$last_parent = $1;
}
else
{
$last_parent = undef;
}
}
elsif ($line =~ /^Author:\s+(.*)/)
{
$commit{'author'} = $1;
}
elsif ($line =~ /^CommitDate:\s+(.*)/)
{
$commit{'date'} = $1;
}
elsif ($line =~ /^\s\s/)
{
$commit{'message'} .= $line;
}
}
push_commit(\%commit) if %commit;
$sprout_tags{$last_parent} = $branch if defined $last_parent;
waitpid($pid, 0);
my $child_exit_status = $? >> 8;
die "@cmd failed" if $child_exit_status != 0;
}
# Run through the master branch and apply tags. We already tagged the other
# branches, but master needs a separate pass after we've acquired the
# sprout_tags data. Also, in post-date mode we need to add phony entries
# for branches that sprouted after a particular master commit was made.
{
my $last_tag = undef;
my %sprouted_branches;
for my $cc (@{ $all_commits_by_branch{'master'} })
{
my $commit = $cc->{'commit'};
my $c = $cc->{'commits'}->[0];
$last_tag = $rel_tags{$commit} if defined $rel_tags{$commit};
if (defined $sprout_tags{$commit})
{
$last_tag = $sprout_tags{$commit};
# normalize branch names for making sprout tags
$last_tag =~ s/^(REL_\d+).*/$1_BR/;
$last_tag =~ s/^(REL\d+_\d+).*/$1_BR/;
}
$c->{'last_tag'} = $last_tag;
if ($post_date)
{
if (defined $sprout_tags{$commit})
{
$sprouted_branches{ $sprout_tags{$commit} } = 1;
}
# insert new commits between master and any other commits
my @new_commits = (shift @{ $cc->{'commits'} });
for my $branch (reverse sort keys %sprouted_branches)
{
my $ccopy = { %{$c} };
$ccopy->{'branch'} = $branch;
push @new_commits, $ccopy;
}
$cc->{'commits'} = [ @new_commits, @{ $cc->{'commits'} } ];
}
}
}
my %position;
for my $branch (@BRANCHES)
{
$position{$branch} = 0;
}
while (1)
{
my $best_branch;
my $best_timestamp;
for my $branch (@BRANCHES)
{
my $leader = $all_commits_by_branch{$branch}->[ $position{$branch} ];
next if !defined $leader;
if (!defined $best_branch
|| $leader->{'timestamp'} > $best_timestamp)
{
$best_branch = $branch;
$best_timestamp = $leader->{'timestamp'};
}
}
last if !defined $best_branch;
my $winner =
$all_commits_by_branch{$best_branch}->[ $position{$best_branch} ];
my $print_it = 1;
if ($master_only)
{
$print_it = (@{ $winner->{'commits'} } == 1)
&& ($winner->{'commits'}[0]->{'branch'} eq 'master');
}
elsif ($non_master_only)
{
foreach my $c (@{ $winner->{'commits'} })
{
$print_it = 0 if ($c->{'branch'} eq 'master');
}
}
if ($print_it)
{
output_details($winner) if (!$details_after);
output_str("%s", $winner->{'message'} . "\n");
output_details($winner) if ($details_after);
unshift(@output_buffer, $output_line) if ($oldest_first);
$output_line = '';
}
$winner->{'done'} = 1;
for my $branch (@BRANCHES)
{
my $leader = $all_commits_by_branch{$branch}->[ $position{$branch} ];
if (defined $leader && $leader->{'done'})
{
++$position{$branch};
redo;
}
}
}
print @output_buffer if ($oldest_first);
sub push_commit
{
my ($c) = @_;
my $ht = hash_commit($c);
my $ts = parse_datetime($c->{'date'});
my $cc;
# Note that this code will never merge two commits on the same branch,
# even if they have the same hash (author/message) and nearby
# timestamps. This means that there could be multiple potential
# matches when we come to add a commit from another branch. Prefer
# the closest-in-time one.
for my $candidate (@{ $all_commits{$ht} })
{
my $diff = abs($ts - $candidate->{'timestamp'});
if ($diff < $timestamp_slop
&& !exists $candidate->{'branch_position'}{ $c->{'branch'} })
{
if (!defined $cc
|| $diff < abs($ts - $cc->{'timestamp'}))
{
$cc = $candidate;
}
}
}
if (!defined $cc)
{
$cc = {
'author' => $c->{'author'},
'message' => $c->{'message'},
'commit' => $c->{'commit'},
'commits' => [],
'timestamp' => $ts
};
push @{ $all_commits{$ht} }, $cc;
}
# stash only the fields we'll need later
my $smallc = {
'branch' => $c->{'branch'},
'commit' => $c->{'commit'},
'date' => $c->{'date'},
'last_tag' => $c->{'last_tag'}
};
push @{ $cc->{'commits'} }, $smallc;
push @{ $all_commits_by_branch{ $c->{'branch'} } }, $cc;
$cc->{'branch_position'}{ $c->{'branch'} } =
-1 + @{ $all_commits_by_branch{ $c->{'branch'} } };
return;
}
sub hash_commit
{
my ($c) = @_;
return $c->{'author'} . "\0" . $c->{'message'};
}
sub parse_datetime
{
my ($dt) = @_;
$dt =~
/^(\d\d\d\d)-(\d\d)-(\d\d)\s+(\d\d):(\d\d):(\d\d)\s+([-+])(\d\d)(\d\d)$/;
my $gm = Time::Local::timegm($6, $5, $4, $3, $2 - 1, $1);
my $tzoffset = ($8 * 60 + $9) * 60;
$tzoffset = -$tzoffset if $7 eq '-';
return $gm - $tzoffset;
}
sub output_str
{
($oldest_first) ? ($output_line .= sprintf(shift, @_)) : printf(@_);
return;
}
sub output_details
{
my $item = shift;
if ($details_after)
{
$item->{'author'} =~ m{^(.*?)\s*<[^>]*>$};
# output only author name, not email address
output_str("(%s)\n", $1);
}
else
{
output_str("Author: %s\n", $item->{'author'});
}
foreach my $c (@{ $item->{'commits'} })
{
if ($brief)
{
$item->{'message'} =~ m/^\s*(.*)/;
output_str(
"%s [%s] %s\n",
substr($c->{'date'}, 0, 10),
substr($c->{'commit'}, 0, 9),
substr($1, 0, 56));
}
else
{
output_str("Branch: %s ", $c->{'branch'})
if (!$master_only);
output_str("Release: %s ", $c->{'last_tag'})
if (defined $c->{'last_tag'});
output_str("[%s] %s\n", substr($c->{'commit'}, 0, 9),
$c->{'date'});
}
}
output_str("\n");
return;
}
sub usage
{
print STDERR <<EOM;
Usage: git_changelog [--brief/-b] [--details-after/-d] [--master-only/-m] [--non-master-only/-n] [--oldest-first/-o] [--post-date/-p] [--since=SINCE]
--brief Shorten commit descriptions, omitting branch identification
--details-after Show branch and author info after the commit description
--master-only Show only commits made just in the master branch
--non-master-only Show only commits made just in back branches
--oldest-first Show oldest commits first
--post-date Show branches made after a commit occurred
--since Show only commits dated since SINCE
EOM
exit 1;
}