postgresql/src/common/unicode/generate-unicode_combining_table.pl
Tom Lane 5cbfce562f Initial pgindent and pgperltidy run for v13.
Includes some manual cleanup of places that pgindent messed up,
most of which weren't per project style anyway.

Notably, it seems some people didn't absorb the style rules of
commit c9d297751, because there were a bunch of new occurrences
of function calls with a newline just after the left paren, all
with faulty expectations about how the rest of the call would get
indented.
2020-05-14 13:06:50 -04:00

53 lines
1.1 KiB
Perl

#!/usr/bin/perl
#
# Generate sorted list of non-overlapping intervals of non-spacing
# characters, using Unicode data files as input. Pass UnicodeData.txt
# as argument. The output is on stdout.
#
# Copyright (c) 2019, PostgreSQL Global Development Group
use strict;
use warnings;
my $range_start = undef;
my $codepoint;
my $prev_codepoint;
my $count = 0;
print
"/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n";
print "static const struct mbinterval combining[] = {\n";
foreach my $line (<ARGV>)
{
chomp $line;
my @fields = split ';', $line;
$codepoint = hex $fields[0];
next if $codepoint > 0xFFFF;
if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
{
# combining character, save for start of range
if (!defined($range_start))
{
$range_start = $codepoint;
}
}
else
{
# not a combining character, print out previous range if any
if (defined($range_start))
{
printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
$range_start = undef;
}
}
}
continue
{
$prev_codepoint = $codepoint;
}
print "};\n";