#! /usr/bin/env perl

=begin COPYRIGHT

----------------------------------------------------------------------------

    Copyright (C) 2025 Marc Penninga.

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation, either version 2
    of the License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to
        Free Software Foundation, Inc.,
        59 Temple Place,
        Suite 330,
        Boston, MA 02111-1307,
        USA

----------------------------------------------------------------------------

=end COPYRIGHT

=cut

use strict;
use warnings;

use Encode;
use File::Basename;
use Getopt::Long;
use Pod::Usage;

my $VERSION = "20250909";

parse_commandline();

use constant {
    SIZEOF_OFFSET_TABLE =>  12,
    SIZEOF_TABLE_RECORD =>  16,
};

my $filename_in = $ARGV[0];
my ($basename, undef, undef) = fileparse($filename_in, qw(.ttc .otc));

open my $fh, '<:raw', $filename_in
    or die "[ERROR] cannot open $filename_in: $!";
my $ttc = do { local $/; <$fh> };
close $fh
    or die "[ERROR] something went wrong when closing $filename_in: $!";

# unpack TTC header; we ignore the DSIG fields in version 2.0 headers
my ($ttc_tag, $major_version, $minor_version, $num_fonts)
    = unpack 'A4 nnN', $ttc;
my $ttc_header_version = sprintf "%d.%d", $major_version, $minor_version;
die "$ARGV[0] is not an OpenType Collection file" if $ttc_tag ne 'ttcf';
die "$ARGV[0]: unknown TTC Header Version $ttc_header_version"
    if $ttc_header_version !~ m/\A [12] [.] 0 \z/xms;

my $offsets_template = sprintf "\@%d N%d", SIZEOF_OFFSET_TABLE, $num_fonts;
my @offsets_of_offset_table = unpack $offsets_template, $ttc;

# collect the data for each font and write to separate file
for my $i_font (1..$num_fonts) {
    my $offset_of_offset_table = $offsets_of_offset_table[$i_font - 1];
    my $offset_table
        = substr $ttc, $offset_of_offset_table, SIZEOF_OFFSET_TABLE;

    # unpack relevant fields from Offset Table
    my ($sfnt_version, $num_tables) = unpack 'a4 n', $offset_table;
    my $file_ext = $sfnt_version eq 'OTTO'             ? 'otf'
                 : $sfnt_version eq "\x00\x01\x00\x00" ? 'ttf'
                 :                                       die
                 ;
    my $filename_out = sprintf "%s%d.%s", $basename, $i_font, $file_ext;

    # unpack Table Records and create list of tables
    my $table_records
        = substr $ttc,
                 $offset_of_offset_table + SIZEOF_OFFSET_TABLE,
                 $num_tables * SIZEOF_TABLE_RECORD,
                 ;
    my $table_records_template
        = sprintf "(a%d)%d", SIZEOF_TABLE_RECORD, $num_tables;
    my @table_records = unpack $table_records_template, $table_records;
    for my $table_record (@table_records) {
        my ($table_tag, $checksum, $offset, $length)
            = unpack 'a4 N3', $table_record;
        my $padding = "\x00" x ((4 - ($length % 4)) % 4);
        my $table = substr($ttc, $offset, $length) . $padding;
        $table_record = {
            table_tag   =>  $table_tag,
            checksum    =>  $checksum,
            offset      =>  $offset,
            length      =>  $length,
            table       =>  $table,
        };
    }

    my $file_pos = SIZEOF_OFFSET_TABLE + length $table_records;
    for my $table_record (@table_records) {
        $table_record->{offset} = $file_pos;
        $file_pos += length $table_record->{table};

        if ($table_record->{table_tag} eq 'name') {
            my $postscript_name = get_postscript_name($table_record->{table});
            if ($postscript_name) {
                $filename_out = $postscript_name . q(.) . $file_ext;
            }
        }
    }

    open my $filehandle_out, '>:raw', $filename_out
        or die "[ERROR] Cannot create $filename_out: $!";
    print {$filehandle_out} $offset_table;
    for my $table_record (@table_records) {
        print {$filehandle_out}
            pack 'a4 N3',
                 @{$table_record}{ qw(table_tag checksum offset length) }
            ;
    }
    for my $table_record (@table_records) {
        print {$filehandle_out} $table_record->{table};
    }
    close $filehandle_out
        or die "[ERROR] something went wrong when closing $filename_out: $!";
}


# --------------------------------------------------------------------------
#   Reads the command-line options.
# --------------------------------------------------------------------------
sub parse_commandline {
    Getopt::Long::GetOptions(
        'help|?'            =>  sub { pod2usage(-verbose => 1) },
        'version'           =>  sub { print "$VERSION\n"; exit },
        'output-prefix=s'   => \$ARGV{output_prefix},
    )
    or pod2usage(-verbose => 0);

    pod2usage(-verbose => 0) if @ARGV != 1;

    return;
}


# --------------------------------------------------------------------------
#   Parses the 'name' table and returns the font's PostScript name.
# --------------------------------------------------------------------------
sub get_postscript_name {
    my $name_table = shift;

    my ($version, $count, $string_offset) = unpack '@0n3', $name_table;
    if ($version > 1) {
        # Don't try to parse future versions
        return;
    }

    my @name_records = unpack "\@6(a12)$count", $name_table;
    for my $name_record (@name_records) {
        my ($platform_id, $encoding_id, $language_id,
            $name_id, $length, $offset) = unpack 'n6', $name_record;

        if ($name_id != 6) {
            next;
        }

        my $postscript_data
            = substr $name_table, $string_offset + $offset, $length;
        my $postscript_name;
        if ($platform_id == 3
                and $encoding_id == 1
                and $language_id == 0x409) {
            $postscript_name = Encode::decode('UTF-16BE', $postscript_data);
        }
        elsif ($platform_id == 1
                and $encoding_id == 0
                and $language_id == 0) {
            $postscript_name = Encode::decode('MacRoman', $postscript_data);
        }
        else {
            # According to the 'Recommendations' in the OpenType spec,
            # any PostScript names other than the above two may be ignored.
            next;
        }

        return $postscript_name;
    }

    return;
}


__END__


############################################################################

    To create the documentation:

    pod2man --center="Marc Penninga" --release="fontools" --section=1 \
        splitttc - | groff -Tps -man - | ps2pdf - splitttc.pdf

=pod

=head1 NAME

splitttc - split an OpenType Collection F<ttc> or F<otc> file

=head1 SYNOPSIS

=over 8

=item B<splitttc>
[B<-help>]
[B<-version>]
B<< <ttc-or-otc-file> >>

=back


=head1 DESCRIPTION

B<splitttc> splits an OpenType Collection file.

The OpenType specification allows for multiple fonts to be contained
in the same file. This might be advantageous if fonts share a lot of data,
as this then needs to be stored only once. Probably the best known
example of an OpenType Collection file is Microsoft's Cambria,
where the Regular weight shares a file with the Math font.

Unfortunately some tools (including Eddie Kohler's LCDF TypeTools)
cannot handle such OpenType Collections; they only work for individual fonts.
B<splitttc> takes an OpenType Collection file
and splits it into its constituent parts.


=head1 OPTIONS AND ARGUMENTS

=over 4

=item B<-help>

Print a short description of the syntax and exit.

=item B<-version>

Print version number and exit.

=item B<< <ttc-or-otc-file> >>

The F<ttc> or F<otc> file to be split.

=back

You may use either one or two dashes before options,
and option names may be shortened to a unique prefix.


=head1 OUTPUT

B<splitttc> tries to determine the 'PostScript name' of the resulting fonts
and uses these to name the output files.
If it cannot determine the PostScript names, it uses the basename of the input
font collection file plus a three-digit sequence number:
F<<< I<< <input> >> 001.otf >>>, F<<< I<< <input> >> 002.otf >>> etc.


=head1 AUTHOR

Marc Penninga <marcpenninga@gmail.com>


=head1 COPYRIGHT

Copyright (C) 2019-2025 Marc Penninga.


=head1 LICENSE

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
by the Free Software Foundation, either version 2 of the License,
or (at your option) any later version.
A copy of the GNU General Public License is included with B<splitttc>;
see the file F<GPLv2.txt>.


=head1 DISCLAIMER

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.


=head1 VERSION

This document describes B<splitttc> version 20250909.


=head1 RECENT CHANGES

(See the source code for the rest of the story.)

=over 12

=item I<2020-05-11>

Use the 'PostScript name' to name the output fonts.

=item I<2019-06-25>

First release.

=back

=cut