#!/usr/bin/perl
# $Id: compress_files 271507 2010-12-15 00:42:52Z peroyvind $
# compress man and info pages.

use strict;
use warnings;
use Cwd;
use File::Find;
use File::Basename;
use Fcntl ':mode';

my $ext = $ARGV[0] ||= '.gz';
die "Unknown extension $ext" unless $ext =~ /^\.(?:gz|bz2|lzma|xz)$/;

my $buildroot = $ENV{RPM_BUILD_ROOT};
die "No build root defined" unless $buildroot;
die "Invalid build root" unless -d $buildroot;
# normalize build root
$buildroot =~ s|/$||;

my $exclude_pattern = $ENV{EXCLUDE_FROM_COMPRESS} ?
    qr/$ENV{EXCLUDE_FROM_COMPRESS}/ : undef;

my @sodirs = map { "$buildroot/$_" } qw{
    usr/man
    usr/X11R6/man
    usr/lib/perl5/man
};
my @mandirs = map { "$buildroot/$_" } qw{
    usr/info
    usr/share/info
    usr/man
    usr/share/man
    usr/X11/man
    usr/lib/perl5/man
};

# Convert man pages from old locations just consisting
# of a single include directive to a symlink
my (@sofiles, @sodests);
my $so_function = sub {
    # skip symlinks
    return if -l $_;
    # skip directories
    return if -d $_;
    # The -s test is becuase a .so file tends to be small. We don't want
    # to open every man page. 1024 is arbitrary.
    return if -s $_ > 1024;
    # skip excluded files
    return if $exclude_pattern && $File::Find::name =~ $exclude_pattern;

    # Test first line of file for the .so thing.
    open(my $in, $_);
    my $line = <$in>;
    close ($in);
    if ($line =~ m/\.so\s+(.*)/) {
        my $solink = $1;
        # This test is here to prevent links like ... man8/../man8/foo.8
        if (basename($File::Find::dir) eq dirname($solink)) {
            $solink = basename($solink);
        } else {
            $solink = "../$solink";
        }

        push @sofiles, $File::Find::name;
        push @sodests, $solink;
    }
};
foreach my $dir (@sodirs) {
    find($so_function, $dir) if -e $dir;
}
foreach my $sofile (@sofiles) {
    my $sodest = shift(@sodests);
    unlink $sofile;
    symlink $sodest, $sofile;
}

# find non-compressed info/man pages
my @files;
my $function = sub {
    # skip symlinks
    return if -l $_;
    # skip directories
    return if -d $_;
    # skip excluded files
    return if $exclude_pattern && $File::Find::name =~ $exclude_pattern;
    # skip compressed files
    return if $_ =~ /\.(?:gz|bz2|lzma|xz)$/;
    # skip particular files
    return if $_ eq 'dir' || $_ eq 'whatis';

    push @files, $File::Find::name;
};
foreach my $dir (@mandirs) {
    find($function, $dir) if -e $dir;
}

# uncompress info/man pages using another format
uncompress_files('.gz', 'gzip') if $ext ne '.gz';
uncompress_files('.bz2', 'bzip2') if $ext ne '.bz2';
uncompress_files('.lzma', 'lzma') if $ext ne '.lzma';
uncompress_files('.xz', 'xz') if $ext ne '.xz';

# drop executable bits
foreach my $file (@files) {
    my $mode = (stat($file))[2];
    chmod $mode & ~S_IXUSR & ~S_IXGRP & ~S_IXOTH, $file;
}

if (@files) {

    my @command = $ext eq '.gz'   ? qw/gzip -9f/
                : $ext eq '.bz2'  ? qw/bzip2 -9f/
		# do not change compression level with --text, it will only
		# increase the size for smaller files due to larger dictionary.
                : $ext eq '.lzma' ? qw/lzma -0f --text/
                : $ext eq '.xz'   ? qw/xz -0f --text/
                :                   qw//
                ;
    xargs(\@files, @command);
}

# Fix up symlinks that were pointing to the uncompressed files.
my $link_function = sub {
    # skip everything but symlinks
    return unless -l $_;
    # skip non-dangling symlinks
    my $linkval = readlink($_);
    return if -e "$File::Find::dir/$linkval";

    if (-e "$File::Find::dir/$linkval$ext") {
        unlink $_;
        symlink "$linkval$ext", "$_$ext";
    } elsif ($File::Find::dir =~ m|man/|)  {
        # Bad link go on nowhere (any better idea) ?
        unlink $_;
    }
};
find($link_function, $buildroot);

# Run a command that may have a huge number of arguments, like xargs does.
# Pass in a reference to an array containing the arguments, and then other
# parameters that are the command and any parameters that should be passed to
# it each time.
sub xargs {
    my $args = shift;

    # The kernel can accept command lines up to 20k worth of characters.
    my $command_max = 20000;

    # Figure out length of static portion of command.
    my $static_length = 0;
    foreach (@_) {
        $static_length += length($_) + 1;
    }

    my @collect;
    my $length = $static_length;
    foreach my $arg (@$args) {
        if (length($arg) + 1 + $static_length > $command_max) {
            error(qq(This command is greater than the maximum command size allowed by the kernel, and cannot be split up further. What on earth are you doing? "@_ $_"));
        }
        $length += length($arg) + 1;
        if ($length < $command_max) {
            push @collect, $arg;
        } else {
            system(@_, @collect) if $#collect > -1;
            @collect = $arg;
            $length = $static_length + length($arg) + 1;
        }
    }
    system(@_, @collect) if $#collect > -1;
}

# uncompress info/man pages with a given extension
sub uncompress_files {
    my ($extension, $command) = @_;

    my @compressed_files;

    my $function = sub {
        # skip symlinks
        return if -l $_;
        # skip directories
        return if -d $_;
        # skip excluded files
        return if $exclude_pattern && $File::Find::name =~ $exclude_pattern;
        # skip everything but files with wanted extension
        return if $_ !~ /$extension$/;

        push @compressed_files, $File::Find::name;
    };

    foreach my $dir (@mandirs) {
        find($function, $dir) if -e $dir;
    }

    if (@compressed_files) {
        xargs(\@compressed_files, $command, "-d");
        die "Something wrong with the decompression of the $extension man/info file"
            if $?;
        my $length = length($extension);
        push(@files, map { substr($_, 0, -$length) } @compressed_files);
    }
}
