#!/usr/bin/perl

# Copyright (C) 2010 Modestas Vainius <modax@debian.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>

use strict;
use warnings;

use Debian::PkgKde;
use Getopt::Long;
use File::Copy qw();
use File::Temp qw();
use File::Spec;
use IO::Uncompress::Inflate qw(inflate);

# Load extra modules (from libwww-perl and its dependencies)
INIT {
    eval "use HTTP::Request";
    if ($@) {
	error "in order to use this utility, you have to install libwww-perl package";
    }
    eval "use URI; use URI::QueryParam; use URI::Escape;";
    eval "use HTTP::Response; use LWP::UserAgent; use HTML::LinkExtor; use HTML::Parser;";
}

sub usage {
    usageerr "[ -d destdir ] [ -v version ] [ -a arch ] [ -o ] [ package ] [ distribution ]";
}

sub construct_url {
    my ($path, %params) = @_;

    my $url = URI->new($path);
    foreach my $param (keys %params) {
	my @value = (ref $params{$param} eq "ARRAY") ?
	    @{$params{$param}} : ( $params{$param} );
	$url->query_param_append($param, @value);
    }
    return $url->as_string();
}

sub as_array {
    my $scalar = shift;
    my @ret;
    if (defined $scalar) {
	if (ref($scalar) eq 'ARRAY') {
	    @ret = @$scalar;
	} else {
	    push @ret, $scalar;
	}
    }
    return @ret;
}

sub get_command_output {
    my @lines;
    open(my $cmd, "-|", @_) or syserr("unable to execute command %s", $_[0]);
    while (<$cmd>) {
	chop;
	push @lines, $_;
    }
    close $cmd;
    return @lines;
}

sub get_rfc822_field_value {
    my ($field, $input) = @_;
    foreach my $line (@$input) {
	if ($line =~ /^\Q$field\E:\s*(.*)$/) {
	    return "$1"
	}
    }
}

sub html2text {
    my ($in, $out) = @_;
    my $body;
    my $parser = HTML::Parser->new( api_version => 3,
	start_h => [ sub { if (shift() eq "body") { $body = 1 } }, "tagname" ],
	end_h   => [ sub { if (shift() eq "body") { $body = 0 } }, "tagname" ],
	text_h  => [ sub { if ($body) { print $out shift(); } }, "dtext" ]
    );
    $parser->ignore_elements("head", "a", "img");
    return defined($parser->parse_file($in)) && defined($body);
}

sub download_logs {
    my ($destdir, $pkg, %opts) = @_;
    my $distro = $opts{distro};
    my $url;

    # Construct index URL
    if (defined $distro) {
        $url = construct_url('https://buildd.debian.org/pkg.cgi',
	    pkg => $pkg, dist => $distro, arch => [ as_array($opts{arch}) ]);
    } elsif (defined $opts{ver}) {
        $url = construct_url('https://buildd.debian.org/build.cgi',
	    pkg => $pkg, ver => [ as_array($opts{ver}) ],
	    arch => [ as_array($opts{arch}) ])
    } else {
	error "neither version(s) nor distribution was specified";
    }

    # Download index document and extract links
    info "Downloading build log index from $url ...";
    my $browser = LWP::UserAgent->new(
	agent => get_program_name(),
	timeout => 10,
	keep_alive => 1,
	env_proxy => 1,
    );
    my $request = HTTP::Request->new(GET => $url);
    if (my $response = $browser->request($request)) {
	error "unable to access log index at URL $url: ".$response->status_line
	    unless $response->is_success();
	my $linkextor = HTML::LinkExtor->new(undef, "https://buildd.debian.org/");
	$linkextor->parse($response->content());
	if (my @links = grep { $_->[0] eq "a" } $linkextor->links()) {
	    @links = map { shift @{$_}; +{ @{$_} }->{href} } @links;
	    my @ok;
	    my @failed;
	    foreach my $link (@links) {
		# Check if it is the link we need
		if ($link =~ m,/fetch\.cgi(\?[^/]+)$,) {
		    my ($ok, @status);
		    my $filename = $1 . ".build";
		    $filename =~ s/[?;&][^=]+=([^?;&]+)/_$1/g;
		    $filename =~ s/^_\.*//;
		    $filename = uri_unescape($filename);
		    my $file = File::Spec->catfile($destdir, $filename);

		    if ($opts{overwrite} || ! -e $file) {
			# Create a temporary file
			my $tmpfile1 = File::Temp->new(TEMPLATE => $filename . ".XXXXXX",
			    DIR => $destdir);
			my $tmpfile2 = File::Temp->new(TEMPLATE => $filename . ".XXXXXX",
			    DIR => $destdir);

			info "Fetching build log to $filename ...";
			$request = HTTP::Request->new(GET => $link);
			$request->header("Accept-Encoding" => "deflate, identity");
			$browser->show_progress(1);
			$tmpfile1->close();
			$response = $browser->request($request, $tmpfile1->filename);
			if ($response->is_success()) {
			    my $is_deflated = $response->header("Content-Encoding");
			    $is_deflated = defined $is_deflated && $is_deflated eq "deflate";
			    # Inflate contents if needed
			    if ($is_deflated) {
				push @status, "deflate";
				if (inflate($tmpfile1->filename => $tmpfile2, BinModeOut => 1)) {
				    $tmpfile2->close();
				    ($tmpfile1, $tmpfile2) = ($tmpfile2, $tmpfile1);
				    open($tmpfile2, ">:utf8", $tmpfile2->filename) or
					syserr "unable to reopen temporary file";
				    $ok = 1;
				} else {
				    unlink $filename;
				}
			    } else {
				$ok = 1;
			    }
			    if ($ok) {
				open($tmpfile1, "<:utf8", $tmpfile1->filename);
				if ($ok = html2text($tmpfile1 => $tmpfile2)) {
				    $tmpfile1->close();
				    $tmpfile1 = $tmpfile2;
				} else {
				    push @status, "html unstripped";
				}
				$tmpfile1->close();
				$tmpfile2->close();
				File::Copy::move($tmpfile1->filename, $file) or
				    error "unable to rename '%s' to '%s'", $tmpfile1->filename, $file;
			    }
			}
		    } else {
			info "Not overwriting existing build log $filename ...";
			push @status, "exists, ignored";
		    }
		    if ($ok) {
			push @ok, [ $filename, @status ];
		    } else {
			push @failed, [ $filename, @status ];
		    }
		}
	    }
	    return (@ok || @failed) ?  (\@ok, \@failed) : ();
	}
	return ();
    } else {
	error "unable to access log index URL $url";
    }
}

sub print_summary {
    my $logs = shift;
    my $is_warning = shift;
    my $msg = shift;
    if (@$logs) {
	info $msg, @_ unless $is_warning;
	warning $msg, @_ if $is_warning;
	foreach my $log_info (@$logs) {
	    my ($filename, @info) = @$log_info;
	    if (@info) {
		printmsg "  - %s [%s]", $filename, join(", ", @info);
	    } else {
		printmsg "  - %s", $filename;
	    }
	}
    }
}

my $opt_destdir;
my @opt_versions;
my @opt_archs;
my $opt_force;

# Get and verify options
unless (GetOptions(
	"destdir|d=s" => \$opt_destdir,
	"version|v=s" => \@opt_versions,
	"arch|a=s" => \@opt_archs,
	"force|f!" => \$opt_force))
{
    usage();
}

my ($opt_package, $opt_distro) = @ARGV;
my @dpkg_parsechangelog;

if (!$opt_package && -f "debian/changelog") {
    @dpkg_parsechangelog = get_command_output("dpkg-parsechangelog");
    $opt_package = get_rfc822_field_value("Source", \@dpkg_parsechangelog);
}

if (!$opt_package) {
    errormsg "source package was not specified and could not be autoguessed";
    usage();
}

if ($opt_distro && @opt_versions) {
    errormsg "version and distribution options are mutually exclusive";
    usage();
}

if (!@opt_versions) {
    if (!$opt_distro && -f "debian/changelog") {
	@dpkg_parsechangelog = get_command_output("dpkg-parsechangelog") unless @dpkg_parsechangelog;
	$opt_distro = get_rfc822_field_value("Distribution", \@dpkg_parsechangelog);
	if ($opt_distro eq "UNRELEASED") {
	    # Get distro from the next to current entry
	    $opt_distro = get_rfc822_field_value("Distribution",
		[ get_command_output("dpkg-parsechangelog", "-c1", "-o1") ]);
	}
    }
    if (!$opt_distro) {
	errormsg "neither distribution nor version(s) was specified and could not be autoguessed";
	usage();
    }
}

# Determine destination directory to store logs
unless ($opt_destdir) {
    $opt_destdir = sprintf("%s_%s_logs", $opt_package,
	($opt_distro) ? $opt_distro : $opt_versions[0]);
}

info("Selected output directory for logs: %s/", $opt_destdir);
unless (-d $opt_destdir) {
    mkdir $opt_destdir;
}

my ($ok_logs, $failed_logs) =
    download_logs($opt_destdir, $opt_package, overwrite => $opt_force,
	distro => $opt_distro, ver => \@opt_versions, arch => \@opt_archs);

if (defined $ok_logs) {
    print_summary $ok_logs, 0, "Successfully downloaded build logs (stored to %s):", $opt_destdir;
    print_summary $failed_logs, 1, "Failed to fetch/ignored the following build logs:";
} else {
    error "no build logs referenced in the build log index";
}

END {
    rmdir $opt_destdir if $opt_destdir && $opt_destdir ne ".";
}

exit 0

# vim: noexpandtab tabstop=8 shiftwidth=4
