#!/usr/bin/perl -w

=pod

=head1 NAME

tv_grab_es_digital - Grab TV listings for Spain (Digital+).

=head1 SYNOPSIS

tv_grab_es_digital --help

tv_grab_es_digital [--config-file FILE] --configure

tv_grab_es_digital [--config-file FILE] [--output FILE] [--days N]
           [--offset N] [--quiet]

tv_grab_es_digital --list-channels

=head1 DESCRIPTION

Output TV listings for several channels available in Spain (Digital+).
Currently those channels are available either at Hispasat and Astra on DVB 
format.
The listings are currently coming directly from Digital+ (cplus.es)
The grabber relies on parsing HTML so it might stop working at any time.

First run B<tv_grab_es_digital --configure> to choose, which channels you want
to download. Then running B<tv_grab_es_digital> with no arguments will output
listings in XML format to standard output.

B<--configure> Prompt for which channels,
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_es_digital.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--output FILE> write to FILE rather than standard output.

B<--days N> grab N days.  The default is 3.

B<--offset N> start N days in the future.  The default is to start
from today.

B<--quiet> suppress the progress messages normally written to standard
error.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

Ramon Roca, Ramon.Roca@XCombo.com, based on tv_grab_es

=head1 BUGS

=cut

# Author's TODOs & thoughts
#
# get the icons of each grabbed channel from the website
#
# findout how to setup properly the language, (catalan, basque, galician, vo)
#
# get also the program descriptions. to make it faster, and even more complete
#       this can be accomplished by merging pdf files provided at the 
#       site. That will be also smarter for the website since it will 
#	avoid numerous gets and make run the grabber much faster.
#       
#
# get channel ids in RFC2838 format (I don't, actually the Id comes directly
# 	web site, i don't know where to go for getting th id's for spanish
#	tv broadcasters.
#
# do the listings for cable digital TV. Good source for it looks to me 
#       www.mediapark.es
# 
#


######################################################################
# initializations

use strict;
use XMLTV::Version '$Id: tv_grab_es_digital,v 1.3 2004/04/10 22:00:30 epaepa Exp $ ';
use Getopt::Long;
use Date::Manip;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use IO::File;

use XMLTV;
use XMLTV::Memoize;
use XMLTV::Ask;
use XMLTV::Config_file;
use XMLTV::DST;
use XMLTV::Get_nice;
use XMLTV::Mode;
# Todo: perhaps we should internationalize messages and docs?
use XMLTV::Usage <<END
$0: get Spanish (Digital+) television listings in XMLTV format
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
        [--offset N] [--quiet]
To list channels: $0 --list-channels
END
  ;

# Use Term::ProgressBar if installed.
use constant Have_bar => eval { require Term::ProgressBar; 1 };

# Attributes of the root element in output.
my $HEAD = { 'source-info-url'     => 'http://http://www.plus.es/codigo/television/rejilla_dia_dia.asp',
	     'source-data-url'     => "http://http://www.plus.es/codigo/television/rejilla_dia_dia.asp",
	     'generator-info-name' => 'XMLTV',
	     'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
	   };
		   
# Whether zero-length programmes should be included in the output.
my $WRITE_ZERO_LENGTH = 0;

# default language
my $LANG="es";

# Global channel_data
our @ch_all;

######################################################################
# get options

# Get options, including undocumented --cache option.
XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
my ($opt_days, $opt_offset, $opt_help, $opt_output,
    $opt_configure, $opt_config_file, $opt_quiet,
    $opt_list_channels);
$opt_days  = 3; # default
$opt_offset = 0; # default
$opt_quiet  = 0; # default
GetOptions('days=i'        => \$opt_days,
	   'offset=i'      => \$opt_offset,
	   'help'          => \$opt_help,
	   'configure'     => \$opt_configure,
	   'config-file=s' => \$opt_config_file,
	   'output=s'      => \$opt_output,
	   'quiet'         => \$opt_quiet,
	   'list-channels' => \$opt_list_channels
	  )
  or usage(0);
die 'number of days must not be negative'
  if (defined $opt_days && $opt_days < 0);
usage(1) if $opt_help;

my $mode = XMLTV::Mode::mode('grab', # default
			     $opt_configure => 'configure',
			     $opt_list_channels => 'list-channels',
			    );

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_es_digital', $opt_quiet);

my @config_lines; # used only in grab mode
if ($mode eq 'configure') {
    XMLTV::Config_file::check_no_overwrite($config_file);
}
elsif ($mode eq 'grab') {
    @config_lines = XMLTV::Config_file::read_lines($config_file);
}
elsif ($mode eq 'list-channels') {
    # Config file not used.
}
else { die }

# Whatever we are doing, we need the channels data.
my %channels = get_channels(); # sets @ch_all
my @channels;

######################################################################
# write configuration

if ($mode eq 'configure') {
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";

    # Ask about each channel.
    my @chs = sort keys %channels;
    my @names = map { $channels{$_} } @chs;
    my @qs = map { "add channel $_?" } @names;
    my @want = askManyBooleanQuestions(1, @qs);
    foreach (@chs) {
	my $w = shift @want;
	warn("cannot read input, stopping channel questions"), last
	  if not defined $w;
	# No need to print to user - XMLTV::Ask is verbose enough.

	# Print a config line, but comment it out if channel not wanted.
	print CONF '#' if not $w;
	my $name = shift @names;
	print CONF "channel $_ $name\n";
	# TODO don't store display-name in config file.
    }

    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");

    exit();
}


# Not configuration, we must be writing something, either full
# listings or just channels.
#
die if $mode ne 'grab' and $mode ne 'list-channels';

# Options to be used for XMLTV::Writer.
my %w_args;
if (defined $opt_output) {
    my $fh = new IO::File(">$opt_output");
    die "cannot write to $opt_output: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}
$w_args{encoding} = 'ISO-8859-1';
my $writer = new XMLTV::Writer(%w_args);
$writer->start($HEAD);

if ($mode eq 'list-channels') {
    $writer->write_channel($_) foreach @ch_all;
    $writer->end();
    exit();
}

######################################################################
# We are producing full listings.
die if $mode ne 'grab';

# Read configuration
my $line_num = 1;
foreach (@config_lines) {
    ++ $line_num;
    next if not defined;
    if (/^channel:?\s+(\S+)\s+([^\#]+)/) {
	my $ch_did = $1;
	my $ch_name = $2;
	$ch_name =~ s/\s*$//;
	push @channels, $ch_did;
	$channels{$ch_did} = $ch_name;
    }
    else {
	warn "$config_file:$line_num: bad line\n";
    }
}

######################################################################
# begin main program

# Assume the listings source uses CET (see BUGS above).
my $now = DateCalc(ParseDate('now'), "$opt_offset days");
die "No channels specified, run me with --configure\n"
  if not keys %channels;
my @to_get;


# the order in which we fetch the channels matters
foreach my $ch_did (@channels) {
    my $ch_name=$channels{$ch_did};
    my $ch_xid="$ch_did.cplus.es";
    $writer->write_channel({ id => $ch_xid,
		    		'display-name' => [ [ $ch_name ] ] });
    my $day=UnixDate($now,'%Q');
    for (my $i=0;$i<$opt_days;$i++) {
        push @to_get, [ $day, $ch_xid, $ch_did ];
        #for each day
        $day=nextday($day); die if not defined $day;
    }
}

# This progress bar is for both downloading and parsing.  Maybe
# they could be separate.
#
my $bar = new Term::ProgressBar('getting listings', scalar @to_get)
  if Have_bar && not $opt_quiet;
foreach (@to_get) {
	foreach (process_table($_->[0], $_->[1], $_->[2])) {
		$writer->write_programme($_);
	}
	update $bar if Have_bar && not $opt_quiet;
}
$writer->end();

######################################################################
# subroutine definitions

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
	*t = sub {};
	*d = sub { '' };
    }
    else {
	*t = \&Log::TraceMessages::t;
	*d = \&Log::TraceMessages::d;
	Log::TraceMessages::check_argv();
    }
}

####
# process_table: fetch a URL and process it
#
# arguments:
#    Date::Manip object giving the day to grab
#    xmltv id of channel
#    cplus.es id of channel
#
# returns: list of the programme hashes to write
#
sub process_table {
    my ($date, $ch_xmltv_id, $ch_es_id) = @_;

    $ch_es_id =~ s/\+/\%2B/gi;
    my $today = UnixDate($date, '%d/%m/%Y');
    my $url = "http://www.plus.es/codigo/television/rejilla_dia_dia.asp?Canales=$ch_es_id&Fecha=$today";
    t $url;
    my $data=get_nice($url);
    if (not defined $data) {
	die "could not fetch $url, aborting\n";
    }
    local $SIG{__WARN__} = sub {
	warn "$url: $_[0]";
    };

    # parse the page to a document object
    my $tree = HTML::TreeBuilder->new();
    $tree->parse($data);
    my @program_data = get_program_data($tree);
    my $bump_start_day=0;

    my @r;
    while (@program_data) {
	my $cur = shift @program_data;
	my $next = shift @program_data;
	unshift @program_data,$next if $next;
	
	push @r, make_programme_hash($date, $ch_xmltv_id, $ch_es_id, $cur, $next);
	if (!$bump_start_day && bump_start_day($cur,$next)) {
	    $bump_start_day=1;
	    $date = UnixDate(DateCalc($date,"+ 1 day"),'%Q');
	}
    }
    return @r;
}

sub make_programme_hash {
    my ($date, $ch_xmltv_id, $ch_es_id, $cur, $next) = @_;

    my %prog;

    $prog{channel}=$ch_xmltv_id;
    $prog{title}=[ [ $cur->{title}, $LANG ] ];
    $prog{"sub-title"}=[ [ $cur->{subtitle}, $LANG ] ] if defined $cur->{subtitle};
    $prog{category}=[ [ $cur->{category}, $LANG ] ];

    $prog{start}=utc_offset("$date $cur->{time}", 'CET');
    if (not defined $prog{start}) {
	warn "bad time string: $cur->{time}";
	return undef;
    }

    # FIXME: parse description field further

    die if defined $cur->{desc} and $cur->{desc} !~ /\S/;
    $prog{desc}=[ [ $cur->{desc}, $LANG ] ] if defined $cur->{desc};

    return \%prog;
}
sub bump_start_day {
    my ($cur,$next) = @_;
    if (!defined($next)) {
	return undef;
    }
    my $start = UnixDate($cur->{time},'%H:%M');
    my $stop = UnixDate($next->{time},'%H:%M');
    if (Date_Cmp($start,$stop)>0) {
	return 1;
    } else {
	return 0;
    }
}


#
# program data is split as follows:
# - program listings begin with a fixed header with "Hora", "Género" and 
#   "Título".
sub get_program_data {
    my ($tree) = @_;
    my @data;

    my @txt_elems = get_txt_elems($tree);

    # Actually time and title are required, but we don't check that.

    my $index = 0;
    while (defined $txt_elems[$index]) {
	if (       ($txt_elems[$index] eq "Hora")
		&& ($txt_elems[$index + 1] =~ /nero/)
		&& ($txt_elems[$index + 2] =~ /tulo/) )
		{
	   t "Program listing comes below";
	   $index = $index + 3;
	   while ( $txt_elems[$index] =~ /^\d\d:\d\d/ ) {
		t "Program found: Hora: $txt_elems[$index] Programa: $txt_elems[$index+2]";
                my $p_stime     = $txt_elems[$index];
                my $p_category  = $txt_elems[$index + 1];
		$p_category  =~ s/\240//gi;
    		if ($p_category !~ /\S/) {
			$p_category = "SIN CLASIFICAR";
    		}
                my $p_title     = $txt_elems[$index + 2];
                my $p_desc;
#                my $p_subtitle;

                if (not ( $txt_elems[$index + 3] =~ /^\d\d:\d\d/ ) ) {
                # Program has Description, or at least whitespace
                # where there should be a description.
                        $p_desc = $txt_elems[$index + 3];
			undef $p_desc if $p_desc !~ /\S/;
                        $index = $index + 4;
                } else {
                # Program don't have Description
                        $index = $index + 3;
                }

		# Check that a title is present, if not we can use
		# description instead.
		#
		if ($p_title !~ /\S/) {
		    if (defined $p_desc) {
			$p_title = $p_desc;
			undef $p_desc;
		    }
		    else {
			warn "programme with no title at $p_stime, not writing";
			next;
		    }
		}

                my %h = (       time =>         $p_stime,
                                category=>      $p_category,
                                title=>         $p_title,
                                # subtitle=>      $p_subtitle,
			);
		$h{desc} = $p_desc if defined $p_desc;
                push @data, \%h;
#               t "Next time?: $txt_elems[$index]";
           } # end while prof the program
        }
        t $txt_elems[$index];
        $index = $index + 1;
    }
    return @data;
}
sub get_txt_elems {
    my ($tree) = @_;

    my @txt_elem;
    my @txt_cont = $tree->look_down(
                        sub { ($_[0]->descendants() eq 0  ) },       
			sub { defined($_[0]->attr ("_content") ) } );
	foreach my $txt (@txt_cont) {
        	my @children=$txt->content_list;
		for (my $tmp=$children[0]) {
			s/^\s+//;s/\s+$//;
			push @txt_elem, $_;
		}
	}
    return @txt_elem;
}

# get channel listing
sub get_channels {
    my $bar = new Term::ProgressBar('getting list of channels', 1)
	if Have_bar && not $opt_quiet;
    my %channels;
    my $url="http://www.plus.es/codigo/television/plataformas/digitalplus/portada2.asp";
    t $url;
    my $local_data=get_nice($url);
    die "could not get channel listing $url, aborting\n"
      if not defined $local_data;

    my $tree = HTML::TreeBuilder->new();
    $tree->parse($local_data);
    my @menus = $tree->find_by_tag_name("_tag"=>"select");
    
    foreach my $elem (@menus) {
	my $cname = $elem->attr('name');
	if ($cname eq "Canales") {
	    my @ocanals = $elem->find_by_tag_name("_tag"=>"option");
	    @ocanals = sort @ocanals;
	    foreach my $opt (@ocanals) {
		t $opt->attr('value');
		t $opt->attr('text');
		if ((not $opt->attr('value') eq "") and (not $opt->attr('value') eq "OKPLUS")) {
		    my $channel_id = $opt->attr('value');
		    my @children=$opt->content_list;
		    my $channel_name=$children[0];
		    if (length $channel_id eq 1) {
			$channel_id = "0" . $channel_id
		    }
		    $channels{$channel_id}=$channel_name;
		    push @ch_all, { 'display-name' => [ [ $channel_name,
							  $LANG ] ],
				    'id'=> "$channel_id.cplus.es" };
		}
	    }
	}
    }
    die "no channels could be found" if not keys %channels;
    update $bar if Have_bar && not $opt_quiet;
    return %channels;
}


# Bump a YYYYMMDD date by one.
sub nextday {
    my $d = shift;
    my $p = ParseDate($d);
    my $n = DateCalc($p, '+ 1 day');
    return UnixDate($n, '%Q');
}

