package Bio::RNA::Barriers::Minimum;
our $VERSION = '0.03';

use 5.012;
use strict;
use warnings;

use Moose;
use MooseX::StrictConstructor;
use Moose::Util::TypeConstraints;
use namespace::autoclean;

use autodie qw(:all);
use overload q{""} => 'stringify';

use Scalar::Util    qw(blessed);
use List::Util      qw(max);
use List::MoreUtils qw(zip);

#### Special types for attribute checking.
subtype 'RNAStruct' => (
    as 'Str',
    where { m{ ^ [(.)]+ $ }x },
    message {
        "Only '(', ')', and '.' allowed in structure string, found '$_'"
    },
);

subtype 'DisconSaddle' => (
    as 'Str',
    where { m{ ^ ~+ $ }x },
    message {
        "Only '~' allowed in disconnected saddle string, found '$_'"
    },
);

# index  - index of basins ordered by energy; 1 is lowest
# struct - struct of lowest energy in minimums
# mfe    - free energy of the basin's local minimum
# father_index  - index of father basin (the basin this one is merged to)
# barrier_height - height of energy barrier (in kcal/mol) to minimum this
#                 one is merged to (relative to this minimum)
my @default_attribs = qw( index struct mfe father_index barrier_height);
my @default_attrib_args = (is => 'rw', required => 1);
my %default_attrib_isa
    = &zip(\@default_attribs, [qw(Int RNAStruct Num Int Num)]);
has $_ => (@default_attrib_args, isa => $default_attrib_isa{$_})
    foreach @default_attribs;

# Return true iff this is the mfe basin 1.
sub is_global_min {
    my $self = shift;
    my $is_global_min = $self->index == 1;
    return $is_global_min;
}

# Optional attributes generated by Barriers options --bsize and --saddle.
# Descriptions in quotes are from Barriers tutorial at
# https://www.tbi.univie.ac.at/RNA/tutorial/#sec4_2
# merged_struct_count - 'numbers of structures in the basin we merge with'
#       Given is the number of structures in the *current* basin
#       (including merged ones) *at the time of merging*. For minimum 1,
#       this is close to the total number of input structures (except for
#       disconnected structures and other missing ones (???).
# father_struct_count - 'number of basin which we merge to'
#       Actually, it's the number of *structures* in the basin that we
#       merge to (father basin) *at the time of merging*.
# merged_basin_energy - 'free energy of the basin'
#       This seems to be the free energy of (the partition function of)
#       the basin---including all merged basins---at the time this basin
#       is merged. For minimum 1, this corresponds to the ensemble free
#       energy as far as it was enumerated by RNAsubopt (excluding
#       disconnected structures).
# grad_struct_count - 'number of structures in this basin using gradient walk'
#       This seems to be the actual number of structures only in this
#       basin, excluding merged basins. What about --minh merging? Why
#       doesn't this column sum up to exactly to the total number of
#       structs if --max==Inf (some are missing)? Issues due to degenerate
#       energies?
# grad_basin_energy - 'gradient basin (consisting of all structures where
#                      gradientwalk ends in the minimum)'
#       This seems to be free energy of the basin without any merged
#       basins. Summing up the partition functions corresponding to these
#       energies, one obtains a free energy almost equal to the ensemble
#       energy (up to rounding errors due to 6 digit precision).
my @bsize_attributes = qw(
    merged_struct_count father_struct_count merged_basin_energy
    grad_struct_count grad_basin_energy
);
my @opt_attributes = (@bsize_attributes, qw(saddle_struct));
# Define a bsize predicate only for the first bsize attribute. Ensure in
# BUILD that either all or none of the attributes are set.
my @common_attrib_args = (
    is => 'ro',
    lazy => 1,
    default => sub { confess 'attribute undefined, did you use --bsize/--saddle?' },
);
# has $bsize_attributes[0] => (is => 'ro', predicate => 'has_bsize' );
# has $_ => (is => 'ro') foreach @bsize_attributes[1..$#bsize_attributes];
# has 'saddle_struct' => (is => 'ro', predicate => 'has_saddle_struct');
has $bsize_attributes[0] => (
    @common_attrib_args,
    isa       => 'Num',
    predicate => 'has_bsize',
    writer    => "_$bsize_attributes[0]",           # private writer
);
has $_ => (@common_attrib_args, writer => "_$_")    # private writer
    foreach @bsize_attributes[1..$#bsize_attributes];
has 'saddle_struct' => (
    @common_attrib_args,
    isa       => 'RNAStruct | DisconSaddle',
    predicate => 'has_saddle_struct',
);

# Optional reference to the father minimum.
has 'father' => (
    is        => 'rw',
    trigger   => \&_check_father,
    # Use name 'has_father_REF' because has_father==false reads as if the
    # min does not have a father at all.
    # We don't need this, we always have it if we have a father.
    # predicate => 'has_father_ref',
);

sub _check_father {
    my ($self, $father) = @_;
    confess 'Need a reference to another minimum to set father attribute'
        unless blessed $father and $father->isa( __PACKAGE__ );
    confess "Father's index does not match the index used during construction"
        unless $self->father_index == $father->index;
}

# Returns true iff the minimum has a father minimum it has been merged to.
sub has_father {
    my $self = shift;
    my $has_father = $self->father_index > 0;
    return $has_father;
}


# Minimum is connected to basin 1 (mfe).
has 'is_connected' => (
    is => 'ro',
    lazy => 1,
    init_arg => undef,          # cannot be set manually
    builder => '_build_is_connected',
);

sub _build_is_connected {
    my $self = shift;

    return 1 if $self->index == 1;              # this is the mfe basin
    return 0 unless $self->has_father;          # basin has no father

    # confess 'Reference to father minimum has not been set, cannot proceed.'
    #     unless $self->has_father_ref;

    my $is_connected = $self->father->is_connected;
    return $is_connected;
}

# Parse passed line read from barriers file.
around BUILDARGS => sub {
    my $orig  = shift;
    my $class = shift;

    my @args;                               # as passed to the constructor
    if ( @_ == 1 && !ref $_[0] ) {          # process line from bar file
        my $input_line = shift;
        my @fields = split /\s+/, $input_line;
        shift @fields if $fields[0] eq q{}; # drop empty first field

        if (@fields < @default_attribs) {
            confess "Input line has not enough fields: $input_line";
        }

        # Add default args
        push @args, $_ => shift @fields foreach @default_attribs;
        # @args = map { $_ => shift @fields } @attributes;

        # Add saddle struct if present
        if (@fields == 1 or @fields == @opt_attributes) {
            push @args, saddle_struct => shift @fields;
        }

        # Add bsize attributes if present
        if (@fields == @bsize_attributes) {
            push @args, $_ => shift @fields foreach @bsize_attributes;
        }

        confess "Unrecognized number of fields on input line:\n$input_line"
            unless @fields == 0;            # all fields used up?
    }
    else {
        @args = @_;
    }
    return $class->$orig(@args);
};

sub BUILD {
    my $self = shift;

    # Ensure presence or absence of all bsize attributes
    my $defined_count = grep {defined $self->{$_}} @bsize_attributes;
    confess "Need to define all or none of the --bsize attributes ",
          join q{, }, @bsize_attributes
        unless $defined_count == 0 or $defined_count == @bsize_attributes;
}

# Determine all ancestor minima of this minimum, i.e. this minimum's
# father, grand father, grand grand father etc. in this order.
# Returns list of all ancestors (may be empty if min is disconnected).
sub ancestors {
    my ($self) = @_;

    my $ancestor = $self;
    my @ancestors;
    while ($ancestor->has_father) {
        # confess 'Need father reference to determine ancestors'
        #     unless $ancestor->has_father_ref;
        push @ancestors, $ancestor->father;
        $ancestor = $ancestor->father;
    }

    return @ancestors;
}

# Stringify minimum to equal an entry line in the barriers output file.
# Format strings are taken from Barriers' C source code.
sub stringify {
    my $self = shift;

    # Default attributes
    my $min_string = $self->brief;

    # Add saddle struct if defined.
    if ($self->has_saddle_struct) {
        $min_string .= q{ } . $self->saddle_struct;
    }

    # Add bsize attributes if defined.
    if ($self->has_bsize) {
        $min_string .= sprintf " %12ld %8ld %10.6f %8ld %10.6f",
                               map {$self->{$_}} @bsize_attributes;
    }

    return $min_string;
}

# Stringification method returning a more brief representation of the
# minimum containing only the index, min struct, its energy, the father's
# index, and the barrier height. This equals the output of Barriers if
# neither --bsize nor --saddle is given.
sub brief {
    my $self = shift;

    # Default attributes
    my $brief_string = sprintf "%4d %s %6.2f %4d %6.2f",
                                map {$self->{$_}} @default_attribs;

    return $brief_string;
}

# ABSOLUTE energy of the lowest structure connecting this basin to another
# one. The barrier height, in contrast, is the RELATIVE energy of the same
# structure w.r.t. to the basin's (local) mfe.
# BEWARE: if the basin does not have a father (father == 0), then the
# barrier height is (as reported by Barriers) given with respect to the global exploration
# threshold.
# Since this gives unexpected results, the saddle height is set to the
# global mfe for basin 1, and to Inf for disconnected basins.
sub saddle_height {
    my $self = shift;

    # Mfe basin is connected to itself with a barrier of 0. Other
    # fatherless basins are disconnected and thus have an unknown saddle
    # height -- set to Inf.
    my $barrier_height =   $self->has_father    ? $self->barrier_height
                         : $self->is_global_min ? 0
                                                : 'Inf'
                         ;

    # Energy values from Bar file have only 2 digits precision.
    my $saddle_height = sprintf "%.2f", $self->mfe + $barrier_height;
    return $saddle_height;
}

# Saddle height as described for saddle_height(), but with respect to the
# global mfe structure (basin 1).
sub global_saddle_height {
    my $self = shift;

    # Move up in barrier tree until reachin basin 1 or realizing we are
    # disconnected. Global saddle height is maximal encountered height.
    my $ancestor         = $self;
    my $glob_sadd_height = $self->saddle_height;
    while ($ancestor->father_index > 1) {
        $ancestor = $ancestor->father;
        $glob_sadd_height
            = max $glob_sadd_height, $ancestor->saddle_height;
    }

    return $glob_sadd_height;
}

__PACKAGE__->meta->make_immutable;

1;


__END__

=pod

=encoding UTF-8

=head1 NAME

Bio::RNA::Barriers::Minimum - Store a single local minimum
(macrostate) from the output of I<Barriers>

=head1 SYNOPSIS

    use Bio::RNA::Barriers;

    my $min_string = '...';            # single line from .bar file
    my $min = Bio::RNA::Barriers::Minimum->new($min_string);
    # my $min2 = $results->get_min(3)  # usually used like this

    print "$min\n";                    # prints minimum as in the results file

    if ($min->has_bsize and $min->is_connected)
        print "Minimum contributes an energy of ", $min->grad_basin_energy(),
              " to the partition function."


=head1 DESCRIPTION

Objects of this class repesent the individual local minima (macrostates) from
the results file of I<Barriers>. The construction is usually done
automatically by the results objects (cf. L<Bio::RNA::Barriers::Results>). The
methods can be used for various queries.


=head1 METHODS

=head3 $min->new($results_file_line)

Construct a minimum object from a single line of the I<Barriers> results file.

=head3 $min->ancestors()

Determine all ancestor minima of this minimum, i.e. this minimum's father,
grand father, grand grand father etc. in this order.  Returns a list of all
ancestors (may be empty if min is disconnected).

=head3 $min->has_bsize()

Boolean. True iff the minimum provides information about the basin size as
computed by the I<Barriers> option C<--bsize>.

=head3 $min->merged_struct_count()

Given is the number of structures in the current basin (including merged ones)
B<at the time of merging>. For minimum 1, this is close to the total number of
input structures (except for disconnected structures and other missing ones
(???)).

This attribute is only available if Barriers was used with the C<--bsize>
option. Use C<$min-E<gt>has_bsize()> to query this.

=head3 $min->father_struct_count()

The number of structures in the basin that we merge into (father basin) B<at
the time of merging>.

This attribute is only available if Barriers was used with the C<--bsize>
option. Use C<$min-E<gt>has_bsize()> to query this.

=head3 $min->merged_basin_energy()

The free energy of (the partition function of) the basin -- including all
merged basins -- at the time B<this> basin is merged. For minimum 1, this
corresponds to the ensemble's free energy as far as it was enumerated by
RNAsubopt (excluding disconnected structures).

This attribute is only available if Barriers was used with the C<--bsize>
option. Use C<$min-E<gt>has_bsize()> to query this.

=head3 $min->grad_struct_count()

The number of structures only in this gradient basin, excluding merged basins.

Open questions: What about --minh merging? Why doesn't this column sum up to
exactly to the total number of structs if --max==Inf (some are missing)?
Issues due to degenerate energies?

This attribute is only available if Barriers was used with the C<--bsize>
option. Use C<$min-E<gt>has_bsize()> to query this.

=head3 $min->grad_basin_energy()

Free energy of the basin without any merged basins. Summing
up the partition functions corresponding to these energies, one obtains a free
energy almost equal to the ensemble energy (up to rounding errors due to 6
digit precision, and of course up to the enumeration threshold used for
I<RNAsubopt>).

This attribute is only available if Barriers was used with the C<--bsize>
option. Use C<$min-E<gt>has_bsize()> to query this.

=head3 $min->is_global_min()

Returns true iff this is the global minimum (i.e. basin 1).

=head3 $min->index()

1-based index of the minimum (as is the Barriers file).

=head3 $min->struct()

Returns the dot-bracket structure string of the minimum.

=head3 $min->mfe()

B<Local> minimum free energy of the basin (i.e. the minimum's energy).

=head3 $min->father_index()

Returns the index of the father minimum (i.e. the one this minimum has been
merged to).

=head3 $min->barrier_height()

Returns the barrier height (B<relative> energy difference of the saddle point
to the local minimum). For the B<absolute> energy of the saddle point, see
C<saddle_height()>.

=head3 $min->saddle_height()

B<Absolute> energy of the lowest structure connecting this basin to another
one. The barrier height, in contrast, is the B<relative> energy of the same
structure w.r.t. to the basin's (local) mfe.

B<Beware>: if the basin does not have a father (father == 0), then the
reported saddle height is given with respect to the global exploration
threshold. This is strange but consistent with original Barriers files.

=head3 $min->global_saddle_height()

Saddle height as described for saddle_height(), but not with respect to
any neighbor minimum, but to the global mfe structure (basin 1).

=head3 $min->saddle_struct()

Returns the saddle structure via which it was merged to its father minimum.
If this attribute was not set (i.e. I<Barriers> was run without the
C<--saddle> option), it croaks when accessed.

Use the C<has_saddle()> predicate to query the status of this attribute.

=head3 $min->has_saddle_struct()

Predicate for the C<saddle> attribute. True iff the minimum provides the
saddle structure via which it was merged to its father minimum, as computed by
the I<Barriers> option C<--saddle>. It can be queried via the C<saddle_struct>
method.

=head3 $min->father()

Returns (a reference to) the father minimum object.

=head3 $min->has_father()

Returns true iff the minimum has a father minimum it has been merged to.

=head3 $min->is_connected()

Boolean. True iff the minimum is connected to basin 1 (the mfe basin).

=head3 $min->ancestors()

Determine all ancestor minima of this minimum, i.e. this minimum's father,
grand father, grand grand father etc. in this order.

=head3 $min->stringify()

Stringify minimum to equal an entry line in the barriers output file.
Format strings are taken from Barriers' C source code.

=head3 $min->brief()

Stringification method returning a more brief representation of the
minimum containing only the index, min struct, its energy, the father's
index, and the barrier height. This equals the output of Barriers if
neither C<--bsize> nor C<--saddle> is given.


=head1 AUTHOR

Felix Kuehnl, C<< <felix at bioinf.uni-leipzig.de> >>

=head1 BUGS

Please report any bugs or feature requests by raising an issue at
L<https://github.com/xileF1337/Bio-RNA-Barriers/issues>.

You can also do so by mailing to C<bug-bio-rna-barmap at rt.cpan.org>,
or through the web interface at
L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-BarMap>.  I will be
notified, and then you'll automatically be notified of progress on your bug as
I make changes.


=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc Bio::RNA::Barriers


You can also look for information at the official Barriers website:

L<https://www.tbi.univie.ac.at/RNA/Barriers/>


=over 4

=item * Github: the official repository

L<https://github.com/xileF1337/Bio-RNA-Barriers>


=item * RT: CPAN's request tracker (report bugs here)

L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-Barriers>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/Bio-RNA-Barriers>

=item * CPAN Ratings

L<https://cpanratings.perl.org/d/Bio-RNA-Barriers>

=item * Search CPAN

L<https://metacpan.org/release/Bio-RNA-Barriers>

=back


=head1 LICENSE AND COPYRIGHT

Copyright 2019-2021 Felix Kuehnl.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see L<http://www.gnu.org/licenses/>.


=cut


# End of Bio::RNA::Barriers::Minimum
