#!/usr/bin/perl

# Copyright (C) 2022 Bruno Postle <bruno@postle.net>
#
# ifcmerge is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ifcmerge is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

use strict;
use warnings;
use DateTime;
use DateTime::TimeZone;
use 5.010;

BEGIN {
    eval "use PAR";
}

if (!caller || (defined $PAR::Packer::VERSION)) {

# simple command-line parameter parsing
my $prioritise_local = 0;  # default is to prioritise remote

if (@ARGV > 0 && $ARGV[0] eq '--prioritise-local') {
    $prioritise_local = 1;
    shift @ARGV;  # remove the flag from arguments
}

sub print_help {
    my ($fh) = @_;
    say $fh "Usage: $0 [--prioritise-local] base.ifc local.ifc remote.ifc merged.ifc";
    say $fh "";
    say $fh "Performs a three-way merge of IFC files, analogous to a source-control merge.";
    say $fh "";
    say $fh "Default behaviour (remote wins):";
    say $fh "  - When the same attribute is changed by both sides, the REMOTE value is kept.";
    say $fh "  - When both sides add new entities and their step-IDs overlap, LOCAL entities";
    say $fh "    are renumbered to avoid the conflict; REMOTE step-IDs are preserved as-is.";
    say $fh "  - IfcLocalPlacement conflicts are resolved automatically, keeping the REMOTE";
    say $fh "    placement and adjusting the LOCAL one.";
    say $fh "";
    say $fh "Options:";
    say $fh "  --prioritise-local    Reverse the priority: LOCAL attribute values and step-IDs";
    say $fh "                        are preserved; REMOTE entities are renumbered instead.";
    say $fh "                        Use this when LOCAL is main/master and REMOTE is a";
    say $fh "                        temporary branch being merged in, so that step-IDs in";
    say $fh "                        main/master are never renumbered.  When merging from";
    say $fh "                        main/master/origin into a working branch, use the default";
    say $fh "                        (remote wins) instead.";
    say $fh "  --help                Show this help message.";
    say $fh "";
    say $fh "On success: writes merged.ifc and exits 0.";
    say $fh "On conflict: writes a JSON error report to STDOUT and exits 1.";
}

if (@ARGV > 0 && $ARGV[0] eq '--help') {
    print_help(\*STDOUT);
    exit 0;
}

unless (scalar(@ARGV) == 4) {
    print_help(\*STDERR);
    exit 1;
}

# initialize error tracking structure
my $errors = {
    status => "success",
    message => "",
    conflicts => []
};

my $base = Ifc->new;
$base->load($ARGV[0]);
my $local = Ifc->new;
$local->load($ARGV[1]);
my $remote = Ifc->new;
$remote->load($ARGV[2]);
my $merged = Ifc->new;
$merged->load($ARGV[0]); # note: initially the same as base

# track original ID mappings before any renumbering
my $id_mapping = {
    local => {},
    remote => {}
};

# store original IDs for tracking
foreach my $id ($local->file_ids) {
    $id_mapping->{local}->{$id} = $id;
}
foreach my $id ($remote->file_ids) {
    $id_mapping->{remote}->{$id} = $id;
}

$local->compare($base);
$remote->compare($base);

# if both files have added entities, renumber local added entities to make space

my $offset = $remote->last - $base->last;
my $max = $base->last;

if ($offset > 0 && !$prioritise_local)
{
    # default: prioritise remote by renumbering local IDs
    for my $id (reverse ($local->added_ids))
    {
        my $new_id = _add_offset($id, $max, $offset);
        my $text = $local->{file}->{$id};
        $text =~ s/#([0-9]+)/'#'. _add_offset($1, $max, $offset)/ge;
        $local->{file}->{$new_id} = $text;
        delete $local->{file}->{$id};

        # update ID mapping to track original IDs
        $id_mapping->{local}->{$new_id} = $id;
    }
    for my $id ($local->modified_ids)
    {
        my $text = $local->{file}->{$id};
        $text =~ s/#([0-9]+)/'#'. _add_offset($1, $max, $offset)/ge;
        $local->{file}->{$id} = $text;
    }
}
elsif ($local->last - $base->last > 0 && $prioritise_local)
{
    # alternative: prioritise local by renumbering remote IDs
    my $local_offset = $local->last - $base->last;
    my $local_max = $base->last;

    for my $id (reverse ($remote->added_ids))
    {
        my $new_id = _add_offset($id, $local_max, $local_offset);
        my $text = $remote->{file}->{$id};
        $text =~ s/#([0-9]+)/'#'. _add_offset($1, $local_max, $local_offset)/ge;
        $remote->{file}->{$new_id} = $text;
        delete $remote->{file}->{$id};

        # update ID mapping to track original IDs
        $id_mapping->{remote}->{$new_id} = $id;
    }
    for my $id ($remote->modified_ids)
    {
        my $text = $remote->{file}->{$id};
        $text =~ s/#([0-9]+)/'#'. _add_offset($1, $local_max, $local_offset)/ge;
        $remote->{file}->{$id} = $text;
    }
}

# recompare after potential renumbering
if ($prioritise_local) {
    $remote->compare($base);
} else {
    $local->compare($base);
}

# copy added entities
for my $id ($local->added_ids)
{
    $merged->{file}->{$id} = $local->{file}->{$id};
}
for my $id ($remote->added_ids)
{
    $merged->{file}->{$id} = $remote->{file}->{$id};
}

# delete deleted entities

for my $id ($local->deleted_ids)
{
    if (defined $remote->{modified}->{$id})
    {
        my ($remote_class) = $remote->class_attributes($id);
        if ($remote_class =~ /^IfcRel/i)
        {
            # IfcRelationship may be deleted overzealously, reinsert empty
            $local->{file}->{$id} = $remote->{file}->{$id};
            $local->{file}->{$id} =~ s/\([0-9#,]+\)/\(\)/;
            delete $local->{deleted}->{$id};
            $local->{modified}->{$id} = 1;
        }
        else
        {
            add_conflict($errors, "entity_deleted_and_modified", {
                entity_id => $id,
                original_local_id => get_original_id('local', $id),
                original_remote_id => get_original_id('remote', $id),
                message => "Entity deleted in local but modified in remote",
                local_file => $ARGV[1],
                remote_file => $ARGV[2],
                entity_class => $remote_class
            });
        }
    }
    else
    {
        delete $merged->{file}->{$id};
    }
}
for my $id ($remote->deleted_ids)
{
    if (defined $local->{modified}->{$id})
    {
        my ($local_class) = $local->class_attributes($id);
        if ($local_class =~ /^IfcRel/i)
        {
            # IfcRelationship may be deleted overzealously, reinsert empty
            $remote->{file}->{$id} = $local->{file}->{$id};
            $remote->{file}->{$id} =~ s/\([0-9#,]+\)/\(\)/;
            delete $remote->{deleted}->{$id};
            $remote->{modified}->{$id} = 1;
        }
        else
        {
            add_conflict($errors, "entity_deleted_and_modified", {
                entity_id => $id,
                original_local_id => get_original_id('local', $id),
                original_remote_id => get_original_id('remote', $id),
                message => "Entity deleted in remote but modified in local",
                local_file => $ARGV[1],
                remote_file => $ARGV[2],
                entity_class => $local_class
            });
        }
    }
    else
    {
        delete $merged->{file}->{$id};
    }
}

# update modified entities
# FIXME this will fail if the final entity has been deleted and a new entity created with the same id

for my $id ($local->modified_ids)
{
    my ($base_class, @base_attr) = $base->class_attributes($id);
    my ($local_class, @local_attr) = $local->class_attributes($id);

    if ($base_class ne $local_class) {
        add_conflict($errors, "class_changed", {
            entity_id => $id,
            original_local_id => get_original_id('local', $id),
            original_remote_id => get_original_id('remote', $id),
            message => "Entity class changed in local",
            local_file => $ARGV[1],
            base_class => $base_class,
            modified_class => $local_class
        });
    }
    $merged->{file}->{$id} = $local->{file}->{$id};
}
for my $id ($remote->modified_ids)
{
    my ($base_class, @base_attr) = $base->class_attributes($id);
    my ($remote_class, @remote_attr) = $remote->class_attributes($id);

    if ($base_class ne $remote_class) {
        add_conflict($errors, "class_changed", {
            entity_id => $id,
            original_local_id => get_original_id('local', $id),
            original_remote_id => get_original_id('remote', $id),
            message => "Entity class changed in remote",
            remote_file => $ARGV[2],
            base_class => $base_class,
            modified_class => $remote_class
        });
    }

    if (defined $local->{modified}->{$id})
    {
        # entity is modified in both, try and merge attributes
        my ($local_class, @local_attr) = $local->class_attributes($id);
        my @merged_attr;
        for my $i (0 .. scalar(@base_attr) -1)
        {
            if ($base_attr[$i] eq $local_attr[$i]
            and $base_attr[$i] eq $remote_attr[$i])
            {
                # simple case attribute not modified
                $merged_attr[$i] = $base_attr[$i];
            }
            elsif ($base_attr[$i] ne $local_attr[$i]
               and $base_attr[$i] ne $remote_attr[$i]
               and $local_attr[$i] ne $remote_attr[$i])
            {
                # attribute modified in local and remote
                if ($base_attr[$i] =~ /^\([#,0-9]*\)$/)
                {
                    # attribute is a list of ids
                    my @base_ids = $base_attr[$i] =~ /(#[0-9]+)/g;
                    my @local_ids = $local_attr[$i] =~ /(#[0-9]+)/g;
                    my @remote_ids = $remote_attr[$i] =~ /(#[0-9]+)/g;
                    my (%base_ids, %local_ids, %remote_ids, %merged_ids);
                    $base_ids{$_} = 1 for @base_ids;
                    $local_ids{$_} = 1 for @local_ids;
                    $remote_ids{$_} = 1 for @remote_ids;
                    for my $local_id (@local_ids)
                    {
                        # id exists in local
                        $merged_ids{$local_id} = 1;
                    }
                    for my $remote_id (@remote_ids)
                    {
                        # id exists in remote
                        $merged_ids{$remote_id} = 1;
                    }
                    for my $base_id (@base_ids)
                    {
                        $merged_ids{$base_id} = 1;
                        if (not defined $local_ids{$base_id} or
                            not defined $remote_ids{$base_id})
                        {
                            # id has been deleted in local or remote
                            delete $merged_ids{$base_id};
                        }
                    }
                    # FIXME this should be a numeric sort
                    $merged_attr[$i] = '('. join(',', sort(keys %merged_ids)) .')';
                }
                elsif ($local_class =~ /^IfcOwnerHistory$/i)
                {
                    # for conflict resolution, prioritise based on the parameter
                    $merged_attr[$i] = $prioritise_local ? $local_attr[$i] : $remote_attr[$i];
                }
                elsif ($local_attr[$i] =~ /^#([0-9]+)/)
                {
                    # if attribute is a reference to a single entity and that entity is an Object Placement
                    # resolve the placement conflict by discarding one
                    my ($attribute_class) = $local->class_attributes($1);
                    if ($attribute_class =~ /^Ifc(Local|Linear|Grid)Placement$/i)
                    {
                        $merged_attr[$i] = $prioritise_local ? $local_attr[$i] : $remote_attr[$i];
                    }
                }
                else
                {
                    # attribute is not mergeable
                    # for conflict resolution, prioritise based on the parameter
                    $merged_attr[$i] = $prioritise_local ? $local_attr[$i] : $remote_attr[$i];

                    add_conflict($errors, "attribute_conflict", {
                        entity_id => $id,
                        original_local_id => get_original_id('local', $id),
                        original_remote_id => get_original_id('remote', $id),
                        attribute_index => $i + 1,
                        message => "Attribute conflict in both local and remote" .
                                  ($prioritise_local ? " (prioritising local)" : " (prioritising remote)"),
                        entity_class => $local_class,
                        base_value => $base_attr[$i],
                        local_value => $local_attr[$i],
                        remote_value => $remote_attr[$i],
                        selected_value => $prioritise_local ? $local_attr[$i] : $remote_attr[$i]
                    });
                }
            }
            elsif ($base_attr[$i] ne $local_attr[$i])
            {
                # local only modified, or local and base both identically modified
                $merged_attr[$i] = $local_attr[$i];
            }
            else
            {
                # remote only modified
                $merged_attr[$i] = $remote_attr[$i];
            }
        }
        $merged->{file}->{$id} = $base_class .'('. join(',', @merged_attr) .')';
    }
    else
    {
        # entity is modified in remote only
        $merged->{file}->{$id} = $remote->{file}->{$id};
    }
}

# collect ids used by modified/added entities
# FIXME will find ids in string attributes

my $local_required_ids = {};
for my $id ($local->modified_ids, $local->added_ids)
{
    foreach ($merged->{file}->{$id} =~ /#([0-9]+)/g)
    {
        $local_required_ids->{$_} = 1;
    }
}

my $remote_required_ids = {};
for my $id ($remote->modified_ids, $remote->added_ids)
{
    foreach ($merged->{file}->{$id} =~ /#([0-9]+)/g)
    {
        $remote_required_ids->{$_} = 1;
    }
}

# sanity check needed entities haven't been deleted

for my $id ($local->deleted_ids)
{
    if (defined $remote_required_ids->{$id})
    {
        add_conflict($errors, "required_entity_deleted", {
            entity_id => $id,
            original_local_id => get_original_id('local', $id),
            original_remote_id => get_original_id('remote', $id),
            message => "Entity required by remote was deleted in local",
            local_file => $ARGV[1],
            remote_file => $ARGV[2]
        });
    }
}

for my $id ($remote->deleted_ids)
{
    if (defined $local_required_ids->{$id})
    {
        add_conflict($errors, "required_entity_deleted", {
            entity_id => $id,
            original_local_id => get_original_id('local', $id),
            original_remote_id => get_original_id('remote', $id),
            message => "Entity required by local was deleted in remote",
            local_file => $ARGV[1],
            remote_file => $ARGV[2]
        });
    }
}

# check if merge is possible or report errors
if (@{$errors->{conflicts}}) {
    $errors->{status} = "failed";
    $errors->{message} = "Merge failed due to conflicts";
    print STDOUT json_encode($errors) . "\n";
    exit 1;
}

say "Success!";

$merged->write($ARGV[3]);

exit 0;

# simple JSON encoder function to avoid requiring the JSON module
sub json_encode {
    my ($data, $indent_level) = @_;
    $indent_level //= 0;
    my $indent = "  " x $indent_level;  # 2 spaces per level
    my $next_indent = "  " x ($indent_level + 1);

    return 'null' unless defined $data;

    if (ref $data eq 'HASH') {
        return '{}' unless keys %$data;
        my @pairs;
        foreach my $key (sort keys %$data) {
            my $value = json_encode($data->{$key}, $indent_level + 1);
            $key =~ s/"/\\"/g;
            push @pairs, "$next_indent\"$key\": $value";
        }
        return "{\n" . join(",\n", @pairs) . "\n$indent}";
    }
    elsif (ref $data eq 'ARRAY') {
        return '[]' unless @$data;
        my @values = map { json_encode($_, $indent_level + 1) } @$data;
        return "[\n$next_indent" . join(",\n$next_indent", @values) . "\n$indent]";
    }
    elsif (ref $data eq '') {
        if ($data =~ /^-?\d+$/) {
            return $data;
        }
        elsif ($data =~ /^-?\d*\.\d+$/) {
            return $data;
        }
        else {
            $data =~ s/\\/\\\\/g;
            $data =~ s/"/\\"/g;
            $data =~ s/\n/\\n/g;
            $data =~ s/\r/\\r/g;
            $data =~ s/\t/\\t/g;
            $data =~ s/\f/\\f/g;
            return "\"$data\"";
        }
    }
    else {
        return '{}';
    }
}

# helper function to get original ID before renumbering
sub get_original_id {
    my ($branch, $current_id) = @_;
    return $id_mapping->{$branch}->{$current_id} || $current_id;
}

# helper function to add a conflict to the error report
sub add_conflict {
    my ($errors, $type, $details) = @_;
    $details->{type} = $type;
    push @{$errors->{conflicts}}, $details;
}

}

sub _add_offset
{
    my ($id, $max, $offset) = @_;
    return $id + $offset if $id > $max;
    return $id;
}

package Ifc;

sub new
{
    my $class = shift;
    my $self = {headers => [], file => {}, added => {}, deleted => {}, modified => {}};
    bless $self, $class;
    return $self;
}

sub load
{
    my $self = shift;
    my $path = shift;
    open my $IN, '<', $path or die "$!";
    for my $line (<$IN>)
    {
        if ($line =~ /^#([0-9]+)=(.*);/)
        {
            $self->{file}->{$1} = $2;
        }
        elsif ($line =~ /\/\*.*\*\//)
        {
            # we discard comments
        }
        else
        {
            push @{$self->{headers}}, $line;
        }
    }
    close $IN;
}

sub write
{
    my $self = shift;
    my $path = shift;
    my $dt = DateTime->now;
    my $tz = DateTime::TimeZone->new(name => 'local');
    my $tz_offset = $tz->offset_as_string($tz->offset_for_datetime($dt));
    $tz_offset =~ s/(..)(..)$/$1:$2/;
    my $now = $dt.$tz_offset;
    open my $OUT, '>', $path or die "$!";
    for my $line (@{$self->{headers}})
    {
        if ($line =~ /^FILE_NAME/)
        {
            $line =~ s/....-..-..T..:..:..[+-]..:../$now/;
        }
        print $OUT $line;
        if ($line =~ /^DATA;/)
        {
            for my $id ($self->file_ids)
            {
                say $OUT "#$id=". $self->{file}->{$id} .";";
            }
        }
    }
}

sub compare
{
    my ($self, $other) = @_;
    $self->{added} = {};
    $self->{modified} = {};
    $self->{deleted} = {};
    for my $id ($self->file_ids)
    {
        if (not defined $other->{file}->{$id})
        {
            $self->{added}->{$id} = 1;
        }
        elsif ($self->{file}->{$id} ne $other->{file}->{$id}
               && _normalise_floats($self->{file}->{$id}) ne _normalise_floats($other->{file}->{$id}))
        {
            $self->{modified}->{$id} = 1;
        }
    }
    for my $id ($other->file_ids)
    {
        if (not defined $self->{file}->{$id})
        {
            $self->{deleted}->{$id} = 1;
        }
    }
}

sub last
{
    my $self = shift;
    my @sorted = sort {$a <=> $b} $self->file_ids;
    return $sorted[-1];
}

sub file_ids
{
    my $self = shift;
    return sort {$a <=> $b} keys %{$self->{file}};
}

sub added_ids
{
    my $self = shift;
    return sort {$a <=> $b} keys %{$self->{added}};
}

sub modified_ids
{
    my $self = shift;
    return sort {$a <=> $b} keys %{$self->{modified}};
}

sub deleted_ids
{
    my $self = shift;
    return sort {$a <=> $b} keys %{$self->{deleted}};
}

sub class_attributes
{
    my $self = shift;
    my $id = shift;
    my ($class, $attributes) = $self->{file}->{$id} =~ /^([_[:alnum:]]+)\((.*)\)$/;
    my @attributes = _dissemble($attributes);
    return $class, @attributes;
}

sub _normalise_floats
{
    my $str = shift;
    $str =~ s/(-?\d*\.\d+(?:[eE][+-]?\d+)?|-?\d+\.(?:\d+)?(?:[eE][+-]?\d+)?)/sprintf("%.10g", $1)/ge;
    return $str;
}

sub _dissemble
{
    my $text = shift;
    my $NAME = '[_[:alnum:]]+';
    my $SPACE = '[[:space:]]*';
    my $COMMA = "[[:space:],]+";
    my $QUOTED = "'.*?'";
    my $SIMPLE = "[^',)(]+";
    my $PARAMVALUE = "$NAME\\(.*?\\)";
    my $PARAMQUOTED = "$NAME\\('.*?'\\)";
    my $BRACKETED = "\\((?:$COMMA|$PARAMQUOTED|$PARAMVALUE|$QUOTED|$SIMPLE)*\\)";
    my @tokens = $text =~ /($PARAMQUOTED|$PARAMVALUE|^\(.*\)$|$BRACKETED|$QUOTED|$SIMPLE)/xg;
    my @out;
    for my $token (@tokens)
    {
        $token =~ s/^$COMMA//x;
        $token =~ s/$SPACE$//x;
        push @out, $token;
    }
    return @out;
}
