package CorScorer;

# Copyright (C) 2009-2011, Emili Sapena esapena <at> lsi.upc.edu
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version. This program is distributed in the hope that
# it will be useful, but WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# Modified  in 2013 for v1.07 by Sebastian Martschat, 
# 	sebastian.martschat <at> h-its.org
#

use strict;
use Algorithm::Munkres;

our $VERSION = '1.07';
print "version: ".$VERSION."\n";

# 1.07 Modifications to implement BCUB and CEAFM 
#			exactly as proposed by (Cai & Strube, 2010).
# 1.06 ?
# 1.05	Modification of IdentifMentions in order to correctly evaluate the
#			outputs with detected mentions. Based on (Cai & Strubbe, 2010)
# 1.04	Some output corrections in BLANC functions. Changed package name to "Scorer"
# 1.03	Detects mentions that start in a document but do not end
# 1.02	Corrected Bcub bug. It fails when the key file does not have any mention



# global variables
my $VERBOSE = 1;
my $HEAD_COLUMN = 8;
my $RESPONSE_COLUMN = -1;
my $KEY_COLUMN = -1;


#my %greatWeights = ("PN-PN"=>1.0, "NN-NN"=>1.0, "NR-NR"=>1.0,
#               	  "NN-PN"=>1.0, "PN-NN"=>1.0,
#                  "NR-NN"=>1.0, "NN-NR"=>1.0,
#                  "NR-PN"=>1.0, "PN-NR"=>1.0);

#my %fineWeights = ("PN-PN"=>0.5, "NN-NN"=>0.75, "NR-NR"=>1.0,
#               	  "NN-PN"=>0.75, "PN-NN"=>0.75,
#                  "NR-NN"=>1.0, "NN-NR"=>1.0,
#                  "NR-PN"=>1.0, "PN-NR"=>1.0);

#my %greatWeights = ("NR-NR"=>1.0, "NR-PN"=>1.0, "PN-NR"=>1.0, "NN-NR"=>1.0, "NR-NN"=>1.0,
#		    "NN-NN"=>1.0, "NN-PN"=>1, "PN-NN"=>1,
#		    "PN-PN"=>1);

#my %fineWeights = ("NR-NR"=>1.0, "NR-PN"=>1.0, "PN-NR"=>1.0, "NN-NR"=>1.0, "NR-NN"=>1.0,
#                    "NN-NN"=>1, "NN-PN"=>1, "PN-NN"=>1,
#                    "PN-PN"=>1);

my %greatWeights = ("NR-NR"=>1.0, "NR-PN"=>1.0, "PN-NR"=>1.0, "NN-NR"=>1.0, "NR-NN"=>1.0,
#                    "NN-NN"=>1, "NN-PN"=>1, "PN-NN"=>1,
#                    "PN-PN"=>1
                   "NN-NN"=>.75, "NN-PN"=>.75, "PN-NN"=>.75,
		    "PN-PN"=>.5
);

my %fineWeights = ("NR-NR"=>1.0, "NR-PN"=>1.0, "PN-NR"=>1.0, "NN-NR"=>1.0, "NR-NN"=>1.0,
#                    "NN-NN"=>1, "NN-PN"=>1, "PN-NN"=>1,
#                    "PN-PN"=>1
		    "NN-NN"=>.75, "NN-PN"=>.75, "PN-NN"=>.75,
                    "PN-PN"=>.5
);


my %typeMap = ("PN"=>1, "NN"=>2, "NR"=>3);

#my $non_anaphor_weight =   0.00000000000000000001;
my $non_anaphor_weight = 1;
our $v = 7;
our $Sj = "a";
# Score. Scores the results of a coreference resultion system
# Input: Metric, keys file, response file, [name]
#        Metric: the metric desired to evaluate:
#                muc: MUCScorer (Vilain et al, 1995)
#                bcub: B-Cubed (Bagga and Baldwin
#                ceafm: CEAF (Luo et al, 2005) using mention-based similarity
#                ceafe: CEAF (Luo et al, 2005) using entity-based similarity
#         keys file: file with expected coreference chains in SemEval format
#        response file: file with output of corefrence system (SemEval format)
#        name: [optional] the name of the document to score. If name is not
#              given, all the documents in the dataset will be scored.
#
# Output: an array with numerators and denominators of recall and precision
#         (recall_num, recall_den, precision_num, precision_den)
#
#   Final scores:
# Recall = recall_num / recall_den
# Precision = precision_num / precision_den
# F1 = 2 * Recall * Precision / (Recall + Precision)
sub Score
{
	my ($metric, $kFile, $rFile, $name) = @_;

	if (lc($metric) eq 'blanc') {
		return ScoreBLANC($kFile, $rFile, $name);
	}

	my %idenTotals = (recallDen => 0, recallNum => 0, precisionDen => 0, precisionNum => 0);
	my ($acumNR, $acumDR, $acumNP, $acumDP) = (0,0,0,0);

	if (defined($name) && $name ne 'none') {
		print "$name:\n" if ($VERBOSE);
		my $keys = GetCoreference($kFile, $KEY_COLUMN, $name);
		my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $name);
		my ($keyChains, $keyChainsWithSingletonsFromResponse, $responseChains, $responseChainsWithoutMentionsNotInKey, $rc) = IdentifMentions($keys, $response, \%idenTotals);
		($acumNR, $acumDR, $acumNP, $acumDP) = Eval($metric, $keyChains, $keyChainsWithSingletonsFromResponse, $responseChains, $responseChainsWithoutMentionsNotInKey, $rc);
	}
	else {
		my $kIndexNames = GetFileNames($kFile);
		my $rIndexNames = GetFileNames($rFile);

		$VERBOSE = 0 if ($name eq 'none');
		foreach my $iname (keys(%{$kIndexNames})) {
			my $keys = GetCoreference($kFile, $KEY_COLUMN, $iname, $kIndexNames->{$iname});
			my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $iname, $rIndexNames->{$iname});

			print "$iname:\n" if ($VERBOSE);
			my ($keyChains, $keyChainsWithSingletonsFromResponse, $responseChains, $responseChainsWithoutMentionsNotInKey, $rc) = IdentifMentions($keys, $response, \%idenTotals);
			my ($nr, $dr, $np, $dp) = Eval($metric, $keyChains, $keyChainsWithSingletonsFromResponse, $responseChains, $responseChainsWithoutMentionsNotInKey, $rc);

			$acumNR += $nr;
			$acumDR += $dr;
			$acumNP += $np;
			$acumDP += $dp;
		}
	}

	if ($VERBOSE || $name eq 'none') {
		print "\n====== TOTALS =======\n";
		print "Identification of Mentions: ";
		ShowRPF($idenTotals{recallNum}, $idenTotals{recallDen}, $idenTotals{precisionNum},
				  $idenTotals{precisionDen});
		print "Coreference: ";
		ShowRPF($acumNR, $acumDR, $acumNP, $acumDP);
	}

	return ($acumNR, $acumDR, $acumNP, $acumDP);
}

sub ScoreBLANC
{
	my ($kFile, $rFile, $name) = @_;
	my ($acumNRa, $acumDRa, $acumNPa, $acumDPa) = (0,0,0,0);
	my ($acumNRr, $acumDRr, $acumNPr, $acumDPr) = (0,0,0,0);
	my %idenTotals = (recallDen => 0, recallNum => 0, precisionDen => 0, precisionNum => 0);

	if (defined($name) && $name ne 'none') {
		print "$name:\n" if ($VERBOSE);
		my $keys = GetCoreference($kFile, $KEY_COLUMN, $name);
		my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $name);
		my ($keyChains, $keyChainsWithSingletonsFromResponse, $responseChains, $responseChainsWithoutMentionsNotInKey, $rc) = IdentifMentions($keys, $response, \%idenTotals);
		($acumNRa, $acumDRa, $acumNPa, $acumDPa, $acumNRr, $acumDRr, $acumNPr, $acumDPr) = BLANC($keyChainsWithSingletonsFromResponse, $responseChains);
	}
	else {
		my $kIndexNames = GetFileNames($kFile);
		my $rIndexNames = GetFileNames($rFile);

		$VERBOSE = 0 if ($name eq 'none');
		foreach my $iname (keys(%{$kIndexNames})) {
			my $keys = GetCoreference($kFile, $KEY_COLUMN, $iname, $kIndexNames->{$iname});
			my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $iname, $rIndexNames->{$iname});

			print "$name:\n" if ($VERBOSE);
			my ($keyChains, $keyChainsWithSingletonsFromResponse, $responseChains, $responseChainsWithoutMentionsNotInKey, $rc) = IdentifMentions($keys, $response, \%idenTotals);
			my ($nra, $dra, $npa, $dpa, $nrr, $drr, $npr, $dpr) = BLANC($keyChainsWithSingletonsFromResponse, $responseChains);

			$acumNRa += $nra;
			$acumDRa += $dra;
			$acumNPa += $npa;
			$acumDPa += $dpa;
			$acumNRr += $nrr;
			$acumDRr += $drr;
			$acumNPr += $npr;
			$acumDPr += $dpr;
		}
	}

	if ($VERBOSE || $name eq 'none') {
		print "\n====== TOTALS =======\n";
		print "Identification of Mentions: ";
		ShowRPF($idenTotals{recallNum}, $idenTotals{recallDen}, $idenTotals{precisionNum},
				  $idenTotals{precisionDen});
		print "\nCoreference:\n";
		print "Coreference links: ";
		ShowRPF($acumNRa, $acumDRa, $acumNPa, $acumDPa);
		print "Non-coreference links: ";
		ShowRPF($acumNRr, $acumDRr, $acumNPr, $acumDPr);
		print "BLANC: ";

		my $Ra = ($acumDRa) ? $acumNRa/$acumDRa : -1;
		my $Rr = ($acumDRr) ? $acumNRr/$acumDRr : -1;
		my $Pa = ($acumDPa) ? $acumNPa/$acumDPa : 0;
		my $Pr = ($acumDPr) ? $acumNPr/$acumDPr : 0;

		my $R = ($Ra + $Rr) / 2;
		my $P = ($Pa + $Pr) / 2;

		my $Fa = ($Pa + $Ra) ? 2 * $Pa * $Ra / ($Pa + $Ra) : 0;
		my $Fr = ($Pr + $Rr) ? 2 * $Pr * $Rr / ($Pr + $Rr) : 0;

		my $f1 = ($Fa + $Fr) / 2;

		if ($Ra == -1 && $Rr == -1) {
			$R = 0;
			$P = 0;
			$f1 = 0;
		}
		elsif ($Ra == -1) {
			$R = $Rr;
			$P = $Pr;
			$f1 = $Fr;
		}
		elsif ($Rr == -1) {
			$R = $Ra;
			$P = $Pa;
			$f1 = $Fa;
		}

		ShowRPF($R, 1, $P, 1, $f1);
	}

	return ($acumNRa, $acumDRa, $acumNPa, $acumDPa, $acumNRr, $acumDRr, $acumNPr, $acumDPr);
}


sub GetIndex
{
	my ($ind, $i) = @_;
	if (!defined($ind->{$i})) {
		my $n = $ind->{nexti} || 0;
		$ind->{$i} = $n;
		$n++;
		$ind->{nexti} = $n;
	}

	return $ind->{$i};
}

# Get the coreference information from column $column of the file $file
# If $name is defined, only keys between "#begin document $name" and
# "#end file $name" are taken.
# The output is an array of entites, where each entity is an array
# of mentions and each mention is an array with two values corresponding
# to the mention's begin and end. For example:
# @entities = ( [ [1,3], [45,45], [57,62] ], # <-- entity 0
#               [ [5,5], [25,27], [31,31] ], # <-- entity 1
# ...
# );
# entity 0 is composed by 3 mentions: from token 1 to 3, token 45 and
# from token 57 to 62 (both included)
#
# if $name is not specified, the output is a hash including each file
# found in the document:
# $coref{$file} = \@entities
sub GetCoreference
{
	my ($file, $column, $name, $pos) = @_;
	my %coref;
	my %ind;

	open (F, $file) || die "Can not open $file: $!";
	if ($pos) {
		seek(F, $pos, 0);
	}
	my $fName;
	my $getout = 0;
	do {
		# look for the begin of a file
		while (my $l = <F>) {
			chomp($l);
			$l =~ s/\r$//; # m$ format jokes
			if ($l =~ /^\#\s*begin document (.*?)$/) {
				if (defined($name)) {
					if ($name eq $1) {
						$fName = $name;
						$getout = 1;
						last;
					}
				}
				else {
					$fName = $1;
					last;
				}
			}
		}
		print "====> $fName:\n" if ($VERBOSE > 1);

		# Extract the keys from the file until #end is found
		my $lnumber = 0;
		my @entities;
		my @half;
		my @head;
		my @sentId;
		while (my $l = <F>) {
			chomp($l);
			next if ($l eq '');
			if ($l =~ /\#\s*end document/) {
				foreach my $h (@half) {
					if (defined($h) && @$h) {
						die "Error: some mentions in the document ($name) do not close\n";
					}
				}
				last;
			}
			my @columns = split(/\t/, $l);
			my $cInfo = $columns[$column];
			push (@head, $columns[$HEAD_COLUMN]);
			push (@sentId, $columns[0]);
			if ($cInfo ne '_') {
				
				#discard double antecedent
				while ($cInfo =~ s/\((\d+\+\d)\)//) {
					print "Discarded ($1)\n" if ($VERBOSE > 1);
				}
				
				# one-token mention(s)
				while ($cInfo =~ s/\((\d+)(\@([\w\-\+]*))?\)//) {
					my $ie = GetIndex(\%ind, $1);
					my $type = $3;
					# print "type: $type\n";
					push(@{$entities[$ie]}, [ $lnumber, $lnumber, $lnumber, $type ]);
					print "+mention (entity $ie): ($lnumber,$lnumber)\n" if ($VERBOSE > 2);
				}
				
				# begin of mention(s)
				while ($cInfo =~ s/\((\d+)(\@([\w\-\+]*))?//) {
					my $ie = GetIndex(\%ind, $1);
					my $type = $3;
					push(@{$half[$ie]}, [$lnumber, $type]);
					print "+init mention (entity $ie): ($lnumber\n" if ($VERBOSE > 2);
				}
				
				# end of mention(s)
				while ($cInfo =~ s/(\d+)\)//) {
					my $numberie = $1;
					my $ie = GetIndex(\%ind, $numberie);
					my $p = pop(@{$half[$ie]});
					my ($start, $type) = @$p if (defined($p));
					if (defined($start)) {
						my $inim = $sentId[$start];
						my $endm = $sentId[$lnumber];
						my $tHead = $start;
						# the token whose head is outside the mention is the head of the mention
						for (my $t = $start; $t <= $lnumber; $t++) {
							if ($head[$t] < $inim || $head[$t] > $endm) {
								$tHead = $t;
								last;
							}
						}
						push(@{$entities[$ie]}, [ $start, $lnumber, $tHead, $type ]);
					}
					else {
						die "Detected the end of a mention [$numberie]($ie) without begin (?,$lnumber)";
					}
					print "+mention (entity $ie): ($start,$lnumber)\n" if ($VERBOSE > 2);
				}
			}
			$lnumber++;
		}

		# verbose
		if ($VERBOSE > 1) {
			print "File $fName:\n";
			for (my $e = 0; $e < scalar(@entities); $e++) {
				print "Entity $e:";
				foreach my $mention (@{$entities[$e]}) {
					print " ($mention->[0],$mention->[1])";
				}
				print "\n";
			}
		}

		$coref{$fName} = \@entities;
	} while (!$getout && !eof(F));

	if (defined($name)) {
		return $coref{$name};
	}
	return \%coref;
}

sub GetFileNames {
	my $file = shift;
	my %hash;
	my $last = 0;
	open (F, $file) || die "Can not open $file: $!";
	while (my $l = <F>) {
		chomp($l);
		$l =~ s/\r$//; # m$ format jokes
		if ($l =~ /^\#\s*begin document (.*?)$/) {
			my $name = $1;
			$hash{$name} = $last;
		}
		$last = tell(F);
	}
	close (F);
	return \%hash;
}

sub IdentifMentions
{
	my ($keys, $response, $totals) = @_;
	my @kChains;
	my @kChainsWithSingletonsFromResponse;
	my @rChains;
	my @rChainsWithoutMentionsNotInKey;
	my %id;
	my %map;
	my $idCount = 0;
	my @assigned;



# assign ordered IDs
	my @orderedMs;
	my %set;	

	foreach my $entity (@$keys) {
		foreach my $m (@$entity) {
			if (!defined($set{"$m->[0],$m->[1]"})) {
				my $s = $m->[0];
				my $e = $m->[1];
				my $insert = scalar(@orderedMs);
				for (my $i=0;$i<scalar(@orderedMs);$i++) {
					my $tmp = @orderedMs[$i];
					my $tmpS = $tmp->[0];
					my $tmpE = $tmp->[1];
					if($tmpS>$s || $tmpE>$e) {
						$insert = $i;
						last;					
					}			
				}
				splice(@orderedMs, $insert, 0, $m);
				$set{"$m->[0],$m->[1]"} = -1;
			}
		}
	}

	foreach my $entity (@$response) {
		foreach my $m (@$entity) {
			if (!defined($set{"$m->[0],$m->[1]"})) {
				my $s = $m->[0];
				my $e = $m->[1];
				my $insert = scalar(@orderedMs);
				for (my $i=0;$i<scalar(@orderedMs);$i++) {
					my $tmp = @orderedMs[$i];
					my $tmpS = $tmp->[0];
					my $tmpE = $tmp->[1];
					if($tmpS>$s || $tmpE>$e) {
						$insert = $i;
						last;					
					}			
				}
				splice(@orderedMs, $insert, 0, $m);
				$set{"$m->[0],$m->[1]"} = -1;
			}
		}
	}
	
	for (my $i=0;$i<scalar(@orderedMs);$i++) {
		my $m = @orderedMs[$i];
		$set{"$m->[0],$m->[1]"} = $i;
	}

	# for each mention found in keys an ID is generated
	foreach my $entity (@$keys) {
		foreach my $mention (@$entity) {
			if (defined($id{"$mention->[0],$mention->[1]"})) {
				print "Repe: $mention->[0], $mention->[1] ", $id{"$mention->[0],$mention->[1]"}, $idCount, "\n";
			}
#			$id{"$mention->[0],$mention->[1]"} = $idCount;
			$id{"$mention->[0],$mention->[1]"} = $set{"$mention->[0],$mention->[1]"};
			$idCount++;
		}
	}

#
# bug: someone can add multiple entities of the same id, and can inflate score
#

# 	# correct identification: Exact bound limits
# 	my $exact = 0;
# 	foreach my $entity (@$response) {
# 		foreach my $mention (@$entity) {
# 			if (defined($id{"$mention->[0],$mention->[1]"}) &&
# 				!$assigned[$id{"$mention->[0],$mention->[1]"}]) {
# 				$assigned[$id{"$mention->[0],$mention->[1]"}] = 1;
# 				$map{"$mention->[0],$mention->[1]"} = $id{"$mention->[0],$mention->[1]"};
# 				$exact++;
# 			}
# 		}
# 	}


#
# fix: remove duplicate mentions
#
  # correct identification: Exact bound limits
	my $exact = 0;
	foreach my $entity (@$response) {
		my $i = 0;
		my @remove;

		foreach my $mention (@$entity) {
			if (defined($map{"$mention->[0],$mention->[1]"})) {
				print "Repeated mention: $mention->[0], $mention->[1] ",$map{"$mention->[0],$mention->[1]"}, $id{"$mention->[0],$mention->[1]"},"\n";
				push(@remove, $i);
        		}
			elsif (defined($id{"$mention->[0],$mention->[1]"}) && !$assigned[$id{"$mention->[0],$mention->[1]"}]) {
				$assigned[$id{"$mention->[0],$mention->[1]"}] = 1;
				$map{"$mention->[0],$mention->[1]"} = $id{"$mention->[0],$mention->[1]"};
				$exact++;
          		}
			$i++;
		}
		# Remove repeated mentions in the response
		foreach my $i (sort { $b <=> $a } (@remove)) {
			splice(@$entity, $i, 1);
		}
	}

	# Partial identificaiton: Inside bounds and including the head
	my $part = 0;

# since we will not be giving partial credit for partial mentions in
# the official version of CoNLL evaluation, the following block has
# been commented out

# 	foreach my $entity (@$response) {
# 		foreach my $mention (@$entity) {
# 			my $ini = $mention->[0];
# 			my $end = $mention->[1];
# 			my $head = $mention->[2];
# 			next if (defined($map{"$ini,$end"}));
# 			foreach my $ent (@$keys) {
# 				foreach my $m (@$ent) {
# 					next if ($assigned[$id{"$m->[0],$m->[1]"}]);
# 					if ($ini >= $m->[0] && $ini <= $m->[1] &&
# 						$end >= $m->[0] && $end <= $m->[1] &&
# 						$ini <= $m->[2] && $end >= $m->[2]) {
# 						$map{"$ini,$end"} = $id{"$m->[0],$m->[1]"};
# 						$assigned[$id{"$m->[0],$m->[1]"}] = 1;
# 						$part++;
# 						last;
# 					}
# 					last if (defined($map{"$ini,$end"}));
# 				}
# 			}
# 		}
# 	}

	# Each mention in response not included in keys has a new ID
	my $mresp = 0;
	foreach my $entity (@$response) {
		foreach my $mention (@$entity) {
			my $ini = $mention->[0];
			my $end = $mention->[1];
			if (!defined($map{"$mention->[0],$mention->[1]"})) {
#				$map{"$mention->[0],$mention->[1]"} = $idCount;
				$map{"$mention->[0],$mention->[1]"} = $set{"$mention->[0],$mention->[1]"};
				$idCount++;
			}
			$mresp++;
		}
	}

	if ($VERBOSE) {
		print "Total key mentions: " . scalar(keys(%id)) . "\n";
		print "Total response mentions: " . scalar(keys(%map)) . "\n";
		print "Strictly correct identified mentions: $exact\n";
		print "Partially correct identified mentions: $part\n";
		print "No identified: " . (scalar(keys(%id)) - $exact - $part) . "\n";
		print "Invented: " . ($idCount - scalar(keys(%id))) . "\n";
	}

	if (defined($totals)) {
		$totals->{recallDen} += scalar(keys(%id));
		#$totals->{recallNum} += $exact + 0.5 * $part;
		$totals->{recallNum} += $exact;
		$totals->{precisionDen} += scalar(keys(%map));
		#$totals->{precisionNum} += $exact + 0.5 * $part;
		$totals->{precisionNum} += $exact;
		$totals->{precisionExact} += $exact;
		$totals->{precisionPart} += $part;
	}

	# The coreference chains arrays are generated again with ID of mentions
	# instead of token coordenates
	my $e = 0;
	my %RC = ();

	foreach my $entity (@$keys) {
		foreach my $mention (@$entity) {
			push(@{$kChains[$e]}, $id{"$mention->[0],$mention->[1]"});
			$RC{$id{"$mention->[0],$mention->[1]"}} = $mention->[3] if (defined($mention->[3]));
			if(!defined($mention->[3])) {
				die("Assign type to gold Mention!");
			}
		}
		$e++;
	}
	$e = 0;
	foreach my $entity (@$response) {
		foreach my $mention (@$entity) {
			push(@{$rChains[$e]}, $map{"$mention->[0],$mention->[1]"});
			if (defined($mention->[3]) && !defined($RC{$map{"$mention->[0],$mention->[1]"}})) {
				$RC{$map{"$mention->[0],$mention->[1]"}} = $mention->[3];
			}
			if(!defined($mention->[3])) {
				die("Assign type to system Mention!!");
			}
		}
		$e++;
	}

	# In order to use the metrics as in (Cai & Strube, 2010):
	# 1. Include the non-detected key mentions into the response as singletons
	# 2. Discard the detected mentions not included in key resolved as singletons
	# 3a. For computing precision: put twinless system mentions in key
	# 3b. For computing recall: discard twinless system mentions in response

	my $kIndex = Indexa(\@kChains);
	my $rIndex = Indexa(\@rChains);

	if($v eq '7') {
		# 1. Include the non-detected key mentions into the response as singletons
		my $addkey = 0;
		if (scalar(keys(%id)) - $exact - $part > 0) {
			foreach my $kc (@kChains) {
				foreach my $m (@$kc) {
					if (!defined($rIndex->{$m})) {
						push(@rChains, [$m]);
						$addkey++;
					}
				}
			}
		}

		@kChainsWithSingletonsFromResponse = @kChains;
		@rChainsWithoutMentionsNotInKey;

		# 2. Discard the detected mentions not included in key resolved as singletons
		my $delsin = 0;
	
		if ($idCount - scalar(keys(%id)) > 0) {
			foreach my $rc (@rChains) {
				if (scalar(@$rc) == 1) {
					if (!defined($kIndex->{$rc->[0]})) {
						@$rc = ();
						$delsin++;
					}
				}
		  	}
		}

		# 3a. For computing precision: put twinless system mentions in key as singletons
		my $addinv = 0;

		if ($idCount - scalar(keys(%id)) > 0) {
			foreach my $rc (@rChains) {
				if (scalar(@$rc) > 1) {
					foreach my $m (@$rc) {
						if (!defined($kIndex->{$m})) {
							push(@kChainsWithSingletonsFromResponse, [$m]);
							$addinv++;
						}
					}
				}
			}
		}

		# 3b. For computing recall: discard twinless system mentions in response
		my $delsys = 0;
		foreach my $rc (@rChains) {
			my @temprc;
			my $i = 0;
			foreach my $m (@$rc) {
				if (defined($kIndex->{$m})) {
					push(@temprc, $m);
					$i++;
				}
				else {
					$delsys++;
				}
			}
			if ($i > 0) {
				push(@rChainsWithoutMentionsNotInKey,\@temprc);
			}			
		}

		# We clean the empty chains
		my @newrc;
		foreach my $rc (@rChains) {
			if (scalar(@$rc) > 0) {
				push(@newrc, $rc);
			}
		}
		@rChains = @newrc;
	} elsif ($v eq '6') {
		# CoNLL2012 version
		# 1. Include the non-detected key mentions into the response as singletons
		my $addkey = 0;
		if (scalar(keys(%id)) - $exact - $part > 0) {
			foreach my $kc (@kChains) {
				foreach my $m (@$kc) {
					if (!defined($rIndex->{$m})) {
						push(@rChains, [$m]);
						$addkey++;
					}
				}
			}
		}
	
		# 2. Discard the detected mentions not included in key resolved as singletons
		# 3. Add to the key (as singletons) the detected mentions included in some entity in response
		my $delsin = 0;
		my $addinv = 0;
		if ($idCount - scalar(keys(%id)) > 0) {
			foreach my $rc (@rChains) {
				if (scalar(@$rc) == 1) {
					if (!defined($kIndex->{$rc->[0]})) {
						@$rc = ();
						$delsin++;
					}
				}
				else {
					foreach my $m (@$rc) {
						if (!defined($kIndex->{$m})) {
							push(@kChains, [$m]);
							$addinv++;
						}
					}
				}
			}
		
			# We clean the empty chains
			my @newrc;
			foreach my $rc (@rChains) {
				if (scalar(@$rc) > 0) {
					push(@newrc, $rc);
				}
			}
			@rChains = @newrc;
		}
		@kChainsWithSingletonsFromResponse = @kChains;
		@rChainsWithoutMentionsNotInKey = @rChains;
	}
# 	print "Addkey: $addkey, addinv: $addinv, delsin: $delsin\n" if ($VERBOSE);

	return (\@kChains, \@kChainsWithSingletonsFromResponse, \@rChains, \@rChainsWithoutMentionsNotInKey, \%RC);
}

sub Eval
{
	my ($scorer, $keys, $keysPrecision, $response, $responseRecall, $rc) = @_;
	my $s = scalar($rc);
	$scorer = lc($scorer);
	my ($nr, $dr, $np, $dp);
	if ($scorer eq 'muc') {
		($nr, $dr, $np, $dp) = MUCScorer($keys, $keysPrecision, $response, $responseRecall);
	}
	elsif ($scorer eq 'mucl') {
		($nr, $dr, $np, $dp) = MUCCD($keys, $keysPrecision, $response,  $responseRecall, $rc);
	}
	elsif ($scorer eq 'bcub') {
		($nr, $dr, $np, $dp) = BCUBED($keys, $keysPrecision, $response, $responseRecall);
	}
	elsif ($scorer eq 'bcubl') {
		($nr, $dr, $np, $dp) = BCUBEDCD($keys, $keysPrecision, $response, $responseRecall, $rc);
	}
	elsif ($scorer =~ /^ceafm?$/) {
		($nr, $dr, $np, $dp) = CEAFM($keys, $keysPrecision, $response, $responseRecall);
	}
	elsif ($scorer eq 'ceafml') {
		($nr, $dr, $np, $dp) = CEAFMCD($keys, $keysPrecision, $response, $responseRecall, $rc);
	}
	elsif ($scorer eq 'ceafe') {
		($nr, $dr, $np, $dp) = CEAFE($keys, $keysPrecision, $response, $responseRecall);
	}
	elsif ($scorer eq 'ceafel') {
		($nr, $dr, $np, $dp) = CEAFECD($keys, $keysPrecision, $response, $responseRecall, $rc);
	}
# 	elsif ($scorer eq 'blanc') {
# 		($nr, $dr, $np, $dp) = BLANC($keys, $response);
# 	}
	else {
		die "Metric $scorer not implemented yet\n";
	}
	return ($nr, $dr, $np, $dp);
}

# Indexes an array of arrays, in order to easily know the position of an element
sub Indexa
{
	my ($arrays) = @_;
	my %index;

	for (my $i = 0; $i < @$arrays; $i++) {
		foreach my $e (@{$arrays->[$i]}) {
			$index{$e} = $i;
		}
	}
	return \%index;
}




sub findTheRoot 
{
	my ($chain, $rc) = @_;
	my $root = @$chain[0];
        my $maxType = %typeMap->{$rc->{$root}};
        foreach my $m (@$chain) {
                my $type = %typeMap->{$rc->{$m}};
                if($type>$maxType) {
                        $maxType = $type;
                        $root = $m;
                }
        }
	return $root;
}


sub formGoldGraphs
{
	my ($chain, $rc, $maps) = @_;
	my $root = findTheRoot($chain, $rc);
	my $maxType = %typeMap->{$rc->{$root}};

	my $currentType = $maxType;
	foreach my $m1 (@$chain) {
		my @arr;
		my $tmpMaxType = $maxType;
		while($tmpMaxType!=0) {
			my @subArr;
			foreach my $m2 (@$chain) {
	                        my $type2 = %typeMap->{$rc->{$m2}};
        	                if($type2==$tmpMaxType) {
					push(@subArr, $m2);
				}		
                        }
			my $size = scalar(@subArr);
			if ($size!=0) {
				push(@arr, \@subArr);
			}
			$tmpMaxType--;
		}		
		$maps->{$m1} = \@arr;	
	}
}

sub getGoldChainWeight
{
	my ($chain, $rc) = @_;
	my $root = findTheRoot($chain, $rc);
	my $links = 0;
        foreach my $m (@$chain) {
                if($m!=$root) {
                        $links += getMyWeight($m, $root, $rc, \%greatWeights);
                }
        }
	return $links;
}


sub getSubResponseChainWeight
{
	my ($chain, $rc, $maps) = @_;

	my $root = findTheRoot($chain, $rc);
	
	my $rootType = %typeMap->{$rc->{$root}};

	my @arr = $maps->{$root};
	my @subArr = @arr->[0];

	my $maxType = %typeMap->{$rc->{$maps->{$root}[0][0]}};
	my $links = 0;

	if($maxType==$rootType) {
		foreach my $m (@$chain) {
			if($m != $root) {
				$links += getMyWeight($m, $root, $rc, \%greatWeights);	
			}
		}
	} else {
		foreach my $m (@$chain) {
			if($m != $root) {
				$links += getMyWeight($m, $root, $rc, \%fineWeights);
			}
		}
	}
	return $links;
}

sub splitSystemChain
{
	my ($chain, $kIndex) = @_;
	my @copy = ();
	foreach my $ma (@$chain) {
		push(@copy, $ma);
	}
	my @subChains;
	while(scalar(@copy)!=0) {
		my @tmp = ();
		my $mb = @copy->[0];
 		push(@tmp, $mb);
		splice(@copy, 0, 1);
                for (my $j = 0;$j<scalar(@copy);$j++) {
			my $ma = @copy->[$j];
			if($kIndex->{$ma} == $kIndex->{$mb}) {
				push(@tmp, $ma) ;
				splice(@copy, $j, 1);
				$j--;
			}
		}
		push(@subChains, \@tmp);
	}
	return \@subChains;
}

sub MUCCD
{
	my ($keys, $keysPrecision, $response, $responseRecall, $rc) = @_;
	my $kIndex = Indexa($keys);
	my $kIndexPrec = Indexa($keysPrecision);

	# Links in key
	my $keylinks = 0;
	my $reslinks = 0;
	my %goldSingletons = {};
	my %goldMapsRec;
	foreach my $kEntity (@$keys) {
		next if (!defined($kEntity));
		formGoldGraphs($kEntity, $rc, \%goldMapsRec);
		my $info = getGoldChainWeight($kEntity, $rc);
		my $root = findTheRoot($kEntity);
		$keylinks += $info;
		
		if(scalar(@$kEntity)==1) {
			$keylinks += $non_anaphor_weight;
			my $m = $kEntity->[0];
			print "Singleton : $m \n";
			$goldSingletons{$m} = 1;
		}
		# plus the non-anaphoricity weight
	}

	my $correctRec = 0;

	foreach my $rEntity (@$responseRecall) {
		next if (!defined($rEntity));
		if(scalar(@$rEntity)==1) {
			my $m = $rEntity->[0];
			$correctRec += $non_anaphor_weight if(defined($goldSingletons{$m}));
		}
		# for each possible pair
		# partition response key file
		my $subChains = splitSystemChain($rEntity, $kIndex);
		foreach my $subChain (@$subChains) {
			my $info = getSubResponseChainWeight($subChain, $rc, \%goldMapsRec);
			$correctRec += $info;
		}
	}

	my %goldRootSetPrec = {};
	my %goldMapsPrec;
	my $correctPrec = 0;
	foreach my $kEntity (@$keysPrecision) {
		next if (!defined($kEntity));
		formGoldGraphs($kEntity, $rc, \%goldMapsPrec);
		my $root = findTheRoot($kEntity);
		$goldRootSetPrec{$root} = 1;
	}

	foreach my $rEntity (@$response) {
		next if (!defined($rEntity));
		if(scalar(@$rEntity)==1) {
			$reslinks += $non_anaphor_weight;
			my $m = $rEntity->[0];
			$correctPrec += $non_anaphor_weight if(defined($goldSingletons{$m}));	
		}
		my $subChains = splitSystemChain($rEntity, $kIndexPrec);
		foreach my $subChain (@$subChains) {
			my $info = getSubResponseChainWeight($subChain, $rc, \%goldMapsPrec);
			$correctPrec += $info;

			if($Sj eq 'a') {
				$reslinks += $info;
			}
		}
		if($Sj eq 'a') {
			$reslinks += getLinkSubclustersWeight($rEntity, $subChains, $rc);
		} else {
			$reslinks += getGoldChainWeight($rEntity, $rc);
		}	

		print "Res: $reslinks \n";
	}
	$kIndexPrec = undef;
	$kIndex = undef;
	ShowRPF($correctRec, $keylinks, $correctPrec, $reslinks) if ($VERBOSE);
	return ($correctRec, $keylinks, $correctPrec, $reslinks);
}


sub getMyWeight
{
	my ($m1, $m2, $rc, $weight) = @_;
	if(!defined($rc->{$m1}) || !defined($rc->{$m2})) {
		die ("Please assign type to every mention");
	}


	my $type1 = $rc->{$m1};
	my $type2 = $rc->{$m2};
	my $pair = $type1 . "-" . $type2;
	my $w = $weight->{$pair};
	return $w;
}

# Es consideren els "links" dintre de cada cadena de coreferents. La cadena
# A-B-C-D te 3 links: A-B, B-C i C-D. Recall: num links correctes / esperats
# Precisio: num links correctes / marcats
sub MUCScorer
{
	my ($keys, $keysPrecision, $response, $responseRecall) = @_;

	my $kIndex = Indexa($keys);

	# Calculate correct links
	my $correct = 0;
	foreach my $rEntity (@$response) {
		next if (!defined($rEntity));
		# for each possible pair
		for (my $i = 0; $i < @$rEntity; $i++) {
			my $id_i = $rEntity->[$i];
			for (my $j = $i+1; $j < @$rEntity; $j++) {
				my $id_j = $rEntity->[$j];
				if (defined($kIndex->{$id_i}) && defined($kIndex->{$id_j}) &&
					$kIndex->{$id_i} == $kIndex->{$id_j}) {
					$correct++;
					last;
				}
# 				else {
# 					print "$i $id_i $kIndex->{$id_i} =? $j $id_j $kIndex->{$id_j}\n";
# 				}
			}
		}
	}

	# Links in key
	my $keylinks = 0;
	foreach my $kEntity (@$keys) {
		next if (!defined($kEntity));
		$keylinks += scalar(@$kEntity) - 1 if (scalar(@$kEntity));
	}

	# Links in response
	my $reslinks = 0;
	foreach my $rEntity (@$response) {
		next if (!defined($rEntity));
		$reslinks += scalar(@$rEntity) - 1 if (scalar(@$rEntity));
	}

	ShowRPF($correct, $keylinks, $correct, $reslinks) if ($VERBOSE);
	return ($correct, $keylinks, $correct, $reslinks);
}

# Per cada mencio de la resposta es calcula la precisio i per cada mencio a les
# keys es calcula el recall
sub BCUBED
{
	my ($keys, $keysWithSingletonsFromResponse, $response, $responseWithoutMentionsNotInKey) = @_;

	my $kIndex = Indexa($keys);
	my $kIndexWithSingletonsFromResponse = Indexa($keysWithSingletonsFromResponse);
	my $rIndex = Indexa($response);
	my $rIndexWithoutMentionsNotInKey = Indexa($responseWithoutMentionsNotInKey);

	my $acumP = 0;
	my $acumR = 0;

	# first compute precision: use keysPrecision instead of keys
	foreach my $rChain (@$response) {
		foreach my $m (@$rChain) {
			my $kChain = (defined($kIndexWithSingletonsFromResponse->{$m})) ? $keysWithSingletonsFromResponse->[$kIndexWithSingletonsFromResponse->{$m}] : [];
			my $ci = 0;
			my $ri = scalar(@$rChain);
			my $ki = scalar(@$kChain);

			# common mentions in rChain and kChain => Ci
			foreach my $mr (@$rChain) {
				foreach my $mk (@$kChain) {
					if ($mr == $mk) {
						$ci++;
						last;
					}
				}
			}

			$acumP += $ci / $ri if ($ri);
		}
	}

	# for recall we use responseRecall instead of response
	foreach my $rChain (@$responseWithoutMentionsNotInKey) {
		foreach my $m (@$rChain) {
			my $kChain = (defined($kIndex->{$m})) ? $keys->[$kIndex->{$m}] : [];
			my $ci = 0;
			my $ri = scalar(@$rChain);
			my $ki = scalar(@$kChain);

			# common mentions in rChain and kChain => Ci
			foreach my $mr (@$rChain) {
				foreach my $mk (@$kChain) {
					if ($mr == $mk) {
						$ci++;
						last;
					}
				}
			}

			$acumR += $ci / $ki if ($ki);
		}
	}

	# Mentions in key
	my $keymentions = 0;
	foreach my $kEntity (@$keys) {
		$keymentions += scalar(@$kEntity);
	}

	# Mentions in response
	my $resmentions = 0;
	foreach my $rEntity (@$response) {
		$resmentions += scalar(@$rEntity);
	}

	ShowRPF($acumR, $keymentions, $acumP, $resmentions) if ($VERBOSE);
	return($acumR, $keymentions, $acumP, $resmentions);
}

sub getMaximumWeight
{
	my ($c1, $c2, $rc) = @_;
	my $maxWeight = 0;
	foreach my $m1 (@$c1) {
		foreach my $m2 (@$c2) {
			my $weight = getMyWeight($m1, $m2, $rc, \%greatWeights);
			if($maxWeight < $weight) {
				$maxWeight = $weight;
			}
		}
	}
	return $maxWeight;
}

sub getLinkSubclustersWeight
{
	my ($cluster, $subclusters, $rc) = @_;
	my $root = findTheRoot($cluster);
	my $betweenLinks = 0;
	my $size = scalar(@$subclusters);
	my @links;	
	for (my $i=0;$i<$size;$i++) {
		my $sc1 = $subclusters->[$i];
		for (my $j=$i+1;$j<$size;$j++) {
			my $sc2 = $subclusters->[$j];
			my $weight = getMaximumWeight($sc1, $sc2, $rc);
			
			#insert sort
			my $insert = scalar(@links);
			for (my $j=0;$j<scalar(@links);$j++) {
				my $temp = @links[$j];
				if($weight > $temp) {
					$insert = $j;
					last;
				}			
			}	
			splice(@links, $insert, 0, $weight);
		}
	}
	for(my $i=0;$i<$size-1;$i++) {
		$betweenLinks += @links[$i];
	}
#	print "between : $betweenLinks \n";	
	return $betweenLinks;
}


sub BCUBEDCD
{
	my ($keys, $keysPrecision, $response, $responseRecall, $rc) = @_;

	my $kIndex = Indexa($keys);
	my $kIndexPrec = Indexa($keysPrecision);
	my $rIndex = Indexa($response);
	my $rIndexRec = Indexa($responseRecall);

	my $acumP = 0;
	my $acumR = 0;

	my %goldMapsRec;
	foreach my $kEntity (@$keys) {
		next if (!defined($kEntity));
		formGoldGraphs($kEntity, $rc, \%goldMapsRec);
	}	
	
	my %goldMapsPrec;
	foreach my $kEntity (@$keysPrecision) {
		next if (!defined($kEntity));
		formGoldGraphs($kEntity, $rc, \%goldMapsPrec);
	}	

	# first compute precision: use keysPrecision instead of keys
	foreach my $rChain (@$response) {
		foreach my $m (@$rChain) {
			my $kChain = (defined($kIndexPrec->{$m})) ? $keysPrecision->[$kIndexPrec->{$m}] : [];
			my $root = findTheRoot($rChain);
			# common mentions in rChain and kChain => Ci
			my @common;
			foreach my $mr (@$rChain) {
				foreach my $mk (@$kChain) {
					if ($mr == $mk) {
						push(@common, $mr);
						last;
					}
				}
			}
			my $ci = getSubResponseChainWeight(\@common, $rc, \%goldMapsPrec);
			# calculate response overall weight 
			my $ri = 0;

			if(scalar(@$rChain)==1) {
				$ri += $non_anaphor_weight;
				$ci += $non_anaphor_weight if(scalar(@$kChain)==1);
			}
			my $subChains = splitSystemChain($rChain, $kIndexPrec);
			if ($Sj eq 'a') {
				foreach my $subChain (@$subChains) {
					my $info = getSubResponseChainWeight($subChain, $rc, \%goldMapsPrec);
					$ri += $info;
				}
				$ri += getLinkSubclustersWeight($rChain, $subChains, $rc);
			} else {
				$ri += getGoldChainWeight($rChain, $rc);	
			}
			print "Prec $m: $ci/$ri \n";
			$acumP += $ci / $ri if ($ri);
		}
	}

	# for recall we use responseRecall instead of response
	foreach my $rChain (@$responseRecall) {
		foreach my $m (@$rChain) {
			my $kChain = (defined($kIndex->{$m})) ? $keys->[$kIndex->{$m}] : [];
			# common mentions in rChain and kChain => Ci
			my @common;
			foreach my $mr (@$rChain) {
				foreach my $mk (@$kChain) {
					if ($mr == $mk) {
						push(@common, $mr);
						last;
					}
				}
			}
			my $ci = getSubResponseChainWeight(\@common, $rc, \%goldMapsRec);
			my $ki = getGoldChainWeight($kChain, $rc);
			if(scalar(@$kChain)==1) {
				$ki += $non_anaphor_weight;
				$ci += $non_anaphor_weight if (scalar(@$rChain)==1);
			}
			print "Rec $m: $ci/$ki \n";
			$acumR += $ci / $ki if ($ki);
		}
	}

	# Mentions in key
	my $keymentions = 0;
	foreach my $kEntity (@$keys) {
		$keymentions += scalar(@$kEntity);
	}

	# Mentions in response
	my $resmentions = 0;
	foreach my $rEntity (@$response) {
		$resmentions += scalar(@$rEntity);
	}

	ShowRPF($acumR, $keymentions, $acumP, $resmentions) if ($VERBOSE);
	return($acumR, $keymentions, $acumP, $resmentions);
}

sub SIMEntityBased
{
	my ($a, $b) = @_;
	my $intersection = 0;

	# Common elements in A and B
	foreach my $ma (@$a) {
		next if (!defined($ma));
		foreach my $mb (@$b) {
			next if (!defined($mb));
			if ($ma == $mb) {
				$intersection++;
				last;
			}
		}
	}

	my $r = 0;
	my $d = scalar(@$a) + scalar(@$b);
	if ($d != 0) {
		$r = 2 * $intersection / $d;
	}

	return $r;
}

sub SIMMentionBased
{
	my ($a, $b) = @_;
	my $intersection = 0;

	# Common elements in A and B
	foreach my $ma (@$a) {
		next if (!defined($ma));
		foreach my $mb (@$b) {
			next if (!defined($mb));
			if ($ma == $mb) {
				$intersection++;
				last;
			}
		}
	}

	return $intersection;
}

sub CEAFE
{
	my ($keys, $keysWithSingletonsFromResponse, $response, $responseWithoutMentionsNotInKey, $type) = @_;
	my @sim;

	for (my $i = 0; $i < scalar(@$keysWithSingletonsFromResponse); $i++) {
		for (my $j = 0; $j < scalar(@$response); $j++) {
			if (defined($keysWithSingletonsFromResponse->[$i]) && defined($response->[$j])) {
				$sim[$i][$j] = 1 - SIMEntityBased($keysWithSingletonsFromResponse->[$i], $response->[$j]);
				# 1 - X => the library searches minima not maxima
			}
			else {
				$sim[$i][$j] = 1;
			}
		}

		# fill the matrix when response chains are less than key ones
		for (my $j = scalar(@$response); $j < scalar(@$keysWithSingletonsFromResponse); $j++) {
			$sim[$i][$j] = 1;
		}
		#$denrec += SIMEntityBased($kChain->[$i], $kChain->[$i]);
	}

	my @out;

	# Munkres algorithm
	assign(\@sim, \@out);

	my $numerador = 0;
	my $denpre = 0;
	my $denrec = 0;

	foreach my $c (@$response) {
		$denpre++ if (defined($c) && scalar(@$c) > 0);
	}
	foreach my $c (@$keysWithSingletonsFromResponse) {
		$denrec++ if (defined($c) && scalar(@$c) > 0);
	}

	for (my $i = 0; $i < scalar(@$keysWithSingletonsFromResponse); $i++) {
		$numerador += 1 - $sim[$i][$out[$i]];
	}

	ShowRPF($numerador, $denrec, $numerador, $denpre) if ($VERBOSE);

	return ($numerador, $denrec, $numerador, $denpre);
}

sub SIMEntityBasedCD
{
	my ($goldChain, $systemChain, $maps, $rc, $kIndex) = @_;
	my @common;
	foreach my $m1 (@$systemChain) {
		foreach my $m2 (@$goldChain) {
			if ($m1==$m2) {
				push(@common, $m1);
				last;
			}
		}
	}
	my $goldWeight = getGoldChainWeight($goldChain, $rc);
	if(scalar(@$goldChain)==1) {
		$goldWeight += $non_anaphor_weight;
	}
	my $size = scalar(@$goldChain);
		
	my $matchWeight = getSubResponseChainWeight(\@common, $rc, $maps);
	if(scalar(@$goldChain)==1 && scalar(@$systemChain)==1 && $goldChain->[0]==$systemChain->[0]) {
		$matchWeight += $non_anaphor_weight;
	}
	my $size = scalar(@common);
	my $systemWeight = 0;
	if(scalar(@$systemChain)==1) {
		$systemWeight += $non_anaphor_weight;
	}
	if($Sj eq 'a') {
		my $subChains = splitSystemChain($systemChain, $kIndex);
		foreach my $subChain (@$subChains)	{
			$systemWeight += getSubResponseChainWeight($subChain, $rc, $maps);
		}
		$systemWeight += getLinkSubclustersWeight($systemChain, $subChains, $rc);
	} else {
		$systemWeight += getGoldChainWeight($systemChain, $rc);	
	}
	my $denom = $goldWeight + $systemWeight;
	my $sim = 0;
	if($denom !=0) {	
		$sim = 2*$matchWeight/$denom;
	}
	print "2 * $matchWeight / ( $goldWeight + $systemWeight ) \n";
	print "Sim: $sim \n";
	return $sim;
}

sub CEAFECD
{
	my ($keys, $keysPrecision, $response, $responseRecall, $rc) = @_;
	my @sim;
	my $kIndexPrec = Indexa($keysPrecision);

	my %goldMaps;
	foreach my $kEntity (@$keysPrecision) {
		next if (!defined($kEntity));
		formGoldGraphs($kEntity, $rc, \%goldMaps);
	}

	for (my $i = 0; $i < scalar(@$keysPrecision); $i++) {
		for (my $j = 0; $j < scalar(@$response); $j++) {
			if (defined($keysPrecision->[$i]) && defined($response->[$j])) {
				my $sim = SIMEntityBasedCD($keysPrecision->[$i], $response->[$j], \%goldMaps, $rc, $kIndexPrec);
				print "SIM [$i][$j] : $sim \n====\n";
				$sim[$i][$j] = 1 - SIMEntityBasedCD($keysPrecision->[$i], $response->[$j], \%goldMaps, $rc, $kIndexPrec);
				# 1 - X => the library searches minima not maxima
			}
			else {
				$sim[$i][$j] = 1;
			}
		}

		# fill the matrix when response chains are less than key ones
		for (my $j = scalar(@$response); $j < scalar(@$keysPrecision); $j++) {
			$sim[$i][$j] = 1;
		}
		#$denrec += SIMEntityBased($kChain->[$i], $kChain->[$i]);
	}

	my @out;

	# Munkres algorithm
	assign(\@sim, \@out);

	my $numerador = 0;
	my $denpre = 0;
	my $denrec = 0;

	foreach my $c (@$response) {
		$denpre++ if (defined($c) && scalar(@$c) > 0);
	}
	foreach my $c (@$keysPrecision) {
		$denrec++ if (defined($c) && scalar(@$c) > 0);
	}

	for (my $i = 0; $i < scalar(@$keysPrecision); $i++) {
		$numerador += 1 - $sim[$i][$out[$i]];
	}

	ShowRPF($numerador, $denrec, $numerador, $denpre) if ($VERBOSE);

	return ($numerador, $denrec, $numerador, $denpre);
}


sub SIMMentionBasedCD
{
	my ($goldChain, $systemChain, $maps, $rc, $kIndex) = @_;
	my @common;
	foreach my $m1 (@$systemChain) {
		foreach my $m2 (@$goldChain) {
			if ($m1==$m2) {
				push(@common, $m1);
				last;
			}
		}
	}
	my $matchWeight = getSubResponseChainWeight(\@common, $rc, $maps);
	if( scalar(@$goldChain)==1 && scalar(@$systemChain)==1 && $goldChain->[0]==$systemChain->[0] ) {
		$matchWeight += $non_anaphor_weight;
	}
	return $matchWeight;	
}


sub printChains
{
	my ($chains) = @_;
	foreach my $chain (@$chains) {
		foreach my $m (@$chain) {
			print "$m, ";
		}
		print "\n";
	}
	print "=====\n";	
}

sub CEAFMCD
{
	my ($keys, $keysPrecision, $response, $responseRecall, $rc) = @_;
	my @simPrecision;
	my @simRecall;

	my %goldMapsRec;
	foreach my $kEntity (@$keys) {
		next if (!defined($kEntity));
		formGoldGraphs($kEntity, $rc, \%goldMapsRec);
	}	
	
	my %goldMapsPrec;
	foreach my $kEntity (@$keysPrecision) {
		next if (!defined($kEntity));
		formGoldGraphs($kEntity, $rc, \%goldMapsPrec);
	}
	
	my $kIndex = Indexa($keys);
	my $kIndexPrec = Indexa($keysPrecision);

	# similarity for precision
	for (my $i = 0; $i < scalar(@$keysPrecision); $i++) {
		for (my $j = 0; $j < scalar(@$response); $j++) {
			if (defined($keysPrecision->[$i]) && defined($response->[$j])) {
	#			my $sim = SIMMentionBasedCD($keysPrecision->[$i], $response->[$j], \%goldMapsPrec, $rc, $kIndexPrec);
#				print "SIM PREC [$i][$j] : $sim \n";
				$simPrecision[$i][$j] = 1 - SIMMentionBasedCD($keysPrecision->[$i], $response->[$j], \%goldMapsPrec, $rc, $kIndexPrec);
			}
			else {
				$simPrecision[$i][$j] = 1;
			}
		}
		# fill the matrix when response chains are less than key ones
		for (my $j = scalar(@$response); $j < scalar(@$keysPrecision); $j++) {
			$simPrecision[$i][$j] = 1;
		}
	}	

	# similarity for recall
	for (my $i = 0; $i < scalar(@$keys); $i++) {
		for (my $j = 0; $j < scalar(@$responseRecall); $j++) {
			if (defined($keys->[$i]) && defined($responseRecall->[$j])) {
	#			my $sim = SIMMentionBasedCD($keys->[$i], $responseRecall->[$j], \%goldMapsRec, $rc, $kIndex);
         #                       print "SIM REC [$i][$j] : $sim \n";
				$simRecall[$i][$j] = 1 - SIMMentionBasedCD($keys->[$i], $responseRecall->[$j], \%goldMapsRec, $rc, $kIndex);
			}
			else {
				$simRecall[$i][$j] = 1;								
			}
		}
		# fill the matrix when response chains are less than key ones
		for (my $j = scalar(@$responseRecall); $j < scalar(@$keys); $j++) {
			$simRecall[$i][$j] = 1;
		}
	}

	my @outPrecision;
	my @outRecall;

	# Munkres algorithm

	assign(\@simPrecision, \@outPrecision);
	assign(\@simRecall, \@outRecall);	

	my $numeradorpre = 0;
	my $numeradorrec = 0;
	my $denpre = 0;
	my $denrec = 0;

	foreach my $c (@$response) {
		my $subChains = splitSystemChain($c, $kIndex);
		my $systemWeight = 0;
		if ($Sj eq 'a') {
			foreach my $subChain (@$subChains)	{
				$systemWeight += getSubResponseChainWeight($subChain, $rc, \%goldMapsPrec);
			}
			$systemWeight += getLinkSubclustersWeight($c, $subChains, $rc);	
#			$denpre += scalar(@$c) if (defined($c));
			$denpre += $systemWeight;
		} else {
			$denpre += getGoldChainWeight($c, $rc);
		}
		$denpre += $non_anaphor_weight if (scalar(@$c)==1);
	}
	foreach my $c (@$keys) {
		$denrec += getGoldChainWeight($c, $rc);
		$denrec += $non_anaphor_weight if (scalar(@$c)==1);
#		$denrec += scalar(@$c) if (defined($c));
	}

	for (my $i = 0; $i < scalar(@$keysPrecision); $i++) {
	#	my $sim = 1 - $simPrecision[$i][$outPrecision[$i]];
	#	print "Sim Prec[$i][$outPrecision[$i]] : $sim \n";
		$numeradorpre += 1 - $simPrecision[$i][$outPrecision[$i]];
	}

	for (my $i = 0; $i < scalar(@$keys); $i++) {
	#	my $sim = 1 - $simRecall[$i][$outRecall[$i]];
	#	print "Sim Rec[$i][$outRecall[$i]] : $sim \n";
		$numeradorrec += 1 - $simRecall[$i][$outRecall[$i]];
	}

	ShowRPF($numeradorrec, $denrec, $numeradorpre, $denpre) if ($VERBOSE);
	$kIndexPrec = undef;
	$kIndex = undef;
	%goldMapsRec = undef;
	%goldMapsPrec = undef;
	return ($numeradorrec, $denrec, $numeradorpre, $denpre);
}

sub CEAFM
{
	my ($keys, $keysWithSingletonsFromResponse, $response, $responseWithoutMentionsNotInKey, $type) = @_;

	my @simPrecision;
	my @simRecall;

	# similarity for precision
	for (my $i = 0; $i < scalar(@$keysWithSingletonsFromResponse); $i++) {
		for (my $j = 0; $j < scalar(@$response); $j++) {
			if (defined($keysWithSingletonsFromResponse->[$i]) && defined($response->[$j])) {
				$simPrecision[$i][$j] = 1 - SIMMentionBased($keysWithSingletonsFromResponse->[$i], $response->[$j]);
			}
			else {
				$simPrecision[$i][$j] = 1;
			}
		}

		# fill the matrix when response chains are less than key ones
		for (my $j = scalar(@$response); $j < scalar(@$keysWithSingletonsFromResponse); $j++) {
			$simPrecision[$i][$j] = 1;
		}
	}	

	# similarity for recall
	for (my $i = 0; $i < scalar(@$keys); $i++) {
		for (my $j = 0; $j < scalar(@$responseWithoutMentionsNotInKey); $j++) {
			if (defined($keys->[$i]) && defined($responseWithoutMentionsNotInKey->[$j])) {
				$simRecall[$i][$j] = 1 - SIMMentionBased($keys->[$i], $responseWithoutMentionsNotInKey->[$j]);
			}
			else {
				$simRecall[$i][$j] = 1;								
			}
		}

		# fill the matrix when response chains are less than key ones
		for (my $j = scalar(@$responseWithoutMentionsNotInKey); $j < scalar(@$keys); $j++) {
			$simRecall[$i][$j] = 1;
		}
	}

	my @outPrecision;
	my @outRecall;

	# Munkres algorithm

	assign(\@simPrecision, \@outPrecision);
	assign(\@simRecall, \@outRecall);	

	my $numeradorpre = 0;
	my $numeradorrec = 0;
	my $denpre = 0;
	my $denrec = 0;

	foreach my $c (@$response) {
		$denpre += scalar(@$c) if (defined($c));
	}
	foreach my $c (@$keys) {
		$denrec += scalar(@$c) if (defined($c));
	}

	for (my $i = 0; $i < scalar(@$keysWithSingletonsFromResponse); $i++) {
		$numeradorpre += 1 - $simPrecision[$i][$outPrecision[$i]];
	}

	for (my $i = 0; $i < scalar(@$keys); $i++) {
		$numeradorrec += 1 - $simRecall[$i][$outRecall[$i]];
	}

	ShowRPF($numeradorrec, $denrec, $numeradorpre, $denpre) if ($VERBOSE);

	return ($numeradorrec, $denrec, $numeradorpre, $denpre);
}

sub BLANC
{
	my ($keys, $response) = @_;
	my ($ga, $gr, $ba, $br) = (0, 0, 0, 0);

	# Each possible pair of mentions
	my $kIndex = Indexa($keys);
	my $rIndex = Indexa($response);

	my @ri = keys(%{$rIndex});
	for (my $i = 0; $i < @ri - 1; $i++) {
		my $m_i = $ri[$i];
		for (my $j = $i+1; $j < @ri; $j++) {
			my $m_j = $ri[$j];
			# atraction
			if ($rIndex->{$m_i} == $rIndex->{$m_j}) {
				if ($kIndex->{$m_i} == $kIndex->{$m_j}) {
					$ga++;
				}
				else {
					$ba++;
				}
			}
			# repulsion
			else {
				if ($kIndex->{$m_i} != $kIndex->{$m_j}) {
					$gr++;
				}
				else {
					$br++;
				}
			}
		}
	}

	if ($VERBOSE) {
		print "Coreference links: ";
		ShowRPF($ga, ($ga + $br), $ga, ($ga + $ba));
		print "Non-coreference links: ";
		ShowRPF($gr, ($gr + $ba), $gr, ($gr + $br));
		print "Mean: ";

		my $Pa = ($ga + $ba) ? $ga / ($ga + $ba) : 0;
		my $Pr = ($gr + $br) ? $gr / ($gr + $br) : 0;
		my $Ra = ($ga + $br) ? $ga / ($ga + $br) : -1;
		my $Rr = ($gr + $ba) ? $gr / ($gr + $ba) : -1;

		my $R = ($Ra + $Rr) / 2;
		my $P = ($Pa + $Pr) / 2;
		my $Fa = ($Pa + $Ra) ? 2 * $Pa * $Ra / ($Pa + $Ra) : 0;
		my $Fr = ($Pr + $Rr) ? 2 * $Pr * $Rr / ($Pr + $Rr) : 0;
		my $f1 = ($Fa + $Fr) / 2;

		if ($Ra == -1 && $Rr == -1) {
			$R = 0;
			$P = 0;
			$f1 = 0;
		}
		elsif ($Ra == -1) {
			$R = $Rr;
			$P = $Pr;
			$f1 = $Fr;
		}
		elsif ($Rr == -1) {
			$R = $Ra;
			$P = $Pa;
			$f1 = $Fa;
		}
		ShowRPF($R, 1, $P, 1, $f1);
	}

	return ($ga, ($ga + $br), $ga, ($ga + $ba), $gr, ($gr + $ba), $gr, ($gr + $br));
}


sub ShowRPF
{
	my ($numrec, $denrec, $numpre, $denpre, $f1) = @_;

	my $precisio = $denpre ? $numpre / $denpre : 0;
	my $recall = $denrec ? $numrec / $denrec : 0;
	if (!defined($f1)) {
		$f1 = 0;
		if ($recall + $precisio) {
			$f1 = 2 * $precisio * $recall / ($precisio + $recall);
		}
	}

	print "Recall: ($numrec / $denrec) " . int($recall*10000)/100 . '%';
	print "\tPrecision: ($numpre / $denpre) " . int($precisio*10000)/100 . '%';
	print "\tF1: " . int($f1*10000)/100 . "\%\n";
  print "--------------------------------------------------------------------------\n";
}

1;
