# RCD.pl = an implementation of Tesar & Smolensky's (1996) Constraint Demotion
#   algorithm.  This is the simplest version, presented pp. 14-22 of the "short version"
# (The actual approach uses the "Comparative tableau" format of Prince 2000, 2002)

print "Enter name of input file to rank: ";
$inputfile = <STDIN>;
chomp($inputfile);

open (INPUT, $inputfile) or die "Warning! Can't open input file: $!\n";
$line_number = 0;

# First read the constraint names
$line = <INPUT>;
$line_number++;
chomp($line);
(undef, undef, undef, @constraintnames) = split(/\t/, $line);


# And then read the "short" constraint names
$line = <INPUT>;
$line_number++;
chomp($line);
(undef, undef, undef, @shortconstraintnames) = split(/\t/, $line);

if (scalar (@constraintnames) != scalar (@shortconstraintnames)) {
    print "Warning! Unequal number of full and short constraint names\n\t(Perhaps there is a formatting error in the file?)\n";
}
if ($line eq undef) {
    print "Too few lines in file to be a valid input file ($line_number).  Goodbye.\n";        
}
# Let's store the number of constraints, for later reference
$number_of_constraints = $#constraintnames;
# Now read in the constraint violations
while ($line = <INPUT>) {
    $line_number++;
    chomp($line);
    ($UR, $candidate, $winner, @candidate_violations) = split( /\t/, $line);
    if ($UR ne "") {
	$number_of_inputs++;
	$current_input++;  # redundant but easier to read
	# Remember this input
	$inputs[$current_input] = $UR;    
	$current_candidate = 1;
	$number_of_candidates[$current_input]++;		
    } else {
        $current_candidate++;
	$number_of_candidates[$current_input]++;		
    }
    if ($winner > 0) {
        if ($winners[$current_input] eq undef) {
	    $winners[$current_input] = $current_candidate;	    	    
	} else {
	    print "Warning: two winners listed for input $current_input ($inputs[$current_input])\n";	    
	    exit;	    	    
	}
    }
    
    if (scalar (@candidate_violations) > scalar (@constraintnames) ) {
        print "Warning! Line $line_number of file has too many constraint violations.\nPlease check the format of your input file, and try again.\n";        
	exit;
    }
    
    # Record the current candidate and its violations
    $candidates[$current_input][$current_candidate] = $candidate;    
    for (my $v = 0; $v <= $#candidate_violations; $v++) {
	$violations[$current_input][$current_candidate][$v] = $candidate_violations[$v];
    }    
}

# Now we are done reading in the candidates and violations
# As a check, let's print them out.
# print_tableau();

# And, let's convert the original data into comparative tableaus (Prince 2000 et seq)
# In order to do this, we convert the rows into mark-data pairs (mdp's)
# Each input form has a MDP for each loser
for (my $i = 1; $i <= $number_of_inputs; $i++) {
    $winner = $winners[$i];    
    print "Constructing mdp's for input $i: /$inputs[$i], winning output [$candidates[$i][$winner]]\n";        
     for (my $cand = 1; $cand <= $number_of_candidates[$i]; $cand++ ) {
	next if ($cand == $winner);
	$number_of_mdps++;
	print "\tMDP: $candidates[$i][$winners[$i]] ~ $candidates[$i][$cand]\n";
	$mdp_winners[$number_of_mdps] = $candidates[$i][$winners[$i]];	
	$mdp_losers[$number_of_mdps] = $candidates[$i][$cand];
	$mdp_inputs[$number_of_mdps] = $i;		
	for (my $con = 0; $con <= $number_of_constraints; $con++) {
	    # For each constraint, check whether it favors the winner, the loser, or neither
	    if ($violations[$i][$winner][$con] > $violations[$i][$cand][$con]) {
		# This one favors the winner
		$mdps[$number_of_mdps][$con] = "L";		
	    } elsif ($violations[$i][$winner][$con] < $violations[$i][$cand][$con]) {
		$mdps[$number_of_mdps][$con] = "W";
	    } # if neither, then blank (no value)
	    print "\t\t$shortconstraintnames[$con]:\t$mdps[$number_of_mdps][$con]\n";	    
	}
    }
}

# Now start ranking.
# At first, we start with all constraints unranked, in the same stratum,
# and all mdp's are unexplained.
# Strata are numbered from 0 (highest) to, in theory, C = number of constraints (lowest)
for (my $con = 0; $con <= $number_of_constraints; $con++) {
    $stratum[$con] = 0;        
}
$current_stratum = 0;
$number_explained = 0;

# Now, it's time to rank.  Since the procedure is recursive, it makes sense
#    to put it into a subroutine
$successful_ranking = apply_rcd();
if ($successful_ranking) {
    print "\n************************************************\n";
    print "       Constraint ranking";
    for (my $s = 0; $s <= $current_stratum; $s++) {
	print "\nStratum ". ($s + 1) ."\n";    
	for (my $con = 0; $con <= $number_of_constraints; $con++) {
	    if ($stratum[$con] == $s) {
		print "\t$constraintnames[$con]\n";	    
	    }
	}    
    }
    print "************************************************\n";
} else {
    print "****************************************************\nIt appears that there is no ranking of the given\nconstraints that will generate the observed data.\n****************************************************\n";	
}

sub apply_rcd {
    # The strategy is to place in the current stratum all constraints that prefer no losers
    # If a constraint ever prefers a loser for an active mdp, it can't be in the current stratum
    # So, go through and demote all constraints that ever prefer a loser
    $current_stratum++;
    $previous_number_explained = $number_explained;    
    
    print "\n************ Constructing stratum $current_stratum ************\n";        
    CHECK_CONSTRAINT:
    for (my $con = 0; $con <= $number_of_constraints; $con++) {
        # Obviously if a constraint has already been placed in a higher stratum, leave it alone
        next if ($stratum[$con] < ($current_stratum-1));        
	
	# scan the mdps, seeing if this constraint is ever an L
	for (my $p = 1; $p <= $number_of_mdps; $p++) {
	    next if $explained[$p];	    	    
	    if ($mdps[$p][$con] eq "L") {
		# This constraint favors a loser; demote it.
		print "$shortconstraintnames[$con] incorrectly favors $mdp_losers[$p] over $mdp_winners[$p] for input /$inputs[$mdp_inputs[$p]]/.\n\t***Demoting $shortconstraintnames[$con] to stratum ".($current_stratum+1)."\n";
		$stratum[$con] = $current_stratum;
		# Don't need to keep looking; favoring 1 loser is enough
		next CHECK_CONSTRAINT;
	    }
	}
    }
    
    # Now we need to check how far this got us; in particular, we need to see which mdps
    #  are now explained, and which still need work
#    print "\nNow checking which mdps are explained\n";        
    for (my $mdp = 1; $mdp <= $number_of_mdps; $mdp++) {
	# An mdp is unexplained if it has an L that lacks a higher-ranked W
	# This can be computed by "linearizing" the MDP, and checking to make sure there
	#  are no L's without higher ranked W's
	#  
	next if ($explained[$mdp]);
	$mdp_row = undef;		
	for (my $s = 0; $s <= $current_stratum; $s++) {
	    for (my $con = 0; $con <= $number_of_constraints; $con++) {
		if ($stratum[$con] eq $s) {
		    $mdp_row .= $mdps[$mdp][$con];
		}
	    }
	    # We'll place a marker between strata, to check for dominance
	    $mdp_row .= ">";	    
	}
	# print "MDP $mdp: $mdp_row\n";	
	
	# Now if the mdp row contains an L without an W...>, it's still not explained
	if ($mdp_row =~ /^([^W]*>)*W*L/) {
	  print "\tMDP $mdp ($inputs[$mdp_inputs[$mdp]] -> $mdp_winners[$mdp], not $mdp_losers[$mdp]) still unexplained\n";
	  # don't modify value of $explained[$mdp] yet
	} else {
	    $explained[$mdp] = 1;
	    $number_explained++;	    
	}
    }
    
    # If there are still unexplained mdp's, keep going
    if ($number_explained == $previous_number_explained) {
	# We're getting nowhere
	return 0;	      
    } elsif ($number_explained < $number_of_mdps) {
	print "\n". ($number_of_mdps - $number_explained) . " MDP(s) still left to explain.\n";		
	apply_rcd();	
    } else {
	print "\nAll MDP's successfully explained.\n";	
	return 1;        
    }
}


sub print_tableau {
    print "No.\tInput\tCand No.\tWinner\tCandidate";
    for (my $con = 0; $con <= $number_of_constraints; $con++) {
	print "\t$shortconstraintnames[$con]"
    }
    print "\n";

    for (my $i = 1; $i <= $number_of_inputs; $i++) {
	for (my $cand = 1; $cand <= $number_of_candidates[$i]; $cand++) {
	    print "$i\t$inputs[$i]\t$cand\t";	
	    if ($winners[$current_input] == $cand) {
		print "->";	    	    
	    }
	    
	    print "\t$candidates[$i][$cand]";	
	    for (my $con = 0; $con <= $number_of_constraints; $con++) {
		    print "\t$violations[$i][$cand][$con]";				
	    }
	    print "\n";	    
	}
    }
    print "\n";    
}
