* Copyright (C) 2024 MDW * * A class containing a diff implementation * * Created by Stephen Morley - http://stephenmorley.org/ - and released under the * terms of the CC0 1.0 Universal legal code: * * http://creativecommons.org/publicdomain/zero/1.0/legalcode */ /** * A class containing functions for computing diffs and formatting the output. * We can compare 2 strings or 2 files (as one string or line by line) */ class Diff { // define the constants const UNMODIFIED = 0; const DELETED = 1; const INSERTED = 2; /** * Returns the diff for two strings. The return value is an array, each of * whose values is an array containing two values: a line (or character, if * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the * line or character is in both strings), DIFF::DELETED (the line or character * is only in the first string), and DIFF::INSERTED (the line or character is * only in the second string). The parameters are: * * @param string $string1 First string * @param string $string2 Second string * @param boolean $compareCharacters true to compare characters, and false to compare lines; this optional parameter defaults to false * @return array}> Array of diff */ public static function compare($string1, $string2, $compareCharacters = false) { // initialise the sequences and comparison start and end positions $start = 0; if ($compareCharacters) { $sequence1 = $string1; $sequence2 = $string2; $end1 = strlen($string1) - 1; $end2 = strlen($string2) - 1; } else { $sequence1 = preg_split('/\R/', $string1); $sequence2 = preg_split('/\R/', $string2); $end1 = count($sequence1) - 1; $end2 = count($sequence2) - 1; } // skip any common prefix while ($start <= $end1 && $start <= $end2 && $sequence1[$start] == $sequence2[$start]) { $start++; } // skip any common suffix while ($end1 >= $start && $end2 >= $start && $sequence1[$end1] == $sequence2[$end2]) { $end1--; $end2--; } // compute the table of longest common subsequence lengths $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2); // generate the partial diff $partialDiff = self::generatePartialDiff($table, $sequence1, $sequence2, $start); // generate the full diff $diff = array(); for ($index = 0; $index < $start; $index++) { $diff[] = array($sequence1[$index], self::UNMODIFIED); } while (count($partialDiff) > 0) { $diff[] = array_pop($partialDiff); } $end2 = ($compareCharacters ? strlen($sequence1) : count($sequence1)); for ($index = $end1 + 1; $index < $end2; $index++) { $diff[] = array($sequence1[$index], self::UNMODIFIED); } // return the diff return $diff; } /** * Returns the diff for two files. The parameters are: * * @param string $file1 Path to the first file * @param string $file2 Path to the second file * @param boolean $compareCharacters true to compare characters, and false to compare lines; this optional parameter defaults to false * @return array}> Array of diff */ public static function compareFiles( $file1, $file2, $compareCharacters = false ) { // return the diff of the files return self::compare( file_get_contents($file1), file_get_contents($file2), $compareCharacters ); } /** * Returns the table of longest common subsequence lengths for the specified sequences. The parameters are: * * @param string $sequence1 the first sequence * @param string $sequence2 the second sequence * @param int $start the starting index * @param int $end1 the ending index for the first sequence * @param int $end2 the ending index for the second sequence * @return array> array of diff */ private static function computeTable($sequence1, $sequence2, $start, $end1, $end2) { // determine the lengths to be compared $length1 = $end1 - $start + 1; $length2 = $end2 - $start + 1; // initialise the table $table = array(array_fill(0, $length2 + 1, 0)); // loop over the rows for ($index1 = 1; $index1 <= $length1; $index1++) { // create the new row $table[$index1] = array(0); // loop over the columns for ($index2 = 1; $index2 <= $length2; $index2++) { // store the longest common subsequence length if ($sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1] ) { $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1; } else { $table[$index1][$index2] = max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]); } } } // return the table return $table; } /** * Returns the partial diff for the specified sequences, in reverse order. * The parameters are: * * @param array}> $table the table returned by the computeTable function * @param string $sequence1 the first sequence * @param string $sequence2 the second sequence * @param int $start the starting index * @return array}> array of diff */ private static function generatePartialDiff($table, $sequence1, $sequence2, $start) { // initialise the diff $diff = array(); // initialise the indices $index1 = count($table) - 1; $index2 = count($table[0]) - 1; // loop until there are no items remaining in either sequence while ($index1 > 0 || $index2 > 0) { // check what has happened to the items at these indices if ($index1 > 0 && $index2 > 0 && $sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1] ) { // update the diff and the indices $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED); $index1--; $index2--; } elseif ($index2 > 0 && $table[$index1][$index2] == $table[$index1][$index2 - 1] ) { // update the diff and the indices $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED); $index2--; } else { // update the diff and the indices $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED); $index1--; } } // return the diff return $diff; } /** * Returns a diff as a string, where unmodified lines are prefixed by ' ', * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The * parameters are: * * @param array}> $diff the diff array * @param string $separator the separator between lines; this optional parameter defaults to "\n" * @return string String */ public static function toString($diff, $separator = "\n") { // initialise the string $string = ''; // loop over the lines in the diff foreach ($diff as $line) { // extend the string with the line switch ($line[1]) { case self::UNMODIFIED: $string .= ' '.$line[0]; break; case self::DELETED: $string .= '- '.$line[0]; break; case self::INSERTED: $string .= '+ '.$line[0]; break; } // extend the string with the separator $string .= $separator; } // return the string return $string; } /** * Returns a diff as an HTML string, where unmodified lines are contained * within 'span' elements, deletions are contained within 'del' elements, and * insertions are contained within 'ins' elements. The parameters are: * * @param array}> $diff the diff array * @param string $separator the separator between lines; this optional parameter defaults to '
' * @return string HTML string */ public static function toHTML($diff, $separator = '
') { // initialise the HTML $html = ''; // loop over the lines in the diff $element = 'unknown'; foreach ($diff as $line) { // extend the HTML with the line switch ($line[1]) { case self::UNMODIFIED: $element = 'span'; break; case self::DELETED: $element = 'del'; break; case self::INSERTED: $element = 'ins'; break; } $html .= '<'.$element.'>'.dol_escape_htmltag($line[0]).''; // extend the HTML with the separator $html .= $separator; } // return the HTML return $html; } /** * Returns a diff as an HTML table. The parameters are: * * @param array}> $diff the diff array * @param string $indentation indentation to add to every line of the generated HTML; this optional parameter defaults to '' * @param string $separator the separator between lines; this optional parameter defaults to '
' * @return string HTML string */ public static function toTable($diff, $indentation = '', $separator = '
') { // initialise the HTML $html = $indentation."\n"; $rightCell = $leftCell = ''; // loop over the lines in the diff $index = 0; $nbdiff = count($diff); while ($index < $nbdiff) { // determine the line type switch ($diff[$index][1]) { // display the content on the left and right case self::UNMODIFIED: $leftCell = self::getCellContent( $diff, $indentation, $separator, $index, self::UNMODIFIED ); $rightCell = $leftCell; break; // display the deleted on the left and inserted content on the right case self::DELETED: $leftCell = self::getCellContent( $diff, $indentation, $separator, $index, self::DELETED ); $rightCell = self::getCellContent( $diff, $indentation, $separator, $index, self::INSERTED ); break; // display the inserted content on the right case self::INSERTED: $leftCell = ''; $rightCell = self::getCellContent( $diff, $indentation, $separator, $index, self::INSERTED ); break; } // extend the HTML with the new row $html .= $indentation . " \n" . $indentation . ' \n" . $indentation . ' \n" . $indentation . " \n"; } // return the HTML return $html.$indentation."
' . $leftCell . "' . $rightCell . "
\n"; } /** * Returns the content of the cell, for use in the toTable function. The * parameters are: * * @param array}> $diff the diff array * @param string $indentation indentation to add to every line of the generated HTML * @param string $separator the separator between lines * @param int $index the current index, passed by reference * @param int<0,2> $type the type of line * @return string HTML string */ private static function getCellContent($diff, $indentation, $separator, &$index, $type) { // initialise the HTML $html = ''; // loop over the matching lines, adding them to the HTML while ($index < count($diff) && $diff[$index][1] == $type) { $html .= '' . htmlspecialchars($diff[$index][0]) . '' . $separator; $index++; } // return the HTML return $html; } }