first commit

This commit is contained in:
2025-06-17 11:53:18 +02:00
commit 9f0f7ba12b
8804 changed files with 1369176 additions and 0 deletions

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018-2021 Jack Cherng <jfcherng@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,4 @@
#!/usr/bin/env php
<?php
define('RMT_ROOT_DIR', __DIR__);
require 'vendor\liip\rmt\command.php';

View File

@ -0,0 +1,224 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Utility;
/**
* Make your PHP command-line application colorful.
*
* @see https://en.wikipedia.org/wiki/ANSI_escape_code
*
* @author Jack Cherng <jfcherng@gmail.com>
*/
final class CliColor
{
const COLOR_BEGIN = "\033[";
const COLOR_END = 'm';
const COLOR_BEGIN_REGEX = "\033\\[";
const COLOR_END_REGEX = 'm';
/**
* @var array the color map
*/
private static $colorMap = [
// background
'b_black' => '40',
'b_blue' => '44',
'b_cyan' => '46',
'b_green' => '42',
'b_light_gray' => '47',
'b_magenta' => '45',
'b_red' => '41',
'b_yellow' => '43',
// foreground
'f_black' => '30',
'f_blue' => '34',
'f_brown' => '33',
'f_cyan' => '36',
'f_green' => '32',
'f_light_gray' => '37',
'f_normal' => '39',
'f_purple' => '35',
'f_red' => '31',
// compound
'f_dark_gray' => '1;30',
'f_light_blue' => '1;34',
'f_light_cyan' => '1;36',
'f_light_green' => '1;32',
'f_light_purple' => '1;35',
'f_light_red' => '1;31',
'f_white' => '1;37',
'f_yellow' => '1;33',
// special
'blink' => '5',
'bold' => '1',
'dim' => '2',
'hidden' => '8',
'reset' => '0',
'reverse' => '7',
'underline' => '4',
// alias
'b' => 'bold',
'blk' => 'blink',
'h' => 'hidden',
'rev' => 'reverse',
'rst' => 'reset',
'u' => 'underline',
// regex for color codes
'regex_any' => '(?:[0-9]++;?)++',
];
/**
* Get the color map.
*
* @return array the color map
*/
public static function getColorMap(): array
{
return self::$colorMap;
}
/**
* Make a string colorful.
*
* @param string $str the string
* @param string|string[] $colors the colors
* @param bool $reset reset color at the end of the string?
*
* @return string the colored string
*/
public static function color(string $str, $colors = [], bool $reset = true): string
{
// always convert $colors into an array
if (\is_string($colors)) {
$colors = \explode(',', $colors);
}
$colored = self::getColorCode($colors) . $str;
if ($reset) {
$colored .= self::getColorCode(['reset']);
}
return self::simplifyColoredString($colored);
}
/**
* Remove all colors from a string.
*
* @param string $str the string
*
* @return string the string without colors
*/
public static function noColor(string $str): string
{
return \preg_replace(
'~' . self::getColorCode(['regex_any'], true) . '~uS',
'',
$str
);
}
/**
* Get the color code from given colors.
*
* @param array $colors the colors
* @param bool $returnRegex return as an regex segment
*
* @return string the color code
*/
private static function getColorCode(array $colors, bool $returnRegex = false): string
{
$colors = self::sanitizeColors($colors);
if (empty($colors)) {
return '';
}
// convert color into color code
$colorCodes = \array_map(
function (string $color): string {
// resolve color alias
while (isset(self::$colorMap[$color])) {
$color = self::$colorMap[$color];
}
return $color;
},
$colors
);
$closures = $returnRegex
? [self::COLOR_BEGIN_REGEX, self::COLOR_END_REGEX]
: [self::COLOR_BEGIN, self::COLOR_END];
return $closures[0] . \implode(';', $colorCodes) . $closures[1];
}
/**
* Sanitize colors.
*
* @param array $colors the colors
*
* @return array the sanitized colors
*/
private static function sanitizeColors(array $colors): array
{
return self::listUnique(\array_filter(
\array_map('trim', $colors),
function (string $color): bool {
return isset(self::$colorMap[$color]);
}
));
}
/**
* Simplify the colored string.
*
* @param string $str the colored string
*
* @return string the simplified colored string
*/
private static function simplifyColoredString(string $str): string
{
// replace multiple consecutive resets with a single reset
$str = \preg_replace(
'~(' . self::getColorCode(['reset'], true) . '){2,}~uS',
'$1',
$str
);
// remove colors for an emtpy string
$str = \preg_replace(
(
'~' .
'(' . self::getColorCode(['regex_any'], true) . ')' .
'(' . self::getColorCode(['reset'], true) . ')' .
'~uS'
),
'$2',
$str
);
return $str;
}
/**
* The fastest array_unique() implementation for a non-associative array AFAIK.
*
* @see https://stackoverflow.com/questions/8321620/array-unique-vs-array-flip
*
* @param array $array the array
*/
private static function listUnique(array $array): array
{
return \array_keys(\array_count_values($array));
}
}

View File

@ -0,0 +1,43 @@
<?php
namespace PHPSTORM_META;
override(
\Jfcherng\Diff\Factory\LineRendererFactory::getInstance(0),
map(['' => 'Jfcherng\Diff\Renderer\Html\LineRenderer\@'])
);
override(
\Jfcherng\Diff\Factory\LineRendererFactory::make(0),
map(['' => 'Jfcherng\Diff\Renderer\Html\LineRenderer\@'])
);
override(
\Jfcherng\Diff\Factory\RendererFactory::getInstance(0),
map([
// html
'Combined' => \Jfcherng\Diff\Renderer\Html\Combined::class,
'Inline' => \Jfcherng\Diff\Renderer\Html\Inline::class,
'Json' => \Jfcherng\Diff\Renderer\Html\Json::class,
'JsonHtml' => \Jfcherng\Diff\Renderer\Html\JsonHtml::class,
'SideBySide' => \Jfcherng\Diff\Renderer\Html\SideBySide::class,
// text
'Context' => \Jfcherng\Diff\Renderer\Text\Context::class,
'JsonText' => \Jfcherng\Diff\Renderer\Text\JsonText::class,
'Unified' => \Jfcherng\Diff\Renderer\Text\Unified::class,
])
);
override(
\Jfcherng\Diff\Factory\RendererFactory::make(0),
map([
// html
'Combined' => \Jfcherng\Diff\Renderer\Html\Combined::class,
'Inline' => \Jfcherng\Diff\Renderer\Html\Inline::class,
'Json' => \Jfcherng\Diff\Renderer\Html\Json::class,
'JsonHtml' => \Jfcherng\Diff\Renderer\Html\JsonHtml::class,
'SideBySide' => \Jfcherng\Diff\Renderer\Html\SideBySide::class,
// text
'Context' => \Jfcherng\Diff\Renderer\Text\Context::class,
'JsonText' => \Jfcherng\Diff\Renderer\Text\JsonText::class,
'Unified' => \Jfcherng\Diff\Renderer\Text\Unified::class,
])
);

View File

@ -0,0 +1,31 @@
BSD 3-Clause License
Copyright (c) 2018-2022 Jack Cherng <jfcherng@gmail.com>
Copyright (c) 2009 Chris Boulton <chris.boulton@interspire.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,182 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff;
use Jfcherng\Diff\Factory\RendererFactory;
use Jfcherng\Diff\Renderer\RendererConstant;
final class DiffHelper
{
/**
* The constructor.
*/
private function __construct()
{
}
/**
* Get the absolute path of the project root directory.
*/
public static function getProjectDirectory(): string
{
static $path;
return $path ??= realpath(__DIR__ . '/..');
}
/**
* Get the information about available renderers.
*/
public static function getRenderersInfo(): array
{
static $info;
if (isset($info)) {
return $info;
}
$glob = implode(\DIRECTORY_SEPARATOR, [
static::getProjectDirectory(),
'src',
'Renderer',
'{' . implode(',', RendererConstant::RENDERER_TYPES) . '}',
'*.php',
]);
$fileNames = array_map(
// get basename without file extension
static fn (string $file): string => pathinfo($file, \PATHINFO_FILENAME),
// paths of all Renderer files
glob($glob, \GLOB_BRACE),
);
$renderers = array_filter(
$fileNames,
// only normal class files are wanted
static fn (string $fileName): bool => (
substr($fileName, 0, 8) !== 'Abstract'
&& substr($fileName, -9) !== 'Interface'
&& substr($fileName, -5) !== 'Trait'
),
);
$info = [];
foreach ($renderers as $renderer) {
$info[$renderer] = RendererFactory::resolveRenderer($renderer)::INFO;
}
return $info;
}
/**
* Get the available renderers.
*
* @return string[] the available renderers
*/
public static function getAvailableRenderers(): array
{
return array_keys(self::getRenderersInfo());
}
/**
* Get the content of the CSS style sheet for HTML renderers.
*
* @throws \LogicException path is a directory
* @throws \RuntimeException path cannot be opened
*/
public static function getStyleSheet(): string
{
static $fileContent;
if (isset($fileContent)) {
return $fileContent;
}
$filePath = static::getProjectDirectory() . '/example/diff-table.css';
$file = new \SplFileObject($filePath, 'r');
return $fileContent = $file->fread($file->getSize());
}
/**
* Gets the diff statistics such as inserted and deleted etc...
*
* @return array<string,float> the statistics
*/
public static function getStatistics(): array
{
return Differ::getInstance()->getStatistics();
}
/**
* All-in-one static method to calculate the diff between two strings (or arrays of strings).
*
* @param string|string[] $old the old string (or array of lines)
* @param string|string[] $new the new string (or array of lines)
* @param string $renderer the renderer name
* @param array $differOptions the options for Differ object
* @param array $rendererOptions the options for renderer object
*
* @return string the rendered differences
*/
public static function calculate(
$old,
$new,
string $renderer = 'Unified',
array $differOptions = [],
array $rendererOptions = []
): string {
// always convert into array form
\is_string($old) && ($old = explode("\n", $old));
\is_string($new) && ($new = explode("\n", $new));
return RendererFactory::getInstance($renderer)
->setOptions($rendererOptions)
->render(
Differ::getInstance()
->setOldNew($old, $new)
->setOptions($differOptions),
)
;
}
/**
* All-in-one static method to calculate the diff between two files.
*
* @param string $old the path of the old file
* @param string $new the path of the new file
* @param string $renderer the renderer name
* @param array $differOptions the options for Differ object
* @param array $rendererOptions the options for renderer object
*
* @throws \LogicException path is a directory
* @throws \RuntimeException path cannot be opened
*
* @return string the rendered differences
*/
public static function calculateFiles(
string $old,
string $new,
string $renderer = 'Unified',
array $differOptions = [],
array $rendererOptions = []
): string {
// we want to leave the line-ending problem to static::calculate()
// so do not set SplFileObject::DROP_NEW_LINE flag
// otherwise, we will lose \r if the line-ending is \r\n
$oldFile = new \SplFileObject($old, 'r');
$newFile = new \SplFileObject($new, 'r');
return static::calculate(
// fread() requires the length > 0 hence we plus 1 for empty files
$oldFile->fread($oldFile->getSize() + 1),
$newFile->fread($newFile->getSize() + 1),
$renderer,
$differOptions,
$rendererOptions,
);
}
}

View File

@ -0,0 +1,502 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff;
use Jfcherng\Diff\Utility\Arr;
/**
* A comprehensive library for generating differences between two strings
* in multiple formats (unified, side by side HTML etc).
*
* @author Jack Cherng <jfcherng@gmail.com>
* @author Chris Boulton <chris.boulton@interspire.com>
*
* @see http://github.com/chrisboulton/php-diff
*/
final class Differ
{
/**
* @var int a safe number for indicating showing all contexts
*/
public const CONTEXT_ALL = \PHP_INT_MAX >> 3;
/**
* @var string used to indicate a line has no EOL
*
* Arbitrary chars from the 15-16th Unicode reserved areas
* and hopefully, they won't appear in source texts
*/
public const LINE_NO_EOL = "\u{fcf28}\u{fc231}";
/**
* @var array cached properties and their default values
*/
private const CACHED_PROPERTIES = [
'groupedOpcodes' => [],
'groupedOpcodesGnu' => [],
'oldNoEolAtEofIdx' => -1,
'newNoEolAtEofIdx' => -1,
'oldNewComparison' => 0,
];
/**
* @var array array of the options that have been applied for generating the diff
*/
public array $options = [];
/**
* @var string[] the old sequence
*/
private array $old = [];
/**
* @var string[] the new sequence
*/
private array $new = [];
/**
* @var bool is any of cached properties dirty?
*/
private bool $isCacheDirty = true;
/**
* @var SequenceMatcher the sequence matcher
*/
private SequenceMatcher $sequenceMatcher;
private int $oldSrcLength = 0;
private int $newSrcLength = 0;
/**
* @var int the end index for the old if the old has no EOL at EOF
* -1 means the old has an EOL at EOF
*/
private int $oldNoEolAtEofIdx = -1;
/**
* @var int the end index for the new if the new has no EOL at EOF
* -1 means the new has an EOL at EOF
*/
private int $newNoEolAtEofIdx = -1;
/**
* @var int the result of comparing the old and the new with the spaceship operator
* -1 means old < new, 0 means old == new, 1 means old > new
*/
private int $oldNewComparison = 0;
/**
* @var int[][][] array containing the generated opcodes for the differences between the two items
*/
private array $groupedOpcodes = [];
/**
* @var int[][][] array containing the generated opcodes for the differences between the two items (GNU version)
*/
private array $groupedOpcodesGnu = [];
/**
* @var array associative array of the default options available for the Differ class and their default value
*/
private static array $defaultOptions = [
// show how many neighbor lines
// Differ::CONTEXT_ALL can be used to show the whole file
'context' => 3,
// ignore case difference
'ignoreCase' => false,
// ignore line ending difference
'ignoreLineEnding' => false,
// ignore whitespace difference
'ignoreWhitespace' => false,
// if the input sequence is too long, it will just gives up (especially for char-level diff)
'lengthLimit' => 2000,
];
/**
* The constructor.
*
* @param string[] $old array containing the lines of the old string to compare
* @param string[] $new array containing the lines for the new string to compare
* @param array $options the options
*/
public function __construct(array $old, array $new, array $options = [])
{
$this->sequenceMatcher = new SequenceMatcher([], []);
$this->setOldNew($old, $new)->setOptions($options);
}
/**
* Set old and new.
*
* @param string[] $old the old
* @param string[] $new the new
*/
public function setOldNew(array $old, array $new): self
{
return $this->setOld($old)->setNew($new);
}
/**
* Set old.
*
* @param string[] $old the old
*/
public function setOld(array $old): self
{
if ($this->old !== $old) {
$this->old = $old;
$this->isCacheDirty = true;
}
return $this;
}
/**
* Set new.
*
* @param string[] $new the new
*/
public function setNew(array $new): self
{
if ($this->new !== $new) {
$this->new = $new;
$this->isCacheDirty = true;
}
return $this;
}
/**
* Set the options.
*
* @param array $options the options
*/
public function setOptions(array $options): self
{
$mergedOptions = $options + static::$defaultOptions;
if ($this->options !== $mergedOptions) {
$this->options = $mergedOptions;
$this->isCacheDirty = true;
}
return $this;
}
/**
* Get a range of lines from $start to $end from the old.
*
* @param int $start the starting index (negative = count from backward)
* @param null|int $end the ending index (negative = count from backward)
* if is null, it returns a slice from $start to the end
*
* @return string[] array of all of the lines between the specified range
*/
public function getOld(int $start = 0, ?int $end = null): array
{
return Arr::getPartialByIndex($this->old, $start, $end);
}
/**
* Get a range of lines from $start to $end from the new.
*
* @param int $start the starting index (negative = count from backward)
* @param null|int $end the ending index (negative = count from backward)
* if is null, it returns a slice from $start to the end
*
* @return string[] array of all of the lines between the specified range
*/
public function getNew(int $start = 0, ?int $end = null): array
{
return Arr::getPartialByIndex($this->new, $start, $end);
}
/**
* Get the options.
*
* @return array the options
*/
public function getOptions(): array
{
return $this->options;
}
/**
* Get the old no EOL at EOF index.
*
* @return int the old no EOL at EOF index
*/
public function getOldNoEolAtEofIdx(): int
{
return $this->finalize()->oldNoEolAtEofIdx;
}
/**
* Get the new no EOL at EOF index.
*
* @return int the new no EOL at EOF index
*/
public function getNewNoEolAtEofIdx(): int
{
return $this->finalize()->newNoEolAtEofIdx;
}
/**
* Compare the old and the new with the spaceship operator.
*/
public function getOldNewComparison(): int
{
return $this->finalize()->oldNewComparison;
}
/**
* Get the singleton.
*/
public static function getInstance(): self
{
static $singleton;
return $singleton ??= new static([], []);
}
/**
* Gets the diff statistics such as inserted and deleted etc...
*
* @return array<string,float> the statistics
*/
public function getStatistics(): array
{
$ret = [
'inserted' => 0,
'deleted' => 0,
'unmodified' => 0,
'changedRatio' => 0.0,
];
foreach ($this->getGroupedOpcodes() as $hunk) {
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
if ($op & (SequenceMatcher::OP_INS | SequenceMatcher::OP_REP)) {
$ret['inserted'] += $j2 - $j1;
}
if ($op & (SequenceMatcher::OP_DEL | SequenceMatcher::OP_REP)) {
$ret['deleted'] += $i2 - $i1;
}
}
}
$ret['unmodified'] = $this->oldSrcLength - $ret['deleted'];
$ret['changedRatio'] = 1 - ($ret['unmodified'] / $this->oldSrcLength);
return $ret;
}
/**
* Generate a list of the compiled and grouped opcodes for the differences between the
* two strings. Generally called by the renderer, this class instantiates the sequence
* matcher and performs the actual diff generation and return an array of the opcodes
* for it. Once generated, the results are cached in the Differ class instance.
*
* @return int[][][] array of the grouped opcodes for the generated diff
*/
public function getGroupedOpcodes(): array
{
$this->finalize();
if (!empty($this->groupedOpcodes)) {
return $this->groupedOpcodes;
}
$old = $this->old;
$new = $this->new;
$this->getGroupedOpcodesPre($old, $new);
$opcodes = $this->sequenceMatcher
->setSequences($old, $new)
->getGroupedOpcodes($this->options['context'])
;
$this->getGroupedOpcodesPost($opcodes);
return $this->groupedOpcodes = $opcodes;
}
/**
* A EOL-at-EOF-sensitive version of getGroupedOpcodes().
*
* @return int[][][] array of the grouped opcodes for the generated diff (GNU version)
*/
public function getGroupedOpcodesGnu(): array
{
$this->finalize();
if (!empty($this->groupedOpcodesGnu)) {
return $this->groupedOpcodesGnu;
}
$old = $this->old;
$new = $this->new;
$this->getGroupedOpcodesGnuPre($old, $new);
$opcodes = $this->sequenceMatcher
->setSequences($old, $new)
->getGroupedOpcodes($this->options['context'])
;
$this->getGroupedOpcodesGnuPost($opcodes);
return $this->groupedOpcodesGnu = $opcodes;
}
/**
* Triggered before getGroupedOpcodes(). May modify the $old and $new.
*
* @param string[] $old the old
* @param string[] $new the new
*/
private function getGroupedOpcodesPre(array &$old, array &$new): void
{
// append these lines to make sure the last block of the diff result is OP_EQ
static $eolAtEofHelperLines = [
SequenceMatcher::APPENDED_HELPER_LINE,
SequenceMatcher::APPENDED_HELPER_LINE,
SequenceMatcher::APPENDED_HELPER_LINE,
SequenceMatcher::APPENDED_HELPER_LINE,
];
$this->oldSrcLength = \count($old);
array_push($old, ...$eolAtEofHelperLines);
$this->newSrcLength = \count($new);
array_push($new, ...$eolAtEofHelperLines);
}
/**
* Triggered after getGroupedOpcodes(). May modify the $opcodes.
*
* @param int[][][] $opcodes the opcodes
*/
private function getGroupedOpcodesPost(array &$opcodes): void
{
// remove those extra lines cause by adding extra SequenceMatcher::APPENDED_HELPER_LINE lines
foreach ($opcodes as $hunkIdx => &$hunk) {
foreach ($hunk as $blockIdx => &$block) {
// range overflow
if ($block[1] > $this->oldSrcLength) {
$block[1] = $this->oldSrcLength;
}
if ($block[2] > $this->oldSrcLength) {
$block[2] = $this->oldSrcLength;
}
if ($block[3] > $this->newSrcLength) {
$block[3] = $this->newSrcLength;
}
if ($block[4] > $this->newSrcLength) {
$block[4] = $this->newSrcLength;
}
// useless extra block?
/** @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset */
if ($block[1] === $block[2] && $block[3] === $block[4]) {
unset($hunk[$blockIdx]);
}
}
if (empty($hunk)) {
unset($opcodes[$hunkIdx]);
}
}
}
/**
* Triggered before getGroupedOpcodesGnu(). May modify the $old and $new.
*
* @param string[] $old the old
* @param string[] $new the new
*/
private function getGroupedOpcodesGnuPre(array &$old, array &$new): void
{
/**
* Make the lines to be prepared for GNU-style diff.
*
* This method checks whether $lines has no EOL at EOF and append a special
* indicator to the last line.
*
* @param string[] $lines the lines created by simply explode("\n", $string)
*/
$createGnuCompatibleLines = static function (array $lines): array {
// note that the $lines should not be empty at this point
// they have at least one element "" in the array because explode("\n", "") === [""]
$lastLineIdx = \count($lines) - 1;
$lastLine = &$lines[$lastLineIdx];
if ($lastLine === '') {
// remove the last plain "" line since we don't need it anymore
// use array_slice() to also reset the array index
$lines = \array_slice($lines, 0, -1);
} else {
// this means the original source has no EOL at EOF
// we append a special indicator to that line so it no longer matches
$lastLine .= self::LINE_NO_EOL;
}
return $lines;
};
$old = $createGnuCompatibleLines($old);
$new = $createGnuCompatibleLines($new);
$this->getGroupedOpcodesPre($old, $new);
}
/**
* Triggered after getGroupedOpcodesGnu(). May modify the $opcodes.
*
* @param int[][][] $opcodes the opcodes
*/
private function getGroupedOpcodesGnuPost(array &$opcodes): void
{
$this->getGroupedOpcodesPost($opcodes);
}
/**
* Claim this class has settled down and we could calculate cached
* properties by current properties.
*
* This method must be called before accessing cached properties to
* make suer that you will not get a outdated cached value.
*
* @internal
*/
private function finalize(): self
{
if ($this->isCacheDirty) {
$this->resetCachedResults();
$this->oldNoEolAtEofIdx = $this->getOld(-1) === [''] ? -1 : \count($this->old);
$this->newNoEolAtEofIdx = $this->getNew(-1) === [''] ? -1 : \count($this->new);
$this->oldNewComparison = $this->old <=> $this->new;
$this->sequenceMatcher->setOptions($this->options);
}
return $this;
}
/**
* Reset cached results.
*/
private function resetCachedResults(): self
{
foreach (static::CACHED_PROPERTIES as $property => $value) {
$this->{$property} = $value;
}
$this->isCacheDirty = false;
return $this;
}
}

View File

@ -0,0 +1,13 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Exception;
final class FileNotFoundException extends \Exception
{
public function __construct(string $filepath = '', int $code = 0, \Throwable $previous = null)
{
parent::__construct("File not found: {$filepath}", $code, $previous);
}
}

View File

@ -0,0 +1,13 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Exception;
final class UnsupportedFunctionException extends \Exception
{
public function __construct(string $funcName = '', int $code = 0, \Throwable $previous = null)
{
parent::__construct("Unsupported function: {$funcName}", $code, $previous);
}
}

View File

@ -0,0 +1,55 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Factory;
use Jfcherng\Diff\Renderer\Html\LineRenderer\AbstractLineRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
final class LineRendererFactory
{
/**
* Instances of line renderers.
*
* @var AbstractLineRenderer[]
*/
private static array $singletons = [];
/**
* The constructor.
*/
private function __construct()
{
}
/**
* Get the singleton of a line renderer.
*
* @param string $type the type
* @param mixed ...$ctorArgs the constructor arguments
*/
public static function getInstance(string $type, ...$ctorArgs): AbstractLineRenderer
{
return self::$singletons[$type] ??= self::make($type, ...$ctorArgs);
}
/**
* Make a new instance of a line renderer.
*
* @param string $type the type
* @param mixed ...$ctorArgs the constructor arguments
*
* @throws \InvalidArgumentException
*/
public static function make(string $type, ...$ctorArgs): AbstractLineRenderer
{
$className = RendererConstant::RENDERER_NAMESPACE . '\\Html\\LineRenderer\\' . ucfirst($type);
if (!class_exists($className)) {
throw new \InvalidArgumentException("LineRenderer not found: {$type}");
}
return new $className(...$ctorArgs);
}
}

View File

@ -0,0 +1,79 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Factory;
use Jfcherng\Diff\Renderer\AbstractRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
final class RendererFactory
{
/**
* Instances of renderers.
*
* @var AbstractRenderer[]
*/
private static array $singletons = [];
/**
* The constructor.
*/
private function __construct()
{
}
/**
* Get the singleton of a renderer.
*
* @param string $renderer the renderer
* @param mixed ...$ctorArgs the constructor arguments
*/
public static function getInstance(string $renderer, ...$ctorArgs): AbstractRenderer
{
return self::$singletons[$renderer] ??= self::make($renderer, ...$ctorArgs);
}
/**
* Make a new instance of a renderer.
*
* @param string $renderer the renderer
* @param mixed ...$ctorArgs the constructor arguments
*
* @throws \InvalidArgumentException
*/
public static function make(string $renderer, ...$ctorArgs): AbstractRenderer
{
$className = self::resolveRenderer($renderer);
if (!isset($className)) {
throw new \InvalidArgumentException("Renderer not found: {$renderer}");
}
return new $className(...$ctorArgs);
}
/**
* Resolve the renderer name into a FQCN.
*
* @param string $renderer the renderer
*/
public static function resolveRenderer(string $renderer): ?string
{
static $cache = [];
if (isset($cache[$renderer])) {
return $cache[$renderer];
}
foreach (RendererConstant::RENDERER_TYPES as $type) {
$className = RendererConstant::RENDERER_NAMESPACE . "\\{$type}\\{$renderer}";
if (class_exists($className)) {
return $cache[$renderer] = $className;
}
}
return null;
}
}

View File

@ -0,0 +1,248 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\SequenceMatcher;
use Jfcherng\Diff\Utility\Language;
/**
* Base class for diff renderers.
*
* @todo use typed properties (BC breaking for public interface) in v7
*/
abstract class AbstractRenderer implements RendererInterface
{
/**
* @var array information about this renderer
*/
public const INFO = [
'desc' => 'default_desc',
'type' => 'default_type',
];
/**
* @var bool Is this renderer pure text?
*/
public const IS_TEXT_RENDERER = true;
/**
* @var string[] array of the opcodes and their corresponding symbols
*/
public const SYMBOL_MAP = [
SequenceMatcher::OP_DEL => '-',
SequenceMatcher::OP_EQ => ' ',
SequenceMatcher::OP_INS => '+',
SequenceMatcher::OP_REP => '!',
];
/**
* @var Language the language translation object
*/
protected $t;
/**
* If the input "changes" have `<ins>...</ins>` or `<del>...</del>`,
* which means they have been processed, then `false`. Otherwise, `true`.
*
* @var bool
*/
protected $changesAreRaw = true;
/**
* @var array array of the default options that apply to this renderer
*/
protected static $defaultOptions = [
// how detailed the rendered HTML in-line diff is? (none, line, word, char)
'detailLevel' => 'line',
// renderer language: eng, cht, chs, jpn, ...
// or an array which has the same keys with a language file
// check the "Custom Language" section in the readme for more advanced usage
'language' => 'eng',
// show line numbers in HTML renderers
'lineNumbers' => true,
// show a separator between different diff hunks in HTML renderers
'separateBlock' => true,
// show the (table) header
'showHeader' => true,
// convert spaces/tabs into HTML codes like `<span class="ch sp"> </span>`
// and the frontend is responsible for rendering them with CSS.
// when using this, "spacesToNbsp" should be false and "tabSize" is not respected.
'spaceToHtmlTag' => false,
// the frontend HTML could use CSS "white-space: pre;" to visualize consecutive whitespaces
// but if you want to visualize them in the backend with "&nbsp;", you can set this to true
'spacesToNbsp' => false,
// HTML renderer tab width (negative = do not convert into spaces)
'tabSize' => 4,
// this option is currently only for the Combined renderer.
// it determines whether a replace-type block should be merged or not
// depending on the content changed ratio, which values between 0 and 1.
'mergeThreshold' => 0.8,
// this option is currently only for the Unified and the Context renderers.
// RendererConstant::CLI_COLOR_AUTO = colorize the output if possible (default)
// RendererConstant::CLI_COLOR_ENABLE = force to colorize the output
// RendererConstant::CLI_COLOR_DISABLE = force not to colorize the output
'cliColorization' => RendererConstant::CLI_COLOR_AUTO,
// this option is currently only for the Json renderer.
// internally, ops (tags) are all int type but this is not good for human reading.
// set this to "true" to convert them into string form before outputting.
'outputTagAsString' => false,
// this option is currently only for the Json renderer.
// it controls how the output JSON is formatted.
// see availabe options on https://www.php.net/manual/en/function.json-encode.php
'jsonEncodeFlags' => \JSON_UNESCAPED_SLASHES | \JSON_UNESCAPED_UNICODE,
// this option is currently effective when the "detailLevel" is "word"
// characters listed in this array can be used to make diff segments into a whole
// for example, making "<del>good</del>-<del>looking</del>" into "<del>good-looking</del>"
// this should bring better readability but set this to empty array if you do not want it
'wordGlues' => ['-', ' '],
// change this value to a string as the returned diff if the two input strings are identical
'resultForIdenticals' => null,
// extra HTML classes added to the DOM of the diff container
'wrapperClasses' => ['diff-wrapper'],
];
/**
* @var array array containing the user applied and merged default options for the renderer
*/
protected $options = [];
/**
* The constructor. Instantiates the rendering engine and if options are passed,
* sets the options for the renderer.
*
* @param array $options optionally, an array of the options for the renderer
*/
public function __construct(array $options = [])
{
$this->setOptions($options);
}
/**
* Set the options of the renderer to those supplied in the passed in array.
* Options are merged with the default to ensure that there aren't any missing
* options.
*
* @param array $options the options
*
* @return static
*/
public function setOptions(array $options): self
{
$newOptions = $options + static::$defaultOptions;
$this->updateLanguage(
$this->options['language'] ?? '',
$newOptions['language'],
);
$this->options = $newOptions;
return $this;
}
/**
* Get the options.
*
* @return array the options
*/
public function getOptions(): array
{
return $this->options;
}
/**
* {@inheritdoc}
*
* @final
*
* @todo mark this method with "final" in the next major release
*
* @throws \InvalidArgumentException
*/
public function getResultForIdenticals(): string
{
$custom = $this->options['resultForIdenticals'];
if (isset($custom) && !\is_string($custom)) {
throw new \InvalidArgumentException('renderer option `resultForIdenticals` must be null or string.');
}
return $custom ?? $this->getResultForIdenticalsDefault();
}
/**
* Get the renderer default result when the old and the new are the same.
*/
abstract public function getResultForIdenticalsDefault(): string;
/**
* {@inheritdoc}
*/
final public function render(Differ $differ): string
{
$this->changesAreRaw = true;
// the "no difference" situation may happen frequently
return $differ->getOldNewComparison() === 0
? $this->getResultForIdenticals()
: $this->renderWorker($differ);
}
/**
* {@inheritdoc}
*/
final public function renderArray(array $differArray): string
{
$this->changesAreRaw = false;
return $this->renderArrayWorker($differArray);
}
/**
* The real worker for self::render().
*
* @param Differ $differ the differ object
*/
abstract protected function renderWorker(Differ $differ): string;
/**
* The real worker for self::renderArray().
*
* @param array[][] $differArray the differ array
*/
abstract protected function renderArrayWorker(array $differArray): string;
/**
* Update the Language object.
*
* @param string|string[] $old the old language
* @param string|string[] $new the new language
*
* @return static
*/
protected function updateLanguage($old, $new): self
{
if (!isset($this->t) || $old !== $new) {
$this->t = new Language($new);
}
return $this;
}
/**
* A shorthand to do translation.
*
* @param string $text The text
* @param bool $escapeHtml Escape the translated text for HTML?
*
* @return string the translated text
*/
protected function _(string $text, bool $escapeHtml = true): string
{
$text = $this->t->translate($text);
return $escapeHtml ? htmlspecialchars($text) : $text;
}
}

View File

@ -0,0 +1,377 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\Factory\LineRendererFactory;
use Jfcherng\Diff\Renderer\AbstractRenderer;
use Jfcherng\Diff\Renderer\Html\LineRenderer\AbstractLineRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Diff\SequenceMatcher;
use Jfcherng\Utility\MbString;
/**
* Base renderer for rendering HTML-based diffs.
*/
abstract class AbstractHtml extends AbstractRenderer
{
/**
* @var bool is this renderer pure text?
*/
public const IS_TEXT_RENDERER = false;
/**
* @var string[] array of the different opcodes and how they are mapped to HTML classes
*
* @todo rename to OP_CLASS_MAP in v7
*/
public const TAG_CLASS_MAP = [
SequenceMatcher::OP_DEL => 'del',
SequenceMatcher::OP_EQ => 'eq',
SequenceMatcher::OP_INS => 'ins',
SequenceMatcher::OP_REP => 'rep',
];
/**
* Auto format the content in "changes" to be suitable for HTML output.
*
* This may not be a wanted behavior for some (custom) renderers
* if they want to do this by themselves in a later stage.
*
* @var bool
*/
public const AUTO_FORMAT_CHANGES = true;
/**
* {@inheritdoc}
*/
public function getResultForIdenticalsDefault(): string
{
return '';
}
/**
* Render and return an array structure suitable for generating HTML
* based differences. Generally called by subclasses that generate a
* HTML based diff and return an array of the changes to show in the diff.
*
* @param Differ $differ the differ object
*
* @return array[][] generated changes, suitable for presentation in HTML
*/
public function getChanges(Differ $differ): array
{
$lineRenderer = LineRendererFactory::make(
$this->options['detailLevel'],
$differ->getOptions(),
$this->options,
);
$old = $differ->getOld();
$new = $differ->getNew();
$changes = [];
foreach ($differ->getGroupedOpcodes() as $hunk) {
$change = [];
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
$change[] = $this->getDefaultBlock($op, $i1, $j1);
$block = &$change[\count($change) - 1];
// if there are same amount of lines replaced
// we can render the inner detailed changes with corresponding lines
// @todo or use LineRenderer to do the job regardless different line counts?
if ($op === SequenceMatcher::OP_REP && $i2 - $i1 === $j2 - $j1) {
for ($k = $i2 - $i1 - 1; $k >= 0; --$k) {
$this->renderChangedExtent($lineRenderer, $old[$i1 + $k], $new[$j1 + $k]);
}
}
$block['old']['lines'] = \array_slice($old, $i1, $i2 - $i1);
$block['new']['lines'] = \array_slice($new, $j1, $j2 - $j1);
}
unset($block);
$changes[] = $change;
}
if (static::AUTO_FORMAT_CHANGES) {
$this->formatChanges($changes);
}
return $changes;
}
/**
* {@inheritdoc}
*/
protected function renderWorker(Differ $differ): string
{
$rendered = $this->redererChanges($this->getChanges($differ));
return $this->cleanUpDummyHtmlClosures($rendered);
}
/**
* {@inheritdoc}
*/
protected function renderArrayWorker(array $differArray): string
{
$this->ensureChangesUseIntTag($differArray);
$rendered = $this->redererChanges($differArray);
return $this->cleanUpDummyHtmlClosures($rendered);
}
/**
* Render the array of changes.
*
* @param array[][] $changes the changes
*
* @todo rename typo to renderChanges() in v7
*/
abstract protected function redererChanges(array $changes): string;
/**
* Renderer the changed extent.
*
* @param AbstractLineRenderer $lineRenderer the line renderer
* @param string $old the old line
* @param string $new the new line
*/
protected function renderChangedExtent(AbstractLineRenderer $lineRenderer, string &$old, string &$new): void
{
static $mbOld, $mbNew;
$mbOld ??= new MbString();
$mbNew ??= new MbString();
$mbOld->set($old);
$mbNew->set($new);
$lineRenderer->render($mbOld, $mbNew);
$old = $mbOld->get();
$new = $mbNew->get();
}
/**
* Get the default block.
*
* @param int $op the operation
* @param int $i1 begin index of the diff of the old array
* @param int $j1 begin index of the diff of the new array
*
* @return array the default block
*
* @todo rename tag to op in v7
*/
protected function getDefaultBlock(int $op, int $i1, int $j1): array
{
return [
'tag' => $op,
'old' => [
'offset' => $i1,
'lines' => [],
],
'new' => [
'offset' => $j1,
'lines' => [],
],
];
}
/**
* Make the content in "changes" suitable for HTML output.
*
* @param array[][] $changes the changes
*/
final protected function formatChanges(array &$changes): void
{
foreach ($changes as &$hunk) {
foreach ($hunk as &$block) {
$block['old']['lines'] = $this->formatLines($block['old']['lines']);
$block['new']['lines'] = $this->formatLines($block['new']['lines']);
/** @phan-suppress-next-line PhanTypeInvalidLeftOperandOfBitwiseOp */
if ($block['tag'] & (SequenceMatcher::OP_REP | SequenceMatcher::OP_DEL)) {
$block['old']['lines'] = str_replace(
RendererConstant::HTML_CLOSURES,
RendererConstant::HTML_CLOSURES_DEL,
$block['old']['lines'],
);
}
/** @phan-suppress-next-line PhanTypeInvalidLeftOperandOfBitwiseOp */
if ($block['tag'] & (SequenceMatcher::OP_REP | SequenceMatcher::OP_INS)) {
$block['new']['lines'] = str_replace(
RendererConstant::HTML_CLOSURES,
RendererConstant::HTML_CLOSURES_INS,
$block['new']['lines'],
);
}
}
}
}
/**
* Make a series of lines suitable for outputting in a HTML rendered diff.
*
* @param string[] $lines array of lines to format
*
* @return string[] array of the formatted lines
*/
protected function formatLines(array $lines): array
{
/**
* To prevent from invoking the same function calls for several times,
* we can glue lines into a string and call functions for one time.
* After that, we split the string back into lines.
*/
return explode(
RendererConstant::IMPLODE_DELIMITER,
$this->formatStringFromLines(
implode(
RendererConstant::IMPLODE_DELIMITER,
$lines,
),
),
);
}
/**
* Make a string suitable for outputting in a HTML rendered diff.
*
* This my involve replacing tab characters with spaces, making the HTML safe
* for output, ensuring that double spaces are replaced with &nbsp; etc.
*
* @param string $string the string of imploded lines
*
* @return string the formatted string
*/
protected function formatStringFromLines(string $string): string
{
if (!$this->options['spaceToHtmlTag']) {
$string = $this->expandTabs($string, $this->options['tabSize']);
}
$string = $this->htmlSafe($string);
if ($this->options['spacesToNbsp']) {
$string = $this->htmlFixSpaces($string);
}
if ($this->options['spaceToHtmlTag']) {
$string = $this->htmlReplaceSpacesToHtmlTag($string);
}
return $string;
}
/**
* Replace tabs in a string with a number of spaces.
*
* @param string $string the input string which may contain tabs
* @param int $tabSize one tab = how many spaces, a negative does nothing
* @param bool $onlyLeadingTabs only expand leading tabs
*
* @return string the string with the tabs converted to spaces
*/
protected function expandTabs(string $string, int $tabSize = 4, bool $onlyLeadingTabs = false): string
{
if ($tabSize < 0) {
return $string;
}
if ($onlyLeadingTabs) {
return preg_replace_callback(
"/^[ \t]{1,}/mS", // tabs and spaces may be mixed
static fn (array $matches): string => str_replace("\t", str_repeat(' ', $tabSize), $matches[0]),
$string,
);
}
return str_replace("\t", str_repeat(' ', $tabSize), $string);
}
/**
* Make a string containing HTML safe for output on a page.
*
* @param string $string the string
*
* @return string the string with the HTML characters replaced by entities
*/
protected function htmlSafe(string $string): string
{
return htmlspecialchars($string, \ENT_NOQUOTES, 'UTF-8');
}
/**
* Replace a string containing spaces with a HTML representation having "&nbsp;".
*
* @param string $string the string of spaces
*
* @return string the HTML representation of the string
*/
protected function htmlFixSpaces(string $string): string
{
return str_replace(' ', '&nbsp;', $string);
}
/**
* Replace spaces/tabs with HTML tags, which may be styled in frontend with CSS.
*
* @param string $string the string of spaces
*
* @return string the HTML representation of the string
*/
protected function htmlReplaceSpacesToHtmlTag(string $string): string
{
return strtr($string, [
' ' => '<span class="ch sp"> </span>',
"\t" => "<span class=\"ch tab\">\t</span>",
]);
}
/**
* Make sure the "changes" array uses int "tag".
*
* Internally, we would like always int form for better performance.
*
* @param array[][] $changes the changes
*/
protected function ensureChangesUseIntTag(array &$changes): void
{
// check if the tag is already int type
if (\is_int($changes[0][0]['tag'] ?? null)) {
return;
}
foreach ($changes as &$hunks) {
foreach ($hunks as &$block) {
$block['tag'] = SequenceMatcher::opStrToInt($block['tag']);
}
}
}
/**
* Clean up empty HTML closures in the given string.
*
* @param string $string the string
*/
protected function cleanUpDummyHtmlClosures(string $string): string
{
return str_replace(
[
RendererConstant::HTML_CLOSURES_DEL[0] . RendererConstant::HTML_CLOSURES_DEL[1],
RendererConstant::HTML_CLOSURES_INS[0] . RendererConstant::HTML_CLOSURES_INS[1],
],
'',
$string,
);
}
}

View File

@ -0,0 +1,521 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
use Jfcherng\Diff\Factory\LineRendererFactory;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Diff\SequenceMatcher;
use Jfcherng\Diff\Utility\ReverseIterator;
use Jfcherng\Utility\MbString;
/**
* Combined HTML diff generator.
*
* Note that this renderer always has no line number.
*/
final class Combined extends AbstractHtml
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Combined',
'type' => 'Html',
];
/**
* {@inheritdoc}
*/
public const AUTO_FORMAT_CHANGES = false;
/**
* {@inheritdoc}
*/
protected function redererChanges(array $changes): string
{
if (empty($changes)) {
return $this->getResultForIdenticals();
}
$wrapperClasses = [
...$this->options['wrapperClasses'],
'diff', 'diff-html', 'diff-combined',
];
return
'<table class="' . implode(' ', $wrapperClasses) . '">' .
$this->renderTableHeader() .
$this->renderTableHunks($changes) .
'</table>';
}
/**
* Renderer the table header.
*/
protected function renderTableHeader(): string
{
if (!$this->options['showHeader']) {
return '';
}
return
'<thead>' .
'<tr>' .
'<th>' . $this->_('differences') . '</th>' .
'</tr>' .
'</thead>';
}
/**
* Renderer the table separate block.
*/
protected function renderTableSeparateBlock(): string
{
return
'<tbody class="skipped">' .
'<tr>' .
'<td></td>' .
'</tr>' .
'</tbody>';
}
/**
* Renderer table hunks.
*
* @param array[][] $hunks each hunk has many blocks
*/
protected function renderTableHunks(array $hunks): string
{
$ret = '';
foreach ($hunks as $i => $hunk) {
if ($i > 0 && $this->options['separateBlock']) {
$ret .= $this->renderTableSeparateBlock();
}
foreach ($hunk as $block) {
$ret .= $this->renderTableBlock($block);
}
}
return $ret;
}
/**
* Renderer the table block.
*
* @param array $block the block
*/
protected function renderTableBlock(array $block): string
{
switch ($block['tag']) {
case SequenceMatcher::OP_EQ:
$content = $this->renderTableBlockEqual($block);
break;
case SequenceMatcher::OP_INS:
$content = $this->renderTableBlockInsert($block);
break;
case SequenceMatcher::OP_DEL:
$content = $this->renderTableBlockDelete($block);
break;
case SequenceMatcher::OP_REP:
$content = $this->renderTableBlockReplace($block);
break;
default:
$content = '';
}
return '<tbody class="change change-' . self::TAG_CLASS_MAP[$block['tag']] . '">' . $content . '</tbody>';
}
/**
* Renderer the table block: equal.
*
* @param array $block the block
*/
protected function renderTableBlockEqual(array $block): string
{
$block['new']['lines'] = $this->customFormatLines(
$block['new']['lines'],
SequenceMatcher::OP_EQ,
);
$ret = '';
// note that although we are in a OP_EQ situation,
// the old and the new may not be exactly the same
// because of ignoreCase, ignoreWhitespace, etc
foreach ($block['new']['lines'] as $newLine) {
// we could only pick either the old or the new to show
// here we pick the new one to let the user know what it is now
$ret .= $this->renderTableRow('new', SequenceMatcher::OP_EQ, $newLine);
}
return $ret;
}
/**
* Renderer the table block: insert.
*
* @param array $block the block
*/
protected function renderTableBlockInsert(array $block): string
{
$block['new']['lines'] = $this->customFormatLines(
$block['new']['lines'],
SequenceMatcher::OP_INS,
);
$ret = '';
foreach ($block['new']['lines'] as $newLine) {
$ret .= $this->renderTableRow('new', SequenceMatcher::OP_INS, $newLine);
}
return $ret;
}
/**
* Renderer the table block: delete.
*
* @param array $block the block
*/
protected function renderTableBlockDelete(array $block): string
{
$block['old']['lines'] = $this->customFormatLines(
$block['old']['lines'],
SequenceMatcher::OP_DEL,
);
$ret = '';
foreach ($block['old']['lines'] as $oldLine) {
$ret .= $this->renderTableRow('old', SequenceMatcher::OP_DEL, $oldLine);
}
return $ret;
}
/**
* Renderer the table block: replace.
*
* @param array $block the block
*/
protected function renderTableBlockReplace(array $block): string
{
if ($this->options['detailLevel'] === 'none') {
return
$this->renderTableBlockDelete($block) .
$this->renderTableBlockInsert($block);
}
$ret = '';
$oldLines = $block['old']['lines'];
$newLines = $block['new']['lines'];
$oldLinesCount = \count($oldLines);
$newLinesCount = \count($newLines);
// if the line counts changes, we treat the old and the new as
// "a line with \n in it" and then do one-line-to-one-line diff
if ($oldLinesCount !== $newLinesCount) {
[$oldLines, $newLines] = $this->markReplaceBlockDiff($oldLines, $newLines);
$oldLinesCount = $newLinesCount = 1;
}
$oldLines = $this->customFormatLines($oldLines, SequenceMatcher::OP_DEL);
$newLines = $this->customFormatLines($newLines, SequenceMatcher::OP_INS);
// now $oldLines must has the same line counts with $newlines
for ($no = 0; $no < $newLinesCount; ++$no) {
$mergedLine = $this->mergeReplaceLines($oldLines[$no], $newLines[$no]);
// not merge-able, we fall back to separated form
if (!isset($mergedLine)) {
$ret .=
$this->renderTableBlockDelete($block) .
$this->renderTableBlockInsert($block);
break;
}
$ret .= $this->renderTableRow('rep', SequenceMatcher::OP_REP, $mergedLine);
}
return $ret;
}
/**
* Renderer a content row of the output table.
*
* @param string $tdClass the <td> class
* @param int $op the operation
* @param string $line the line
*/
protected function renderTableRow(string $tdClass, int $op, string $line): string
{
return
'<tr data-type="' . self::SYMBOL_MAP[$op] . '">' .
'<td class="' . $tdClass . '">' . $line . '</td>' .
'</tr>';
}
/**
* Merge two "replace"-type lines into a single line.
*
* The implementation concept is that if we remove all closure parts from
* the old and the new, the rest of them (cleaned line) should be the same.
* And then, we add back those removed closure parts in a correct order.
*
* @param string $oldLine the old line
* @param string $newLine the new line
*
* @return null|string string if merge-able, null otherwise
*/
protected function mergeReplaceLines(string $oldLine, string $newLine): ?string
{
$delParts = $this->analyzeClosureParts(
$oldLine,
RendererConstant::HTML_CLOSURES_DEL,
SequenceMatcher::OP_DEL,
);
$insParts = $this->analyzeClosureParts(
$newLine,
RendererConstant::HTML_CLOSURES_INS,
SequenceMatcher::OP_INS,
);
// get the cleaned line by a non-regex way (should be faster)
// i.e., the new line with all "<ins>...</ins>" parts removed
$mergedLine = $newLine;
foreach (ReverseIterator::fromArray($insParts) as $part) {
$mergedLine = substr_replace(
$mergedLine,
'', // deletion
$part['offset'],
\strlen($part['content']),
);
}
// note that $mergedLine is actually a clean line at this point
if (!$this->isLinesMergeable($oldLine, $newLine, $mergedLine)) {
return null;
}
// before building the $mergedParts, we do some adjustments
$this->revisePartsForBoundaryNewlines($delParts, RendererConstant::HTML_CLOSURES_DEL);
$this->revisePartsForBoundaryNewlines($insParts, RendererConstant::HTML_CLOSURES_INS);
// create a sorted merged parts array
$mergedParts = [...$delParts, ...$insParts];
usort(
$mergedParts,
// first sort by "offsetClean", "order" then by "type"
static fn (array $a, array $b): int => (
$a['offsetClean'] <=> $b['offsetClean']
?: $a['order'] <=> $b['order']
?: ($a['type'] === SequenceMatcher::OP_DEL ? -1 : 1)
),
);
// insert merged parts into the cleaned line
foreach (ReverseIterator::fromArray($mergedParts) as $part) {
$mergedLine = substr_replace(
$mergedLine,
$part['content'],
$part['offsetClean'],
0, // insertion
);
}
return str_replace("\n", '<br>', $mergedLine);
}
/**
* Analyze and get the closure parts information of the line.
*
* Such as
* extract informations for "<ins>part 1</ins>" and "<ins>part 2</ins>"
* from "Hello <ins>part 1</ins>SOME OTHER TEXT<ins>part 2</ins> World"
*
* @param string $line the line
* @param string[] $closures the closures
* @param int $type the type
*
* @return array[] the closure informations
*/
protected function analyzeClosureParts(string $line, array $closures, int $type): array
{
[$ld, $rd] = $closures;
$ldLength = \strlen($ld);
$rdLength = \strlen($rd);
$parts = [];
$partStart = $partEnd = 0;
$partLengthSum = 0;
// find the next left delimiter
while (false !== ($partStart = strpos($line, $ld, $partEnd))) {
// find the corresponding right delimiter
if (false === ($partEnd = strpos($line, $rd, $partStart + $ldLength))) {
break;
}
$partEnd += $rdLength;
$partLength = $partEnd - $partStart;
$parts[] = [
'type' => $type,
// the sorting order used when both "offsetClean" are the same
'order' => 0,
// the offset in the line
'offset' => $partStart,
// the offset in the cleaned line (i.e., the line with closure parts removed)
'offsetClean' => $partStart - $partLengthSum,
// the content of the part
'content' => substr($line, $partStart, $partLength),
];
$partLengthSum += $partLength;
}
return $parts;
}
/**
* Mark differences between two "replace" blocks.
*
* Each of the returned block (lines) is always only one line.
*
* @param string[] $oldBlock The old block
* @param string[] $newBlock The new block
*
* @return string[][] the value of [[$oldLine], [$newLine]]
*/
protected function markReplaceBlockDiff(array $oldBlock, array $newBlock): array
{
static $mbOld, $mbNew, $lineRenderer;
$mbOld ??= new MbString();
$mbNew ??= new MbString();
$lineRenderer ??= LineRendererFactory::make(
$this->options['detailLevel'],
[], /** @todo is it possible to get the differOptions here? */
$this->options,
);
$mbOld->set(implode("\n", $oldBlock));
$mbNew->set(implode("\n", $newBlock));
$lineRenderer->render($mbOld, $mbNew);
return [
[$mbOld->get()], // one-line block for the old
[$mbNew->get()], // one-line block for the new
];
}
/**
* Determine whether the "replace"-type lines are merge-able or not.
*
* @param string $oldLine the old line
* @param string $newLine the new line
* @param string $cleanLine the clean line
*/
protected function isLinesMergeable(string $oldLine, string $newLine, string $cleanLine): bool
{
$oldLine = str_replace(RendererConstant::HTML_CLOSURES_DEL, '', $oldLine);
$newLine = str_replace(RendererConstant::HTML_CLOSURES_INS, '', $newLine);
$sumLength = \strlen($oldLine) + \strlen($newLine);
/** @var float the changed ratio, 0 <= value < 1 */
$changedRatio = ($sumLength - (\strlen($cleanLine) << 1)) / ($sumLength + 1);
return $changedRatio <= $this->options['mergeThreshold'];
}
/**
* Extract boundary newlines from parts into new parts.
*
* @param array[] $parts the parts
* @param string[] $closures the closures
*
* @see https://git.io/JvVXH
*/
protected function revisePartsForBoundaryNewlines(array &$parts, array $closures): void
{
[$ld, $rd] = $closures;
$ldRegex = preg_quote($ld, '/');
$rdRegex = preg_quote($rd, '/');
for ($i = \count($parts) - 1; $i >= 0; --$i) {
$part = &$parts[$i];
// deal with leading newlines
$part['content'] = preg_replace_callback(
"/(?P<closure>{$ldRegex})(?P<nl>[\r\n]++)/u",
static function (array $matches) use (&$parts, $part, $ld, $rd): string {
// add a new part for the extracted newlines
$part['order'] = -1;
$part['content'] = "{$ld}{$matches['nl']}{$rd}";
$parts[] = $part;
return $matches['closure'];
},
$part['content'],
);
// deal with trailing newlines
$part['content'] = preg_replace_callback(
"/(?P<nl>[\r\n]++)(?P<closure>{$rdRegex})/u",
static function (array $matches) use (&$parts, $part, $ld, $rd): string {
// add a new part for the extracted newlines
$part['order'] = 1;
$part['content'] = "{$ld}{$matches['nl']}{$rd}";
$parts[] = $part;
return $matches['closure'];
},
$part['content'],
);
}
}
/**
* Make lines suitable for HTML output.
*
* @param string[] $lines the lines
* @param int $op the operation
*/
protected function customFormatLines(array $lines, int $op): array
{
if (!$this->changesAreRaw) {
return $lines;
}
static $closureMap = [
SequenceMatcher::OP_DEL => RendererConstant::HTML_CLOSURES_DEL,
SequenceMatcher::OP_INS => RendererConstant::HTML_CLOSURES_INS,
];
$lines = $this->formatLines($lines);
$htmlClosures = $closureMap[$op] ?? null;
foreach ($lines as &$line) {
if ($htmlClosures) {
$line = str_replace(RendererConstant::HTML_CLOSURES, $htmlClosures, $line);
}
// fixes https://github.com/jfcherng/php-diff/issues/34
$line = str_replace("\r\n", "\n", $line);
}
return $lines;
}
}

View File

@ -0,0 +1,266 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
use Jfcherng\Diff\SequenceMatcher;
/**
* Inline HTML diff generator.
*/
final class Inline extends AbstractHtml
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Inline',
'type' => 'Html',
];
/**
* {@inheritdoc}
*/
protected function redererChanges(array $changes): string
{
if (empty($changes)) {
return $this->getResultForIdenticals();
}
$wrapperClasses = [
...$this->options['wrapperClasses'],
'diff', 'diff-html', 'diff-inline',
];
return
'<table class="' . implode(' ', $wrapperClasses) . '">' .
$this->renderTableHeader() .
$this->renderTableHunks($changes) .
'</table>';
}
/**
* Renderer the table header.
*/
protected function renderTableHeader(): string
{
if (!$this->options['showHeader']) {
return '';
}
$colspan = $this->options['lineNumbers'] ? '' : ' colspan="2"';
return
'<thead>' .
'<tr>' .
(
$this->options['lineNumbers']
?
'<th>' . $this->_('old_version') . '</th>' .
'<th>' . $this->_('new_version') . '</th>' .
'<th></th>' // diff symbol column
:
''
) .
'<th' . $colspan . '>' . $this->_('differences') . '</th>' .
'</tr>' .
'</thead>';
}
/**
* Renderer the table separate block.
*/
protected function renderTableSeparateBlock(): string
{
$colspan = $this->options['lineNumbers'] ? '4' : '2';
return
'<tbody class="skipped">' .
'<tr>' .
'<td colspan="' . $colspan . '"></td>' .
'</tr>' .
'</tbody>';
}
/**
* Renderer table hunks.
*
* @param array[][] $hunks each hunk has many blocks
*/
protected function renderTableHunks(array $hunks): string
{
$ret = '';
foreach ($hunks as $i => $hunk) {
if ($i > 0 && $this->options['separateBlock']) {
$ret .= $this->renderTableSeparateBlock();
}
foreach ($hunk as $block) {
$ret .= $this->renderTableBlock($block);
}
}
return $ret;
}
/**
* Renderer the table block.
*
* @param array $block the block
*/
protected function renderTableBlock(array $block): string
{
switch ($block['tag']) {
case SequenceMatcher::OP_EQ:
$content = $this->renderTableBlockEqual($block);
break;
case SequenceMatcher::OP_INS:
$content = $this->renderTableBlockInsert($block);
break;
case SequenceMatcher::OP_DEL:
$content = $this->renderTableBlockDelete($block);
break;
case SequenceMatcher::OP_REP:
$content = $this->renderTableBlockReplace($block);
break;
default:
$content = '';
}
return '<tbody class="change change-' . self::TAG_CLASS_MAP[$block['tag']] . '">' . $content . '</tbody>';
}
/**
* Renderer the table block: equal.
*
* @param array $block the block
*/
protected function renderTableBlockEqual(array $block): string
{
$ret = '';
// note that although we are in a OP_EQ situation,
// the old and the new may not be exactly the same
// because of ignoreCase, ignoreWhitespace, etc
foreach ($block['new']['lines'] as $no => $newLine) {
// we could only pick either the old or the new to show
// here we pick the new one to let the user know what it is now
$ret .= $this->renderTableRow(
'new',
SequenceMatcher::OP_EQ,
$newLine,
$block['old']['offset'] + $no + 1,
$block['new']['offset'] + $no + 1,
);
}
return $ret;
}
/**
* Renderer the table block: insert.
*
* @param array $block the block
*/
protected function renderTableBlockInsert(array $block): string
{
$ret = '';
foreach ($block['new']['lines'] as $no => $newLine) {
$ret .= $this->renderTableRow(
'new',
SequenceMatcher::OP_INS,
$newLine,
null,
$block['new']['offset'] + $no + 1,
);
}
return $ret;
}
/**
* Renderer the table block: delete.
*
* @param array $block the block
*/
protected function renderTableBlockDelete(array $block): string
{
$ret = '';
foreach ($block['old']['lines'] as $no => $oldLine) {
$ret .= $this->renderTableRow(
'old',
SequenceMatcher::OP_DEL,
$oldLine,
$block['old']['offset'] + $no + 1,
null,
);
}
return $ret;
}
/**
* Renderer the table block: replace.
*
* @param array $block the block
*/
protected function renderTableBlockReplace(array $block): string
{
return
$this->renderTableBlockDelete($block) .
$this->renderTableBlockInsert($block);
}
/**
* Renderer a content row of the output table.
*
* @param string $tdClass the <td> class
* @param int $op the operation
* @param string $line the line
* @param null|int $oldLineNum the old line number
* @param null|int $newLineNum the new line number
*/
protected function renderTableRow(
string $tdClass,
int $op,
string $line,
?int $oldLineNum,
?int $newLineNum
): string {
return
'<tr data-type="' . self::SYMBOL_MAP[$op] . '">' .
(
$this->options['lineNumbers']
? $this->renderLineNumberColumns($oldLineNum, $newLineNum)
: ''
) .
'<th class="sign ' . self::TAG_CLASS_MAP[$op] . '">' . self::SYMBOL_MAP[$op] . '</th>' .
'<td class="' . $tdClass . '">' . $line . '</td>' .
'</tr>';
}
/**
* Renderer the line number columns.
*
* @param null|int $oldLineNum The old line number
* @param null|int $newLineNum The new line number
*/
protected function renderLineNumberColumns(?int $oldLineNum, ?int $newLineNum): string
{
return
(
isset($oldLineNum)
? '<th class="n-old">' . $oldLineNum . '</th>'
: '<th></th>'
) .
(
isset($newLineNum)
? '<th class="n-new">' . $newLineNum . '</th>'
: '<th></th>'
);
}
}

View File

@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
/**
* HTML Json diff generator.
*
* @deprecated 6.8.0 Use the "JsonHtml" renderer instead.
*/
final class Json extends JsonHtml
{
}

View File

@ -0,0 +1,68 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
use Jfcherng\Diff\SequenceMatcher;
/**
* HTML Json diff generator.
*/
class JsonHtml extends AbstractHtml
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'HTML Json',
'type' => 'Html',
];
/**
* {@inheritdoc}
*/
public const IS_TEXT_RENDERER = true;
/**
* {@inheritdoc}
*/
public function getResultForIdenticalsDefault(): string
{
return '[]';
}
/**
* {@inheritdoc}
*/
protected function redererChanges(array $changes): string
{
if ($this->options['outputTagAsString']) {
$this->convertTagToString($changes);
}
return json_encode($changes, $this->options['jsonEncodeFlags']);
}
/**
* Convert tags of changes to their string form for better readability.
*
* @param array[][] $changes the changes
*/
protected function convertTagToString(array &$changes): void
{
foreach ($changes as &$hunks) {
foreach ($hunks as &$block) {
$block['tag'] = SequenceMatcher::opIntToStr($block['tag']);
}
}
}
/**
* {@inheritdoc}
*/
protected function formatStringFromLines(string $string): string
{
return $this->htmlSafe($string);
}
}

View File

@ -0,0 +1,108 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Diff\SequenceMatcher;
/**
* Base renderer for rendering HTML-based line diffs.
*
* @todo use typed properties (BC breaking for public interface) in v7
*/
abstract class AbstractLineRenderer implements LineRendererInterface
{
/**
* @var SequenceMatcher the sequence matcher
*/
protected $sequenceMatcher;
/**
* @var array the differ options
*/
protected $differOptions = [];
/**
* @var array the renderer options
*/
protected $rendererOptions = [];
/**
* The constructor.
*
* @param array $differOptions the differ options
* @param array $rendererOptions the renderer options
*/
public function __construct(array $differOptions, array $rendererOptions)
{
$this->sequenceMatcher = new SequenceMatcher([], []);
$this
->setDifferOptions($differOptions)
->setRendererOptions($rendererOptions)
;
}
/**
* Set the differ options.
*
* @param array $differOptions the differ options
*
* @return static
*/
public function setDifferOptions(array $differOptions): self
{
$this->differOptions = $differOptions;
$this->sequenceMatcher->setOptions($differOptions);
return $this;
}
/**
* Set the renderer options.
*
* @param array $rendererOptions the renderer options
*
* @return static
*/
public function setRendererOptions(array $rendererOptions): self
{
$this->rendererOptions = $rendererOptions;
return $this;
}
/**
* Gets the differ options.
*
* @return array the differ options
*/
public function getDifferOptions(): array
{
return $this->differOptions;
}
/**
* Gets the renderer options.
*
* @return array the renderer options
*/
public function getRendererOptions(): array
{
return $this->rendererOptions;
}
/**
* Get the changed extent segments.
*
* @param string[] $old the old array
* @param string[] $new the new array
*
* @return int[][] the changed extent segments
*/
protected function getChangedExtentSegments(array $old, array $new): array
{
return $this->sequenceMatcher->setSequences($old, $new)->getOpcodes();
}
}

View File

@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Diff\SequenceMatcher;
use Jfcherng\Diff\Utility\ReverseIterator;
use Jfcherng\Utility\MbString;
final class Char extends AbstractLineRenderer
{
/**
* {@inheritdoc}
*
* @return static
*/
public function render(MbString $mbOld, MbString $mbNew): LineRendererInterface
{
$hunk = $this->getChangedExtentSegments($mbOld->toArray(), $mbNew->toArray());
// reversely iterate hunk
foreach (ReverseIterator::fromArray($hunk) as [$op, $i1, $i2, $j1, $j2]) {
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_DEL)) {
$mbOld->str_enclose_i(RendererConstant::HTML_CLOSURES, $i1, $i2 - $i1);
}
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_INS)) {
$mbNew->str_enclose_i(RendererConstant::HTML_CLOSURES, $j1, $j2 - $j1);
}
}
return $this;
}
}

View File

@ -0,0 +1,81 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Utility\MbString;
final class Line extends AbstractLineRenderer
{
/**
* {@inheritdoc}
*
* @return static
*/
public function render(MbString $mbOld, MbString $mbNew): LineRendererInterface
{
[$start, $end] = $this->getChangedExtentRegion($mbOld, $mbNew);
// two strings are the same
if ($end === 0) {
return $this;
}
// two strings are different, we do rendering
$mbOld->str_enclose_i(
RendererConstant::HTML_CLOSURES,
$start,
$end + $mbOld->strlen() - $start + 1,
);
$mbNew->str_enclose_i(
RendererConstant::HTML_CLOSURES,
$start,
$end + $mbNew->strlen() - $start + 1,
);
return $this;
}
/**
* Given two strings, determine where the changes in the two strings begin,
* and where the changes in the two strings end.
*
* @param MbString $mbOld the old megabytes line
* @param MbString $mbNew the new megabytes line
*
* @return int[] Array containing the starting position (non-negative) and the ending position (negative)
* [0, 0] if two strings are the same
*/
protected function getChangedExtentRegion(MbString $mbOld, MbString $mbNew): array
{
// two strings are the same
// most lines should be this cases, an early return could save many function calls
if ($mbOld->getRaw() === $mbNew->getRaw()) {
return [0, 0];
}
// calculate $start
$start = 0;
$startMax = min($mbOld->strlen(), $mbNew->strlen());
while (
$start < $startMax // index out of range
&& $mbOld->getAtRaw($start) === $mbNew->getAtRaw($start)
) {
++$start;
}
// calculate $end
$end = -1; // trick
$endMin = $startMax - $start;
while (
-$end <= $endMin // index out of range
&& $mbOld->getAtRaw($end) === $mbNew->getAtRaw($end)
) {
--$end;
}
return [$start, $end];
}
}

View File

@ -0,0 +1,20 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Utility\MbString;
interface LineRendererInterface
{
/**
* Renderer the in-line changed extent.
*
* @param MbString $mbOld the old megabytes line
* @param MbString $mbNew the new megabytes line
*
* @return static
*/
public function render(MbString $mbOld, MbString $mbNew): self;
}

View File

@ -0,0 +1,20 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Utility\MbString;
final class None extends AbstractLineRenderer
{
/**
* {@inheritdoc}
*
* @return static
*/
public function render(MbString $mbOld, MbString $mbNew): LineRendererInterface
{
return $this;
}
}

View File

@ -0,0 +1,108 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Diff\SequenceMatcher;
use Jfcherng\Diff\Utility\ReverseIterator;
use Jfcherng\Diff\Utility\Str;
use Jfcherng\Utility\MbString;
final class Word extends AbstractLineRenderer
{
/**
* {@inheritdoc}
*
* @return static
*/
public function render(MbString $mbOld, MbString $mbNew): LineRendererInterface
{
static $splitRegex = '/([' . RendererConstant::PUNCTUATIONS_RANGE . '])/uS';
static $dummyHtmlClosure = RendererConstant::HTML_CLOSURES[0] . RendererConstant::HTML_CLOSURES[1];
$pregFlag = \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY;
$oldWords = $mbOld->toArraySplit($splitRegex, -1, $pregFlag);
$newWords = $mbNew->toArraySplit($splitRegex, -1, $pregFlag);
$hunk = $this->getChangedExtentSegments($oldWords, $newWords);
// reversely iterate hunk
foreach (ReverseIterator::fromArray($hunk) as [$op, $i1, $i2, $j1, $j2]) {
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_DEL)) {
$oldWords[$i1] = RendererConstant::HTML_CLOSURES[0] . $oldWords[$i1];
$oldWords[$i2 - 1] .= RendererConstant::HTML_CLOSURES[1];
// insert dummy HTML closure to ensure there are always
// the same amounts of HTML closures in $oldWords and $newWords
// thus, this should make that "wordGlues" work correctly
// @see https://github.com/jfcherng/php-diff/pull/25
if ($op === SequenceMatcher::OP_DEL) {
array_splice($newWords, $j1, 0, [$dummyHtmlClosure]);
}
}
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_INS)) {
$newWords[$j1] = RendererConstant::HTML_CLOSURES[0] . $newWords[$j1];
$newWords[$j2 - 1] .= RendererConstant::HTML_CLOSURES[1];
if ($op === SequenceMatcher::OP_INS) {
array_splice($oldWords, $i1, 0, [$dummyHtmlClosure]);
}
}
}
if (!empty($hunk) && !empty($this->rendererOptions['wordGlues'])) {
$regexGlues = array_map(
static fn (string $glue): string => preg_quote($glue, '/'),
$this->rendererOptions['wordGlues'],
);
$gluePattern = '/^(?:' . implode('|', $regexGlues) . ')+$/uS';
$this->glueWordsResult($oldWords, $gluePattern);
$this->glueWordsResult($newWords, $gluePattern);
}
$mbOld->set(implode('', $oldWords));
$mbNew->set(implode('', $newWords));
return $this;
}
/**
* Beautify diff result by glueing words.
*
* What this function does is basically making
* ["<diff_begin>good<diff_end>", "-", "<diff_begin>looking<diff_end>"]
* into
* ["<diff_begin>good", "-", "looking<diff_end>"].
*
* @param array $words the words
* @param string $gluePattern the regex to determine a string is purely glue or not
*/
protected function glueWordsResult(array &$words, string $gluePattern): void
{
/** @var int index of the word which has the trailing closure */
$endClosureIdx = -1;
foreach ($words as $idx => &$word) {
if ($word === '') {
continue;
}
if ($endClosureIdx < 0) {
if (Str::endsWith($word, RendererConstant::HTML_CLOSURES[1])) {
$endClosureIdx = $idx;
}
} elseif (Str::startsWith($word, RendererConstant::HTML_CLOSURES[0])) {
$words[$endClosureIdx] = substr($words[$endClosureIdx], 0, -\strlen(RendererConstant::HTML_CLOSURES[1]));
$word = substr($word, \strlen(RendererConstant::HTML_CLOSURES[0]));
$endClosureIdx = $idx;
} elseif (!preg_match($gluePattern, $word)) {
$endClosureIdx = -1;
}
}
}
}

View File

@ -0,0 +1,281 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
use Jfcherng\Diff\SequenceMatcher;
/**
* Side by Side HTML diff generator.
*/
final class SideBySide extends AbstractHtml
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Side by side',
'type' => 'Html',
];
/**
* {@inheritdoc}
*/
protected function redererChanges(array $changes): string
{
if (empty($changes)) {
return $this->getResultForIdenticals();
}
$wrapperClasses = [
...$this->options['wrapperClasses'],
'diff', 'diff-html', 'diff-side-by-side',
];
return
'<table class="' . implode(' ', $wrapperClasses) . '">' .
$this->renderTableHeader() .
$this->renderTableHunks($changes) .
'</table>';
}
/**
* Renderer the table header.
*/
protected function renderTableHeader(): string
{
if (!$this->options['showHeader']) {
return '';
}
$colspan = $this->options['lineNumbers'] ? ' colspan="2"' : '';
return
'<thead>' .
'<tr>' .
'<th' . $colspan . '>' . $this->_('old_version') . '</th>' .
'<th' . $colspan . '>' . $this->_('new_version') . '</th>' .
'</tr>' .
'</thead>';
}
/**
* Renderer the table separate block.
*/
protected function renderTableSeparateBlock(): string
{
$colspan = $this->options['lineNumbers'] ? '4' : '2';
return
'<tbody class="skipped">' .
'<tr>' .
'<td colspan="' . $colspan . '"></td>' .
'</tr>' .
'</tbody>';
}
/**
* Renderer table hunks.
*
* @param array[][] $hunks each hunk has many blocks
*/
protected function renderTableHunks(array $hunks): string
{
$ret = '';
foreach ($hunks as $i => $hunk) {
if ($i > 0 && $this->options['separateBlock']) {
$ret .= $this->renderTableSeparateBlock();
}
foreach ($hunk as $block) {
$ret .= $this->renderTableBlock($block);
}
}
return $ret;
}
/**
* Renderer the table block.
*
* @param array $block the block
*/
protected function renderTableBlock(array $block): string
{
switch ($block['tag']) {
case SequenceMatcher::OP_EQ:
$content = $this->renderTableBlockEqual($block);
break;
case SequenceMatcher::OP_INS:
$content = $this->renderTableBlockInsert($block);
break;
case SequenceMatcher::OP_DEL:
$content = $this->renderTableBlockDelete($block);
break;
case SequenceMatcher::OP_REP:
$content = $this->renderTableBlockReplace($block);
break;
default:
$content = '';
}
return '<tbody class="change change-' . self::TAG_CLASS_MAP[$block['tag']] . '">' . $content . '</tbody>';
}
/**
* Renderer the table block: equal.
*
* @param array $block the block
*/
protected function renderTableBlockEqual(array $block): string
{
$ret = '';
$rowCount = \count($block['new']['lines']);
for ($no = 0; $no < $rowCount; ++$no) {
$ret .= $this->renderTableRow(
$block['old']['lines'][$no],
$block['new']['lines'][$no],
$block['old']['offset'] + $no + 1,
$block['new']['offset'] + $no + 1,
);
}
return $ret;
}
/**
* Renderer the table block: insert.
*
* @param array $block the block
*/
protected function renderTableBlockInsert(array $block): string
{
$ret = '';
foreach ($block['new']['lines'] as $no => $newLine) {
$ret .= $this->renderTableRow(
null,
$newLine,
null,
$block['new']['offset'] + $no + 1,
);
}
return $ret;
}
/**
* Renderer the table block: delete.
*
* @param array $block the block
*/
protected function renderTableBlockDelete(array $block): string
{
$ret = '';
foreach ($block['old']['lines'] as $no => $oldLine) {
$ret .= $this->renderTableRow(
$oldLine,
null,
$block['old']['offset'] + $no + 1,
null,
);
}
return $ret;
}
/**
* Renderer the table block: replace.
*
* @param array $block the block
*/
protected function renderTableBlockReplace(array $block): string
{
$ret = '';
$lineCountMax = max(\count($block['old']['lines']), \count($block['new']['lines']));
for ($no = 0; $no < $lineCountMax; ++$no) {
if (isset($block['old']['lines'][$no])) {
$oldLineNum = $block['old']['offset'] + $no + 1;
$oldLine = $block['old']['lines'][$no];
} else {
$oldLineNum = $oldLine = null;
}
if (isset($block['new']['lines'][$no])) {
$newLineNum = $block['new']['offset'] + $no + 1;
$newLine = $block['new']['lines'][$no];
} else {
$newLineNum = $newLine = null;
}
$ret .= $this->renderTableRow($oldLine, $newLine, $oldLineNum, $newLineNum);
}
return $ret;
}
/**
* Renderer a content row of the output table.
*
* @param null|string $oldLine the old line
* @param null|string $newLine the new line
* @param null|int $oldLineNum the old line number
* @param null|int $newLineNum the new line number
*/
protected function renderTableRow(
?string $oldLine,
?string $newLine,
?int $oldLineNum,
?int $newLineNum
): string {
return
'<tr>' .
(
$this->options['lineNumbers']
? $this->renderLineNumberColumn('old', $oldLineNum)
: ''
) .
$this->renderLineContentColumn('old', $oldLine) .
(
$this->options['lineNumbers']
? $this->renderLineNumberColumn('new', $newLineNum)
: ''
) .
$this->renderLineContentColumn('new', $newLine) .
'</tr>';
}
/**
* Renderer the line number column.
*
* @param string $type the diff type
* @param null|int $lineNum the line number
*/
protected function renderLineNumberColumn(string $type, ?int $lineNum): string
{
return isset($lineNum)
? '<th class="n-' . $type . '">' . $lineNum . '</th>'
: '<th></th>';
}
/**
* Renderer the line content column.
*
* @param string $type the diff type
* @param null|string $content the line content
*/
protected function renderLineContentColumn(string $type, ?string $content): string
{
return
'<td class="' . $type . (isset($content) ? '' : ' none') . '">' .
$content .
'</td>';
}
}

View File

@ -0,0 +1,116 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer;
final class RendererConstant
{
/**
* The base namespace of renderers.
*
* @var string
*/
public const RENDERER_NAMESPACE = __NAMESPACE__;
/**
* Available renderer types.
*
* @var string[]
*/
public const RENDERER_TYPES = ['Html', 'Text'];
/**
* Closures that are used to enclose different parts in string.
*
* Arbitrary chars from the 15-16th Unicode reserved areas
* and hopefully, they won't appear in source texts.
*
* @var string[]
*/
public const HTML_CLOSURES = ["\u{fcffc}\u{ff2fb}", "\u{fff41}\u{fcffc}"];
/**
* Closures that are used to enclose deleted chars in output HTML.
*
* @var string[]
*/
public const HTML_CLOSURES_DEL = ['<del>', '</del>'];
/**
* Closures that are used to enclose inserted chars in output HTML.
*
* @var string[]
*/
public const HTML_CLOSURES_INS = ['<ins>', '</ins>'];
/**
* The delimiter to be used as the glue in string/array functions.
*
* Arbitrary chars from the 15-16th Unicode reserved areas
* and hopefully, it won't appear in source texts.
*
* @var string
*/
public const IMPLODE_DELIMITER = "\u{ff2fa}\u{fcffc}\u{fff42}";
/**
* Regex range for punctuations.
*
* Presuming the regex delimiter is "/".
*
* @var string
*/
public const PUNCTUATIONS_RANGE = (
// Latin-1 Supplement
// @see https://unicode-table.com/en/blocks/latin-1-supplement/
"\u{0080}-\u{00BB}" .
// Spacing Modifier Letters
// @see https://unicode-table.com/en/blocks/spacing-modifier-letters/
"\u{02B0}-\u{02FF}" .
// Combining Diacritical Marks
// @see https://unicode-table.com/en/blocks/combining-diacritical-marks/
"\u{0300}-\u{036F}" .
// Small Form Variants
// @see https://unicode-table.com/en/blocks/small-form-variants/
"\u{FE50}-\u{FE6F}" .
// General Punctuation
// @see https://unicode-table.com/en/blocks/general-punctuation/
"\u{2000}-\u{206F}" .
// Supplemental Punctuation
// @see https://unicode-table.com/en/blocks/supplemental-punctuation/
"\u{2E00}-\u{2E7F}" .
// CJK Symbols and Punctuation
// @see https://unicode-table.com/en/blocks/cjk-symbols-and-punctuation/
"\u{3000}-\u{303F}" .
// Ideographic Symbols and Punctuation
// @see https://unicode-table.com/en/blocks/ideographic-symbols-and-punctuation/
"\u{16FE0}-\u{16FFF}" .
// hmm... these seem to be no rule
" \t\r\n$,.:;!?'\"()\\[\\]{}%@<=>_+\\-*\\/~\\\\|" .
' _' .
'「」『』〈〉《》【】()()‘’“”' .
'.‧・・•·¿'
);
/**
* Colorize the CLI output if possible.
*
* @var int
*/
public const CLI_COLOR_AUTO = -1;
/**
* Force not to colorize the CLI output.
*
* @var int
*/
public const CLI_COLOR_DISABLE = 0;
/**
* Force to colorize the CLI output if possible.
*
* @var int
*/
public const CLI_COLOR_ENABLE = 1;
}

View File

@ -0,0 +1,35 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\Exception\UnsupportedFunctionException;
/**
* Renderer Interface.
*/
interface RendererInterface
{
/**
* Get the renderer result when the old and the new are the same.
*/
public function getResultForIdenticals(): string;
/**
* Render the differ and return the result.
*
* @param Differ $differ the Differ object to be rendered
*/
public function render(Differ $differ): string;
/**
* Render the differ array and return the result.
*
* @param array[][] $differArray the Differ array to be rendered
*
* @throws UnsupportedFunctionException if the renderer does not support this method
*/
public function renderArray(array $differArray): string;
}

View File

@ -0,0 +1,145 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Text;
use Jfcherng\Diff\Exception\UnsupportedFunctionException;
use Jfcherng\Diff\Renderer\AbstractRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Utility\CliColor;
/**
* Base renderer for rendering text-based diffs.
*/
abstract class AbstractText extends AbstractRenderer
{
/**
* @var bool is this renderer pure text?
*/
public const IS_TEXT_RENDERER = true;
/**
* @var string the diff output representing there is no EOL at EOF in the GNU diff tool
*/
public const GNU_OUTPUT_NO_EOL_AT_EOF = '\ No newline at end of file';
/**
* @var bool controls whether cliColoredString() is enabled or not
*/
protected $isCliColorEnabled = false;
/**
* {@inheritdoc}
*/
public function setOptions(array $options): AbstractRenderer
{
parent::setOptions($options);
// determine $this->isCliColorEnabled
if ($this->options['cliColorization'] === RendererConstant::CLI_COLOR_ENABLE) {
$this->isCliColorEnabled = true;
} elseif ($this->options['cliColorization'] === RendererConstant::CLI_COLOR_DISABLE) {
$this->isCliColorEnabled = false;
} else {
$this->isCliColorEnabled = \PHP_SAPI === 'cli' && $this->hasColorSupport(\STDOUT);
}
return $this;
}
/**
* {@inheritdoc}
*/
public function getResultForIdenticalsDefault(): string
{
return '';
}
/**
* {@inheritdoc}
*/
protected function renderArrayWorker(array $differArray): string
{
throw new UnsupportedFunctionException(__METHOD__);
return ''; // make IDE not complain
}
/**
* Colorize the string for CLI output.
*
* @param string $str the string
* @param null|string $symbol the symbol
*
* @return string the (maybe) colorized string
*/
protected function cliColoredString(string $str, ?string $symbol): string
{
static $symbolToStyles = [
'@' => ['f_purple', 'bold'], // header
'-' => ['f_red', 'bold'], // deleted
'+' => ['f_green', 'bold'], // inserted
'!' => ['f_yellow', 'bold'], // replaced
];
$styles = $symbolToStyles[$symbol] ?? [];
if (!$this->isCliColorEnabled || empty($styles)) {
return $str;
}
return CliColor::color($str, $styles);
}
/**
* Returns true if the stream supports colorization.
*
* Colorization is disabled if not supported by the stream:
*
* This is tricky on Windows, because Cygwin, Msys2 etc emulate pseudo
* terminals via named pipes, so we can only check the environment.
*
* Reference: Composer\XdebugHandler\Process::supportsColor
* https://github.com/composer/xdebug-handler
*
* @see https://github.com/symfony/console/blob/647c51ff073300a432a4a504e29323cf0d5e0571/Output/StreamOutput.php#L81-L124
*
* @param resource $stream
*
* @return bool true if the stream supports colorization, false otherwise
*
* @suppress PhanUndeclaredFunction
*/
protected function hasColorSupport($stream): bool
{
// Follow https://no-color.org/
if (isset($_SERVER['NO_COLOR']) || false !== getenv('NO_COLOR')) {
return false;
}
if ('Hyper' === getenv('TERM_PROGRAM')) {
return true;
}
if (\DIRECTORY_SEPARATOR === '\\') {
return (\function_exists('sapi_windows_vt100_support')
&& @sapi_windows_vt100_support($stream))
|| false !== getenv('ANSICON')
|| 'ON' === getenv('ConEmuANSI')
|| 'xterm' === getenv('TERM');
}
if (\function_exists('stream_isatty')) {
return @stream_isatty($stream);
}
if (\function_exists('posix_isatty')) {
return @posix_isatty($stream);
}
$stat = @fstat($stream);
// Check if formatted mode is S_IFCHR
return $stat ? 0020000 === ($stat['mode'] & 0170000) : false;
}
}

View File

@ -0,0 +1,163 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Text;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\SequenceMatcher;
/**
* Context diff generator.
*
* @see https://en.wikipedia.org/wiki/Diff#Context_format
*/
final class Context extends AbstractText
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Context',
'type' => 'Text',
];
/**
* @var int the union of OPs that indicate there is a change
*/
public const OP_BLOCK_CHANGED =
SequenceMatcher::OP_DEL |
SequenceMatcher::OP_INS |
SequenceMatcher::OP_REP;
/**
* {@inheritdoc}
*/
protected function renderWorker(Differ $differ): string
{
$ret = '';
foreach ($differ->getGroupedOpcodesGnu() as $hunk) {
$lastBlockIdx = \count($hunk) - 1;
// note that these line number variables are 0-based
$i1 = $hunk[0][1];
$i2 = $hunk[$lastBlockIdx][2];
$j1 = $hunk[0][3];
$j2 = $hunk[$lastBlockIdx][4];
$ret .=
$this->cliColoredString("***************\n", '@') .
$this->renderHunkHeader('*', $i1, $i2) .
$this->renderHunkOld($differ, $hunk) .
$this->renderHunkHeader('-', $j1, $j2) .
$this->renderHunkNew($differ, $hunk);
}
return $ret;
}
/**
* Render the hunk header.
*
* @param string $symbol the symbol
* @param int $a1 the begin index
* @param int $a2 the end index
*/
protected function renderHunkHeader(string $symbol, int $a1, int $a2): string
{
$a1x = $a1 + 1; // 1-based begin line number
return $this->cliColoredString(
"{$symbol}{$symbol}{$symbol} " .
($a1x < $a2 ? "{$a1x},{$a2}" : $a2) .
" {$symbol}{$symbol}{$symbol}{$symbol}\n",
'@', // symbol
);
}
/**
* Render the old hunk.
*
* @param Differ $differ the differ object
* @param int[][] $hunk the hunk
*/
protected function renderHunkOld(Differ $differ, array $hunk): string
{
$ret = '';
$hunkOps = 0;
$noEolAtEofIdx = $differ->getOldNoEolAtEofIdx();
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
// OP_INS does not belongs to an old hunk
if ($op === SequenceMatcher::OP_INS) {
continue;
}
$hunkOps |= $op;
$ret .= $this->renderContext(
self::SYMBOL_MAP[$op],
$differ->getOld($i1, $i2),
$i2 === $noEolAtEofIdx,
);
}
// if there is no content changed, the hunk context should be omitted
return $hunkOps & self::OP_BLOCK_CHANGED ? $ret : '';
}
/**
* Render the new hunk.
*
* @param Differ $differ the differ object
* @param int[][] $hunk the hunk
*/
protected function renderHunkNew(Differ $differ, array $hunk): string
{
$ret = '';
$hunkOps = 0;
$noEolAtEofIdx = $differ->getNewNoEolAtEofIdx();
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
// OP_DEL does not belongs to a new hunk
if ($op === SequenceMatcher::OP_DEL) {
continue;
}
$hunkOps |= $op;
$ret .= $this->renderContext(
self::SYMBOL_MAP[$op],
$differ->getNew($j1, $j2),
$j2 === $noEolAtEofIdx,
);
}
// if there is no content changed, the hunk context should be omitted
return $hunkOps & self::OP_BLOCK_CHANGED ? $ret : '';
}
/**
* Render the context array with the symbol.
*
* @param string $symbol the symbol
* @param string[] $context the context
* @param bool $noEolAtEof there is no EOL at EOF in this block
*/
protected function renderContext(string $symbol, array $context, bool $noEolAtEof = false): string
{
if (empty($context)) {
return '';
}
$ret = "{$symbol} " . implode("\n{$symbol} ", $context) . "\n";
$ret = $this->cliColoredString($ret, $symbol);
if ($noEolAtEof) {
$ret .= self::GNU_OUTPUT_NO_EOL_AT_EOF . "\n";
}
return $ret;
}
}

View File

@ -0,0 +1,81 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Text;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\SequenceMatcher;
/**
* Plain text Json diff generator.
*/
final class JsonText extends AbstractText
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Text JSON',
'type' => 'Text',
];
/**
* {@inheritdoc}
*/
protected function renderWorker(Differ $differ): string
{
$ret = [];
foreach ($differ->getGroupedOpcodes() as $hunk) {
$ret[] = $this->renderHunk($differ, $hunk);
}
if ($this->options['outputTagAsString']) {
$this->convertTagToString($ret);
}
return json_encode($ret, $this->options['jsonEncodeFlags']);
}
/**
* Render the hunk.
*
* @param Differ $differ the differ object
* @param int[][] $hunk the hunk
*/
protected function renderHunk(Differ $differ, array $hunk): array
{
$ret = [];
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
$ret[] = [
'tag' => $op,
'old' => [
'offset' => $i1,
'lines' => $differ->getOld($i1, $i2),
],
'new' => [
'offset' => $j1,
'lines' => $differ->getNew($j1, $j2),
],
];
}
return $ret;
}
/**
* Convert tags of changes to their string form for better readability.
*
* @param array[][] $changes the changes
*/
protected function convertTagToString(array &$changes): void
{
foreach ($changes as &$hunks) {
foreach ($hunks as &$block) {
$block['tag'] = SequenceMatcher::opIntToStr($block['tag']);
}
}
}
}

View File

@ -0,0 +1,147 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Text;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\SequenceMatcher;
/**
* Unified diff generator.
*
* @see https://en.wikipedia.org/wiki/Diff#Unified_format
*/
final class Unified extends AbstractText
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Unified',
'type' => 'Text',
];
/**
* {@inheritdoc}
*/
protected function renderWorker(Differ $differ): string
{
$ret = '';
foreach ($differ->getGroupedOpcodesGnu() as $hunk) {
$ret .= $this->renderHunkHeader($differ, $hunk);
$ret .= $this->renderHunkBlocks($differ, $hunk);
}
return $ret;
}
/**
* Render the hunk header.
*
* @param Differ $differ the differ
* @param int[][] $hunk the hunk
*/
protected function renderHunkHeader(Differ $differ, array $hunk): string
{
$lastBlockIdx = \count($hunk) - 1;
// note that these line number variables are 0-based
$i1 = $hunk[0][1];
$i2 = $hunk[$lastBlockIdx][2];
$j1 = $hunk[0][3];
$j2 = $hunk[$lastBlockIdx][4];
$oldLinesCount = $i2 - $i1;
$newLinesCount = $j2 - $j1;
return $this->cliColoredString(
'@@' .
' -' .
// the line number in GNU diff is 1-based, so we add 1
// a special case is when a hunk has only changed blocks,
// i.e., context is set to 0, we do not need the adding
($i1 === $i2 ? $i1 : $i1 + 1) .
// if the line counts is 1, it can (and mostly) be omitted
($oldLinesCount === 1 ? '' : ",{$oldLinesCount}") .
' +' .
($j1 === $j2 ? $j1 : $j1 + 1) .
($newLinesCount === 1 ? '' : ",{$newLinesCount}") .
" @@\n",
'@', // symbol
);
}
/**
* Render the hunk content.
*
* @param Differ $differ the differ
* @param int[][] $hunk the hunk
*/
protected function renderHunkBlocks(Differ $differ, array $hunk): string
{
$ret = '';
$oldNoEolAtEofIdx = $differ->getOldNoEolAtEofIdx();
$newNoEolAtEofIdx = $differ->getNewNoEolAtEofIdx();
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
// note that although we are in a OP_EQ situation,
// the old and the new may not be exactly the same
// because of ignoreCase, ignoreWhitespace, etc
if ($op === SequenceMatcher::OP_EQ) {
// we could only pick either the old or the new to show
// note that the GNU diff will use the old one because it creates a patch
$ret .= $this->renderContext(
' ',
$differ->getOld($i1, $i2),
$i2 === $oldNoEolAtEofIdx,
);
continue;
}
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_DEL)) {
$ret .= $this->renderContext(
'-',
$differ->getOld($i1, $i2),
$i2 === $oldNoEolAtEofIdx,
);
}
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_INS)) {
$ret .= $this->renderContext(
'+',
$differ->getNew($j1, $j2),
$j2 === $newNoEolAtEofIdx,
);
}
}
return $ret;
}
/**
* Render the context array with the symbol.
*
* @param string $symbol the symbol
* @param string[] $context the context
* @param bool $noEolAtEof there is no EOL at EOF in this block
*/
protected function renderContext(string $symbol, array $context, bool $noEolAtEof = false): string
{
if (empty($context)) {
return '';
}
$ret = $symbol . implode("\n{$symbol}", $context) . "\n";
$ret = $this->cliColoredString($ret, $symbol);
if ($noEolAtEof) {
$ret .= self::GNU_OUTPUT_NO_EOL_AT_EOF . "\n";
}
return $ret;
}
}

View File

@ -0,0 +1,65 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Utility;
final class Arr
{
/**
* Get a partial array slice with start/end indexes.
*
* @param array $array the array
* @param int $start the starting index (negative = count from backward)
* @param null|int $end the ending index (negative = count from backward)
* if is null, it returns a slice from $start to the end
*
* @return array array of all of the lines between the specified range
*/
public static function getPartialByIndex(array $array, int $start = 0, ?int $end = null): array
{
$count = \count($array);
// make $end set
$end ??= $count;
// make $start non-negative
if ($start < 0) {
$start += $count;
if ($start < 0) {
$start = 0;
}
}
// make $end non-negative
if ($end < 0) {
$end += $count;
if ($end < 0) {
$end = 0;
}
}
// make the length non-negative
return \array_slice($array, $start, max(0, $end - $start));
}
/**
* Determines whether the array is associative.
*
* @param array $arr the array
*
* @return bool `true` if the array is associative, `false` otherwise
*/
public static function isAssociative($arr): bool
{
foreach ($arr as $key => $value) {
if (\is_string($key)) {
return true;
}
}
return false;
}
}

View File

@ -0,0 +1,127 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Utility;
final class Language
{
/**
* @var string[] the translation dict
*/
private array $translations = [];
/**
* @var string the language name
*/
private string $language = '_custom_';
/**
* The constructor.
*
* @param array<int,string|string[]>|string|string[] $target the language ID or translations dict
*/
public function __construct($target = 'eng')
{
$this->load($target);
}
/**
* Gets the language.
*
* @return string the language
*/
public function getLanguage(): string
{
return $this->language;
}
/**
* Gets the translations.
*
* @return array the translations
*/
public function getTranslations(): array
{
return $this->translations;
}
/**
* Loads the target language.
*
* @param array<int,string|string[]>|string|string[] $target the language ID or translations dict
*/
public function load($target): void
{
$this->translations = $this->resolve($target);
$this->language = \is_string($target) ? $target : '_custom_';
}
/**
* Translates the text.
*
* @param string $text the text
*/
public function translate(string $text): string
{
return $this->translations[$text] ?? "![{$text}]";
}
/**
* Get the translations from the language file.
*
* @param string $language the language
*
* @throws \Exception fail to decode the JSON file
* @throws \LogicException path is a directory
* @throws \RuntimeException path cannot be opened
*
* @return string[]
*/
private static function getTranslationsByLanguage(string $language): array
{
$filePath = __DIR__ . "/../languages/{$language}.json";
$file = new \SplFileObject($filePath, 'r');
$fileContent = $file->fread($file->getSize());
try {
$decoded = json_decode($fileContent, true, 512, \JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
throw new \Exception(sprintf('Fail to decode JSON file (%s): %s', realpath($filePath), (string) $e));
}
return (array) $decoded;
}
/**
* Resolves the target language.
*
* @param array<int,string|string[]>|string|string[] $target the language ID or translations array
*
* @throws \InvalidArgumentException
*
* @return string[] the resolved translations
*/
private function resolve($target): array
{
if (\is_string($target)) {
return self::getTranslationsByLanguage($target);
}
if (\is_array($target)) {
// $target is an associative array
if (Arr::isAssociative($target)) {
return $target;
}
// $target is a list of "key-value pairs or language ID"
return array_reduce(
$target,
fn (array $carry, $translation): array => array_merge($carry, $this->resolve($translation)),
[],
);
}
throw new \InvalidArgumentException('$target is not in valid form');
}
}

View File

@ -0,0 +1,51 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Utility;
final class ReverseIterator
{
public const ITERATOR_GET_VALUE = 0;
public const ITERATOR_GET_KEY = 1 << 0;
public const ITERATOR_GET_BOTH = 1 << 1;
/**
* The constructor.
*/
private function __construct()
{
}
/**
* Iterate the array reversely.
*
* @param array $array the array
* @param int $flags the flags
*/
public static function fromArray(array $array, int $flags = self::ITERATOR_GET_VALUE): \Generator
{
// iterate [key => value] pair
if ($flags & self::ITERATOR_GET_BOTH) {
for (end($array); ($key = key($array)) !== null; prev($array)) {
yield $key => current($array);
}
return;
}
// iterate only key
if ($flags & self::ITERATOR_GET_KEY) {
for (end($array); ($key = key($array)) !== null; prev($array)) {
yield $key;
}
return;
}
// iterate only value
for (end($array); key($array) !== null; prev($array)) {
yield current($array);
}
}
}

View File

@ -0,0 +1,30 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Utility;
final class Str
{
/**
* Determine if a given string starts with a given substring.
*
* @param string $haystack the haystack
* @param string $needle the needle
*/
public static function startsWith(string $haystack, string $needle): bool
{
return substr($haystack, 0, \strlen($needle)) === $needle;
}
/**
* Determine if a given string ends with a given substring.
*
* @param string $haystack the haystack
* @param string $needle the needle
*/
public static function endsWith(string $haystack, string $needle): bool
{
return substr($haystack, -\strlen($needle)) === $needle;
}
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Стара версия",
"new_version": "Нова версия",
"differences": "Разлики"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "旧版本",
"new_version": "新版本",
"differences": "差异"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "舊版本",
"new_version": "新版本",
"differences": "差異"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Původní",
"new_version": "Nové",
"differences": "Rozdíly"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Alt",
"new_version": "Neu",
"differences": "Unterschiede"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Old",
"new_version": "New",
"differences": "Differences"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Avant",
"new_version": "Après",
"differences": "Différences"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Vecchio",
"new_version": "Nuovo",
"differences": "Differenze"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "古い",
"new_version": "新しい",
"differences": "差異"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Tidligere versjon",
"new_version": "Ny versjon",
"differences": "Differanse"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Original",
"new_version": "Nova",
"differences": "Diferenças"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Старая версия",
"new_version": "Новая версия",
"differences": "Различия"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Anterior",
"new_version": "Nuevo",
"differences": "Diferencias"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Eski",
"new_version": "Yeni",
"differences": "Değişiklikler"
}

View File

@ -0,0 +1,5 @@
{
"old_version": "Було",
"new_version": "Стало",
"differences": "Відмінності"
}

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018-2023 Jack Cherng <jfcherng@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,400 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Utility;
/**
* An internal UTF-32 multi-bytes string class.
*
* Because UTF-8 is varied-width, mb_*() is kinda O(n) when doing decoding.
* Using iconv() to make it UTF-32 and work with str*() can be possibly faster.
*
* UTF-32 is a fix-width encoding (1 char = 4 bytes).
* Note that the first 4 bytes in a UTF-32 string is the header (endian bytes).
*
* @author Jack Cherng <jfcherng@gmail.com>
*/
class MbString extends \ArrayObject implements \Stringable
{
public const MBSTRING_CONVMETHOD_ICONV = 1;
public const MBSTRING_CONVMETHOD_MBSTRING = 2;
/**
* The way to convert text encoding.
*
* @var int
*/
public static $convMethod;
/**
* UTF-32 string without endian bytes.
*
* @var string
*/
protected $str;
/**
* The original encoding.
*
* @var string
*/
protected $encoding;
/**
* The endian bytes for UTF-32.
*
* @var string
*/
protected static $utf32Header;
/**
* The constructor.
*
* @param string $str the string
* @param string $encoding the encoding
*/
public function __construct(string $str = '', string $encoding = 'UTF-8')
{
static::$convMethod ??= static::detectConvEncoding();
static::$utf32Header ??= static::getUtf32Header();
$this->encoding = $encoding;
$this->set($str);
}
/**
* Returns a string representation of the object.
*
* @return string string representation of the object
*/
public function __toString(): string
{
return $this->get();
}
/**
* The string setter.
*
* @param string $str the string
*/
public function set(string $str): self
{
$this->str = $this->inputConv($str);
return $this;
}
public function setAt(int $idx, string $char): self
{
$char = $this->inputConv($char);
if (\strlen($char) > 4) {
$char = substr($char, 0, 4);
}
$spacesPrepend = $idx - $this->strlen();
// set index (out of bound)
if ($spacesPrepend > 0) {
$this->str .= $this->inputConv(str_repeat(' ', $spacesPrepend)) . $char;
}
// set index (in bound)
else {
$this->str = substr_replace($this->str, $char, $idx << 2, 4);
}
return $this;
}
/**
* The string getter.
*/
public function get(): string
{
return $this->outputConv($this->str);
}
/**
* The raw string getter.
*
* @return string the UTF-32-encoded raw string
*/
public function getRaw(): string
{
return $this->str;
}
public function getAt(int $idx): string
{
return $this->outputConv(substr($this->str, $idx << 2, 4));
}
public function getAtRaw(int $idx): string
{
return substr($this->str, $idx << 2, 4);
}
public function toArray(): array
{
return self::strToChars($this->get());
}
public function toArraySplit(string $regex, int $limit = -1, $flags = 0): array
{
if ($this->str === '') {
return [];
}
return preg_split($regex, $this->get(), $limit, $flags);
}
public function toArrayRaw(): array
{
if ($this->str === '') {
return [];
}
return str_split($this->str, 4);
}
public static function strToChars(string $str): array
{
return preg_split('//uS', $str, -1, \PREG_SPLIT_NO_EMPTY) ?: [];
}
// /////////////////////////////////
// string manipulation functions //
// /////////////////////////////////
public function stripos(string $needle, int $offset = 0)
{
$needle = $this->inputConv($needle);
$pos = stripos($this->str, $needle, $offset << 2);
return \is_bool($pos) ? $pos : $pos >> 2;
}
public function strlen(): int
{
return \strlen($this->str) >> 2;
}
public function strpos(string $needle, int $offset = 0)
{
$needle = $this->inputConv($needle);
$pos = strpos($this->str, $needle, $offset << 2);
return \is_bool($pos) ? $pos : $pos >> 2;
}
public function substr(int $start = 0, ?int $length = null): string
{
return $this->outputConv(
isset($length)
? substr($this->str, $start << 2, $length << 2)
: substr($this->str, $start << 2),
);
}
public function substr_replace(string $replacement, int $start = 0, ?int $length = null): string
{
$replacement = $this->inputConv($replacement);
return $this->outputConv(
isset($length)
? substr_replace($this->str, $replacement, $start << 2, $length << 2)
: substr_replace($this->str, $replacement, $start << 2),
);
}
public function strtolower(): string
{
return strtolower($this->get());
}
public function strtoupper(): string
{
return strtoupper($this->get());
}
// //////////////////////////////
// non-manipulative functions //
// //////////////////////////////
public function has(string $needle): bool
{
$needle = $this->inputConv($needle);
return str_contains($this->str, $needle);
}
public function startsWith(string $needle): bool
{
$needle = $this->inputConv($needle);
return $needle === substr($this->str, 0, \strlen($needle));
}
public function endsWith(string $needle): bool
{
$needle = $this->inputConv($needle);
$length = \strlen($needle);
return $length === 0 ? true : $needle === substr($this->str, -$length);
}
// ///////////////////////////////////////////
// those functions will not return a value //
// ///////////////////////////////////////////
public function str_insert_i(string $insert, int $position): self
{
$insert = $this->inputConv($insert);
$this->str = substr_replace($this->str, $insert, $position << 2, 0);
return $this;
}
public function str_enclose_i(array $closures, int $start = 0, ?int $length = null): self
{
// ex: $closures = array('{', '}');
foreach ($closures as &$closure) {
$closure = $this->inputConv($closure);
}
unset($closure);
if (\count($closures) < 2) {
$closures[0] = $closures[1] = reset($closures);
}
if (isset($length)) {
$replacement = $closures[0] . substr($this->str, $start << 2, $length << 2) . $closures[1];
$this->str = substr_replace($this->str, $replacement, $start << 2, $length << 2);
} else {
$replacement = $closures[0] . substr($this->str, $start << 2) . $closures[1];
$this->str = substr_replace($this->str, $replacement, $start << 2);
}
return $this;
}
public function str_replace_i(string $search, string $replace): self
{
$search = $this->inputConv($search);
$replace = $this->inputConv($replace);
$this->str = str_replace($search, $replace, $this->str);
return $this;
}
public function substr_replace_i(string $replacement, int $start = 0, ?int $length = null): self
{
$replacement = $this->inputConv($replacement);
$this->str = (
isset($length)
? substr_replace($this->str, $replacement, $start << 2, $length << 2)
: substr_replace($this->str, $replacement, $start << 2)
);
return $this;
}
// ///////////////
// ArrayObject //
// ///////////////
public function offsetSet(mixed $idx, mixed $char): void
{
$this->setAt($idx, $char);
}
public function offsetGet(mixed $idx): string
{
return $this->getAt($idx);
}
public function offsetExists(mixed $idx): bool
{
return \is_int($idx) ? $this->strlen() > $idx : false;
}
public function append(mixed $str): void
{
$this->str .= $this->inputConv($str);
}
public function count(): int
{
return $this->strlen();
}
// //////////////////
// misc functions //
// //////////////////
/**
* Gets the utf 32 header.
*
* @return string the UTF-32 header or empty string
*/
protected static function getUtf32Header(): string
{
// just use any string to get the endian header, here we use "A"
$tmp = self::convEncoding('A', 'UTF-8', 'UTF-32');
// some distributions like "php alpine" docker image won't generate the header
return $tmp && \strlen($tmp) > 4 ? substr($tmp, 0, 4) : '';
}
protected static function detectConvEncoding(): int
{
if (\function_exists('iconv') && iconv('UTF-8', 'UTF-32', 'A') !== false) {
return static::MBSTRING_CONVMETHOD_ICONV;
}
if (\function_exists('mb_convert_encoding') && mb_convert_encoding('A', 'UTF-32', 'UTF-8') !== false) {
return static::MBSTRING_CONVMETHOD_MBSTRING;
}
throw new \RuntimeException('Either "iconv" or "mbstring" extension is required.');
}
protected static function convEncoding(string $str, string $from, string $to): string
{
if (static::$convMethod === static::MBSTRING_CONVMETHOD_ICONV) {
return iconv($from, $to, $str);
}
if (static::$convMethod === static::MBSTRING_CONVMETHOD_MBSTRING) {
return mb_convert_encoding($str, $to, $from);
}
throw new \RuntimeException('Unknown conversion method.');
}
/**
* Convert the output string to its original encoding.
*
* @param string $str The string
*/
protected function outputConv(string $str): string
{
if ($str === '') {
return '';
}
return static::convEncoding(static::$utf32Header . $str, 'UTF-32', $this->encoding);
}
/**
* Convert the input string to UTF-32 without header.
*
* @param string $str The string
*/
protected function inputConv(string $str): string
{
if ($str === '') {
return '';
}
return substr(static::convEncoding($str, $this->encoding, 'UTF-32'), \strlen(static::$utf32Header));
}
}

View File

@ -0,0 +1,31 @@
BSD 3-Clause License
Copyright (c) 2019-2023 Jack Cherng <jfcherng@gmail.com>
Copyright (c) 2009 Chris Boulton <chris.boulton@interspire.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,722 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff;
/**
* A longest sequence matcher.
*
* The logic is primarily based on the Python difflib package.
*
* @see https://docs.python.org/3/library/difflib.html
*/
final class SequenceMatcher
{
/** @var int 0, opcode: no operation */
public const OP_NOP = 0;
/** @var int 1, opcode: equal */
public const OP_EQ = 1 << 0;
/** @var int 2, opcode: delete */
public const OP_DEL = 1 << 1;
/** @var int 4, opcode: insert */
public const OP_INS = 1 << 2;
/** @var int 8, opcode: replace */
public const OP_REP = 1 << 3;
public const OP_INT_TO_STR_MAP = [
self::OP_NOP => 'nop',
self::OP_EQ => 'eq',
self::OP_DEL => 'del',
self::OP_INS => 'ins',
self::OP_REP => 'rep',
];
public const OP_STR_TO_INT_MAP = [
'nop' => self::OP_NOP,
'eq' => self::OP_EQ,
'del' => self::OP_DEL,
'ins' => self::OP_INS,
'rep' => self::OP_REP,
];
/**
* The helper line which may be used to append to the source inputs to help
* it easier to handle EOL at EOF problem. This line shouldn't be counted into diff.
*
* @var string
*/
public const APPENDED_HELPER_LINE = "\u{fcf28}\u{fc232}";
/**
* @var null|\Closure either a string or an array containing a callback function to determine if a line is "junk" or not
*/
private ?\Closure $junkCallback;
/**
* @var array the first sequence to compare against
*/
private array $a = [];
/**
* @var array the second sequence
*/
private array $b = [];
/**
* @var array the first sequence to compare against (transformed)
*/
private array $at = [];
/**
* @var array the second sequence (transformed)
*/
private array $bt = [];
/**
* @var array array of characters that are considered junk from the second sequence. Characters are the array key.
*/
private array $junkDict = [];
/**
* @var array array of indices that do not contain junk elements
*/
private array $b2j = [];
private array $options = [];
private static array $defaultOptions = [
'ignoreCase' => false,
'ignoreLineEnding' => false,
'ignoreWhitespace' => false,
'lengthLimit' => 2000,
];
private array $matchingBlocks = [];
/**
* @var array generated opcodes which manipulates seq1 to seq2
*/
private array $opcodes = [];
/**
* The constructor. With the sequences being passed, they'll be set
* for the sequence matcher and it will perform a basic cleanup &
* calculate junk elements.
*
* @param string[] $a an array containing the lines to compare against
* @param string[] $b an array containing the lines to compare
* @param null|\Closure $junkCallback either an array or string that references a callback function (if there is one) to determine 'junk' characters
* @param array $options the options
*/
public function __construct(array $a, array $b, ?\Closure $junkCallback = null, array $options = [])
{
$this->junkCallback = $junkCallback;
$this->setOptions($options);
$this->setSequences($a, $b);
}
/**
* Set the options.
*
* @param array $options The options
*/
public function setOptions(array $options): static
{
$needRerunChainB = $this->isAnyOptionChanged(
$this->options,
$options,
['ignoreCase', 'ignoreLineEnding', 'ignoreWhitespace', 'lengthLimit'],
);
$this->options = $options + self::$defaultOptions;
if ($needRerunChainB) {
$this->chainB();
}
$this->resetCachedResults();
return $this;
}
/**
* Get the options.
*/
public function getOptions(): array
{
return $this->options;
}
/**
* Reset cached results.
*/
public function resetCachedResults(): static
{
$this->matchingBlocks = [];
$this->opcodes = [];
return $this;
}
/**
* Set the first and second sequences to use with the sequence matcher.
*
* This method is more effecient than "->setSeq1($old)->setSeq2($new)"
* because it only run the routine once.
*
* @param string[] $a an array containing the lines to compare against
* @param string[] $b an array containing the lines to compare
*/
public function setSequences(array $a, array $b): static
{
$need_routine = false;
if ($this->a !== $a) {
$need_routine = true;
$this->a = $a;
}
if ($this->b !== $b) {
$need_routine = true;
$this->b = $b;
}
if ($need_routine) {
$this->chainB();
$this->resetCachedResults();
}
return $this;
}
/**
* Set the first sequence ($a) and reset any internal caches to indicate that
* when calling the calculation methods, we need to recalculate them.
*
* @param string[] $a the sequence to set as the first sequence
*/
public function setSeq1(array $a): static
{
if ($this->a !== $a) {
$this->a = $a;
$this->chainB();
$this->resetCachedResults();
}
return $this;
}
/**
* Set the second sequence ($b) and reset any internal caches to indicate that
* when calling the calculation methods, we need to recalculate them.
*
* @param string[] $b the sequence to set as the second sequence
*/
public function setSeq2(array $b): static
{
if ($this->b !== $b) {
$this->b = $b;
$this->chainB();
$this->resetCachedResults();
}
return $this;
}
/**
* Find the longest matching block in the two sequences, as defined by the
* lower and upper constraints for each sequence. (for the first sequence,
* $alo - $ahi and for the second sequence, $blo - $bhi).
*
* Essentially, of all of the maximal matching blocks, return the one that
* startest earliest in $a, and all of those maximal matching blocks that
* start earliest in $a, return the one that starts earliest in $b.
*
* If the junk callback is defined, do the above but with the restriction
* that the junk element appears in the block. Extend it as far as possible
* by matching only junk elements in both $a and $b.
*
* @param int $alo the lower constraint for the first sequence
* @param int $ahi the upper constraint for the first sequence
* @param int $blo the lower constraint for the second sequence
* @param int $bhi the upper constraint for the second sequence
*
* @return int[] an array containing the longest match that includes the starting position in $a, start in $b and the length/size
*/
public function findLongestMatch(int $alo, int $ahi, int $blo, int $bhi): array
{
$bestI = $alo;
$bestJ = $blo;
$bestSize = 0;
$j2Len = [];
for ($i = $alo; $i < $ahi; ++$i) {
if (null === ($element = $this->at[$i] ?? null)) {
continue;
}
$newJ2Len = [];
$jDict = $this->b2j[$element] ?? [];
foreach ($jDict as $j) {
if ($j < $blo) {
continue;
}
if ($j >= $bhi) {
break;
}
$k = ($j2Len[$j - 1] ?? 0) + 1;
$newJ2Len[$j] = $k;
if ($k > $bestSize) {
$bestI = $i - $k + 1;
$bestJ = $j - $k + 1;
$bestSize = $k;
}
}
$j2Len = $newJ2Len;
}
while (
$bestI > $alo
&& $bestJ > $blo
&& $this->at[$bestI - 1] === $this->bt[$bestJ - 1]
&& !$this->isBJunk($this->bt[$bestJ - 1])
) {
--$bestI;
--$bestJ;
++$bestSize;
}
while (
$bestI + $bestSize < $ahi
&& $bestJ + $bestSize < $bhi
&& $this->at[$bestI + $bestSize] === $this->bt[$bestJ + $bestSize]
&& !$this->isBJunk($this->bt[$bestJ + $bestSize])
) {
++$bestSize;
}
while (
$bestI > $alo
&& $bestJ > $blo
&& $this->at[$bestI - 1] === $this->bt[$bestJ - 1]
&& $this->isBJunk($this->bt[$bestJ - 1])
) {
--$bestI;
--$bestJ;
++$bestSize;
}
while (
$bestI + $bestSize < $ahi
&& $bestJ + $bestSize < $bhi
&& $this->at[$bestI + $bestSize] === $this->bt[$bestJ + $bestSize]
&& $this->isBJunk($this->bt[$bestJ + $bestSize])
) {
++$bestSize;
}
return [$bestI, $bestJ, $bestSize];
}
/**
* Return a nested set of arrays for all of the matching sub-sequences
* in the strings $a and $b.
*
* Each block contains the lower constraint of the block in $a, the lower
* constraint of the block in $b and finally the number of lines that the
* block continues for.
*
* @return int[][] a nested array of the matching blocks, as described by the function
*/
public function getMatchingBlocks(): array
{
if (!empty($this->matchingBlocks)) {
return $this->matchingBlocks;
}
$aCount = \count($this->a);
$bCount = \count($this->b);
$queue = [
[0, $aCount, 0, $bCount],
];
$matchingBlocks = [];
while (!empty($queue)) {
[$alo, $ahi, $blo, $bhi] = array_pop($queue);
[$i, $j, $k] = $x = $this->findLongestMatch($alo, $ahi, $blo, $bhi);
if ($k) {
$matchingBlocks[] = $x;
if ($alo < $i && $blo < $j) {
$queue[] = [$alo, $i, $blo, $j];
}
if ($i + $k < $ahi && $j + $k < $bhi) {
$queue[] = [$i + $k, $ahi, $j + $k, $bhi];
}
}
}
usort($matchingBlocks, function (array $a, array $b): int {
$aCount = \count($a);
$bCount = \count($b);
$min = min($aCount, $bCount);
for ($i = 0; $i < $min; ++$i) {
if ($a[$i] !== $b[$i]) {
return $a[$i] <=> $b[$i];
}
}
return $aCount <=> $bCount;
});
$i1 = $j1 = $k1 = 0;
$nonAdjacent = [];
foreach ($matchingBlocks as [$i2, $j2, $k2]) {
if ($i1 + $k1 === $i2 && $j1 + $k1 === $j2) {
$k1 += $k2;
continue;
}
if ($k1) {
$nonAdjacent[] = [$i1, $j1, $k1];
}
$i1 = $i2;
$j1 = $j2;
$k1 = $k2;
}
if ($k1) {
$nonAdjacent[] = [$i1, $j1, $k1];
}
$nonAdjacent[] = [$aCount, $bCount, 0];
$this->matchingBlocks = $nonAdjacent;
return $this->matchingBlocks;
}
/**
* Return a list of all of the opcodes for the differences between the
* two strings.
*
* The nested array returned contains an array describing the opcode
* which includes:
* 0 - The type of op (as described below) for the opcode.
* 1 - The beginning line in the first sequence.
* 2 - The end line in the first sequence.
* 3 - The beginning line in the second sequence.
* 4 - The end line in the second sequence.
*
* The different types of ops include:
* replace - The string from $i1 to $i2 in $a should be replaced by
* the string in $b from $j1 to $j2.
* delete - The string in $a from $i1 to $j2 should be deleted.
* insert - The string in $b from $j1 to $j2 should be inserted at
* $i1 in $a.
* equal - The two strings with the specified ranges are equal.
*
* @return int[][] array of the opcodes describing the differences between the strings
*/
public function getOpcodes(): array
{
if (!empty($this->opcodes)) {
return $this->opcodes;
}
$i = $j = 0;
$this->opcodes = [];
foreach ($this->getMatchingBlocks() as [$ai, $bj, $size]) {
if ($i < $ai && $j < $bj) {
$op = self::OP_REP;
} elseif ($i < $ai) {
$op = self::OP_DEL;
} elseif ($j < $bj) {
$op = self::OP_INS;
} else {
$op = self::OP_NOP;
}
if ($op) {
$this->opcodes[] = [$op, $i, $ai, $j, $bj];
}
$i = $ai + $size;
$j = $bj + $size;
if ($size) {
$this->opcodes[] = [self::OP_EQ, $ai, $i, $bj, $j];
}
}
return $this->opcodes;
}
/**
* Return a series of nested arrays containing different groups of generated
* opcodes for the differences between the strings with up to $context lines
* of surrounding content.
*
* Essentially what happens here is any big equal blocks of strings are stripped
* out, the smaller subsets of changes are then arranged in to their groups.
* This means that the sequence matcher and diffs do not need to include the full
* content of the different files but can still provide context as to where the
* changes are.
*
* @param int $context the number of lines of context to provide around the groups
*
* @return int[][][] nested array of all of the grouped opcodes
*/
public function getGroupedOpcodes(int $context = 3): array
{
$opcodes = $this->getOpcodes();
if (empty($opcodes)) {
$opcodes = [
[self::OP_EQ, 0, 1, 0, 1],
];
}
if ($opcodes[0][0] === self::OP_EQ) {
// fix the leading sequence which is out of context.
$opcodes[0] = [
$opcodes[0][0],
max($opcodes[0][1], $opcodes[0][2] - $context),
$opcodes[0][2],
max($opcodes[0][3], $opcodes[0][4] - $context),
$opcodes[0][4],
];
}
$lastItem = \count($opcodes) - 1;
if ($opcodes[$lastItem][0] === self::OP_EQ) {
[$op, $i1, $i2, $j1, $j2] = $opcodes[$lastItem];
// fix the trailing sequence which is out of context.
$opcodes[$lastItem] = [
$op,
$i1,
min($i2, $i1 + $context),
$j1,
min($j2, $j1 + $context),
];
}
$maxRange = $context << 1;
$groups = $group = [];
foreach ($opcodes as [$op, $i1, $i2, $j1, $j2]) {
if ($op === self::OP_EQ && $i2 - $i1 > $maxRange) {
$group[] = [
$op,
$i1,
min($i2, $i1 + $context),
$j1,
min($j2, $j1 + $context),
];
$groups[] = $group;
$group = [];
$i1 = max($i1, $i2 - $context);
$j1 = max($j1, $j2 - $context);
}
$group[] = [$op, $i1, $i2, $j1, $j2];
}
if (
!empty($group)
&& (
\count($group) !== 1
|| $group[0][0] !== self::OP_EQ
)
) {
$groups[] = $group;
}
// there will be at least leading/trailing OP_EQ blocks
// if we want really zero-context, we keep only non-equal blocks
if ($context <= 0) {
$groupsNew = [];
foreach ($groups as $group) {
$groupNew = [];
foreach ($group as $block) {
if ($block[0] !== self::OP_EQ) {
$groupNew[] = $block;
}
}
if (!empty($groupNew)) {
$groupsNew[] = $groupNew;
}
}
return $groupsNew;
}
return $groups;
}
/**
* Convert an operation code from int into its string form.
*
* @param int $op the operation code
*
* @throws \InvalidArgumentException
*
* @return string the string representation of the operation code
*/
public static function opIntToStr(int $op): string
{
if (!isset(self::OP_INT_TO_STR_MAP[$op])) {
throw new \InvalidArgumentException("Invalid OP: {$op}");
}
return self::OP_INT_TO_STR_MAP[$op];
}
/**
* Convert an operation code from string into its int form.
*
* @param string $op the operation code
*
* @throws \InvalidArgumentException
*
* @return int the int representation of the operation code
*/
public static function opStrToInt(string $op): int
{
if (!isset(self::OP_STR_TO_INT_MAP[$op])) {
throw new \InvalidArgumentException("Invalid OP: {$op}");
}
return self::OP_STR_TO_INT_MAP[$op];
}
/**
* Determine if any option under test changed.
*
* @param array $old the old options
* @param array $new the new options
* @param array $keys the option keys under test
*/
private function isAnyOptionChanged(array $old, array $new, array $keys): bool
{
foreach ($keys as $key) {
if (isset($new[$key]) && $new[$key] !== $old[$key]) {
return true;
}
}
return false;
}
/**
* Get the processed line with the initialized options.
*
* @param string $line the line
*
* @return string the line after being processed
*/
private function processLineWithOptions(string $line): string
{
if ($this->options['ignoreWhitespace']) {
static $whitespaces = [' ', "\t", "\r", "\n"];
$line = str_replace($whitespaces, '', $line);
}
if ($this->options['ignoreCase']) {
$line = strtolower($line);
}
if ($this->options['ignoreLineEnding']) {
$line = rtrim($line, "\r\n");
}
return $line;
}
/**
* Generate the internal arrays containing the list of junk and non-junk
* characters for the second ($b) sequence.
*/
private function chainB(): static
{
$this->at = array_map([$this, 'processLineWithOptions'], $this->a);
$this->bt = array_map([$this, 'processLineWithOptions'], $this->b);
$length = \count($this->bt);
$this->b2j = [];
$popularDict = [];
for ($i = 0; $i < $length; ++$i) {
$char = $this->bt[$i];
$this->b2j[$char] = $this->b2j[$char] ?? [];
if (
$length >= $this->options['lengthLimit']
&& \count($this->b2j[$char]) * 100 > $length
&& $char !== self::APPENDED_HELPER_LINE
) {
$popularDict[$char] = 1;
unset($this->b2j[$char]);
} else {
$this->b2j[$char][] = $i;
}
}
// remove leftovers
foreach (array_keys($popularDict) as $char) {
unset($this->b2j[$char]);
}
$this->junkDict = [];
if (\is_callable($this->junkCallback)) {
foreach (array_keys($popularDict) as $char) {
if (($this->junkCallback)($char)) {
$this->junkDict[$char] = 1;
unset($popularDict[$char]);
}
}
foreach (array_keys($this->b2j) as $char) {
if (($this->junkCallback)($char)) {
$this->junkDict[$char] = 1;
unset($this->b2j[$char]);
}
}
}
return $this;
}
/**
* Checks if a particular character is in the junk dictionary
* for the list of junk characters.
*
* @return bool $b True if the character is considered junk. False if not.
*/
private function isBJunk(string $b): bool
{
return isset($this->junkDict[$b]);
}
}