primo commit

This commit is contained in:
2024-12-17 17:34:10 +01:00
commit e650f8df99
16435 changed files with 2451012 additions and 0 deletions

View File

@ -0,0 +1,241 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\SequenceMatcher;
use Jfcherng\Diff\Utility\Language;
/**
* Base class for diff renderers.
*
* @todo use typed properties (BC breaking for public interface) in v7
*/
abstract class AbstractRenderer implements RendererInterface
{
/**
* @var array information about this renderer
*/
public const INFO = [
'desc' => 'default_desc',
'type' => 'default_type',
];
/**
* @var bool Is this renderer pure text?
*/
public const IS_TEXT_RENDERER = true;
/**
* @var string[] array of the opcodes and their corresponding symbols
*/
public const SYMBOL_MAP = [
SequenceMatcher::OP_DEL => '-',
SequenceMatcher::OP_EQ => ' ',
SequenceMatcher::OP_INS => '+',
SequenceMatcher::OP_REP => '!',
];
/**
* @var Language the language translation object
*/
protected $t;
/**
* If the input "changes" have `<ins>...</ins>` or `<del>...</del>`,
* which means they have been processed, then `false`. Otherwise, `true`.
*
* @var bool
*/
protected $changesAreRaw = true;
/**
* @var array array of the default options that apply to this renderer
*/
protected static $defaultOptions = [
// how detailed the rendered HTML in-line diff is? (none, line, word, char)
'detailLevel' => 'line',
// renderer language: eng, cht, chs, jpn, ...
// or an array which has the same keys with a language file
// check the "Custom Language" section in the readme for more advanced usage
'language' => 'eng',
// show line numbers in HTML renderers
'lineNumbers' => true,
// show a separator between different diff hunks in HTML renderers
'separateBlock' => true,
// show the (table) header
'showHeader' => true,
// convert spaces/tabs into HTML codes like `<span class="ch sp"> </span>`
// and the frontend is responsible for rendering them with CSS.
// when using this, "spacesToNbsp" should be false and "tabSize" is not respected.
'spaceToHtmlTag' => false,
// the frontend HTML could use CSS "white-space: pre;" to visualize consecutive whitespaces
// but if you want to visualize them in the backend with "&nbsp;", you can set this to true
'spacesToNbsp' => false,
// HTML renderer tab width (negative = do not convert into spaces)
'tabSize' => 4,
// this option is currently only for the Combined renderer.
// it determines whether a replace-type block should be merged or not
// depending on the content changed ratio, which values between 0 and 1.
'mergeThreshold' => 0.8,
// this option is currently only for the Unified and the Context renderers.
// RendererConstant::CLI_COLOR_AUTO = colorize the output if possible (default)
// RendererConstant::CLI_COLOR_ENABLE = force to colorize the output
// RendererConstant::CLI_COLOR_DISABLE = force not to colorize the output
'cliColorization' => RendererConstant::CLI_COLOR_AUTO,
// this option is currently only for the Json renderer.
// internally, ops (tags) are all int type but this is not good for human reading.
// set this to "true" to convert them into string form before outputting.
'outputTagAsString' => false,
// this option is currently only for the Json renderer.
// it controls how the output JSON is formatted.
// see available options on https://www.php.net/manual/en/function.json-encode.php
'jsonEncodeFlags' => \JSON_UNESCAPED_SLASHES | \JSON_UNESCAPED_UNICODE,
// this option is currently effective when the "detailLevel" is "word"
// characters listed in this array can be used to make diff segments into a whole
// for example, making "<del>good</del>-<del>looking</del>" into "<del>good-looking</del>"
// this should bring better readability but set this to empty array if you do not want it
'wordGlues' => ['-', ' '],
// change this value to a string as the returned diff if the two input strings are identical
'resultForIdenticals' => null,
// extra HTML classes added to the DOM of the diff container
'wrapperClasses' => ['diff-wrapper'],
];
/**
* @var array array containing the user applied and merged default options for the renderer
*/
protected $options = [];
/**
* The constructor. Instantiates the rendering engine and if options are passed,
* sets the options for the renderer.
*
* @param array $options optionally, an array of the options for the renderer
*/
public function __construct(array $options = [])
{
$this->setOptions($options);
}
/**
* Set the options of the renderer to those supplied in the passed in array.
* Options are merged with the default to ensure that there aren't any missing
* options.
*
* @param array $options the options
*
* @return static
*/
public function setOptions(array $options): self
{
$newOptions = $options + static::$defaultOptions;
$this->updateLanguage(
$this->options['language'] ?? '',
$newOptions['language'],
);
$this->options = $newOptions;
return $this;
}
/**
* Get the options.
*
* @return array the options
*/
public function getOptions(): array
{
return $this->options;
}
/**
* @final
*
* @todo mark this method with "final" in the next major release
*
* @throws \InvalidArgumentException
*/
public function getResultForIdenticals(): string
{
$custom = $this->options['resultForIdenticals'];
if (isset($custom) && !\is_string($custom)) {
throw new \InvalidArgumentException('renderer option `resultForIdenticals` must be null or string.');
}
return $custom ?? $this->getResultForIdenticalsDefault();
}
/**
* Get the renderer default result when the old and the new are the same.
*/
abstract public function getResultForIdenticalsDefault(): string;
final public function render(Differ $differ): string
{
$this->changesAreRaw = true;
// the "no difference" situation may happen frequently
return $differ->getOldNewComparison() === 0 && !$differ->options['fullContextIfIdentical']
? $this->getResultForIdenticals()
: $this->renderWorker($differ);
}
final public function renderArray(array $differArray): string
{
$this->changesAreRaw = false;
return $this->renderArrayWorker($differArray);
}
/**
* The real worker for self::render().
*
* @param Differ $differ the differ object
*/
abstract protected function renderWorker(Differ $differ): string;
/**
* The real worker for self::renderArray().
*
* @param array[][] $differArray the differ array
*/
abstract protected function renderArrayWorker(array $differArray): string;
/**
* Update the Language object.
*
* @param string|string[] $old the old language
* @param string|string[] $new the new language
*
* @return static
*/
protected function updateLanguage($old, $new): self
{
if (!isset($this->t) || $old !== $new) {
$this->t = new Language($new);
}
return $this;
}
/**
* A shorthand to do translation.
*
* @param string $text The text
* @param bool $escapeHtml Escape the translated text for HTML?
*
* @return string the translated text
*/
protected function _(string $text, bool $escapeHtml = true): string
{
$text = $this->t->translate($text);
return $escapeHtml ? htmlspecialchars($text) : $text;
}
}

View File

@ -0,0 +1,368 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\Factory\LineRendererFactory;
use Jfcherng\Diff\Renderer\AbstractRenderer;
use Jfcherng\Diff\Renderer\Html\LineRenderer\AbstractLineRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Diff\SequenceMatcher;
use Jfcherng\Utility\MbString;
/**
* Base renderer for rendering HTML-based diffs.
*/
abstract class AbstractHtml extends AbstractRenderer
{
/**
* @var bool is this renderer pure text?
*/
public const IS_TEXT_RENDERER = false;
/**
* @var string[] array of the different opcodes and how they are mapped to HTML classes
*
* @todo rename to OP_CLASS_MAP in v7
*/
public const TAG_CLASS_MAP = [
SequenceMatcher::OP_DEL => 'del',
SequenceMatcher::OP_EQ => 'eq',
SequenceMatcher::OP_INS => 'ins',
SequenceMatcher::OP_REP => 'rep',
];
/**
* Auto format the content in "changes" to be suitable for HTML output.
*
* This may not be a wanted behavior for some (custom) renderers
* if they want to do this by themselves in a later stage.
*
* @var bool
*/
public const AUTO_FORMAT_CHANGES = true;
public function getResultForIdenticalsDefault(): string
{
return '';
}
/**
* Render and return an array structure suitable for generating HTML
* based differences. Generally called by subclasses that generate a
* HTML based diff and return an array of the changes to show in the diff.
*
* @param Differ $differ the differ object
*
* @return array[][] generated changes, suitable for presentation in HTML
*/
public function getChanges(Differ $differ): array
{
$lineRenderer = LineRendererFactory::make(
$this->options['detailLevel'],
$differ->getOptions(),
$this->options,
);
$old = $differ->getOld();
$new = $differ->getNew();
$changes = [];
foreach ($differ->getGroupedOpcodes() as $hunk) {
$change = [];
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
$change[] = $this->getDefaultBlock($op, $i1, $j1);
$block = &$change[\count($change) - 1];
// if there are same amount of lines replaced
// we can render the inner detailed changes with corresponding lines
// @todo or use LineRenderer to do the job regardless different line counts?
if ($op === SequenceMatcher::OP_REP && $i2 - $i1 === $j2 - $j1) {
for ($k = $i2 - $i1 - 1; $k >= 0; --$k) {
$this->renderChangedExtent($lineRenderer, $old[$i1 + $k], $new[$j1 + $k]);
}
}
$block['old']['lines'] = \array_slice($old, $i1, $i2 - $i1);
$block['new']['lines'] = \array_slice($new, $j1, $j2 - $j1);
}
unset($block);
$changes[] = $change;
}
if (static::AUTO_FORMAT_CHANGES) {
$this->formatChanges($changes);
}
return $changes;
}
protected function renderWorker(Differ $differ): string
{
$rendered = $this->redererChanges($this->getChanges($differ));
return $this->cleanUpDummyHtmlClosures($rendered);
}
protected function renderArrayWorker(array $differArray): string
{
$this->ensureChangesUseIntTag($differArray);
$rendered = $this->redererChanges($differArray);
return $this->cleanUpDummyHtmlClosures($rendered);
}
/**
* Render the array of changes.
*
* @param array[][] $changes the changes
*
* @todo rename typo to renderChanges() in v7
*/
abstract protected function redererChanges(array $changes): string;
/**
* Renderer the changed extent.
*
* @param AbstractLineRenderer $lineRenderer the line renderer
* @param string $old the old line
* @param string $new the new line
*/
protected function renderChangedExtent(AbstractLineRenderer $lineRenderer, string &$old, string &$new): void
{
static $mbOld, $mbNew;
$mbOld ??= new MbString();
$mbNew ??= new MbString();
$mbOld->set($old);
$mbNew->set($new);
$lineRenderer->render($mbOld, $mbNew);
$old = $mbOld->get();
$new = $mbNew->get();
}
/**
* Get the default block.
*
* @param int $op the operation
* @param int $i1 begin index of the diff of the old array
* @param int $j1 begin index of the diff of the new array
*
* @return array the default block
*
* @todo rename tag to op in v7
*/
protected function getDefaultBlock(int $op, int $i1, int $j1): array
{
return [
'tag' => $op,
'old' => [
'offset' => $i1,
'lines' => [],
],
'new' => [
'offset' => $j1,
'lines' => [],
],
];
}
/**
* Make the content in "changes" suitable for HTML output.
*
* @param array[][] $changes the changes
*/
final protected function formatChanges(array &$changes): void
{
foreach ($changes as &$hunk) {
foreach ($hunk as &$block) {
$block['old']['lines'] = $this->formatLines($block['old']['lines']);
$block['new']['lines'] = $this->formatLines($block['new']['lines']);
/** @phan-suppress-next-line PhanTypeInvalidLeftOperandOfBitwiseOp */
if ($block['tag'] & (SequenceMatcher::OP_REP | SequenceMatcher::OP_DEL)) {
$block['old']['lines'] = str_replace(
RendererConstant::HTML_CLOSURES,
RendererConstant::HTML_CLOSURES_DEL,
$block['old']['lines'],
);
}
/** @phan-suppress-next-line PhanTypeInvalidLeftOperandOfBitwiseOp */
if ($block['tag'] & (SequenceMatcher::OP_REP | SequenceMatcher::OP_INS)) {
$block['new']['lines'] = str_replace(
RendererConstant::HTML_CLOSURES,
RendererConstant::HTML_CLOSURES_INS,
$block['new']['lines'],
);
}
}
}
}
/**
* Make a series of lines suitable for outputting in a HTML rendered diff.
*
* @param string[] $lines array of lines to format
*
* @return string[] array of the formatted lines
*/
protected function formatLines(array $lines): array
{
/**
* To prevent from invoking the same function calls for several times,
* we can glue lines into a string and call functions for one time.
* After that, we split the string back into lines.
*/
return explode(
RendererConstant::IMPLODE_DELIMITER,
$this->formatStringFromLines(
implode(
RendererConstant::IMPLODE_DELIMITER,
$lines,
),
),
);
}
/**
* Make a string suitable for outputting in a HTML rendered diff.
*
* This my involve replacing tab characters with spaces, making the HTML safe
* for output, ensuring that double spaces are replaced with &nbsp; etc.
*
* @param string $string the string of imploded lines
*
* @return string the formatted string
*/
protected function formatStringFromLines(string $string): string
{
if (!$this->options['spaceToHtmlTag']) {
$string = $this->expandTabs($string, $this->options['tabSize']);
}
$string = $this->htmlSafe($string);
if ($this->options['spacesToNbsp']) {
$string = $this->htmlFixSpaces($string);
}
if ($this->options['spaceToHtmlTag']) {
$string = $this->htmlReplaceSpacesToHtmlTag($string);
}
return $string;
}
/**
* Replace tabs in a string with a number of spaces.
*
* @param string $string the input string which may contain tabs
* @param int $tabSize one tab = how many spaces, a negative does nothing
* @param bool $onlyLeadingTabs only expand leading tabs
*
* @return string the string with the tabs converted to spaces
*/
protected function expandTabs(string $string, int $tabSize = 4, bool $onlyLeadingTabs = false): string
{
if ($tabSize < 0) {
return $string;
}
if ($onlyLeadingTabs) {
return preg_replace_callback(
"/^[ \t]{1,}/mS", // tabs and spaces may be mixed
static fn (array $matches): string => str_replace("\t", str_repeat(' ', $tabSize), $matches[0]),
$string,
);
}
return str_replace("\t", str_repeat(' ', $tabSize), $string);
}
/**
* Make a string containing HTML safe for output on a page.
*
* @param string $string the string
*
* @return string the string with the HTML characters replaced by entities
*/
protected function htmlSafe(string $string): string
{
return htmlspecialchars($string, \ENT_NOQUOTES, 'UTF-8');
}
/**
* Replace a string containing spaces with a HTML representation having "&nbsp;".
*
* @param string $string the string of spaces
*
* @return string the HTML representation of the string
*/
protected function htmlFixSpaces(string $string): string
{
return str_replace(' ', '&nbsp;', $string);
}
/**
* Replace spaces/tabs with HTML tags, which may be styled in frontend with CSS.
*
* @param string $string the string of spaces
*
* @return string the HTML representation of the string
*/
protected function htmlReplaceSpacesToHtmlTag(string $string): string
{
return strtr($string, [
' ' => '<span class="ch sp"> </span>',
"\t" => "<span class=\"ch tab\">\t</span>",
]);
}
/**
* Make sure the "changes" array uses int "tag".
*
* Internally, we would like always int form for better performance.
*
* @param array[][] $changes the changes
*/
protected function ensureChangesUseIntTag(array &$changes): void
{
// check if the tag is already int type
if (\is_int($changes[0][0]['tag'] ?? null)) {
return;
}
foreach ($changes as &$hunks) {
foreach ($hunks as &$block) {
$block['tag'] = SequenceMatcher::opStrToInt($block['tag']);
}
}
}
/**
* Clean up empty HTML closures in the given string.
*
* @param string $string the string
*/
protected function cleanUpDummyHtmlClosures(string $string): string
{
return str_replace(
[
RendererConstant::HTML_CLOSURES_DEL[0] . RendererConstant::HTML_CLOSURES_DEL[1],
RendererConstant::HTML_CLOSURES_INS[0] . RendererConstant::HTML_CLOSURES_INS[1],
],
'',
$string,
);
}
}

View File

@ -0,0 +1,518 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
use Jfcherng\Diff\Factory\LineRendererFactory;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Diff\SequenceMatcher;
use Jfcherng\Diff\Utility\ReverseIterator;
use Jfcherng\Utility\MbString;
/**
* Combined HTML diff generator.
*
* Note that this renderer always has no line number.
*/
final class Combined extends AbstractHtml
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Combined',
'type' => 'Html',
];
/**
* {@inheritdoc}
*/
public const AUTO_FORMAT_CHANGES = false;
protected function redererChanges(array $changes): string
{
if (empty($changes)) {
return $this->getResultForIdenticals();
}
$wrapperClasses = [
...$this->options['wrapperClasses'],
'diff', 'diff-html', 'diff-combined',
];
return
'<table class="' . implode(' ', $wrapperClasses) . '">' .
$this->renderTableHeader() .
$this->renderTableHunks($changes) .
'</table>';
}
/**
* Renderer the table header.
*/
protected function renderTableHeader(): string
{
if (!$this->options['showHeader']) {
return '';
}
return
'<thead>' .
'<tr>' .
'<th>' . $this->_('differences') . '</th>' .
'</tr>' .
'</thead>';
}
/**
* Renderer the table separate block.
*/
protected function renderTableSeparateBlock(): string
{
return
'<tbody class="skipped">' .
'<tr>' .
'<td></td>' .
'</tr>' .
'</tbody>';
}
/**
* Renderer table hunks.
*
* @param array[][] $hunks each hunk has many blocks
*/
protected function renderTableHunks(array $hunks): string
{
$ret = '';
foreach ($hunks as $i => $hunk) {
if ($i > 0 && $this->options['separateBlock']) {
$ret .= $this->renderTableSeparateBlock();
}
foreach ($hunk as $block) {
$ret .= $this->renderTableBlock($block);
}
}
return $ret;
}
/**
* Renderer the table block.
*
* @param array $block the block
*/
protected function renderTableBlock(array $block): string
{
switch ($block['tag']) {
case SequenceMatcher::OP_EQ:
$content = $this->renderTableBlockEqual($block);
break;
case SequenceMatcher::OP_INS:
$content = $this->renderTableBlockInsert($block);
break;
case SequenceMatcher::OP_DEL:
$content = $this->renderTableBlockDelete($block);
break;
case SequenceMatcher::OP_REP:
$content = $this->renderTableBlockReplace($block);
break;
default:
$content = '';
}
return '<tbody class="change change-' . self::TAG_CLASS_MAP[$block['tag']] . '">' . $content . '</tbody>';
}
/**
* Renderer the table block: equal.
*
* @param array $block the block
*/
protected function renderTableBlockEqual(array $block): string
{
$block['new']['lines'] = $this->customFormatLines(
$block['new']['lines'],
SequenceMatcher::OP_EQ,
);
$ret = '';
// note that although we are in a OP_EQ situation,
// the old and the new may not be exactly the same
// because of ignoreCase, ignoreWhitespace, etc
foreach ($block['new']['lines'] as $newLine) {
// we could only pick either the old or the new to show
// here we pick the new one to let the user know what it is now
$ret .= $this->renderTableRow('new', SequenceMatcher::OP_EQ, $newLine);
}
return $ret;
}
/**
* Renderer the table block: insert.
*
* @param array $block the block
*/
protected function renderTableBlockInsert(array $block): string
{
$block['new']['lines'] = $this->customFormatLines(
$block['new']['lines'],
SequenceMatcher::OP_INS,
);
$ret = '';
foreach ($block['new']['lines'] as $newLine) {
$ret .= $this->renderTableRow('new', SequenceMatcher::OP_INS, $newLine);
}
return $ret;
}
/**
* Renderer the table block: delete.
*
* @param array $block the block
*/
protected function renderTableBlockDelete(array $block): string
{
$block['old']['lines'] = $this->customFormatLines(
$block['old']['lines'],
SequenceMatcher::OP_DEL,
);
$ret = '';
foreach ($block['old']['lines'] as $oldLine) {
$ret .= $this->renderTableRow('old', SequenceMatcher::OP_DEL, $oldLine);
}
return $ret;
}
/**
* Renderer the table block: replace.
*
* @param array $block the block
*/
protected function renderTableBlockReplace(array $block): string
{
if ($this->options['detailLevel'] === 'none') {
return
$this->renderTableBlockDelete($block) .
$this->renderTableBlockInsert($block);
}
$ret = '';
$oldLines = $block['old']['lines'];
$newLines = $block['new']['lines'];
$oldLinesCount = \count($oldLines);
$newLinesCount = \count($newLines);
// if the line counts changes, we treat the old and the new as
// "a line with \n in it" and then do one-line-to-one-line diff
if ($oldLinesCount !== $newLinesCount) {
[$oldLines, $newLines] = $this->markReplaceBlockDiff($oldLines, $newLines);
$oldLinesCount = $newLinesCount = 1;
}
$oldLines = $this->customFormatLines($oldLines, SequenceMatcher::OP_DEL);
$newLines = $this->customFormatLines($newLines, SequenceMatcher::OP_INS);
// now $oldLines must has the same line counts with $newlines
for ($no = 0; $no < $newLinesCount; ++$no) {
$mergedLine = $this->mergeReplaceLines($oldLines[$no], $newLines[$no]);
// not merge-able, we fall back to separated form
if (!isset($mergedLine)) {
$ret .=
$this->renderTableBlockDelete($block) .
$this->renderTableBlockInsert($block);
break;
}
$ret .= $this->renderTableRow('rep', SequenceMatcher::OP_REP, $mergedLine);
}
return $ret;
}
/**
* Renderer a content row of the output table.
*
* @param string $tdClass the <td> class
* @param int $op the operation
* @param string $line the line
*/
protected function renderTableRow(string $tdClass, int $op, string $line): string
{
return
'<tr data-type="' . self::SYMBOL_MAP[$op] . '">' .
'<td class="' . $tdClass . '">' . $line . '</td>' .
'</tr>';
}
/**
* Merge two "replace"-type lines into a single line.
*
* The implementation concept is that if we remove all closure parts from
* the old and the new, the rest of them (cleaned line) should be the same.
* And then, we add back those removed closure parts in a correct order.
*
* @param string $oldLine the old line
* @param string $newLine the new line
*
* @return null|string string if merge-able, null otherwise
*/
protected function mergeReplaceLines(string $oldLine, string $newLine): ?string
{
$delParts = $this->analyzeClosureParts(
$oldLine,
RendererConstant::HTML_CLOSURES_DEL,
SequenceMatcher::OP_DEL,
);
$insParts = $this->analyzeClosureParts(
$newLine,
RendererConstant::HTML_CLOSURES_INS,
SequenceMatcher::OP_INS,
);
// get the cleaned line by a non-regex way (should be faster)
// i.e., the new line with all "<ins>...</ins>" parts removed
$mergedLine = $newLine;
foreach (ReverseIterator::fromArray($insParts) as $part) {
$mergedLine = substr_replace(
$mergedLine,
'', // deletion
$part['offset'],
\strlen($part['content']),
);
}
// note that $mergedLine is actually a clean line at this point
if (!$this->isLinesMergeable($oldLine, $newLine, $mergedLine)) {
return null;
}
// before building the $mergedParts, we do some adjustments
$this->revisePartsForBoundaryNewlines($delParts, RendererConstant::HTML_CLOSURES_DEL);
$this->revisePartsForBoundaryNewlines($insParts, RendererConstant::HTML_CLOSURES_INS);
// create a sorted merged parts array
$mergedParts = [...$delParts, ...$insParts];
usort(
$mergedParts,
// first sort by "offsetClean", "order" then by "type"
static fn (array $a, array $b): int => (
$a['offsetClean'] <=> $b['offsetClean']
?: $a['order'] <=> $b['order']
?: ($a['type'] === SequenceMatcher::OP_DEL ? -1 : 1)
),
);
// insert merged parts into the cleaned line
foreach (ReverseIterator::fromArray($mergedParts) as $part) {
$mergedLine = substr_replace(
$mergedLine,
$part['content'],
$part['offsetClean'],
0, // insertion
);
}
return str_replace("\n", '<br>', $mergedLine);
}
/**
* Analyze and get the closure parts information of the line.
*
* Such as
* extract information for "<ins>part 1</ins>" and "<ins>part 2</ins>"
* from "Hello <ins>part 1</ins>SOME OTHER TEXT<ins>part 2</ins> World"
*
* @param string $line the line
* @param string[] $closures the closures
* @param int $type the type
*
* @return array[] the closure information
*/
protected function analyzeClosureParts(string $line, array $closures, int $type): array
{
[$ld, $rd] = $closures;
$ldLength = \strlen($ld);
$rdLength = \strlen($rd);
$parts = [];
$partStart = $partEnd = 0;
$partLengthSum = 0;
// find the next left delimiter
while (false !== ($partStart = strpos($line, $ld, $partEnd))) {
// find the corresponding right delimiter
if (false === ($partEnd = strpos($line, $rd, $partStart + $ldLength))) {
break;
}
$partEnd += $rdLength;
$partLength = $partEnd - $partStart;
$parts[] = [
'type' => $type,
// the sorting order used when both "offsetClean" are the same
'order' => 0,
// the offset in the line
'offset' => $partStart,
// the offset in the cleaned line (i.e., the line with closure parts removed)
'offsetClean' => $partStart - $partLengthSum,
// the content of the part
'content' => substr($line, $partStart, $partLength),
];
$partLengthSum += $partLength;
}
return $parts;
}
/**
* Mark differences between two "replace" blocks.
*
* Each of the returned block (lines) is always only one line.
*
* @param string[] $oldBlock The old block
* @param string[] $newBlock The new block
*
* @return string[][] the value of [[$oldLine], [$newLine]]
*/
protected function markReplaceBlockDiff(array $oldBlock, array $newBlock): array
{
static $mbOld, $mbNew, $lineRenderer;
$mbOld ??= new MbString();
$mbNew ??= new MbString();
$lineRenderer ??= LineRendererFactory::make(
$this->options['detailLevel'],
[], /** @todo is it possible to get the differOptions here? */
$this->options,
);
$mbOld->set(implode("\n", $oldBlock));
$mbNew->set(implode("\n", $newBlock));
$lineRenderer->render($mbOld, $mbNew);
return [
[$mbOld->get()], // one-line block for the old
[$mbNew->get()], // one-line block for the new
];
}
/**
* Determine whether the "replace"-type lines are merge-able or not.
*
* @param string $oldLine the old line
* @param string $newLine the new line
* @param string $cleanLine the clean line
*/
protected function isLinesMergeable(string $oldLine, string $newLine, string $cleanLine): bool
{
$oldLine = str_replace(RendererConstant::HTML_CLOSURES_DEL, '', $oldLine);
$newLine = str_replace(RendererConstant::HTML_CLOSURES_INS, '', $newLine);
$sumLength = \strlen($oldLine) + \strlen($newLine);
/** @var float the changed ratio, 0 <= value < 1 */
$changedRatio = ($sumLength - (\strlen($cleanLine) << 1)) / ($sumLength + 1);
return $changedRatio <= $this->options['mergeThreshold'];
}
/**
* Extract boundary newlines from parts into new parts.
*
* @param array[] $parts the parts
* @param string[] $closures the closures
*
* @see https://git.io/JvVXH
*/
protected function revisePartsForBoundaryNewlines(array &$parts, array $closures): void
{
[$ld, $rd] = $closures;
$ldRegex = preg_quote($ld, '/');
$rdRegex = preg_quote($rd, '/');
for ($i = \count($parts) - 1; $i >= 0; --$i) {
$part = &$parts[$i];
// deal with leading newlines
$part['content'] = preg_replace_callback(
"/(?P<closure>{$ldRegex})(?P<nl>[\r\n]++)/u",
static function (array $matches) use (&$parts, $part, $ld, $rd): string {
// add a new part for the extracted newlines
$part['order'] = -1;
$part['content'] = "{$ld}{$matches['nl']}{$rd}";
$parts[] = $part;
return $matches['closure'];
},
$part['content'],
);
// deal with trailing newlines
$part['content'] = preg_replace_callback(
"/(?P<nl>[\r\n]++)(?P<closure>{$rdRegex})/u",
static function (array $matches) use (&$parts, $part, $ld, $rd): string {
// add a new part for the extracted newlines
$part['order'] = 1;
$part['content'] = "{$ld}{$matches['nl']}{$rd}";
$parts[] = $part;
return $matches['closure'];
},
$part['content'],
);
}
}
/**
* Make lines suitable for HTML output.
*
* @param string[] $lines the lines
* @param int $op the operation
*/
protected function customFormatLines(array $lines, int $op): array
{
if (!$this->changesAreRaw) {
return $lines;
}
static $closureMap = [
SequenceMatcher::OP_DEL => RendererConstant::HTML_CLOSURES_DEL,
SequenceMatcher::OP_INS => RendererConstant::HTML_CLOSURES_INS,
];
$lines = $this->formatLines($lines);
$htmlClosures = $closureMap[$op] ?? null;
foreach ($lines as &$line) {
if ($htmlClosures) {
$line = str_replace(RendererConstant::HTML_CLOSURES, $htmlClosures, $line);
}
// fixes https://github.com/jfcherng/php-diff/issues/34
$line = str_replace("\r\n", "\n", $line);
}
return $lines;
}
}

View File

@ -0,0 +1,263 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
use Jfcherng\Diff\SequenceMatcher;
/**
* Inline HTML diff generator.
*/
final class Inline extends AbstractHtml
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Inline',
'type' => 'Html',
];
protected function redererChanges(array $changes): string
{
if (empty($changes)) {
return $this->getResultForIdenticals();
}
$wrapperClasses = [
...$this->options['wrapperClasses'],
'diff', 'diff-html', 'diff-inline',
];
return
'<table class="' . implode(' ', $wrapperClasses) . '">' .
$this->renderTableHeader() .
$this->renderTableHunks($changes) .
'</table>';
}
/**
* Renderer the table header.
*/
protected function renderTableHeader(): string
{
if (!$this->options['showHeader']) {
return '';
}
$colspan = $this->options['lineNumbers'] ? '' : ' colspan="2"';
return
'<thead>' .
'<tr>' .
(
$this->options['lineNumbers']
?
'<th>' . $this->_('old_version') . '</th>' .
'<th>' . $this->_('new_version') . '</th>' .
'<th></th>' // diff symbol column
:
''
) .
'<th' . $colspan . '>' . $this->_('differences') . '</th>' .
'</tr>' .
'</thead>';
}
/**
* Renderer the table separate block.
*/
protected function renderTableSeparateBlock(): string
{
$colspan = $this->options['lineNumbers'] ? '4' : '2';
return
'<tbody class="skipped">' .
'<tr>' .
'<td colspan="' . $colspan . '"></td>' .
'</tr>' .
'</tbody>';
}
/**
* Renderer table hunks.
*
* @param array[][] $hunks each hunk has many blocks
*/
protected function renderTableHunks(array $hunks): string
{
$ret = '';
foreach ($hunks as $i => $hunk) {
if ($i > 0 && $this->options['separateBlock']) {
$ret .= $this->renderTableSeparateBlock();
}
foreach ($hunk as $block) {
$ret .= $this->renderTableBlock($block);
}
}
return $ret;
}
/**
* Renderer the table block.
*
* @param array $block the block
*/
protected function renderTableBlock(array $block): string
{
switch ($block['tag']) {
case SequenceMatcher::OP_EQ:
$content = $this->renderTableBlockEqual($block);
break;
case SequenceMatcher::OP_INS:
$content = $this->renderTableBlockInsert($block);
break;
case SequenceMatcher::OP_DEL:
$content = $this->renderTableBlockDelete($block);
break;
case SequenceMatcher::OP_REP:
$content = $this->renderTableBlockReplace($block);
break;
default:
$content = '';
}
return '<tbody class="change change-' . self::TAG_CLASS_MAP[$block['tag']] . '">' . $content . '</tbody>';
}
/**
* Renderer the table block: equal.
*
* @param array $block the block
*/
protected function renderTableBlockEqual(array $block): string
{
$ret = '';
// note that although we are in a OP_EQ situation,
// the old and the new may not be exactly the same
// because of ignoreCase, ignoreWhitespace, etc
foreach ($block['new']['lines'] as $no => $newLine) {
// we could only pick either the old or the new to show
// here we pick the new one to let the user know what it is now
$ret .= $this->renderTableRow(
'new',
SequenceMatcher::OP_EQ,
$newLine,
$block['old']['offset'] + $no + 1,
$block['new']['offset'] + $no + 1,
);
}
return $ret;
}
/**
* Renderer the table block: insert.
*
* @param array $block the block
*/
protected function renderTableBlockInsert(array $block): string
{
$ret = '';
foreach ($block['new']['lines'] as $no => $newLine) {
$ret .= $this->renderTableRow(
'new',
SequenceMatcher::OP_INS,
$newLine,
null,
$block['new']['offset'] + $no + 1,
);
}
return $ret;
}
/**
* Renderer the table block: delete.
*
* @param array $block the block
*/
protected function renderTableBlockDelete(array $block): string
{
$ret = '';
foreach ($block['old']['lines'] as $no => $oldLine) {
$ret .= $this->renderTableRow(
'old',
SequenceMatcher::OP_DEL,
$oldLine,
$block['old']['offset'] + $no + 1,
null,
);
}
return $ret;
}
/**
* Renderer the table block: replace.
*
* @param array $block the block
*/
protected function renderTableBlockReplace(array $block): string
{
return
$this->renderTableBlockDelete($block) .
$this->renderTableBlockInsert($block);
}
/**
* Renderer a content row of the output table.
*
* @param string $tdClass the <td> class
* @param int $op the operation
* @param string $line the line
* @param null|int $oldLineNum the old line number
* @param null|int $newLineNum the new line number
*/
protected function renderTableRow(
string $tdClass,
int $op,
string $line,
?int $oldLineNum,
?int $newLineNum
): string {
return
'<tr data-type="' . self::SYMBOL_MAP[$op] . '">' .
(
$this->options['lineNumbers']
? $this->renderLineNumberColumns($oldLineNum, $newLineNum)
: ''
) .
'<th class="sign ' . self::TAG_CLASS_MAP[$op] . '">' . self::SYMBOL_MAP[$op] . '</th>' .
'<td class="' . $tdClass . '">' . $line . '</td>' .
'</tr>';
}
/**
* Renderer the line number columns.
*
* @param null|int $oldLineNum The old line number
* @param null|int $newLineNum The new line number
*/
protected function renderLineNumberColumns(?int $oldLineNum, ?int $newLineNum): string
{
return
(
isset($oldLineNum)
? '<th class="n-old">' . $oldLineNum . '</th>'
: '<th></th>'
) .
(
isset($newLineNum)
? '<th class="n-new">' . $newLineNum . '</th>'
: '<th></th>'
);
}
}

View File

@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
/**
* HTML Json diff generator.
*
* @deprecated 6.8.0 Use the "JsonHtml" renderer instead.
*/
final class Json extends JsonHtml
{
}

View File

@ -0,0 +1,59 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
use Jfcherng\Diff\SequenceMatcher;
/**
* HTML Json diff generator.
*/
class JsonHtml extends AbstractHtml
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'HTML Json',
'type' => 'Html',
];
/**
* {@inheritdoc}
*/
public const IS_TEXT_RENDERER = true;
public function getResultForIdenticalsDefault(): string
{
return '[]';
}
protected function redererChanges(array $changes): string
{
if ($this->options['outputTagAsString']) {
$this->convertTagToString($changes);
}
return json_encode($changes, $this->options['jsonEncodeFlags']);
}
/**
* Convert tags of changes to their string form for better readability.
*
* @param array[][] $changes the changes
*/
protected function convertTagToString(array &$changes): void
{
foreach ($changes as &$hunks) {
foreach ($hunks as &$block) {
$block['tag'] = SequenceMatcher::opIntToStr($block['tag']);
}
}
}
protected function formatStringFromLines(string $string): string
{
return $this->htmlSafe($string);
}
}

View File

@ -0,0 +1,108 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Diff\SequenceMatcher;
/**
* Base renderer for rendering HTML-based line diffs.
*
* @todo use typed properties (BC breaking for public interface) in v7
*/
abstract class AbstractLineRenderer implements LineRendererInterface
{
/**
* @var SequenceMatcher the sequence matcher
*/
protected $sequenceMatcher;
/**
* @var array the differ options
*/
protected $differOptions = [];
/**
* @var array the renderer options
*/
protected $rendererOptions = [];
/**
* The constructor.
*
* @param array $differOptions the differ options
* @param array $rendererOptions the renderer options
*/
public function __construct(array $differOptions, array $rendererOptions)
{
$this->sequenceMatcher = new SequenceMatcher([], []);
$this
->setDifferOptions($differOptions)
->setRendererOptions($rendererOptions)
;
}
/**
* Set the differ options.
*
* @param array $differOptions the differ options
*
* @return static
*/
public function setDifferOptions(array $differOptions): self
{
$this->differOptions = $differOptions;
$this->sequenceMatcher->setOptions($differOptions);
return $this;
}
/**
* Set the renderer options.
*
* @param array $rendererOptions the renderer options
*
* @return static
*/
public function setRendererOptions(array $rendererOptions): self
{
$this->rendererOptions = $rendererOptions;
return $this;
}
/**
* Gets the differ options.
*
* @return array the differ options
*/
public function getDifferOptions(): array
{
return $this->differOptions;
}
/**
* Gets the renderer options.
*
* @return array the renderer options
*/
public function getRendererOptions(): array
{
return $this->rendererOptions;
}
/**
* Get the changed extent segments.
*
* @param string[] $old the old array
* @param string[] $new the new array
*
* @return int[][] the changed extent segments
*/
protected function getChangedExtentSegments(array $old, array $new): array
{
return $this->sequenceMatcher->setSequences($old, $new)->getOpcodes();
}
}

View File

@ -0,0 +1,34 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Diff\SequenceMatcher;
use Jfcherng\Diff\Utility\ReverseIterator;
use Jfcherng\Utility\MbString;
final class Char extends AbstractLineRenderer
{
/**
* @return static
*/
public function render(MbString $mbOld, MbString $mbNew): LineRendererInterface
{
$hunk = $this->getChangedExtentSegments($mbOld->toArray(), $mbNew->toArray());
// reversely iterate hunk
foreach (ReverseIterator::fromArray($hunk) as [$op, $i1, $i2, $j1, $j2]) {
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_DEL)) {
$mbOld->str_enclose_i(RendererConstant::HTML_CLOSURES, $i1, $i2 - $i1);
}
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_INS)) {
$mbNew->str_enclose_i(RendererConstant::HTML_CLOSURES, $j1, $j2 - $j1);
}
}
return $this;
}
}

View File

@ -0,0 +1,79 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Utility\MbString;
final class Line extends AbstractLineRenderer
{
/**
* @return static
*/
public function render(MbString $mbOld, MbString $mbNew): LineRendererInterface
{
[$start, $end] = $this->getChangedExtentRegion($mbOld, $mbNew);
// two strings are the same
if ($end === 0) {
return $this;
}
// two strings are different, we do rendering
$mbOld->str_enclose_i(
RendererConstant::HTML_CLOSURES,
$start,
$end + $mbOld->strlen() - $start + 1,
);
$mbNew->str_enclose_i(
RendererConstant::HTML_CLOSURES,
$start,
$end + $mbNew->strlen() - $start + 1,
);
return $this;
}
/**
* Given two strings, determine where the changes in the two strings begin,
* and where the changes in the two strings end.
*
* @param MbString $mbOld the old megabytes line
* @param MbString $mbNew the new megabytes line
*
* @return int[] Array containing the starting position (non-negative) and the ending position (negative)
* [0, 0] if two strings are the same
*/
protected function getChangedExtentRegion(MbString $mbOld, MbString $mbNew): array
{
// two strings are the same
// most lines should be this cases, an early return could save many function calls
if ($mbOld->getRaw() === $mbNew->getRaw()) {
return [0, 0];
}
// calculate $start
$start = 0;
$startMax = min($mbOld->strlen(), $mbNew->strlen());
while (
$start < $startMax // index out of range
&& $mbOld->getAtRaw($start) === $mbNew->getAtRaw($start)
) {
++$start;
}
// calculate $end
$end = -1; // trick
$endMin = $startMax - $start;
while (
-$end <= $endMin // index out of range
&& $mbOld->getAtRaw($end) === $mbNew->getAtRaw($end)
) {
--$end;
}
return [$start, $end];
}
}

View File

@ -0,0 +1,20 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Utility\MbString;
interface LineRendererInterface
{
/**
* Renderer the in-line changed extent.
*
* @param MbString $mbOld the old megabytes line
* @param MbString $mbNew the new megabytes line
*
* @return static
*/
public function render(MbString $mbOld, MbString $mbNew): self;
}

View File

@ -0,0 +1,18 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Utility\MbString;
final class None extends AbstractLineRenderer
{
/**
* @return static
*/
public function render(MbString $mbOld, MbString $mbNew): LineRendererInterface
{
return $this;
}
}

View File

@ -0,0 +1,106 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html\LineRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Diff\SequenceMatcher;
use Jfcherng\Diff\Utility\ReverseIterator;
use Jfcherng\Diff\Utility\Str;
use Jfcherng\Utility\MbString;
final class Word extends AbstractLineRenderer
{
/**
* @return static
*/
public function render(MbString $mbOld, MbString $mbNew): LineRendererInterface
{
static $splitRegex = '/([' . RendererConstant::PUNCTUATIONS_RANGE . '])/uS';
static $dummyHtmlClosure = RendererConstant::HTML_CLOSURES[0] . RendererConstant::HTML_CLOSURES[1];
$pregFlag = \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY;
$oldWords = $mbOld->toArraySplit($splitRegex, -1, $pregFlag);
$newWords = $mbNew->toArraySplit($splitRegex, -1, $pregFlag);
$hunk = $this->getChangedExtentSegments($oldWords, $newWords);
// reversely iterate hunk
foreach (ReverseIterator::fromArray($hunk) as [$op, $i1, $i2, $j1, $j2]) {
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_DEL)) {
$oldWords[$i1] = RendererConstant::HTML_CLOSURES[0] . $oldWords[$i1];
$oldWords[$i2 - 1] .= RendererConstant::HTML_CLOSURES[1];
// insert dummy HTML closure to ensure there are always
// the same amounts of HTML closures in $oldWords and $newWords
// thus, this should make that "wordGlues" work correctly
// @see https://github.com/jfcherng/php-diff/pull/25
if ($op === SequenceMatcher::OP_DEL) {
array_splice($newWords, $j1, 0, [$dummyHtmlClosure]);
}
}
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_INS)) {
$newWords[$j1] = RendererConstant::HTML_CLOSURES[0] . $newWords[$j1];
$newWords[$j2 - 1] .= RendererConstant::HTML_CLOSURES[1];
if ($op === SequenceMatcher::OP_INS) {
array_splice($oldWords, $i1, 0, [$dummyHtmlClosure]);
}
}
}
if (!empty($hunk) && !empty($this->rendererOptions['wordGlues'])) {
$regexGlues = array_map(
static fn (string $glue): string => preg_quote($glue, '/'),
$this->rendererOptions['wordGlues'],
);
$gluePattern = '/^(?:' . implode('|', $regexGlues) . ')+$/uS';
$this->glueWordsResult($oldWords, $gluePattern);
$this->glueWordsResult($newWords, $gluePattern);
}
$mbOld->set(implode('', $oldWords));
$mbNew->set(implode('', $newWords));
return $this;
}
/**
* Beautify diff result by glueing words.
*
* What this function does is basically making
* ["<diff_begin>good<diff_end>", "-", "<diff_begin>looking<diff_end>"]
* into
* ["<diff_begin>good", "-", "looking<diff_end>"].
*
* @param array $words the words
* @param string $gluePattern the regex to determine a string is purely glue or not
*/
protected function glueWordsResult(array &$words, string $gluePattern): void
{
/** @var int index of the word which has the trailing closure */
$endClosureIdx = -1;
foreach ($words as $idx => &$word) {
if ($word === '') {
continue;
}
if ($endClosureIdx < 0) {
if (Str::endsWith($word, RendererConstant::HTML_CLOSURES[1])) {
$endClosureIdx = $idx;
}
} elseif (Str::startsWith($word, RendererConstant::HTML_CLOSURES[0])) {
$words[$endClosureIdx] = substr($words[$endClosureIdx], 0, -\strlen(RendererConstant::HTML_CLOSURES[1]));
$word = substr($word, \strlen(RendererConstant::HTML_CLOSURES[0]));
$endClosureIdx = $idx;
} elseif (!preg_match($gluePattern, $word)) {
$endClosureIdx = -1;
}
}
}
}

View File

@ -0,0 +1,278 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Html;
use Jfcherng\Diff\SequenceMatcher;
/**
* Side by Side HTML diff generator.
*/
final class SideBySide extends AbstractHtml
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Side by side',
'type' => 'Html',
];
protected function redererChanges(array $changes): string
{
if (empty($changes)) {
return $this->getResultForIdenticals();
}
$wrapperClasses = [
...$this->options['wrapperClasses'],
'diff', 'diff-html', 'diff-side-by-side',
];
return
'<table class="' . implode(' ', $wrapperClasses) . '">' .
$this->renderTableHeader() .
$this->renderTableHunks($changes) .
'</table>';
}
/**
* Renderer the table header.
*/
protected function renderTableHeader(): string
{
if (!$this->options['showHeader']) {
return '';
}
$colspan = $this->options['lineNumbers'] ? ' colspan="2"' : '';
return
'<thead>' .
'<tr>' .
'<th' . $colspan . '>' . $this->_('old_version') . '</th>' .
'<th' . $colspan . '>' . $this->_('new_version') . '</th>' .
'</tr>' .
'</thead>';
}
/**
* Renderer the table separate block.
*/
protected function renderTableSeparateBlock(): string
{
$colspan = $this->options['lineNumbers'] ? '4' : '2';
return
'<tbody class="skipped">' .
'<tr>' .
'<td colspan="' . $colspan . '"></td>' .
'</tr>' .
'</tbody>';
}
/**
* Renderer table hunks.
*
* @param array[][] $hunks each hunk has many blocks
*/
protected function renderTableHunks(array $hunks): string
{
$ret = '';
foreach ($hunks as $i => $hunk) {
if ($i > 0 && $this->options['separateBlock']) {
$ret .= $this->renderTableSeparateBlock();
}
foreach ($hunk as $block) {
$ret .= $this->renderTableBlock($block);
}
}
return $ret;
}
/**
* Renderer the table block.
*
* @param array $block the block
*/
protected function renderTableBlock(array $block): string
{
switch ($block['tag']) {
case SequenceMatcher::OP_EQ:
$content = $this->renderTableBlockEqual($block);
break;
case SequenceMatcher::OP_INS:
$content = $this->renderTableBlockInsert($block);
break;
case SequenceMatcher::OP_DEL:
$content = $this->renderTableBlockDelete($block);
break;
case SequenceMatcher::OP_REP:
$content = $this->renderTableBlockReplace($block);
break;
default:
$content = '';
}
return '<tbody class="change change-' . self::TAG_CLASS_MAP[$block['tag']] . '">' . $content . '</tbody>';
}
/**
* Renderer the table block: equal.
*
* @param array $block the block
*/
protected function renderTableBlockEqual(array $block): string
{
$ret = '';
$rowCount = \count($block['new']['lines']);
for ($no = 0; $no < $rowCount; ++$no) {
$ret .= $this->renderTableRow(
$block['old']['lines'][$no],
$block['new']['lines'][$no],
$block['old']['offset'] + $no + 1,
$block['new']['offset'] + $no + 1,
);
}
return $ret;
}
/**
* Renderer the table block: insert.
*
* @param array $block the block
*/
protected function renderTableBlockInsert(array $block): string
{
$ret = '';
foreach ($block['new']['lines'] as $no => $newLine) {
$ret .= $this->renderTableRow(
null,
$newLine,
null,
$block['new']['offset'] + $no + 1,
);
}
return $ret;
}
/**
* Renderer the table block: delete.
*
* @param array $block the block
*/
protected function renderTableBlockDelete(array $block): string
{
$ret = '';
foreach ($block['old']['lines'] as $no => $oldLine) {
$ret .= $this->renderTableRow(
$oldLine,
null,
$block['old']['offset'] + $no + 1,
null,
);
}
return $ret;
}
/**
* Renderer the table block: replace.
*
* @param array $block the block
*/
protected function renderTableBlockReplace(array $block): string
{
$ret = '';
$lineCountMax = max(\count($block['old']['lines']), \count($block['new']['lines']));
for ($no = 0; $no < $lineCountMax; ++$no) {
if (isset($block['old']['lines'][$no])) {
$oldLineNum = $block['old']['offset'] + $no + 1;
$oldLine = $block['old']['lines'][$no];
} else {
$oldLineNum = $oldLine = null;
}
if (isset($block['new']['lines'][$no])) {
$newLineNum = $block['new']['offset'] + $no + 1;
$newLine = $block['new']['lines'][$no];
} else {
$newLineNum = $newLine = null;
}
$ret .= $this->renderTableRow($oldLine, $newLine, $oldLineNum, $newLineNum);
}
return $ret;
}
/**
* Renderer a content row of the output table.
*
* @param null|string $oldLine the old line
* @param null|string $newLine the new line
* @param null|int $oldLineNum the old line number
* @param null|int $newLineNum the new line number
*/
protected function renderTableRow(
?string $oldLine,
?string $newLine,
?int $oldLineNum,
?int $newLineNum
): string {
return
'<tr>' .
(
$this->options['lineNumbers']
? $this->renderLineNumberColumn('old', $oldLineNum)
: ''
) .
$this->renderLineContentColumn('old', $oldLine) .
(
$this->options['lineNumbers']
? $this->renderLineNumberColumn('new', $newLineNum)
: ''
) .
$this->renderLineContentColumn('new', $newLine) .
'</tr>';
}
/**
* Renderer the line number column.
*
* @param string $type the diff type
* @param null|int $lineNum the line number
*/
protected function renderLineNumberColumn(string $type, ?int $lineNum): string
{
return isset($lineNum)
? '<th class="n-' . $type . '">' . $lineNum . '</th>'
: '<th></th>';
}
/**
* Renderer the line content column.
*
* @param string $type the diff type
* @param null|string $content the line content
*/
protected function renderLineContentColumn(string $type, ?string $content): string
{
return
'<td class="' . $type . (isset($content) ? '' : ' none') . '">' .
$content .
'</td>';
}
}

View File

@ -0,0 +1,116 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer;
final class RendererConstant
{
/**
* The base namespace of renderers.
*
* @var string
*/
public const RENDERER_NAMESPACE = __NAMESPACE__;
/**
* Available renderer types.
*
* @var string[]
*/
public const RENDERER_TYPES = ['Html', 'Text'];
/**
* Closures that are used to enclose different parts in string.
*
* Arbitrary chars from the 15-16th Unicode reserved areas
* and hopefully, they won't appear in source texts.
*
* @var string[]
*/
public const HTML_CLOSURES = ["\u{fcffc}\u{ff2fb}", "\u{fff41}\u{fcffc}"];
/**
* Closures that are used to enclose deleted chars in output HTML.
*
* @var string[]
*/
public const HTML_CLOSURES_DEL = ['<del>', '</del>'];
/**
* Closures that are used to enclose inserted chars in output HTML.
*
* @var string[]
*/
public const HTML_CLOSURES_INS = ['<ins>', '</ins>'];
/**
* The delimiter to be used as the glue in string/array functions.
*
* Arbitrary chars from the 15-16th Unicode reserved areas
* and hopefully, it won't appear in source texts.
*
* @var string
*/
public const IMPLODE_DELIMITER = "\u{ff2fa}\u{fcffc}\u{fff42}";
/**
* Regex range for punctuations.
*
* Presuming the regex delimiter is "/".
*
* @var string
*/
public const PUNCTUATIONS_RANGE = (
// Latin-1 Supplement
// @see https://unicode-table.com/en/blocks/latin-1-supplement/
"\u{0080}-\u{00BB}" .
// Spacing Modifier Letters
// @see https://unicode-table.com/en/blocks/spacing-modifier-letters/
"\u{02B0}-\u{02FF}" .
// Combining Diacritical Marks
// @see https://unicode-table.com/en/blocks/combining-diacritical-marks/
"\u{0300}-\u{036F}" .
// Small Form Variants
// @see https://unicode-table.com/en/blocks/small-form-variants/
"\u{FE50}-\u{FE6F}" .
// General Punctuation
// @see https://unicode-table.com/en/blocks/general-punctuation/
"\u{2000}-\u{206F}" .
// Supplemental Punctuation
// @see https://unicode-table.com/en/blocks/supplemental-punctuation/
"\u{2E00}-\u{2E7F}" .
// CJK Symbols and Punctuation
// @see https://unicode-table.com/en/blocks/cjk-symbols-and-punctuation/
"\u{3000}-\u{303F}" .
// Ideographic Symbols and Punctuation
// @see https://unicode-table.com/en/blocks/ideographic-symbols-and-punctuation/
"\u{16FE0}-\u{16FFF}" .
// hmm... these seem to be no rule
" \t\r\n$,.:;!?'\"()\\[\\]{}%@<=>_+\\-*\\/~\\\\|" .
' _' .
'「」『』〈〉《》【】()()‘’“”' .
'.‧・・•·¿'
);
/**
* Colorize the CLI output if possible.
*
* @var int
*/
public const CLI_COLOR_AUTO = -1;
/**
* Force not to colorize the CLI output.
*
* @var int
*/
public const CLI_COLOR_DISABLE = 0;
/**
* Force to colorize the CLI output if possible.
*
* @var int
*/
public const CLI_COLOR_ENABLE = 1;
}

View File

@ -0,0 +1,35 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\Exception\UnsupportedFunctionException;
/**
* Renderer Interface.
*/
interface RendererInterface
{
/**
* Get the renderer result when the old and the new are the same.
*/
public function getResultForIdenticals(): string;
/**
* Render the differ and return the result.
*
* @param Differ $differ the Differ object to be rendered
*/
public function render(Differ $differ): string;
/**
* Render the differ array and return the result.
*
* @param array[][] $differArray the Differ array to be rendered
*
* @throws UnsupportedFunctionException if the renderer does not support this method
*/
public function renderArray(array $differArray): string;
}

View File

@ -0,0 +1,137 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Text;
use Jfcherng\Diff\Exception\UnsupportedFunctionException;
use Jfcherng\Diff\Renderer\AbstractRenderer;
use Jfcherng\Diff\Renderer\RendererConstant;
use Jfcherng\Utility\CliColor;
/**
* Base renderer for rendering text-based diffs.
*/
abstract class AbstractText extends AbstractRenderer
{
/**
* @var bool is this renderer pure text?
*/
public const IS_TEXT_RENDERER = true;
/**
* @var string the diff output representing there is no EOL at EOF in the GNU diff tool
*/
public const GNU_OUTPUT_NO_EOL_AT_EOF = '\ No newline at end of file';
/**
* @var bool controls whether cliColoredString() is enabled or not
*/
protected $isCliColorEnabled = false;
public function setOptions(array $options): AbstractRenderer
{
parent::setOptions($options);
// determine $this->isCliColorEnabled
if ($this->options['cliColorization'] === RendererConstant::CLI_COLOR_ENABLE) {
$this->isCliColorEnabled = true;
} elseif ($this->options['cliColorization'] === RendererConstant::CLI_COLOR_DISABLE) {
$this->isCliColorEnabled = false;
} else {
$this->isCliColorEnabled = \PHP_SAPI === 'cli' && $this->hasColorSupport(\STDOUT);
}
return $this;
}
public function getResultForIdenticalsDefault(): string
{
return '';
}
protected function renderArrayWorker(array $differArray): string
{
throw new UnsupportedFunctionException(__METHOD__);
return ''; // make IDE not complain
}
/**
* Colorize the string for CLI output.
*
* @param string $str the string
* @param null|string $symbol the symbol
*
* @return string the (maybe) colorized string
*/
protected function cliColoredString(string $str, ?string $symbol): string
{
static $symbolToStyles = [
'@' => ['f_purple', 'bold'], // header
'-' => ['f_red', 'bold'], // deleted
'+' => ['f_green', 'bold'], // inserted
'!' => ['f_yellow', 'bold'], // replaced
];
$styles = $symbolToStyles[$symbol] ?? [];
if (!$this->isCliColorEnabled || empty($styles)) {
return $str;
}
return CliColor::color($str, $styles);
}
/**
* Returns true if the stream supports colorization.
*
* Colorization is disabled if not supported by the stream:
*
* This is tricky on Windows, because Cygwin, Msys2 etc emulate pseudo
* terminals via named pipes, so we can only check the environment.
*
* Reference: Composer\XdebugHandler\Process::supportsColor
* https://github.com/composer/xdebug-handler
*
* @see https://github.com/symfony/console/blob/647c51ff073300a432a4a504e29323cf0d5e0571/Output/StreamOutput.php#L81-L124
*
* @param resource $stream
*
* @return bool true if the stream supports colorization, false otherwise
*
* @suppress PhanUndeclaredFunction
*/
protected function hasColorSupport($stream): bool
{
// Follow https://no-color.org/
if (isset($_SERVER['NO_COLOR']) || false !== getenv('NO_COLOR')) {
return false;
}
if ('Hyper' === getenv('TERM_PROGRAM')) {
return true;
}
if (\DIRECTORY_SEPARATOR === '\\') {
return (\function_exists('sapi_windows_vt100_support')
&& @sapi_windows_vt100_support($stream))
|| false !== getenv('ANSICON')
|| 'ON' === getenv('ConEmuANSI')
|| 'xterm' === getenv('TERM');
}
if (\function_exists('stream_isatty')) {
return @stream_isatty($stream);
}
if (\function_exists('posix_isatty')) {
return @posix_isatty($stream);
}
$stat = @fstat($stream);
// Check if formatted mode is S_IFCHR
return $stat ? 0020000 === ($stat['mode'] & 0170000) : false;
}
}

View File

@ -0,0 +1,160 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Text;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\SequenceMatcher;
/**
* Context diff generator.
*
* @see https://en.wikipedia.org/wiki/Diff#Context_format
*/
final class Context extends AbstractText
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Context',
'type' => 'Text',
];
/**
* @var int the union of OPs that indicate there is a change
*/
public const OP_BLOCK_CHANGED =
SequenceMatcher::OP_DEL |
SequenceMatcher::OP_INS |
SequenceMatcher::OP_REP;
protected function renderWorker(Differ $differ): string
{
$ret = '';
foreach ($differ->getGroupedOpcodesGnu() as $hunk) {
$lastBlockIdx = \count($hunk) - 1;
// note that these line number variables are 0-based
$i1 = $hunk[0][1];
$i2 = $hunk[$lastBlockIdx][2];
$j1 = $hunk[0][3];
$j2 = $hunk[$lastBlockIdx][4];
$ret .=
$this->cliColoredString("***************\n", '@') .
$this->renderHunkHeader('*', $i1, $i2) .
$this->renderHunkOld($differ, $hunk) .
$this->renderHunkHeader('-', $j1, $j2) .
$this->renderHunkNew($differ, $hunk);
}
return $ret;
}
/**
* Render the hunk header.
*
* @param string $symbol the symbol
* @param int $a1 the begin index
* @param int $a2 the end index
*/
protected function renderHunkHeader(string $symbol, int $a1, int $a2): string
{
$a1x = $a1 + 1; // 1-based begin line number
return $this->cliColoredString(
"{$symbol}{$symbol}{$symbol} " .
($a1x < $a2 ? "{$a1x},{$a2}" : $a2) .
" {$symbol}{$symbol}{$symbol}{$symbol}\n",
'@', // symbol
);
}
/**
* Render the old hunk.
*
* @param Differ $differ the differ object
* @param int[][] $hunk the hunk
*/
protected function renderHunkOld(Differ $differ, array $hunk): string
{
$ret = '';
$hunkOps = 0;
$noEolAtEofIdx = $differ->getOldNoEolAtEofIdx();
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
// OP_INS does not belongs to an old hunk
if ($op === SequenceMatcher::OP_INS) {
continue;
}
$hunkOps |= $op;
$ret .= $this->renderContext(
self::SYMBOL_MAP[$op],
$differ->getOld($i1, $i2),
$i2 === $noEolAtEofIdx,
);
}
// if there is no content changed, the hunk context should be omitted
return $hunkOps & self::OP_BLOCK_CHANGED ? $ret : '';
}
/**
* Render the new hunk.
*
* @param Differ $differ the differ object
* @param int[][] $hunk the hunk
*/
protected function renderHunkNew(Differ $differ, array $hunk): string
{
$ret = '';
$hunkOps = 0;
$noEolAtEofIdx = $differ->getNewNoEolAtEofIdx();
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
// OP_DEL does not belongs to a new hunk
if ($op === SequenceMatcher::OP_DEL) {
continue;
}
$hunkOps |= $op;
$ret .= $this->renderContext(
self::SYMBOL_MAP[$op],
$differ->getNew($j1, $j2),
$j2 === $noEolAtEofIdx,
);
}
// if there is no content changed, the hunk context should be omitted
return $hunkOps & self::OP_BLOCK_CHANGED ? $ret : '';
}
/**
* Render the context array with the symbol.
*
* @param string $symbol the symbol
* @param string[] $context the context
* @param bool $noEolAtEof there is no EOL at EOF in this block
*/
protected function renderContext(string $symbol, array $context, bool $noEolAtEof = false): string
{
if (empty($context)) {
return '';
}
$ret = "{$symbol} " . implode("\n{$symbol} ", $context) . "\n";
$ret = $this->cliColoredString($ret, $symbol);
if ($noEolAtEof) {
$ret .= self::GNU_OUTPUT_NO_EOL_AT_EOF . "\n";
}
return $ret;
}
}

View File

@ -0,0 +1,78 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Text;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\SequenceMatcher;
/**
* Plain text Json diff generator.
*/
final class JsonText extends AbstractText
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Text JSON',
'type' => 'Text',
];
protected function renderWorker(Differ $differ): string
{
$ret = [];
foreach ($differ->getGroupedOpcodes() as $hunk) {
$ret[] = $this->renderHunk($differ, $hunk);
}
if ($this->options['outputTagAsString']) {
$this->convertTagToString($ret);
}
return json_encode($ret, $this->options['jsonEncodeFlags']);
}
/**
* Render the hunk.
*
* @param Differ $differ the differ object
* @param int[][] $hunk the hunk
*/
protected function renderHunk(Differ $differ, array $hunk): array
{
$ret = [];
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
$ret[] = [
'tag' => $op,
'old' => [
'offset' => $i1,
'lines' => $differ->getOld($i1, $i2),
],
'new' => [
'offset' => $j1,
'lines' => $differ->getNew($j1, $j2),
],
];
}
return $ret;
}
/**
* Convert tags of changes to their string form for better readability.
*
* @param array[][] $changes the changes
*/
protected function convertTagToString(array &$changes): void
{
foreach ($changes as &$hunks) {
foreach ($hunks as &$block) {
$block['tag'] = SequenceMatcher::opIntToStr($block['tag']);
}
}
}
}

View File

@ -0,0 +1,144 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Diff\Renderer\Text;
use Jfcherng\Diff\Differ;
use Jfcherng\Diff\SequenceMatcher;
/**
* Unified diff generator.
*
* @see https://en.wikipedia.org/wiki/Diff#Unified_format
*/
final class Unified extends AbstractText
{
/**
* {@inheritdoc}
*/
public const INFO = [
'desc' => 'Unified',
'type' => 'Text',
];
protected function renderWorker(Differ $differ): string
{
$ret = '';
foreach ($differ->getGroupedOpcodesGnu() as $hunk) {
$ret .= $this->renderHunkHeader($differ, $hunk);
$ret .= $this->renderHunkBlocks($differ, $hunk);
}
return $ret;
}
/**
* Render the hunk header.
*
* @param Differ $differ the differ
* @param int[][] $hunk the hunk
*/
protected function renderHunkHeader(Differ $differ, array $hunk): string
{
$lastBlockIdx = \count($hunk) - 1;
// note that these line number variables are 0-based
$i1 = $hunk[0][1];
$i2 = $hunk[$lastBlockIdx][2];
$j1 = $hunk[0][3];
$j2 = $hunk[$lastBlockIdx][4];
$oldLinesCount = $i2 - $i1;
$newLinesCount = $j2 - $j1;
return $this->cliColoredString(
'@@' .
' -' .
// the line number in GNU diff is 1-based, so we add 1
// a special case is when a hunk has only changed blocks,
// i.e., context is set to 0, we do not need the adding
($i1 === $i2 ? $i1 : $i1 + 1) .
// if the line counts is 1, it can (and mostly) be omitted
($oldLinesCount === 1 ? '' : ",{$oldLinesCount}") .
' +' .
($j1 === $j2 ? $j1 : $j1 + 1) .
($newLinesCount === 1 ? '' : ",{$newLinesCount}") .
" @@\n",
'@', // symbol
);
}
/**
* Render the hunk content.
*
* @param Differ $differ the differ
* @param int[][] $hunk the hunk
*/
protected function renderHunkBlocks(Differ $differ, array $hunk): string
{
$ret = '';
$oldNoEolAtEofIdx = $differ->getOldNoEolAtEofIdx();
$newNoEolAtEofIdx = $differ->getNewNoEolAtEofIdx();
foreach ($hunk as [$op, $i1, $i2, $j1, $j2]) {
// note that although we are in a OP_EQ situation,
// the old and the new may not be exactly the same
// because of ignoreCase, ignoreWhitespace, etc
if ($op === SequenceMatcher::OP_EQ) {
// we could only pick either the old or the new to show
// note that the GNU diff will use the old one because it creates a patch
$ret .= $this->renderContext(
' ',
$differ->getOld($i1, $i2),
$i2 === $oldNoEolAtEofIdx,
);
continue;
}
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_DEL)) {
$ret .= $this->renderContext(
'-',
$differ->getOld($i1, $i2),
$i2 === $oldNoEolAtEofIdx,
);
}
if ($op & (SequenceMatcher::OP_REP | SequenceMatcher::OP_INS)) {
$ret .= $this->renderContext(
'+',
$differ->getNew($j1, $j2),
$j2 === $newNoEolAtEofIdx,
);
}
}
return $ret;
}
/**
* Render the context array with the symbol.
*
* @param string $symbol the symbol
* @param string[] $context the context
* @param bool $noEolAtEof there is no EOL at EOF in this block
*/
protected function renderContext(string $symbol, array $context, bool $noEolAtEof = false): string
{
if (empty($context)) {
return '';
}
$ret = $symbol . implode("\n{$symbol}", $context) . "\n";
$ret = $this->cliColoredString($ret, $symbol);
if ($noEolAtEof) {
$ret .= self::GNU_OUTPUT_NO_EOL_AT_EOF . "\n";
}
return $ret;
}
}