primo commit

2024-12-17 17:34:10 +01:00
commit e650f8df99
16435 changed files with 2451012 additions and 0 deletions
--- a/libraries/vendor/algo26-matthias/idna-convert/src/TranscodeUnicode/TranscodeUnicode.php
+++ b/libraries/vendor/algo26-matthias/idna-convert/src/TranscodeUnicode/TranscodeUnicode.php
@@ -0,0 +1,437 @@
+<?php
+/**
+ * Converts between various flavours of Unicode representations like UCS-4 or UTF-8
+ * Supported schemes:
+ * - UCS-4 Little Endian / Big Endian / Array (partially)
+ * - UTF-16 Little Endian / Big Endian (not yet)
+ * - UTF-8
+ * - UTF-7
+ * - UTF-7 IMAP (modified UTF-7)
+ *
+ * @package IdnaConvert
+ * @author Matthias Sommerfeld  <matthias.sommerfeld@algo26.de>
+ * @copyright 2003-2019 algo26 Beratungs GmbH, Berlin, https://www.algo26.de
+ */
+
+namespace Algo26\IdnaConvert\TranscodeUnicode;
+
+use Algo26\IdnaConvert\Exception\InvalidCharacterException;
+use InvalidArgumentException;
+
+class TranscodeUnicode implements TranscodeUnicodeInterface
+{
+    public const FORMAT_UCS4       = 'ucs4';
+    public const FORMAT_UCS4_ARRAY = 'ucs4array';
+    public const FORMAT_UTF8       = 'utf8';
+    public const FORMAT_UTF7       = 'utf7';
+    public const FORMAT_UTF7_IMAP  = 'utf7imap';
+
+    private const encodings = [
+        self::FORMAT_UCS4,
+        self::FORMAT_UCS4_ARRAY,
+        self::FORMAT_UTF8,
+        self::FORMAT_UTF7,
+        self::FORMAT_UTF7_IMAP
+    ];
+
+    private $safeMode;
+    private $safeCodepoint = 0xFFFC;
+
+    public function convert(
+        $data,
+        string $fromEncoding,
+        string $toEncoding,
+        bool $safeMode = false,
+        ?int $safeCodepoint = null
+    ) {
+        $this->safeMode = $safeMode;
+        if ($safeCodepoint !== null) {
+            $this->safeCodepoint = $safeCodepoint;
+        }
+
+        $fromEncoding = strtolower($fromEncoding);
+        $toEncoding   = strtolower($toEncoding);
+
+        if ($fromEncoding === $toEncoding) {
+            return $data;
+        }
+
+        if (!in_array($fromEncoding, self::encodings)) {
+            throw new InvalidArgumentException(sprintf('Invalid input format %s', $fromEncoding), 300);
+        }
+        if (!in_array($toEncoding, self::encodings)) {
+            throw new InvalidArgumentException(sprintf('Invalid output format %s', $toEncoding), 301);
+        }
+
+        if ($fromEncoding !== self::FORMAT_UCS4_ARRAY) {
+            $methodName = sprintf('%s_%s', $fromEncoding, self::FORMAT_UCS4_ARRAY);
+            $data = $this->$methodName($data);
+        }
+        if ($toEncoding !== self::FORMAT_UCS4_ARRAY) {
+            $methodName = sprintf('%s_%s', self::FORMAT_UCS4_ARRAY, $toEncoding);
+            $data = $this->$methodName($data);
+        }
+
+        return $data;
+    }
+
+    /**
+     * This converts an UTF-8 encoded string to its UCS-4 representation
+     *
+     * @param string $input The UTF-8 string to convert
+     *
+     * @return array  Array of 32bit values representing each codepoint
+     * @throws InvalidCharacterException
+     * @access public
+     */
+    private function utf8_ucs4array($input)
+    {
+        $startByte = 0;
+        $nextByte = 0;
+
+        $output = [];
+        $outputLength = 0;
+        $inputLength = $this->byteLength($input);
+        $mode = 'next';
+        $test = 'none';
+        for ($k = 0; $k < $inputLength; ++$k) {
+            $v = ord($input[$k]); // Extract byte from input string
+
+            if ($v < 128) { // We found an ASCII char - put into string as is
+                $output[$outputLength] = $v;
+                ++$outputLength;
+                if ('add' === $mode) {
+                    if ($this->safeMode) {
+                        $output[$outputLength - 2] = $this->safeCodepoint;
+                        $mode = 'next';
+                    } else {
+                        throw new InvalidCharacterException(
+                            sprintf(
+                                'Conversion from UTF-8 to UCS-4 failed: malformed input at byte %d',
+                                $k
+                            ),
+                            302
+                        );
+                    }
+                }
+
+                continue;
+            }
+
+            if ('next' === $mode) { // Try to find the next start byte; determine the width of the Unicode char
+                $startByte = $v;
+                $mode = 'add';
+                $test = 'range';
+                if ($v >> 5 === 6) { // &110xxxxx 10xxxxx
+                    $nextByte = 0; // How many times subsequent bit masks must rotate 6bits to the left
+                    $v = ($v - 192) << 6;
+                } elseif ($v >> 4 === 14) { // &1110xxxx 10xxxxxx 10xxxxxx
+                    $nextByte = 1;
+                    $v = ($v - 224) << 12;
+                } elseif ($v >> 3 === 30) { // &11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+                    $nextByte = 2;
+                    $v = ($v - 240) << 18;
+                } elseif ($this->safeMode) {
+                    $mode = 'next';
+                    $output[$outputLength] = $this->safeCodepoint;
+                    ++$outputLength;
+
+                    continue;
+                } else {
+                    throw new InvalidCharacterException(
+                        sprintf('This might be UTF-8, but I don\'t understand it at byte %d', $k),
+                        303
+                    );
+                }
+                if (($inputLength - $k - $nextByte) < 2) {
+                    $output[$outputLength] = $this->safeCodepoint;
+                    $mode = 'no';
+
+                    continue;
+                }
+
+                if ('add' === $mode) {
+                    $output[$outputLength] = (int)$v;
+                    ++$outputLength;
+
+                    continue;
+                }
+            }
+            if ('add' == $mode) {
+                if (!$this->safeMode && $test === 'range') {
+                    $test = 'none';
+                    if (($v < 0xA0 && $startByte === 0xE0)
+                        || ($v < 0x90 && $startByte === 0xF0)
+                        || ($v > 0x8F && $startByte === 0xF4)
+                    ) {
+                        throw new InvalidCharacterException(
+                            sprintf('Bogus UTF-8 character (out of legal range) at byte %d', $k),
+                            304
+                        );
+                    }
+                }
+                if ($v >> 6 === 2) { // Bit mask must be 10xxxxxx
+                    $v = ($v - 128) << ($nextByte * 6);
+                    $output[($outputLength - 1)] += $v;
+                    --$nextByte;
+                } else {
+                    if ($this->safeMode) {
+                        $output[$outputLength - 1] = ord($this->safeCodepoint);
+                        $k--;
+                        $mode = 'next';
+
+                        continue;
+                    } else {
+                        throw new InvalidCharacterException(
+                            sprintf('Conversion from UTF-8 to UCS-4 failed: malformed input at byte %d', $k),
+                            302
+                        );
+                    }
+                }
+                if ($nextByte < 0) {
+                    $mode = 'next';
+                }
+            }
+        } // for
+
+        return $output;
+    }
+
+    /**
+     * Convert UCS-4 arary into UTF-8 string
+     * See utf8_ucs4array() for details
+     *
+     * @param $input array Array of UCS-4 codepoints
+     *
+     * @return string
+     * @access   public
+     * @throws InvalidCharacterException
+     */
+    private function ucs4array_utf8($input)
+    {
+        $output = '';
+        foreach ($input as $k => $v) {
+            if ($v < 128) { // 7bit are transferred literally
+                $output .= chr($v);
+            } elseif ($v < (1 << 11)) { // 2 bytes
+                $output .= sprintf(
+                    '%s%s',
+                    chr(192 + ($v >> 6)),
+                    chr(128 + ($v & 63))
+                );
+            } elseif ($v < (1 << 16)) { // 3 bytes
+                $output .= sprintf(
+                    '%s%s%s',
+                    chr(224 + ($v >> 12)),
+                    chr(128 + (($v >> 6) & 63)),
+                    chr(128 + ($v & 63))
+                );
+            } elseif ($v < (1 << 21)) { // 4 bytes
+                $output .= sprintf(
+                    '%s%s%s%s',
+                    chr(240 + ($v >> 18)),
+                    chr(128 + (($v >> 12) & 63)),
+                    chr(128 + (($v >> 6) & 63)),
+                    chr(128 + ($v & 63))
+                );
+            } elseif ($this->safeMode) {
+                $output .= $this->safeCodepoint;
+            } else {
+                throw new InvalidCharacterException(
+                    sprintf('Conversion from UCS-4 to UTF-8 failed: malformed input at byte %d', $k),
+                    305
+                );
+            }
+        }
+
+        return $output;
+    }
+
+    private function utf7imap_ucs4array($input)
+    {
+        return $this->utf7_ucs4array(str_replace(',', '/', $input), '&');
+    }
+
+    private function utf7_ucs4array($input, $sc = '+')
+    {
+        $output = [];
+        $outputLength = 0;
+        $inputLength = $this->byteLength($input);
+        $mode = 'd';
+        $b64 = '';
+
+        for ($k = 0; $k < $inputLength; ++$k) {
+            $c = $input[$k];
+
+            // Ignore zero bytes
+            if (0 === ord($c)) {
+                continue;
+            }
+            if ('b' === $mode) {
+                // Sequence got terminated
+                if (!preg_match('![A-Za-z0-9/'.preg_quote($sc, '!').']!', $c)) {
+                    if ('-' == $c) {
+                        if ($b64 === '') {
+                            $output[$outputLength] = ord($sc);
+                            $outputLength++;
+                            $mode = 'd';
+
+                            continue;
+                        }
+                    }
+                    $tmp = base64_decode($b64);
+                    $tmp = substr($tmp, -1 * (strlen($tmp) % 2));
+                    for ($i = 0; $i < strlen($tmp); $i++) {
+                        if ($i % 2) {
+                            $output[$outputLength] += ord($tmp[$i]);
+                            $outputLength++;
+                        } else {
+                            $output[$outputLength] = ord($tmp[$i]) << 8;
+                        }
+                    }
+                    $mode = 'd';
+                    $b64 = '';
+
+                    continue;
+                } else {
+                    $b64 .= $c;
+                }
+            }
+            if ('d' === $mode) {
+                if ($sc === $c) {
+                    $mode = 'b';
+
+                    continue;
+                }
+
+                $output[$outputLength] = ord($c);
+                $outputLength++;
+            }
+        }
+
+        return $output;
+    }
+
+    private function ucs4array_utf7imap($input)
+    {
+        return str_replace(
+            '/',
+            ',',
+            $this->ucs4array_utf7($input, '&')
+        );
+    }
+
+    private function ucs4array_utf7($input, $sc = '+')
+    {
+        $output = '';
+        $mode = 'd';
+        $b64 = '';
+        while (true) {
+            $v = (!empty($input)) ? array_shift($input) : false;
+            $isDirect = (false !== $v)
+                ? (0x20 <= $v && $v <= 0x7e && $v !== ord($sc))
+                : true;
+            if ($mode === 'b') {
+                if ($isDirect) {
+                    if ($b64 === chr(0).$sc) {
+                        $output .= $sc.'-';
+                        $b64 = '';
+                    } elseif ($b64) {
+                        $output .= $sc.str_replace('=', '', base64_encode($b64)).'-';
+                        $b64 = '';
+                    }
+                    $mode = 'd';
+                } elseif (false !== $v) {
+                    $b64 .= chr(($v >> 8) & 255).chr($v & 255);
+                }
+            }
+            if ($mode === 'd' && false !== $v) {
+                if ($isDirect) {
+                    $output .= chr($v);
+                } else {
+                    $b64 = chr(($v >> 8) & 255).chr($v & 255);
+                    $mode = 'b';
+                }
+            }
+            if (false === $v && $b64 === '') {
+                break;
+            }
+        }
+
+        return $output;
+    }
+
+    /**
+     * Convert UCS-4 array into UCS-4 string (Little Endian at the moment)
+     * @param $input array UCS-4 code points
+     * @return string
+     * @access   public
+     */
+    private function ucs4array_ucs4($input)
+    {
+        $output = '';
+        foreach ($input as $v) {
+            $output .= sprintf(
+                '%s%s%s%s',
+                chr(($v >> 24) & 255),
+                chr(($v >> 16) & 255),
+                chr(($v >> 8) & 255),
+                chr($v & 255)
+            );
+        }
+
+        return $output;
+    }
+
+    /**
+     * Convert UCS-4 string (LE ar the moment) into UCS-4 array
+     *
+     * @param $input string UCS-4 LE string
+     *
+     * @return array
+     * @access   public
+     * @throws InvalidCharacterException
+     */
+    private function ucs4_ucs4array($input)
+    {
+        $output = [];
+
+        $inputLength = $this->byteLength($input);
+        // Input length must be dividable by 4
+        if ($inputLength % 4) {
+            throw new InvalidCharacterException('Input UCS4 string is broken', 306);
+        }
+        // Empty input - return empty output
+        if (!$inputLength) {
+            return $output;
+        }
+
+        for ($i = 0, $outputLength = -1; $i < $inputLength; ++$i) {
+            if (!($i % 4)) { // Increment output position every 4 input bytes
+                $outputLength++;
+                $output[$outputLength] = 0;
+            }
+            $output[$outputLength] += ord($input[$i]) << (8 * (3 - ($i % 4)));
+        }
+
+        return $output;
+    }
+    
+    /**
+     * Gets the length of a string in bytes even if mbstring function
+     * overloading is turned on
+     *
+     * @param string $string the string for which to get the length.
+     * @return integer the length of the string in bytes.
+     */
+    protected function byteLength($string)
+    {
+        if ((extension_loaded('mbstring')
+             && (ini_get('mbstring.func_overload') & 0x02) === 0x02)
+        ) {
+            return mb_strlen($string, '8bit');
+        }
+
+        return strlen((binary) $string);
+    }    
+}
--- a/libraries/vendor/algo26-matthias/idna-convert/src/TranscodeUnicode/TranscodeUnicodeInterface.php
+++ b/libraries/vendor/algo26-matthias/idna-convert/src/TranscodeUnicode/TranscodeUnicodeInterface.php
@@ -0,0 +1,13 @@
+<?php
+namespace Algo26\IdnaConvert\TranscodeUnicode;
+
+interface TranscodeUnicodeInterface
+{
+    public function convert(
+        $data,
+        string $fromEncoding,
+        string $toEncoding,
+        bool $safeMode = false,
+        int $safeCodepoint = 0xFFFC
+    );
+}