primo commit
This commit is contained in:
		
							
								
								
									
										437
									
								
								libraries/vendor/algo26-matthias/idna-convert/src/TranscodeUnicode/TranscodeUnicode.php
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										437
									
								
								libraries/vendor/algo26-matthias/idna-convert/src/TranscodeUnicode/TranscodeUnicode.php
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,437 @@ | ||||
| <?php | ||||
| /** | ||||
|  * Converts between various flavours of Unicode representations like UCS-4 or UTF-8 | ||||
|  * Supported schemes: | ||||
|  * - UCS-4 Little Endian / Big Endian / Array (partially) | ||||
|  * - UTF-16 Little Endian / Big Endian (not yet) | ||||
|  * - UTF-8 | ||||
|  * - UTF-7 | ||||
|  * - UTF-7 IMAP (modified UTF-7) | ||||
|  * | ||||
|  * @package IdnaConvert | ||||
|  * @author Matthias Sommerfeld  <matthias.sommerfeld@algo26.de> | ||||
|  * @copyright 2003-2019 algo26 Beratungs GmbH, Berlin, https://www.algo26.de | ||||
|  */ | ||||
|  | ||||
| namespace Algo26\IdnaConvert\TranscodeUnicode; | ||||
|  | ||||
| use Algo26\IdnaConvert\Exception\InvalidCharacterException; | ||||
| use InvalidArgumentException; | ||||
|  | ||||
| class TranscodeUnicode implements TranscodeUnicodeInterface | ||||
| { | ||||
|     public const FORMAT_UCS4       = 'ucs4'; | ||||
|     public const FORMAT_UCS4_ARRAY = 'ucs4array'; | ||||
|     public const FORMAT_UTF8       = 'utf8'; | ||||
|     public const FORMAT_UTF7       = 'utf7'; | ||||
|     public const FORMAT_UTF7_IMAP  = 'utf7imap'; | ||||
|  | ||||
|     private const encodings = [ | ||||
|         self::FORMAT_UCS4, | ||||
|         self::FORMAT_UCS4_ARRAY, | ||||
|         self::FORMAT_UTF8, | ||||
|         self::FORMAT_UTF7, | ||||
|         self::FORMAT_UTF7_IMAP | ||||
|     ]; | ||||
|  | ||||
|     private $safeMode; | ||||
|     private $safeCodepoint = 0xFFFC; | ||||
|  | ||||
|     public function convert( | ||||
|         $data, | ||||
|         string $fromEncoding, | ||||
|         string $toEncoding, | ||||
|         bool $safeMode = false, | ||||
|         ?int $safeCodepoint = null | ||||
|     ) { | ||||
|         $this->safeMode = $safeMode; | ||||
|         if ($safeCodepoint !== null) { | ||||
|             $this->safeCodepoint = $safeCodepoint; | ||||
|         } | ||||
|  | ||||
|         $fromEncoding = strtolower($fromEncoding); | ||||
|         $toEncoding   = strtolower($toEncoding); | ||||
|  | ||||
|         if ($fromEncoding === $toEncoding) { | ||||
|             return $data; | ||||
|         } | ||||
|  | ||||
|         if (!in_array($fromEncoding, self::encodings)) { | ||||
|             throw new InvalidArgumentException(sprintf('Invalid input format %s', $fromEncoding), 300); | ||||
|         } | ||||
|         if (!in_array($toEncoding, self::encodings)) { | ||||
|             throw new InvalidArgumentException(sprintf('Invalid output format %s', $toEncoding), 301); | ||||
|         } | ||||
|  | ||||
|         if ($fromEncoding !== self::FORMAT_UCS4_ARRAY) { | ||||
|             $methodName = sprintf('%s_%s', $fromEncoding, self::FORMAT_UCS4_ARRAY); | ||||
|             $data = $this->$methodName($data); | ||||
|         } | ||||
|         if ($toEncoding !== self::FORMAT_UCS4_ARRAY) { | ||||
|             $methodName = sprintf('%s_%s', self::FORMAT_UCS4_ARRAY, $toEncoding); | ||||
|             $data = $this->$methodName($data); | ||||
|         } | ||||
|  | ||||
|         return $data; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * This converts an UTF-8 encoded string to its UCS-4 representation | ||||
|      * | ||||
|      * @param string $input The UTF-8 string to convert | ||||
|      * | ||||
|      * @return array  Array of 32bit values representing each codepoint | ||||
|      * @throws InvalidCharacterException | ||||
|      * @access public | ||||
|      */ | ||||
|     private function utf8_ucs4array($input) | ||||
|     { | ||||
|         $startByte = 0; | ||||
|         $nextByte = 0; | ||||
|  | ||||
|         $output = []; | ||||
|         $outputLength = 0; | ||||
|         $inputLength = $this->byteLength($input); | ||||
|         $mode = 'next'; | ||||
|         $test = 'none'; | ||||
|         for ($k = 0; $k < $inputLength; ++$k) { | ||||
|             $v = ord($input[$k]); // Extract byte from input string | ||||
|  | ||||
|             if ($v < 128) { // We found an ASCII char - put into string as is | ||||
|                 $output[$outputLength] = $v; | ||||
|                 ++$outputLength; | ||||
|                 if ('add' === $mode) { | ||||
|                     if ($this->safeMode) { | ||||
|                         $output[$outputLength - 2] = $this->safeCodepoint; | ||||
|                         $mode = 'next'; | ||||
|                     } else { | ||||
|                         throw new InvalidCharacterException( | ||||
|                             sprintf( | ||||
|                                 'Conversion from UTF-8 to UCS-4 failed: malformed input at byte %d', | ||||
|                                 $k | ||||
|                             ), | ||||
|                             302 | ||||
|                         ); | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 continue; | ||||
|             } | ||||
|  | ||||
|             if ('next' === $mode) { // Try to find the next start byte; determine the width of the Unicode char | ||||
|                 $startByte = $v; | ||||
|                 $mode = 'add'; | ||||
|                 $test = 'range'; | ||||
|                 if ($v >> 5 === 6) { // &110xxxxx 10xxxxx | ||||
|                     $nextByte = 0; // How many times subsequent bit masks must rotate 6bits to the left | ||||
|                     $v = ($v - 192) << 6; | ||||
|                 } elseif ($v >> 4 === 14) { // &1110xxxx 10xxxxxx 10xxxxxx | ||||
|                     $nextByte = 1; | ||||
|                     $v = ($v - 224) << 12; | ||||
|                 } elseif ($v >> 3 === 30) { // &11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | ||||
|                     $nextByte = 2; | ||||
|                     $v = ($v - 240) << 18; | ||||
|                 } elseif ($this->safeMode) { | ||||
|                     $mode = 'next'; | ||||
|                     $output[$outputLength] = $this->safeCodepoint; | ||||
|                     ++$outputLength; | ||||
|  | ||||
|                     continue; | ||||
|                 } else { | ||||
|                     throw new InvalidCharacterException( | ||||
|                         sprintf('This might be UTF-8, but I don\'t understand it at byte %d', $k), | ||||
|                         303 | ||||
|                     ); | ||||
|                 } | ||||
|                 if (($inputLength - $k - $nextByte) < 2) { | ||||
|                     $output[$outputLength] = $this->safeCodepoint; | ||||
|                     $mode = 'no'; | ||||
|  | ||||
|                     continue; | ||||
|                 } | ||||
|  | ||||
|                 if ('add' === $mode) { | ||||
|                     $output[$outputLength] = (int)$v; | ||||
|                     ++$outputLength; | ||||
|  | ||||
|                     continue; | ||||
|                 } | ||||
|             } | ||||
|             if ('add' == $mode) { | ||||
|                 if (!$this->safeMode && $test === 'range') { | ||||
|                     $test = 'none'; | ||||
|                     if (($v < 0xA0 && $startByte === 0xE0) | ||||
|                         || ($v < 0x90 && $startByte === 0xF0) | ||||
|                         || ($v > 0x8F && $startByte === 0xF4) | ||||
|                     ) { | ||||
|                         throw new InvalidCharacterException( | ||||
|                             sprintf('Bogus UTF-8 character (out of legal range) at byte %d', $k), | ||||
|                             304 | ||||
|                         ); | ||||
|                     } | ||||
|                 } | ||||
|                 if ($v >> 6 === 2) { // Bit mask must be 10xxxxxx | ||||
|                     $v = ($v - 128) << ($nextByte * 6); | ||||
|                     $output[($outputLength - 1)] += $v; | ||||
|                     --$nextByte; | ||||
|                 } else { | ||||
|                     if ($this->safeMode) { | ||||
|                         $output[$outputLength - 1] = ord($this->safeCodepoint); | ||||
|                         $k--; | ||||
|                         $mode = 'next'; | ||||
|  | ||||
|                         continue; | ||||
|                     } else { | ||||
|                         throw new InvalidCharacterException( | ||||
|                             sprintf('Conversion from UTF-8 to UCS-4 failed: malformed input at byte %d', $k), | ||||
|                             302 | ||||
|                         ); | ||||
|                     } | ||||
|                 } | ||||
|                 if ($nextByte < 0) { | ||||
|                     $mode = 'next'; | ||||
|                 } | ||||
|             } | ||||
|         } // for | ||||
|  | ||||
|         return $output; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Convert UCS-4 arary into UTF-8 string | ||||
|      * See utf8_ucs4array() for details | ||||
|      * | ||||
|      * @param $input array Array of UCS-4 codepoints | ||||
|      * | ||||
|      * @return string | ||||
|      * @access   public | ||||
|      * @throws InvalidCharacterException | ||||
|      */ | ||||
|     private function ucs4array_utf8($input) | ||||
|     { | ||||
|         $output = ''; | ||||
|         foreach ($input as $k => $v) { | ||||
|             if ($v < 128) { // 7bit are transferred literally | ||||
|                 $output .= chr($v); | ||||
|             } elseif ($v < (1 << 11)) { // 2 bytes | ||||
|                 $output .= sprintf( | ||||
|                     '%s%s', | ||||
|                     chr(192 + ($v >> 6)), | ||||
|                     chr(128 + ($v & 63)) | ||||
|                 ); | ||||
|             } elseif ($v < (1 << 16)) { // 3 bytes | ||||
|                 $output .= sprintf( | ||||
|                     '%s%s%s', | ||||
|                     chr(224 + ($v >> 12)), | ||||
|                     chr(128 + (($v >> 6) & 63)), | ||||
|                     chr(128 + ($v & 63)) | ||||
|                 ); | ||||
|             } elseif ($v < (1 << 21)) { // 4 bytes | ||||
|                 $output .= sprintf( | ||||
|                     '%s%s%s%s', | ||||
|                     chr(240 + ($v >> 18)), | ||||
|                     chr(128 + (($v >> 12) & 63)), | ||||
|                     chr(128 + (($v >> 6) & 63)), | ||||
|                     chr(128 + ($v & 63)) | ||||
|                 ); | ||||
|             } elseif ($this->safeMode) { | ||||
|                 $output .= $this->safeCodepoint; | ||||
|             } else { | ||||
|                 throw new InvalidCharacterException( | ||||
|                     sprintf('Conversion from UCS-4 to UTF-8 failed: malformed input at byte %d', $k), | ||||
|                     305 | ||||
|                 ); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return $output; | ||||
|     } | ||||
|  | ||||
|     private function utf7imap_ucs4array($input) | ||||
|     { | ||||
|         return $this->utf7_ucs4array(str_replace(',', '/', $input), '&'); | ||||
|     } | ||||
|  | ||||
|     private function utf7_ucs4array($input, $sc = '+') | ||||
|     { | ||||
|         $output = []; | ||||
|         $outputLength = 0; | ||||
|         $inputLength = $this->byteLength($input); | ||||
|         $mode = 'd'; | ||||
|         $b64 = ''; | ||||
|  | ||||
|         for ($k = 0; $k < $inputLength; ++$k) { | ||||
|             $c = $input[$k]; | ||||
|  | ||||
|             // Ignore zero bytes | ||||
|             if (0 === ord($c)) { | ||||
|                 continue; | ||||
|             } | ||||
|             if ('b' === $mode) { | ||||
|                 // Sequence got terminated | ||||
|                 if (!preg_match('![A-Za-z0-9/'.preg_quote($sc, '!').']!', $c)) { | ||||
|                     if ('-' == $c) { | ||||
|                         if ($b64 === '') { | ||||
|                             $output[$outputLength] = ord($sc); | ||||
|                             $outputLength++; | ||||
|                             $mode = 'd'; | ||||
|  | ||||
|                             continue; | ||||
|                         } | ||||
|                     } | ||||
|                     $tmp = base64_decode($b64); | ||||
|                     $tmp = substr($tmp, -1 * (strlen($tmp) % 2)); | ||||
|                     for ($i = 0; $i < strlen($tmp); $i++) { | ||||
|                         if ($i % 2) { | ||||
|                             $output[$outputLength] += ord($tmp[$i]); | ||||
|                             $outputLength++; | ||||
|                         } else { | ||||
|                             $output[$outputLength] = ord($tmp[$i]) << 8; | ||||
|                         } | ||||
|                     } | ||||
|                     $mode = 'd'; | ||||
|                     $b64 = ''; | ||||
|  | ||||
|                     continue; | ||||
|                 } else { | ||||
|                     $b64 .= $c; | ||||
|                 } | ||||
|             } | ||||
|             if ('d' === $mode) { | ||||
|                 if ($sc === $c) { | ||||
|                     $mode = 'b'; | ||||
|  | ||||
|                     continue; | ||||
|                 } | ||||
|  | ||||
|                 $output[$outputLength] = ord($c); | ||||
|                 $outputLength++; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return $output; | ||||
|     } | ||||
|  | ||||
|     private function ucs4array_utf7imap($input) | ||||
|     { | ||||
|         return str_replace( | ||||
|             '/', | ||||
|             ',', | ||||
|             $this->ucs4array_utf7($input, '&') | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     private function ucs4array_utf7($input, $sc = '+') | ||||
|     { | ||||
|         $output = ''; | ||||
|         $mode = 'd'; | ||||
|         $b64 = ''; | ||||
|         while (true) { | ||||
|             $v = (!empty($input)) ? array_shift($input) : false; | ||||
|             $isDirect = (false !== $v) | ||||
|                 ? (0x20 <= $v && $v <= 0x7e && $v !== ord($sc)) | ||||
|                 : true; | ||||
|             if ($mode === 'b') { | ||||
|                 if ($isDirect) { | ||||
|                     if ($b64 === chr(0).$sc) { | ||||
|                         $output .= $sc.'-'; | ||||
|                         $b64 = ''; | ||||
|                     } elseif ($b64) { | ||||
|                         $output .= $sc.str_replace('=', '', base64_encode($b64)).'-'; | ||||
|                         $b64 = ''; | ||||
|                     } | ||||
|                     $mode = 'd'; | ||||
|                 } elseif (false !== $v) { | ||||
|                     $b64 .= chr(($v >> 8) & 255).chr($v & 255); | ||||
|                 } | ||||
|             } | ||||
|             if ($mode === 'd' && false !== $v) { | ||||
|                 if ($isDirect) { | ||||
|                     $output .= chr($v); | ||||
|                 } else { | ||||
|                     $b64 = chr(($v >> 8) & 255).chr($v & 255); | ||||
|                     $mode = 'b'; | ||||
|                 } | ||||
|             } | ||||
|             if (false === $v && $b64 === '') { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return $output; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Convert UCS-4 array into UCS-4 string (Little Endian at the moment) | ||||
|      * @param $input array UCS-4 code points | ||||
|      * @return string | ||||
|      * @access   public | ||||
|      */ | ||||
|     private function ucs4array_ucs4($input) | ||||
|     { | ||||
|         $output = ''; | ||||
|         foreach ($input as $v) { | ||||
|             $output .= sprintf( | ||||
|                 '%s%s%s%s', | ||||
|                 chr(($v >> 24) & 255), | ||||
|                 chr(($v >> 16) & 255), | ||||
|                 chr(($v >> 8) & 255), | ||||
|                 chr($v & 255) | ||||
|             ); | ||||
|         } | ||||
|  | ||||
|         return $output; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Convert UCS-4 string (LE ar the moment) into UCS-4 array | ||||
|      * | ||||
|      * @param $input string UCS-4 LE string | ||||
|      * | ||||
|      * @return array | ||||
|      * @access   public | ||||
|      * @throws InvalidCharacterException | ||||
|      */ | ||||
|     private function ucs4_ucs4array($input) | ||||
|     { | ||||
|         $output = []; | ||||
|  | ||||
|         $inputLength = $this->byteLength($input); | ||||
|         // Input length must be dividable by 4 | ||||
|         if ($inputLength % 4) { | ||||
|             throw new InvalidCharacterException('Input UCS4 string is broken', 306); | ||||
|         } | ||||
|         // Empty input - return empty output | ||||
|         if (!$inputLength) { | ||||
|             return $output; | ||||
|         } | ||||
|  | ||||
|         for ($i = 0, $outputLength = -1; $i < $inputLength; ++$i) { | ||||
|             if (!($i % 4)) { // Increment output position every 4 input bytes | ||||
|                 $outputLength++; | ||||
|                 $output[$outputLength] = 0; | ||||
|             } | ||||
|             $output[$outputLength] += ord($input[$i]) << (8 * (3 - ($i % 4))); | ||||
|         } | ||||
|  | ||||
|         return $output; | ||||
|     } | ||||
|      | ||||
|     /** | ||||
|      * Gets the length of a string in bytes even if mbstring function | ||||
|      * overloading is turned on | ||||
|      * | ||||
|      * @param string $string the string for which to get the length. | ||||
|      * @return integer the length of the string in bytes. | ||||
|      */ | ||||
|     protected function byteLength($string) | ||||
|     { | ||||
|         if ((extension_loaded('mbstring') | ||||
|              && (ini_get('mbstring.func_overload') & 0x02) === 0x02) | ||||
|         ) { | ||||
|             return mb_strlen($string, '8bit'); | ||||
|         } | ||||
|  | ||||
|         return strlen((binary) $string); | ||||
|     }     | ||||
| } | ||||
							
								
								
									
										13
									
								
								libraries/vendor/algo26-matthias/idna-convert/src/TranscodeUnicode/TranscodeUnicodeInterface.php
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								libraries/vendor/algo26-matthias/idna-convert/src/TranscodeUnicode/TranscodeUnicodeInterface.php
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | ||||
| <?php | ||||
| namespace Algo26\IdnaConvert\TranscodeUnicode; | ||||
|  | ||||
| interface TranscodeUnicodeInterface | ||||
| { | ||||
|     public function convert( | ||||
|         $data, | ||||
|         string $fromEncoding, | ||||
|         string $toEncoding, | ||||
|         bool $safeMode = false, | ||||
|         int $safeCodepoint = 0xFFFC | ||||
|     ); | ||||
| } | ||||
		Reference in New Issue
	
	Block a user