*/ class MbString extends \ArrayObject implements \Stringable { public const MBSTRING_CONVMETHOD_ICONV = 1; public const MBSTRING_CONVMETHOD_MBSTRING = 2; /** * The way to convert text encoding. * * @var int */ public static $convMethod; /** * UTF-32 string without endian bytes. * * @var string */ protected $str; /** * The original encoding. * * @var string */ protected $encoding; /** * The endian bytes for UTF-32. * * @var string */ protected static $utf32Header; /** * The constructor. * * @param string $str the string * @param string $encoding the encoding */ public function __construct(string $str = '', string $encoding = 'UTF-8') { static::$convMethod ??= static::detectConvEncoding(); static::$utf32Header ??= static::getUtf32Header(); $this->encoding = $encoding; $this->set($str); } /** * Returns a string representation of the object. * * @return string string representation of the object */ public function __toString(): string { return $this->get(); } /** * The string setter. * * @param string $str the string */ public function set(string $str): self { $this->str = $this->inputConv($str); return $this; } public function setAt(int $idx, string $char): self { $char = $this->inputConv($char); if (\strlen($char) > 4) { $char = substr($char, 0, 4); } $spacesPrepend = $idx - $this->strlen(); // set index (out of bound) if ($spacesPrepend > 0) { $this->str .= $this->inputConv(str_repeat(' ', $spacesPrepend)) . $char; } // set index (in bound) else { $this->str = substr_replace($this->str, $char, $idx << 2, 4); } return $this; } /** * The string getter. */ public function get(): string { return $this->outputConv($this->str); } /** * The raw string getter. * * @return string the UTF-32-encoded raw string */ public function getRaw(): string { return $this->str; } public function getAt(int $idx): string { return $this->outputConv(substr($this->str, $idx << 2, 4)); } public function getAtRaw(int $idx): string { return substr($this->str, $idx << 2, 4); } public function toArray(): array { return self::strToChars($this->get()); } public function toArraySplit(string $regex, int $limit = -1, $flags = 0): array { if ($this->str === '') { return []; } return preg_split($regex, $this->get(), $limit, $flags); } public function toArrayRaw(): array { if ($this->str === '') { return []; } return str_split($this->str, 4); } public static function strToChars(string $str): array { return preg_split('//uS', $str, -1, \PREG_SPLIT_NO_EMPTY) ?: []; } // ///////////////////////////////// // string manipulation functions // // ///////////////////////////////// public function stripos(string $needle, int $offset = 0) { $needle = $this->inputConv($needle); $pos = stripos($this->str, $needle, $offset << 2); return \is_bool($pos) ? $pos : $pos >> 2; } public function strlen(): int { return \strlen($this->str) >> 2; } public function strpos(string $needle, int $offset = 0) { $needle = $this->inputConv($needle); $pos = strpos($this->str, $needle, $offset << 2); return \is_bool($pos) ? $pos : $pos >> 2; } public function substr(int $start = 0, ?int $length = null): string { return $this->outputConv( isset($length) ? substr($this->str, $start << 2, $length << 2) : substr($this->str, $start << 2), ); } public function substr_replace(string $replacement, int $start = 0, ?int $length = null): string { $replacement = $this->inputConv($replacement); return $this->outputConv( isset($length) ? substr_replace($this->str, $replacement, $start << 2, $length << 2) : substr_replace($this->str, $replacement, $start << 2), ); } public function strtolower(): string { return strtolower($this->get()); } public function strtoupper(): string { return strtoupper($this->get()); } // ////////////////////////////// // non-manipulative functions // // ////////////////////////////// public function has(string $needle): bool { $needle = $this->inputConv($needle); return str_contains($this->str, $needle); } public function startsWith(string $needle): bool { $needle = $this->inputConv($needle); return $needle === substr($this->str, 0, \strlen($needle)); } public function endsWith(string $needle): bool { $needle = $this->inputConv($needle); $length = \strlen($needle); return $length === 0 ? true : $needle === substr($this->str, -$length); } // /////////////////////////////////////////// // those functions will not return a value // // /////////////////////////////////////////// public function str_insert_i(string $insert, int $position): self { $insert = $this->inputConv($insert); $this->str = substr_replace($this->str, $insert, $position << 2, 0); return $this; } public function str_enclose_i(array $closures, int $start = 0, ?int $length = null): self { // ex: $closures = array('{', '}'); foreach ($closures as &$closure) { $closure = $this->inputConv($closure); } unset($closure); if (\count($closures) < 2) { $closures[0] = $closures[1] = reset($closures); } if (isset($length)) { $replacement = $closures[0] . substr($this->str, $start << 2, $length << 2) . $closures[1]; $this->str = substr_replace($this->str, $replacement, $start << 2, $length << 2); } else { $replacement = $closures[0] . substr($this->str, $start << 2) . $closures[1]; $this->str = substr_replace($this->str, $replacement, $start << 2); } return $this; } public function str_replace_i(string $search, string $replace): self { $search = $this->inputConv($search); $replace = $this->inputConv($replace); $this->str = str_replace($search, $replace, $this->str); return $this; } public function substr_replace_i(string $replacement, int $start = 0, ?int $length = null): self { $replacement = $this->inputConv($replacement); $this->str = ( isset($length) ? substr_replace($this->str, $replacement, $start << 2, $length << 2) : substr_replace($this->str, $replacement, $start << 2) ); return $this; } // /////////////// // ArrayObject // // /////////////// public function offsetSet(mixed $idx, mixed $char): void { $this->setAt($idx, $char); } public function offsetGet(mixed $idx): string { return $this->getAt($idx); } public function offsetExists(mixed $idx): bool { return \is_int($idx) ? $this->strlen() > $idx : false; } public function append(mixed $str): void { $this->str .= $this->inputConv($str); } public function count(): int { return $this->strlen(); } // ////////////////// // misc functions // // ////////////////// /** * Gets the utf 32 header. * * @return string the UTF-32 header or empty string */ protected static function getUtf32Header(): string { // just use any string to get the endian header, here we use "A" $tmp = self::convEncoding('A', 'UTF-8', 'UTF-32'); // some distributions like "php alpine" docker image won't generate the header return $tmp && \strlen($tmp) > 4 ? substr($tmp, 0, 4) : ''; } protected static function detectConvEncoding(): int { if (\function_exists('iconv') && iconv('UTF-8', 'UTF-32', 'A') !== false) { return static::MBSTRING_CONVMETHOD_ICONV; } if (\function_exists('mb_convert_encoding') && mb_convert_encoding('A', 'UTF-32', 'UTF-8') !== false) { return static::MBSTRING_CONVMETHOD_MBSTRING; } throw new \RuntimeException('Either "iconv" or "mbstring" extension is required.'); } protected static function convEncoding(string $str, string $from, string $to): string { if (static::$convMethod === static::MBSTRING_CONVMETHOD_ICONV) { return iconv($from, $to, $str); } if (static::$convMethod === static::MBSTRING_CONVMETHOD_MBSTRING) { return mb_convert_encoding($str, $to, $from); } throw new \RuntimeException('Unknown conversion method.'); } /** * Convert the output string to its original encoding. * * @param string $str The string */ protected function outputConv(string $str): string { if ($str === '') { return ''; } return static::convEncoding(static::$utf32Header . $str, 'UTF-32', $this->encoding); } /** * Convert the input string to UTF-32 without header. * * @param string $str The string */ protected function inputConv(string $str): string { if ($str === '') { return ''; } return substr(static::convEncoding($str, $this->encoding, 'UTF-32'), \strlen(static::$utf32Header)); } }