first commit

This commit is contained in:
2025-06-17 11:53:18 +02:00
commit 9f0f7ba12b
8804 changed files with 1369176 additions and 0 deletions

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018-2023 Jack Cherng <jfcherng@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,400 @@
<?php
declare(strict_types=1);
namespace Jfcherng\Utility;
/**
* An internal UTF-32 multi-bytes string class.
*
* Because UTF-8 is varied-width, mb_*() is kinda O(n) when doing decoding.
* Using iconv() to make it UTF-32 and work with str*() can be possibly faster.
*
* UTF-32 is a fix-width encoding (1 char = 4 bytes).
* Note that the first 4 bytes in a UTF-32 string is the header (endian bytes).
*
* @author Jack Cherng <jfcherng@gmail.com>
*/
class MbString extends \ArrayObject implements \Stringable
{
public const MBSTRING_CONVMETHOD_ICONV = 1;
public const MBSTRING_CONVMETHOD_MBSTRING = 2;
/**
* The way to convert text encoding.
*
* @var int
*/
public static $convMethod;
/**
* UTF-32 string without endian bytes.
*
* @var string
*/
protected $str;
/**
* The original encoding.
*
* @var string
*/
protected $encoding;
/**
* The endian bytes for UTF-32.
*
* @var string
*/
protected static $utf32Header;
/**
* The constructor.
*
* @param string $str the string
* @param string $encoding the encoding
*/
public function __construct(string $str = '', string $encoding = 'UTF-8')
{
static::$convMethod ??= static::detectConvEncoding();
static::$utf32Header ??= static::getUtf32Header();
$this->encoding = $encoding;
$this->set($str);
}
/**
* Returns a string representation of the object.
*
* @return string string representation of the object
*/
public function __toString(): string
{
return $this->get();
}
/**
* The string setter.
*
* @param string $str the string
*/
public function set(string $str): self
{
$this->str = $this->inputConv($str);
return $this;
}
public function setAt(int $idx, string $char): self
{
$char = $this->inputConv($char);
if (\strlen($char) > 4) {
$char = substr($char, 0, 4);
}
$spacesPrepend = $idx - $this->strlen();
// set index (out of bound)
if ($spacesPrepend > 0) {
$this->str .= $this->inputConv(str_repeat(' ', $spacesPrepend)) . $char;
}
// set index (in bound)
else {
$this->str = substr_replace($this->str, $char, $idx << 2, 4);
}
return $this;
}
/**
* The string getter.
*/
public function get(): string
{
return $this->outputConv($this->str);
}
/**
* The raw string getter.
*
* @return string the UTF-32-encoded raw string
*/
public function getRaw(): string
{
return $this->str;
}
public function getAt(int $idx): string
{
return $this->outputConv(substr($this->str, $idx << 2, 4));
}
public function getAtRaw(int $idx): string
{
return substr($this->str, $idx << 2, 4);
}
public function toArray(): array
{
return self::strToChars($this->get());
}
public function toArraySplit(string $regex, int $limit = -1, $flags = 0): array
{
if ($this->str === '') {
return [];
}
return preg_split($regex, $this->get(), $limit, $flags);
}
public function toArrayRaw(): array
{
if ($this->str === '') {
return [];
}
return str_split($this->str, 4);
}
public static function strToChars(string $str): array
{
return preg_split('//uS', $str, -1, \PREG_SPLIT_NO_EMPTY) ?: [];
}
// /////////////////////////////////
// string manipulation functions //
// /////////////////////////////////
public function stripos(string $needle, int $offset = 0)
{
$needle = $this->inputConv($needle);
$pos = stripos($this->str, $needle, $offset << 2);
return \is_bool($pos) ? $pos : $pos >> 2;
}
public function strlen(): int
{
return \strlen($this->str) >> 2;
}
public function strpos(string $needle, int $offset = 0)
{
$needle = $this->inputConv($needle);
$pos = strpos($this->str, $needle, $offset << 2);
return \is_bool($pos) ? $pos : $pos >> 2;
}
public function substr(int $start = 0, ?int $length = null): string
{
return $this->outputConv(
isset($length)
? substr($this->str, $start << 2, $length << 2)
: substr($this->str, $start << 2),
);
}
public function substr_replace(string $replacement, int $start = 0, ?int $length = null): string
{
$replacement = $this->inputConv($replacement);
return $this->outputConv(
isset($length)
? substr_replace($this->str, $replacement, $start << 2, $length << 2)
: substr_replace($this->str, $replacement, $start << 2),
);
}
public function strtolower(): string
{
return strtolower($this->get());
}
public function strtoupper(): string
{
return strtoupper($this->get());
}
// //////////////////////////////
// non-manipulative functions //
// //////////////////////////////
public function has(string $needle): bool
{
$needle = $this->inputConv($needle);
return str_contains($this->str, $needle);
}
public function startsWith(string $needle): bool
{
$needle = $this->inputConv($needle);
return $needle === substr($this->str, 0, \strlen($needle));
}
public function endsWith(string $needle): bool
{
$needle = $this->inputConv($needle);
$length = \strlen($needle);
return $length === 0 ? true : $needle === substr($this->str, -$length);
}
// ///////////////////////////////////////////
// those functions will not return a value //
// ///////////////////////////////////////////
public function str_insert_i(string $insert, int $position): self
{
$insert = $this->inputConv($insert);
$this->str = substr_replace($this->str, $insert, $position << 2, 0);
return $this;
}
public function str_enclose_i(array $closures, int $start = 0, ?int $length = null): self
{
// ex: $closures = array('{', '}');
foreach ($closures as &$closure) {
$closure = $this->inputConv($closure);
}
unset($closure);
if (\count($closures) < 2) {
$closures[0] = $closures[1] = reset($closures);
}
if (isset($length)) {
$replacement = $closures[0] . substr($this->str, $start << 2, $length << 2) . $closures[1];
$this->str = substr_replace($this->str, $replacement, $start << 2, $length << 2);
} else {
$replacement = $closures[0] . substr($this->str, $start << 2) . $closures[1];
$this->str = substr_replace($this->str, $replacement, $start << 2);
}
return $this;
}
public function str_replace_i(string $search, string $replace): self
{
$search = $this->inputConv($search);
$replace = $this->inputConv($replace);
$this->str = str_replace($search, $replace, $this->str);
return $this;
}
public function substr_replace_i(string $replacement, int $start = 0, ?int $length = null): self
{
$replacement = $this->inputConv($replacement);
$this->str = (
isset($length)
? substr_replace($this->str, $replacement, $start << 2, $length << 2)
: substr_replace($this->str, $replacement, $start << 2)
);
return $this;
}
// ///////////////
// ArrayObject //
// ///////////////
public function offsetSet(mixed $idx, mixed $char): void
{
$this->setAt($idx, $char);
}
public function offsetGet(mixed $idx): string
{
return $this->getAt($idx);
}
public function offsetExists(mixed $idx): bool
{
return \is_int($idx) ? $this->strlen() > $idx : false;
}
public function append(mixed $str): void
{
$this->str .= $this->inputConv($str);
}
public function count(): int
{
return $this->strlen();
}
// //////////////////
// misc functions //
// //////////////////
/**
* Gets the utf 32 header.
*
* @return string the UTF-32 header or empty string
*/
protected static function getUtf32Header(): string
{
// just use any string to get the endian header, here we use "A"
$tmp = self::convEncoding('A', 'UTF-8', 'UTF-32');
// some distributions like "php alpine" docker image won't generate the header
return $tmp && \strlen($tmp) > 4 ? substr($tmp, 0, 4) : '';
}
protected static function detectConvEncoding(): int
{
if (\function_exists('iconv') && iconv('UTF-8', 'UTF-32', 'A') !== false) {
return static::MBSTRING_CONVMETHOD_ICONV;
}
if (\function_exists('mb_convert_encoding') && mb_convert_encoding('A', 'UTF-32', 'UTF-8') !== false) {
return static::MBSTRING_CONVMETHOD_MBSTRING;
}
throw new \RuntimeException('Either "iconv" or "mbstring" extension is required.');
}
protected static function convEncoding(string $str, string $from, string $to): string
{
if (static::$convMethod === static::MBSTRING_CONVMETHOD_ICONV) {
return iconv($from, $to, $str);
}
if (static::$convMethod === static::MBSTRING_CONVMETHOD_MBSTRING) {
return mb_convert_encoding($str, $to, $from);
}
throw new \RuntimeException('Unknown conversion method.');
}
/**
* Convert the output string to its original encoding.
*
* @param string $str The string
*/
protected function outputConv(string $str): string
{
if ($str === '') {
return '';
}
return static::convEncoding(static::$utf32Header . $str, 'UTF-32', $this->encoding);
}
/**
* Convert the input string to UTF-32 without header.
*
* @param string $str The string
*/
protected function inputConv(string $str): string
{
if ($str === '') {
return '';
}
return substr(static::convEncoding($str, $this->encoding, 'UTF-32'), \strlen(static::$utf32Header));
}
}