216 lines
4.2 KiB
PHP
216 lines
4.2 KiB
PHP
<?php
|
|
|
|
/**
|
|
* @author Tassos.gr <info@tassos.gr>
|
|
* @link https://www.tassos.gr
|
|
* @copyright Copyright © 2024 Tassos All Rights Reserved
|
|
* @license GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> or later
|
|
*/
|
|
|
|
namespace NRFramework\Parser;
|
|
|
|
defined('_JEXEC') or die;
|
|
|
|
use NRFramework\Parser\Tokens;
|
|
|
|
/**
|
|
* Lexer base class
|
|
*
|
|
* TODO: Rename to Tokenizer??
|
|
*/
|
|
abstract class Lexer
|
|
{
|
|
/**
|
|
* EOF character
|
|
*/
|
|
const EOF = -1;
|
|
|
|
/**
|
|
* Tokens instance
|
|
*
|
|
* @var NRFramework\Parser\Tokens
|
|
*/
|
|
protected $tokens = null; // Tokens instance
|
|
|
|
/**
|
|
* Input string
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $input;
|
|
|
|
/**
|
|
* Input string length
|
|
*/
|
|
protected $length;
|
|
|
|
/**
|
|
* The index of the current character
|
|
* in the input string
|
|
*
|
|
* @var integer
|
|
*/
|
|
protected $index = 0;
|
|
|
|
/**
|
|
* Current character in input string
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $cur;
|
|
|
|
/**
|
|
* A Mark(position) inside the input string.
|
|
* Used when matching ahead of the 'current' character
|
|
*
|
|
* @var integer
|
|
*/
|
|
protected $mark = 0;
|
|
|
|
/**
|
|
* Holds the Lexer's state
|
|
*
|
|
* @var object
|
|
*/
|
|
protected $state;
|
|
|
|
/**
|
|
* Lexer constructor
|
|
*
|
|
* @param string $input
|
|
*/
|
|
public function __construct($input)
|
|
{
|
|
$this->input = $input;
|
|
$this->length = strlen($input);
|
|
$this->cur = $this->length >= 1 ? $this->input[0] : Lexer::EOF;
|
|
$this->tokens = new Tokens();
|
|
|
|
// inititalize state
|
|
$this->state = new \StdClass();
|
|
$this->state->skip_whitespace = true;
|
|
$this->state->tokenize_content = true;
|
|
}
|
|
|
|
/**
|
|
* Returns the next token from the input string.
|
|
*
|
|
* @return NRFramework\Parser\Token
|
|
*/
|
|
abstract function nextToken();
|
|
|
|
/**
|
|
* Moves n characters ahead in the input string.
|
|
* Returns all n characters.
|
|
* Detects "end of file".
|
|
*
|
|
* @param integer $n Number of characters to advance
|
|
* @return string The n previous characters
|
|
*/
|
|
public function consume($n = 1)
|
|
{
|
|
$prev = '';
|
|
for ($i=0; $i < $n; $i++)
|
|
{
|
|
$prev .= $this->cur;
|
|
if ( ($this->index + 1) >= $this->length)
|
|
{
|
|
$this->cur = Lexer::EOF;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
$this->index++;
|
|
$this->cur = $this->input[$this->index];
|
|
}
|
|
}
|
|
|
|
return $prev;
|
|
}
|
|
|
|
/**
|
|
* Sets the skip_whitespce state
|
|
*
|
|
* @param boolean $skip
|
|
* @return void
|
|
*/
|
|
public function setSkipWhitespaceState($skip = true)
|
|
{
|
|
$this->state->skip_whitespace = $skip;
|
|
}
|
|
|
|
/**
|
|
* Sets the tokenize_content state
|
|
*
|
|
* @param bool
|
|
* @return void
|
|
*/
|
|
public function setTokenizeContentState($state = true)
|
|
{
|
|
$this->state->tokenize_content = $state;
|
|
}
|
|
|
|
/**
|
|
* Gets the tokenize_content state
|
|
*
|
|
* @param bool
|
|
* @return bool
|
|
*/
|
|
public function getTokenizeContentState()
|
|
{
|
|
return $this->state->tokenize_content;
|
|
}
|
|
|
|
/**
|
|
* Marks the current index
|
|
*
|
|
* @return void
|
|
*/
|
|
public function mark()
|
|
{
|
|
$this->mark = $this->index;
|
|
}
|
|
|
|
/**
|
|
* Reset index to previously marked position (or at the start of the stream if not marked)
|
|
*
|
|
* @return void
|
|
*/
|
|
public function reset()
|
|
{
|
|
$this->index = $this->mark;
|
|
$this->cur = $this->input[$this->index];
|
|
$this->mark = 0;
|
|
}
|
|
|
|
/**
|
|
* Get the token types array from the Tokens instance
|
|
*
|
|
* @return void
|
|
*/
|
|
public function getTokensTypes()
|
|
{
|
|
return $this->tokens->getTypes();
|
|
}
|
|
|
|
/**
|
|
* Returns the current position in the input stream
|
|
*
|
|
* @return integer
|
|
*/
|
|
public function getStreamPosition()
|
|
{
|
|
return $this->index;
|
|
}
|
|
|
|
/**
|
|
* whitespace : (' '|'\t'|'\n'|'\r')
|
|
* Ignores any whitespace while advancing
|
|
* @return null
|
|
*/
|
|
protected function whitespace()
|
|
{
|
|
while (preg_match('/\s+/', $this->cur)) $this->consume();
|
|
}
|
|
}
|