Files
conservatorio-tomadini/plugins/system/nrframework/NRFramework/Parser/ConditionLexer.php
2024-12-31 11:07:09 +01:00

613 lines
16 KiB
PHP

<?php
/**
* @author Tassos.gr <info@tassos.gr>
* @link https://www.tassos.gr
* @copyright Copyright © 2024 Tassos All Rights Reserved
* @license GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> or later
*/
namespace NRFramework\Parser;
defined('_JEXEC') or die;
use NRFramework\Parser\Lexer;
/**
* ConditionLexer
*
* Tokens:
* -------
* and : 'AND'
* or : 'OR'
* quotedval : quotes ~(quotes)* quotes
* literal : ~(whitespace | quotes)+
* ident : ('a'..'z' | 'A'..'Z' | '_' | '\-' | '\.')+
* quotes : '\'' | '\"'
* comma : ','
* l_paren : '('
* r_paren : ')'
*
* negate_op : '!'
* equals : '=' | 'equals'
* contains : '*=' | 'contains'
* contains_any : 'containsAny'
* contains_all : 'containsAll'
* contains_only : 'containsOnly'
* ends_with : '$=' | 'endsWith'
* starts_with : '^=' | 'startsWith'
* lt : '<' | 'lt' | 'lowerThan'
* lte : '<=' | 'lte' | 'lowerThanEqual'
* gt : '>' | 'gt' | 'greaterThan'
* gte : '>=' | 'gte' | 'greaterThanEqual'
* empty : 'empty'
*
* param : '--' . ident
* whitespace : ' ' | '\r' | '\n' | '\t'
*/
class ConditionLexer extends Lexer
{
/**
* ConditionLexer constructor
*
* @param string $input
*/
public function __construct($input)
{
parent::__construct($input);
// single char tokens
$this->tokens->addType('comma');
$this->tokens->addType('quote');
$this->tokens->addType('dquote');
$this->tokens->addType('l_paren');
$this->tokens->addType('r_paren');
// operators
$this->tokens->addType('negate_op');
$this->tokens->addType('equals');
$this->tokens->addType('contains');
$this->tokens->addType('contains_all');
$this->tokens->addType('contains_any');
$this->tokens->addType('contains_only');
$this->tokens->addType('ends_with');
$this->tokens->addType('starts_with');
$this->tokens->addType('lt');
$this->tokens->addType('gt');
$this->tokens->addType('lte');
$this->tokens->addType('gte');
$this->tokens->addType('empty');
// logical operators
$this->tokens->addType('and');
$this->tokens->addType('or');
// values/literals/identifiers/parameters
$this->tokens->addType('quotedvalue');
$this->tokens->addType('literal');
$this->tokens->addType('ident');
$this->tokens->addType('param');
}
/**
* Returns the next token from the input string
*
* @return NRFramework\Parser\Token
* @throws Exception
*/
public function nextToken()
{
while ($this->cur !== Lexer::EOF)
{
if (preg_match('/\s+/', $this->cur))
{
$this->whitespace();
continue;
}
switch ($this->cur)
{
// match tokens from single char predictions
case ',':
return $this->comma();
case "'":
return $this->quotedValue("'");
case '"':
return $this->quotedValue('"');
case '=':
return $this->equals();
case '!':
return $this->negate_op();
case '*':
return $this->contains();
case '$':
return $this->ends_with();
case '^':
return $this->starts_with();
case '<':
return $this->lt_or_lte();
case '>':
return $this->gt_or_gte();
case '(':
return $this->l_paren();
case ')':
return $this->r_paren();
case '-':
$this->mark();
$next_chars = $this->consume(2);
if ($next_chars === '--')
{
$this->reset();
return $this->param();
}
$this->reset();
// match other tokens
default:
if (!$this->isValidChar())
{
throw new Exceptions\SyntaxErrorException('Invalid character: ' . $this->cur);
}
$token = null;
// try to match literal operators
$token = $this->literal_ops();
if($token)
{
return $token;
}
// try to match boolean operators
$token = $this->_and();
if($token)
{
return $token;
}
$token = $this->_or();
if($token)
{
return $token;
}
// if we get here the token is certainly a literal
$pos = $this->index;
$token = $this->literal();
if ($token)
{
// check if the literal also qualifies to be an identifier
if ($this->isValidIdentifier($token->text))
{
$token = $this->tokens->create('ident', $token->text, $pos);
}
return $token;
}
return null;
}
}
return $this->tokens->create('EOF', '<EOF>', -1);
}
/**
* Checks if a string qualifies to be an identifier
*
* @return bool
*/
protected function isValidIdentifier($text)
{
$ident_regex = '/(^[a-zA-Z\_]{1}$)|(^[a-zA-Z\_](?=([\w\-\.]*))([\w\-\.]*))/';
return preg_match($ident_regex, $text);
}
/**
* Check if the current character is valid for
* some matching rules (and, or, literal, ident)
*
* @return boolean
*/
protected function isValidChar()
{
$r = '/[^\s\'\",=\!\(\)\~\*\<\>\$\^]/';
return preg_match($r, $this->cur);
}
/**
* literal : ~(whitespace | quotes)+ //one or more chars except whitespace and quotes
*
* @return Token|void
*/
protected function literal()
{
$pos = $this->index;
$buf = '';
do
{
if (!$this->isValidChar())
{
break;
}
$buf .= $this->cur;
$this->consume();
}
while ($this->cur !== Lexer::EOF);
if (strlen($buf) > 0)
{
return $this->tokens->create('literal', $buf, $pos);
}
}
/**
* and : 'AND'
*
* @return Token|void
*/
protected function _and()
{
$pos = $this->index;
$this->mark();
$buf = '';
$buf .= $this->cur;
$this->consume();
$buf .= $this->cur;
$this->consume();
$buf .= $this->cur;
$this->consume();
if (preg_match('/and/', strtolower($buf)))
{
return $this->tokens->create('and', trim($buf), $pos);
}
$this->reset();
}
/**
* or : 'OR'
*
* @return Token|void
*/
public function _or()
{
$pos = $this->index;
$this->mark();
$buf = '';
$buf .= $this->cur;
$this->consume();
$buf .= $this->cur;
$this->consume();
if (preg_match('/or/', strtolower($buf)))
{
return $this->tokens->create('or', trim($buf), $pos);
}
$this->reset();
}
/**
* quotedval : quotes ~(quotes)* quotes
*
* @return Token|void
* @throws Exception
*/
protected function quotedValue($q)
{
$pos = $this->index;
$otherQuote = $q === '"' ? "'" : '"';
$quote_queue = [];
$buf = '';
$quote_queue[] = $q;
$this->consume();
while (!empty($quote_queue))
{
if ($this->cur === Lexer::EOF)
{
throw new Exceptions\SyntaxErrorException('Missing quote at: ' . $buf);
}
if ($this->cur === end($quote_queue))
{
array_pop($quote_queue);
// if it's not the opening quote
if (!empty($quote_queue))
{
$buf .= $this->cur;
}
}
else if ($this->cur === $otherQuote)
{
array_push($quote_queue, $otherQuote);
$buf .= $otherQuote;
}
else
{
$buf .= $this->cur;
}
$this->consume();
}
return $this->tokens->create('quotedvalue', $buf, $pos);
}
/**
* param : '--' . ident
*
* @return Token|void
*/
protected function param()
{
$pos = $this->index;
$this->mark();
$buf = '';
$buf .= $this->cur;
$this->consume();
$buf .= $this->cur;
$this->consume();
if ($buf === '--')
{
$buf = '';
do
{
if (!$this->isValidChar())
{
break;
}
$buf .= $this->cur;
$this->consume();
}
while ($this->cur !== Lexer::EOF);
if (strlen($buf) > 0 && $this->isValidIdentifier($buf))
{
return $this->tokens->create('param', $buf, $pos);
}
}
$this->reset();
}
/**
* equals : '='
*
* @return Token|void
*/
protected function equals()
{
$pos = $this->index;
$this->consume();
return $this->tokens->create('equals', "=", $pos);
}
protected function negate_op()
{
$pos = $this->index;
$this->consume();
return $this->tokens->create('negate_op', "!", $pos);
}
/**
* comma : ','
*
* @return Token
*/
protected function comma()
{
$pos = $this->index;
$this->consume();
return $this->tokens->create('comma', ",", $pos);
}
/**
* l_paren : '('
*/
protected function l_paren()
{
$pos = $this->index;
$this->consume();
return $this->tokens->create('l_paren', '(', $pos);
}
/**
* r_paren : ')'
*/
protected function r_paren()
{
$pos = $this->index;
$this->consume();
return $this->tokens->create('r_paren', ')', $pos);
}
/**
* contains: '*='
*
* @return Token|void
*/
protected function contains()
{
$pos = $this->index;
$this->mark();
$buf = $this->cur;
$this->consume();
$buf .= $this->cur;
$this->consume();
if ($buf === '*=')
{
return $this->tokens->create('contains', "*=", $pos);
}
$this->reset();
}
/**
* contains_word: '~='
*
* @return Token|void
*/
protected function contains_word()
{
$pos = $this->index;
$this->mark();
$buf = $this->cur;
$this->consume();
$buf .= $this->cur;
$this->consume();
if ($buf === '~=')
{
return $this->tokens->create('contains_word', "~=", $pos);
}
$this->reset();
}
/**
* ends_with: '$='
*
* @return Token|void
*/
protected function ends_with()
{
$pos = $this->index;
$this->mark();
$buf = $this->cur;
$this->consume();
$buf .= $this->cur;
$this->consume();
if ($buf === '$=')
{
return $this->tokens->create('ends_with', "$=", $pos);
}
$this->reset();
}
/**
* starts_with: '$='
*
* @return Token|void
*/
protected function starts_with()
{
$pos = $this->index;
$this->mark();
$buf = $this->cur;
$this->consume();
$buf .= $this->cur;
$this->consume();
if ($buf === '^=')
{
return $this->tokens->create('starts_with', "^=", $pos);
}
$this->reset();
}
/**
* lt_or_lte: '<' | '<='
*
* @return Token|void
*/
protected function lt_or_lte()
{
$pos = $this->index;
$this->mark();
$buf = $this->cur;
$this->consume();
$buf .= $this->cur;
$this->consume();
if ($buf === '<=')
{
return $this->tokens->create('lte', "<=", $pos);
}
else
{
$this->reset();
$this->consume();
return $this->tokens->create('lt', '<', $pos);
}
$this->reset();
}
/**
* gt_or_gte: '>' | '>='
*
* @return Token|void
*/
protected function gt_or_gte()
{
$pos = $this->index;
$this->mark();
$buf = $this->cur;
$this->consume();
$buf .= $this->cur;
$this->consume();
if ($buf === '>=')
{
return $this->tokens->create('gte', ">=", $pos);
}
else
{
$this->reset();
$this->consume();
return $this->tokens->create('gt', '>', $pos);
}
$this->reset();
}
/**
* Literal Operators predictor
*
* @return Token|null
*/
protected function literal_ops()
{
$pos = $this->index;
$this->mark();
$lit = $this->literal();
if ($lit)
{
switch (strtolower($lit->text))
{
case 'equals':
return $this->tokens->create('equals', $lit->text, $pos);
case 'startswith':
return $this->tokens->create('starts_with', $lit->text, $pos);
case 'endswith':
return $this->tokens->create('ends_with', $lit->text, $pos);
case 'contains':
return $this->tokens->create('contains', $lit->text, $pos);
case 'containsall':
return $this->tokens->create('contains_all', $lit->text, $pos);
case 'containsany':
return $this->tokens->create('contains_any', $lit->text, $pos);
case 'containsonly':
return $this->tokens->create('contains_only', $lit->text, $pos);
case 'lt':
case 'lowerthan':
return $this->tokens->create('lt', $lit->text, $pos);
case 'lte':
case 'lowerthanequal':
return $this->tokens->create('lte', $lit->text, $pos);
case 'gt':
case 'greaterthan':
return $this->tokens->create('gt', $lit->text, $pos);
case 'gte':
case 'greaterthantequal':
return $this->tokens->create('gte', $lit->text, $pos);
case 'empty':
return $this->tokens->create('empty', $lit->text, $pos);
}
}
$this->reset();
}
}