613 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			613 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| /**
 | |
|  *  @author          Tassos.gr <info@tassos.gr>
 | |
|  *  @link            https://www.tassos.gr
 | |
|  *  @copyright       Copyright © 2024 Tassos All Rights Reserved
 | |
|  *  @license         GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> or later
 | |
| */
 | |
| 
 | |
| namespace NRFramework\Parser;
 | |
| 
 | |
| defined('_JEXEC') or die;
 | |
| 
 | |
| use NRFramework\Parser\Lexer;
 | |
| 
 | |
| /**
 | |
|  *  ConditionLexer
 | |
|  * 
 | |
|  *  Tokens:
 | |
|  *  -------
 | |
|  *  and                     : 'AND'
 | |
|  *  or                      : 'OR'
 | |
|  *  quotedval               : quotes ~(quotes)* quotes
 | |
|  *  literal                 : ~(whitespace | quotes)+
 | |
|  *  ident                   : ('a'..'z' | 'A'..'Z' | '_' | '\-' | '\.')+
 | |
|  *  quotes                  : '\'' | '\"'
 | |
|  *  comma                   : ','
 | |
|  *  l_paren                 : '('
 | |
|  *  r_paren                 : ')'
 | |
|  *  
 | |
|  *  negate_op               : '!'
 | |
|  *  equals                  : '='  | 'equals'
 | |
|  *  contains                : '*=' | 'contains'
 | |
|  *  contains_any            : 'containsAny'
 | |
|  *  contains_all            : 'containsAll'
 | |
|  *  contains_only           : 'containsOnly'
 | |
|  *  ends_with               : '$=' | 'endsWith'
 | |
|  *  starts_with             : '^=' | 'startsWith'
 | |
|  *  lt                      : '<'  | 'lt'  | 'lowerThan'
 | |
|  *  lte                     : '<=' | 'lte' | 'lowerThanEqual'
 | |
|  *  gt                      : '>'  | 'gt'  | 'greaterThan'
 | |
|  *  gte                     : '>=' | 'gte' | 'greaterThanEqual'
 | |
|  *  empty                   : 'empty'
 | |
|  * 
 | |
|  *  param                   : '--' . ident
 | |
|  *  whitespace              : ' ' | '\r' | '\n' | '\t'
 | |
|  */
 | |
| class ConditionLexer extends Lexer
 | |
| {
 | |
|     /**
 | |
|      *  ConditionLexer constructor  
 | |
|      *
 | |
|      * @param string $input
 | |
|      */
 | |
|     public function __construct($input)
 | |
|     {
 | |
|         parent::__construct($input);        
 | |
|         // single char tokens
 | |
|         $this->tokens->addType('comma');
 | |
|         $this->tokens->addType('quote');
 | |
|         $this->tokens->addType('dquote');
 | |
|         $this->tokens->addType('l_paren');
 | |
|         $this->tokens->addType('r_paren');
 | |
|         // operators
 | |
|         $this->tokens->addType('negate_op');
 | |
|         $this->tokens->addType('equals');
 | |
|         $this->tokens->addType('contains');
 | |
|         $this->tokens->addType('contains_all');
 | |
|         $this->tokens->addType('contains_any');
 | |
|         $this->tokens->addType('contains_only');
 | |
|         $this->tokens->addType('ends_with');
 | |
|         $this->tokens->addType('starts_with');
 | |
| 
 | |
|         $this->tokens->addType('lt');
 | |
|         $this->tokens->addType('gt');
 | |
|         $this->tokens->addType('lte');
 | |
|         $this->tokens->addType('gte');
 | |
|         $this->tokens->addType('empty');
 | |
|         // logical operators
 | |
|         $this->tokens->addType('and');
 | |
|         $this->tokens->addType('or');
 | |
|         // values/literals/identifiers/parameters
 | |
|         $this->tokens->addType('quotedvalue');
 | |
|         $this->tokens->addType('literal');
 | |
|         $this->tokens->addType('ident');
 | |
|         $this->tokens->addType('param');
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *  Returns the next token from the input string
 | |
|      *
 | |
|      *  @return NRFramework\Parser\Token
 | |
|      *  @throws Exception
 | |
|      */
 | |
|     public function nextToken()
 | |
|     {
 | |
|         while ($this->cur !== Lexer::EOF)
 | |
|         {
 | |
|             
 | |
|             if (preg_match('/\s+/', $this->cur))
 | |
|             {
 | |
|                 $this->whitespace();
 | |
|                 continue;
 | |
|             }
 | |
|             
 | |
|             switch ($this->cur)
 | |
|             {
 | |
|                 // match tokens from single char predictions
 | |
|                 case ',':
 | |
|                     return $this->comma();
 | |
|                 case "'":
 | |
|                     return $this->quotedValue("'");
 | |
|                 case '"':
 | |
|                     return $this->quotedValue('"');
 | |
|                 case '=':
 | |
|                     return $this->equals();
 | |
|                 case '!':
 | |
|                     return $this->negate_op();
 | |
|                 case '*':
 | |
|                     return $this->contains();
 | |
|                 case '$':
 | |
|                     return $this->ends_with();
 | |
|                 case '^':
 | |
|                     return $this->starts_with();
 | |
|                 case '<':
 | |
|                     return $this->lt_or_lte();
 | |
|                 case '>':
 | |
|                     return $this->gt_or_gte();
 | |
|                 case '(':
 | |
|                     return $this->l_paren();
 | |
|                 case ')':
 | |
|                     return $this->r_paren();
 | |
|                 case '-':
 | |
|                     $this->mark();
 | |
|                     $next_chars = $this->consume(2);
 | |
|                     if ($next_chars === '--')
 | |
|                     {
 | |
|                         $this->reset();
 | |
|                         return $this->param();
 | |
|                     }
 | |
|                     $this->reset();
 | |
|     
 | |
|                 // match other tokens
 | |
|                 default:
 | |
|                     if (!$this->isValidChar())
 | |
|                     {
 | |
|                         throw new Exceptions\SyntaxErrorException('Invalid character: ' . $this->cur);
 | |
|                     }
 | |
|                     $token = null;
 | |
| 
 | |
|                     // try to match literal operators
 | |
|                     $token = $this->literal_ops();
 | |
|                     if($token)
 | |
|                     {
 | |
|                         return $token;
 | |
|                     }
 | |
| 
 | |
|                     // try to match boolean operators
 | |
|                     $token = $this->_and();
 | |
|                     if($token)
 | |
|                     {
 | |
|                         return $token;
 | |
|                     }
 | |
| 
 | |
|                     $token = $this->_or();
 | |
|                     if($token)
 | |
|                     {
 | |
|                         return $token;
 | |
|                     }
 | |
| 
 | |
|                     // if we get here the token is certainly a literal
 | |
|                     $pos    = $this->index;
 | |
|                     $token  = $this->literal();
 | |
|                     if ($token)
 | |
|                     {
 | |
|                         // check if the literal also qualifies to be an identifier
 | |
|                         if ($this->isValidIdentifier($token->text))
 | |
|                         {
 | |
|                             $token = $this->tokens->create('ident', $token->text, $pos);
 | |
|                         }
 | |
|                         return $token;
 | |
|                     }
 | |
|                     return null;                                        
 | |
|             }
 | |
|         }
 | |
|         return $this->tokens->create('EOF', '<EOF>', -1);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Checks if a string qualifies to be an identifier
 | |
|      * 
 | |
|      * @return bool
 | |
|      */
 | |
|     protected function isValidIdentifier($text)
 | |
|     {
 | |
|         $ident_regex = '/(^[a-zA-Z\_]{1}$)|(^[a-zA-Z\_](?=([\w\-\.]*))([\w\-\.]*))/';
 | |
|         return preg_match($ident_regex, $text);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *  Check if the current character is valid for 
 | |
|      *  some matching rules (and, or, literal, ident)
 | |
|      *
 | |
|      *  @return boolean
 | |
|      */
 | |
|     protected function isValidChar()
 | |
|     {
 | |
|         $r = '/[^\s\'\",=\!\(\)\~\*\<\>\$\^]/';
 | |
| 
 | |
|         return preg_match($r, $this->cur);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *  literal  : ~(whitespace | quotes)+ //one or more chars except whitespace and quotes
 | |
|      * 
 | |
|      *  @return Token|void
 | |
|      */
 | |
|     protected function literal()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $buf = '';
 | |
|         do
 | |
|         { 
 | |
|             if (!$this->isValidChar())
 | |
|             {
 | |
|                 break;
 | |
|             }
 | |
|             $buf .= $this->cur;
 | |
|             $this->consume(); 
 | |
|         }
 | |
|         while ($this->cur !== Lexer::EOF);
 | |
| 
 | |
|         if (strlen($buf) > 0)
 | |
|         {
 | |
|             return $this->tokens->create('literal', $buf, $pos);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *  and : 'AND'
 | |
|      *
 | |
|      *  @return Token|void
 | |
|      */
 | |
|     protected function _and()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->mark();
 | |
|         $buf = '';
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
| 
 | |
|         if (preg_match('/and/', strtolower($buf)))
 | |
|         {
 | |
|             return $this->tokens->create('and', trim($buf), $pos);
 | |
|         }
 | |
| 
 | |
|         $this->reset();
 | |
|     }
 | |
|     /**
 | |
|      *  or : 'OR'
 | |
|      *
 | |
|      *  @return Token|void
 | |
|      */
 | |
|     public function _or()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->mark();
 | |
|         $buf = '';
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
| 
 | |
|         if (preg_match('/or/', strtolower($buf)))
 | |
|         {
 | |
|             return $this->tokens->create('or', trim($buf), $pos);
 | |
|         }
 | |
| 
 | |
|         $this->reset();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *  quotedval : quotes ~(quotes)* quotes
 | |
|      *
 | |
|      *  @return Token|void
 | |
|      *  @throws Exception
 | |
|      */
 | |
|     protected function quotedValue($q)
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $otherQuote = $q === '"' ? "'" : '"';
 | |
|         $quote_queue = [];
 | |
|         $buf = '';
 | |
| 
 | |
|         $quote_queue[] = $q;
 | |
|         $this->consume();
 | |
|         while (!empty($quote_queue))
 | |
|         {
 | |
|             if ($this->cur === Lexer::EOF)
 | |
|             {
 | |
|                 throw new Exceptions\SyntaxErrorException('Missing quote at: ' . $buf);
 | |
|             }
 | |
| 
 | |
|             if ($this->cur === end($quote_queue))
 | |
|             {
 | |
|                 array_pop($quote_queue);
 | |
|                 // if it's not the opening quote
 | |
|                 if (!empty($quote_queue))
 | |
|                 {
 | |
|                     $buf .= $this->cur;
 | |
|                 }
 | |
|             }
 | |
|             else if ($this->cur === $otherQuote)
 | |
|             {
 | |
|                 array_push($quote_queue, $otherQuote);
 | |
|                 $buf .= $otherQuote;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 $buf .= $this->cur;
 | |
|             }
 | |
|             $this->consume();
 | |
|         }
 | |
|         return $this->tokens->create('quotedvalue', $buf, $pos);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * param : '--' . ident
 | |
|      * 
 | |
|      * @return Token|void
 | |
|      */
 | |
|     protected function param()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->mark();
 | |
|         $buf = '';
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
| 
 | |
|         if ($buf === '--')
 | |
|         {
 | |
|             $buf = '';
 | |
|             do
 | |
|             { 
 | |
|                 if (!$this->isValidChar())
 | |
|                 {
 | |
|                     break;
 | |
|                 }
 | |
|                 $buf .= $this->cur;
 | |
|                 $this->consume(); 
 | |
|             }
 | |
|             while ($this->cur !== Lexer::EOF);
 | |
| 
 | |
|             if (strlen($buf) > 0 && $this->isValidIdentifier($buf))
 | |
|             {
 | |
|                 return $this->tokens->create('param', $buf, $pos);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         $this->reset();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *  equals : '='
 | |
|      *
 | |
|      *  @return Token|void
 | |
|      */
 | |
|     protected function equals()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->consume();
 | |
|         return $this->tokens->create('equals', "=", $pos);
 | |
|     }
 | |
| 
 | |
|     protected function negate_op()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->consume();
 | |
|         return $this->tokens->create('negate_op', "!", $pos);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *  comma : ','
 | |
|      *
 | |
|      *  @return Token
 | |
|      */
 | |
|     protected function comma()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->consume();
 | |
|         return $this->tokens->create('comma', ",", $pos);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * l_paren : '('
 | |
|      */
 | |
|     protected function l_paren()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->consume();
 | |
|         return $this->tokens->create('l_paren', '(', $pos);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * r_paren : ')'
 | |
|      */
 | |
|     protected function r_paren()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->consume();
 | |
|         return $this->tokens->create('r_paren', ')', $pos);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * contains: '*='
 | |
|      * 
 | |
|      * @return Token|void
 | |
|      */
 | |
|     protected function contains()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->mark();
 | |
|         $buf = $this->cur;
 | |
|         $this->consume();
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
|         
 | |
|         if ($buf === '*=')
 | |
|         {
 | |
|             return $this->tokens->create('contains', "*=", $pos);
 | |
|         }
 | |
| 
 | |
|         $this->reset();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * contains_word: '~='
 | |
|      * 
 | |
|      * @return Token|void
 | |
|      */
 | |
|     protected function contains_word()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->mark();
 | |
|         $buf = $this->cur;
 | |
|         $this->consume();
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
|         
 | |
|         if ($buf === '~=')
 | |
|         {
 | |
|             return $this->tokens->create('contains_word', "~=", $pos);
 | |
|         }
 | |
| 
 | |
|         $this->reset();
 | |
|     }
 | |
| 
 | |
| 
 | |
|     /**
 | |
|      * ends_with: '$='
 | |
|      * 
 | |
|      * @return Token|void
 | |
|      */
 | |
|     protected function ends_with()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->mark();
 | |
|         $buf = $this->cur;
 | |
|         $this->consume();
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
|         
 | |
|         if ($buf === '$=')
 | |
|         {
 | |
|             return $this->tokens->create('ends_with', "$=", $pos);
 | |
|         }
 | |
| 
 | |
|         $this->reset();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * starts_with: '$='
 | |
|      * 
 | |
|      * @return Token|void
 | |
|      */
 | |
|     protected function starts_with()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->mark();
 | |
|         $buf = $this->cur;
 | |
|         $this->consume();
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
|         
 | |
|         if ($buf === '^=')
 | |
|         {
 | |
|             return $this->tokens->create('starts_with', "^=", $pos);
 | |
|         }
 | |
| 
 | |
|         $this->reset();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * lt_or_lte: '<' | '<='
 | |
|      * 
 | |
|      * @return Token|void
 | |
|      */
 | |
|     protected function lt_or_lte()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->mark();
 | |
|         $buf = $this->cur;
 | |
|         $this->consume();
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
|         
 | |
|         if ($buf === '<=')
 | |
|         {
 | |
|             return $this->tokens->create('lte', "<=", $pos);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             $this->reset();
 | |
|             $this->consume();
 | |
|             return $this->tokens->create('lt', '<', $pos);
 | |
|         }
 | |
| 
 | |
|         $this->reset();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * gt_or_gte: '>' | '>='
 | |
|      * 
 | |
|      * @return Token|void
 | |
|      */
 | |
|     protected function gt_or_gte()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->mark();
 | |
|         $buf = $this->cur;
 | |
|         $this->consume();
 | |
|         $buf .= $this->cur;
 | |
|         $this->consume();
 | |
|         
 | |
|         if ($buf === '>=')
 | |
|         {
 | |
|             return $this->tokens->create('gte', ">=", $pos);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             $this->reset();
 | |
|             $this->consume();
 | |
|             return $this->tokens->create('gt', '>', $pos);
 | |
|         }
 | |
| 
 | |
|         $this->reset();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Literal Operators predictor
 | |
|      * 
 | |
|      * @return Token|null
 | |
|      */
 | |
|     protected function literal_ops()
 | |
|     {
 | |
|         $pos = $this->index;
 | |
|         $this->mark();
 | |
|         $lit = $this->literal();
 | |
| 
 | |
|         if ($lit)
 | |
|         {
 | |
|             switch (strtolower($lit->text))
 | |
|             {
 | |
|                 case 'equals':
 | |
|                     return $this->tokens->create('equals', $lit->text, $pos);
 | |
|                 case 'startswith':
 | |
|                     return $this->tokens->create('starts_with', $lit->text, $pos);
 | |
|                 case 'endswith':
 | |
|                     return $this->tokens->create('ends_with', $lit->text, $pos);
 | |
|                 case 'contains':
 | |
|                     return $this->tokens->create('contains', $lit->text, $pos);
 | |
|                 case 'containsall':
 | |
|                     return $this->tokens->create('contains_all', $lit->text, $pos);
 | |
|                 case 'containsany':
 | |
|                     return $this->tokens->create('contains_any', $lit->text, $pos);
 | |
|                 case 'containsonly':
 | |
|                     return $this->tokens->create('contains_only', $lit->text, $pos);
 | |
|                 case 'lt':
 | |
|                 case 'lowerthan':
 | |
|                     return $this->tokens->create('lt', $lit->text, $pos);
 | |
|                 case 'lte':
 | |
|                 case 'lowerthanequal':
 | |
|                     return $this->tokens->create('lte', $lit->text, $pos);
 | |
|                 case 'gt':
 | |
|                 case 'greaterthan':
 | |
|                     return $this->tokens->create('gt', $lit->text, $pos);
 | |
|                 case 'gte':
 | |
|                 case 'greaterthantequal':
 | |
|                     return $this->tokens->create('gte', $lit->text, $pos);
 | |
|                 case 'empty':
 | |
|                     return $this->tokens->create('empty', $lit->text, $pos);
 | |
|             }
 | |
|         }
 | |
|         $this->reset();
 | |
|     }
 | |
| }
 |