520 lines
14 KiB
PHP
520 lines
14 KiB
PHP
<?php
|
|
namespace JmesPath;
|
|
|
|
use JmesPath\Lexer as T;
|
|
|
|
/**
|
|
* JMESPath Pratt parser
|
|
* @link http://hall.org.ua/halls/wizzard/pdf/Vaughan.Pratt.TDOP.pdf
|
|
*/
|
|
class Parser
|
|
{
|
|
/** @var Lexer */
|
|
private $lexer;
|
|
private $tokens;
|
|
private $token;
|
|
private $tpos;
|
|
private $expression;
|
|
private static $nullToken = ['type' => T::T_EOF];
|
|
private static $currentNode = ['type' => T::T_CURRENT];
|
|
|
|
private static $bp = [
|
|
T::T_EOF => 0,
|
|
T::T_QUOTED_IDENTIFIER => 0,
|
|
T::T_IDENTIFIER => 0,
|
|
T::T_RBRACKET => 0,
|
|
T::T_RPAREN => 0,
|
|
T::T_COMMA => 0,
|
|
T::T_RBRACE => 0,
|
|
T::T_NUMBER => 0,
|
|
T::T_CURRENT => 0,
|
|
T::T_EXPREF => 0,
|
|
T::T_COLON => 0,
|
|
T::T_PIPE => 1,
|
|
T::T_OR => 2,
|
|
T::T_AND => 3,
|
|
T::T_COMPARATOR => 5,
|
|
T::T_FLATTEN => 9,
|
|
T::T_STAR => 20,
|
|
T::T_FILTER => 21,
|
|
T::T_DOT => 40,
|
|
T::T_NOT => 45,
|
|
T::T_LBRACE => 50,
|
|
T::T_LBRACKET => 55,
|
|
T::T_LPAREN => 60,
|
|
];
|
|
|
|
/** @var array Acceptable tokens after a dot token */
|
|
private static $afterDot = [
|
|
T::T_IDENTIFIER => true, // foo.bar
|
|
T::T_QUOTED_IDENTIFIER => true, // foo."bar"
|
|
T::T_STAR => true, // foo.*
|
|
T::T_LBRACE => true, // foo[1]
|
|
T::T_LBRACKET => true, // foo{a: 0}
|
|
T::T_FILTER => true, // foo.[?bar==10]
|
|
];
|
|
|
|
/**
|
|
* @param Lexer|null $lexer Lexer used to tokenize expressions
|
|
*/
|
|
public function __construct(Lexer $lexer = null)
|
|
{
|
|
$this->lexer = $lexer ?: new Lexer();
|
|
}
|
|
|
|
/**
|
|
* Parses a JMESPath expression into an AST
|
|
*
|
|
* @param string $expression JMESPath expression to compile
|
|
*
|
|
* @return array Returns an array based AST
|
|
* @throws SyntaxErrorException
|
|
*/
|
|
public function parse($expression)
|
|
{
|
|
$this->expression = $expression;
|
|
$this->tokens = $this->lexer->tokenize($expression);
|
|
$this->tpos = -1;
|
|
$this->next();
|
|
$result = $this->expr();
|
|
|
|
if ($this->token['type'] === T::T_EOF) {
|
|
return $result;
|
|
}
|
|
|
|
throw $this->syntax('Did not reach the end of the token stream');
|
|
}
|
|
|
|
/**
|
|
* Parses an expression while rbp < lbp.
|
|
*
|
|
* @param int $rbp Right bound precedence
|
|
*
|
|
* @return array
|
|
*/
|
|
private function expr($rbp = 0)
|
|
{
|
|
$left = $this->{"nud_{$this->token['type']}"}();
|
|
while ($rbp < self::$bp[$this->token['type']]) {
|
|
$left = $this->{"led_{$this->token['type']}"}($left);
|
|
}
|
|
|
|
return $left;
|
|
}
|
|
|
|
private function nud_identifier()
|
|
{
|
|
$token = $this->token;
|
|
$this->next();
|
|
return ['type' => 'field', 'value' => $token['value']];
|
|
}
|
|
|
|
private function nud_quoted_identifier()
|
|
{
|
|
$token = $this->token;
|
|
$this->next();
|
|
$this->assertNotToken(T::T_LPAREN);
|
|
return ['type' => 'field', 'value' => $token['value']];
|
|
}
|
|
|
|
private function nud_current()
|
|
{
|
|
$this->next();
|
|
return self::$currentNode;
|
|
}
|
|
|
|
private function nud_literal()
|
|
{
|
|
$token = $this->token;
|
|
$this->next();
|
|
return ['type' => 'literal', 'value' => $token['value']];
|
|
}
|
|
|
|
private function nud_expref()
|
|
{
|
|
$this->next();
|
|
return ['type' => T::T_EXPREF, 'children' => [$this->expr(self::$bp[T::T_EXPREF])]];
|
|
}
|
|
|
|
private function nud_not()
|
|
{
|
|
$this->next();
|
|
return ['type' => T::T_NOT, 'children' => [$this->expr(self::$bp[T::T_NOT])]];
|
|
}
|
|
|
|
private function nud_lparen()
|
|
{
|
|
$this->next();
|
|
$result = $this->expr(0);
|
|
if ($this->token['type'] !== T::T_RPAREN) {
|
|
throw $this->syntax('Unclosed `(`');
|
|
}
|
|
$this->next();
|
|
return $result;
|
|
}
|
|
|
|
private function nud_lbrace()
|
|
{
|
|
static $validKeys = [T::T_QUOTED_IDENTIFIER => true, T::T_IDENTIFIER => true];
|
|
$this->next($validKeys);
|
|
$pairs = [];
|
|
|
|
do {
|
|
$pairs[] = $this->parseKeyValuePair();
|
|
if ($this->token['type'] == T::T_COMMA) {
|
|
$this->next($validKeys);
|
|
}
|
|
} while ($this->token['type'] !== T::T_RBRACE);
|
|
|
|
$this->next();
|
|
|
|
return['type' => 'multi_select_hash', 'children' => $pairs];
|
|
}
|
|
|
|
private function nud_flatten()
|
|
{
|
|
return $this->led_flatten(self::$currentNode);
|
|
}
|
|
|
|
private function nud_filter()
|
|
{
|
|
return $this->led_filter(self::$currentNode);
|
|
}
|
|
|
|
private function nud_star()
|
|
{
|
|
return $this->parseWildcardObject(self::$currentNode);
|
|
}
|
|
|
|
private function nud_lbracket()
|
|
{
|
|
$this->next();
|
|
$type = $this->token['type'];
|
|
if ($type == T::T_NUMBER || $type == T::T_COLON) {
|
|
return $this->parseArrayIndexExpression();
|
|
} elseif ($type == T::T_STAR && $this->lookahead() == T::T_RBRACKET) {
|
|
return $this->parseWildcardArray();
|
|
} else {
|
|
return $this->parseMultiSelectList();
|
|
}
|
|
}
|
|
|
|
private function led_lbracket(array $left)
|
|
{
|
|
static $nextTypes = [T::T_NUMBER => true, T::T_COLON => true, T::T_STAR => true];
|
|
$this->next($nextTypes);
|
|
switch ($this->token['type']) {
|
|
case T::T_NUMBER:
|
|
case T::T_COLON:
|
|
return [
|
|
'type' => 'subexpression',
|
|
'children' => [$left, $this->parseArrayIndexExpression()]
|
|
];
|
|
default:
|
|
return $this->parseWildcardArray($left);
|
|
}
|
|
}
|
|
|
|
private function led_flatten(array $left)
|
|
{
|
|
$this->next();
|
|
|
|
return [
|
|
'type' => 'projection',
|
|
'from' => 'array',
|
|
'children' => [
|
|
['type' => T::T_FLATTEN, 'children' => [$left]],
|
|
$this->parseProjection(self::$bp[T::T_FLATTEN])
|
|
]
|
|
];
|
|
}
|
|
|
|
private function led_dot(array $left)
|
|
{
|
|
$this->next(self::$afterDot);
|
|
|
|
if ($this->token['type'] == T::T_STAR) {
|
|
return $this->parseWildcardObject($left);
|
|
}
|
|
|
|
return [
|
|
'type' => 'subexpression',
|
|
'children' => [$left, $this->parseDot(self::$bp[T::T_DOT])]
|
|
];
|
|
}
|
|
|
|
private function led_or(array $left)
|
|
{
|
|
$this->next();
|
|
return [
|
|
'type' => T::T_OR,
|
|
'children' => [$left, $this->expr(self::$bp[T::T_OR])]
|
|
];
|
|
}
|
|
|
|
private function led_and(array $left)
|
|
{
|
|
$this->next();
|
|
return [
|
|
'type' => T::T_AND,
|
|
'children' => [$left, $this->expr(self::$bp[T::T_AND])]
|
|
];
|
|
}
|
|
|
|
private function led_pipe(array $left)
|
|
{
|
|
$this->next();
|
|
return [
|
|
'type' => T::T_PIPE,
|
|
'children' => [$left, $this->expr(self::$bp[T::T_PIPE])]
|
|
];
|
|
}
|
|
|
|
private function led_lparen(array $left)
|
|
{
|
|
$args = [];
|
|
$this->next();
|
|
|
|
while ($this->token['type'] != T::T_RPAREN) {
|
|
$args[] = $this->expr(0);
|
|
if ($this->token['type'] == T::T_COMMA) {
|
|
$this->next();
|
|
}
|
|
}
|
|
|
|
$this->next();
|
|
|
|
return [
|
|
'type' => 'function',
|
|
'value' => $left['value'],
|
|
'children' => $args
|
|
];
|
|
}
|
|
|
|
private function led_filter(array $left)
|
|
{
|
|
$this->next();
|
|
$expression = $this->expr();
|
|
if ($this->token['type'] != T::T_RBRACKET) {
|
|
throw $this->syntax('Expected a closing rbracket for the filter');
|
|
}
|
|
|
|
$this->next();
|
|
$rhs = $this->parseProjection(self::$bp[T::T_FILTER]);
|
|
|
|
return [
|
|
'type' => 'projection',
|
|
'from' => 'array',
|
|
'children' => [
|
|
$left ?: self::$currentNode,
|
|
[
|
|
'type' => 'condition',
|
|
'children' => [$expression, $rhs]
|
|
]
|
|
]
|
|
];
|
|
}
|
|
|
|
private function led_comparator(array $left)
|
|
{
|
|
$token = $this->token;
|
|
$this->next();
|
|
|
|
return [
|
|
'type' => T::T_COMPARATOR,
|
|
'value' => $token['value'],
|
|
'children' => [$left, $this->expr(self::$bp[T::T_COMPARATOR])]
|
|
];
|
|
}
|
|
|
|
private function parseProjection($bp)
|
|
{
|
|
$type = $this->token['type'];
|
|
if (self::$bp[$type] < 10) {
|
|
return self::$currentNode;
|
|
} elseif ($type == T::T_DOT) {
|
|
$this->next(self::$afterDot);
|
|
return $this->parseDot($bp);
|
|
} elseif ($type == T::T_LBRACKET || $type == T::T_FILTER) {
|
|
return $this->expr($bp);
|
|
}
|
|
|
|
throw $this->syntax('Syntax error after projection');
|
|
}
|
|
|
|
private function parseDot($bp)
|
|
{
|
|
if ($this->token['type'] == T::T_LBRACKET) {
|
|
$this->next();
|
|
return $this->parseMultiSelectList();
|
|
}
|
|
|
|
return $this->expr($bp);
|
|
}
|
|
|
|
private function parseKeyValuePair()
|
|
{
|
|
static $validColon = [T::T_COLON => true];
|
|
$key = $this->token['value'];
|
|
$this->next($validColon);
|
|
$this->next();
|
|
|
|
return [
|
|
'type' => 'key_val_pair',
|
|
'value' => $key,
|
|
'children' => [$this->expr()]
|
|
];
|
|
}
|
|
|
|
private function parseWildcardObject(array $left = null)
|
|
{
|
|
$this->next();
|
|
|
|
return [
|
|
'type' => 'projection',
|
|
'from' => 'object',
|
|
'children' => [
|
|
$left ?: self::$currentNode,
|
|
$this->parseProjection(self::$bp[T::T_STAR])
|
|
]
|
|
];
|
|
}
|
|
|
|
private function parseWildcardArray(array $left = null)
|
|
{
|
|
static $getRbracket = [T::T_RBRACKET => true];
|
|
$this->next($getRbracket);
|
|
$this->next();
|
|
|
|
return [
|
|
'type' => 'projection',
|
|
'from' => 'array',
|
|
'children' => [
|
|
$left ?: self::$currentNode,
|
|
$this->parseProjection(self::$bp[T::T_STAR])
|
|
]
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Parses an array index expression (e.g., [0], [1:2:3]
|
|
*/
|
|
private function parseArrayIndexExpression()
|
|
{
|
|
static $matchNext = [
|
|
T::T_NUMBER => true,
|
|
T::T_COLON => true,
|
|
T::T_RBRACKET => true
|
|
];
|
|
|
|
$pos = 0;
|
|
$parts = [null, null, null];
|
|
$expected = $matchNext;
|
|
|
|
do {
|
|
if ($this->token['type'] == T::T_COLON) {
|
|
$pos++;
|
|
$expected = $matchNext;
|
|
} elseif ($this->token['type'] == T::T_NUMBER) {
|
|
$parts[$pos] = $this->token['value'];
|
|
$expected = [T::T_COLON => true, T::T_RBRACKET => true];
|
|
}
|
|
$this->next($expected);
|
|
} while ($this->token['type'] != T::T_RBRACKET);
|
|
|
|
// Consume the closing bracket
|
|
$this->next();
|
|
|
|
if ($pos === 0) {
|
|
// No colons were found so this is a simple index extraction
|
|
return ['type' => 'index', 'value' => $parts[0]];
|
|
}
|
|
|
|
if ($pos > 2) {
|
|
throw $this->syntax('Invalid array slice syntax: too many colons');
|
|
}
|
|
|
|
// Sliced array from start (e.g., [2:])
|
|
return [
|
|
'type' => 'projection',
|
|
'from' => 'array',
|
|
'children' => [
|
|
['type' => 'slice', 'value' => $parts],
|
|
$this->parseProjection(self::$bp[T::T_STAR])
|
|
]
|
|
];
|
|
}
|
|
|
|
private function parseMultiSelectList()
|
|
{
|
|
$nodes = [];
|
|
|
|
do {
|
|
$nodes[] = $this->expr();
|
|
if ($this->token['type'] == T::T_COMMA) {
|
|
$this->next();
|
|
$this->assertNotToken(T::T_RBRACKET);
|
|
}
|
|
} while ($this->token['type'] !== T::T_RBRACKET);
|
|
$this->next();
|
|
|
|
return ['type' => 'multi_select_list', 'children' => $nodes];
|
|
}
|
|
|
|
private function syntax($msg)
|
|
{
|
|
return new SyntaxErrorException($msg, $this->token, $this->expression);
|
|
}
|
|
|
|
private function lookahead()
|
|
{
|
|
return (!isset($this->tokens[$this->tpos + 1]))
|
|
? T::T_EOF
|
|
: $this->tokens[$this->tpos + 1]['type'];
|
|
}
|
|
|
|
private function next(array $match = null)
|
|
{
|
|
if (!isset($this->tokens[$this->tpos + 1])) {
|
|
$this->token = self::$nullToken;
|
|
} else {
|
|
$this->token = $this->tokens[++$this->tpos];
|
|
}
|
|
|
|
if ($match && !isset($match[$this->token['type']])) {
|
|
throw $this->syntax($match);
|
|
}
|
|
}
|
|
|
|
private function assertNotToken($type)
|
|
{
|
|
if ($this->token['type'] == $type) {
|
|
throw $this->syntax("Token {$this->tpos} not allowed to be $type");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @internal Handles undefined tokens without paying the cost of validation
|
|
*/
|
|
public function __call($method, $args)
|
|
{
|
|
$prefix = substr($method, 0, 4);
|
|
if ($prefix == 'nud_' || $prefix == 'led_') {
|
|
$token = substr($method, 4);
|
|
$message = "Unexpected \"$token\" token ($method). Expected one of"
|
|
. " the following tokens: "
|
|
. implode(', ', array_map(function ($i) {
|
|
return '"' . substr($i, 4) . '"';
|
|
}, array_filter(
|
|
get_class_methods($this),
|
|
function ($i) use ($prefix) {
|
|
return strpos($i, $prefix) === 0;
|
|
}
|
|
)));
|
|
throw $this->syntax($message);
|
|
}
|
|
|
|
throw new \BadMethodCallException("Call to undefined method $method");
|
|
}
|
|
}
|