block delimiter represents a block of the given type. * * Since the "core" namespace may be implicit, it's allowable to pass * either the fully-qualified block type with namespace and block name * as well as the shorthand version only containing the block name, if * the desired block is in the "core" namespace. * * Example: * * $is_core_paragraph = $scanner->is_block_type( 'paragraph' ); * $is_core_paragraph = $scanner->is_block_type( 'core/paragraph' ); * $is_formula = $scanner->is_block_type( 'math-block/formula' ); * * @param string $block_type Block type name for the desired block. * E.g. "paragraph", "core/paragraph", "math-blocks/formula". * @return bool Whether this delimiter represents a block of the given type. */ public function is_block_type( string $block_type ): bool { // This is a core/freeform text block, it’s special. if ( 0 === $this->name_length ) { return 'core/freeform' === $block_type || 'freeform' === $block_type; } $slash_at = strpos( $block_type, '/' ); if ( false === $slash_at ) { $namespace = 'core'; $block_name = $block_type; } else { // @todo Get lengths but avoid the allocation, use substr_compare below. $namespace = substr( $block_type, 0, $slash_at ); $block_name = substr( $block_type, $slash_at + 1 ); } // Only the 'core' namespace is allowed to be omitted. if ( 0 === $this->namespace_length && 'core' !== $namespace ) { return false; } // If given an explicit namespace, they must match. if ( 0 !== $this->namespace_length && ( strlen( $namespace ) !== $this->namespace_length || 0 !== substr_compare( $this->source_text, $namespace, $this->namespace_at, $this->namespace_length ) ) ) { return false; } // The block name must match. return ( strlen( $block_name ) === $this->name_length && 0 === substr_compare( $this->source_text, $block_name, $this->name_at, $this->name_length ) ); } /** * Indicates if the matched delimiter is an opening or void delimiter * (i.e. it opens the block) of the given type, if a type is provided. * * This is a helper method to ease handling of code inspecting where * blocks start, and of checking if the blocks are of a given type. * The function is variadic to allow for checking if the delimiter * opens one of many possible block types. * * Example: * * $scanner = Block_Scanner::create( $html ); * while ( $scanner->next_delimiter() ) { * if ( $scanner->opens_block( 'core/code', 'syntaxhighlighter/code' ) ) { * echo "Found code!"; * continue; * } * * if ( $scanner->opens_block( 'core/image' ) ) { * echo "Found an image!"; * continue; * } * * if ( $scanner->opens_block() ) { * echo "Found a new block!"; * } * } * * @see self::is_block_type * * @param string|null ...$block_type Optional. Is the matched block type one of these? * If none are provided, will not test block type. * @return bool Whether the matched block delimiter opens a block, and whether it * opens a block of one of the given block types, if provided. */ public function opens_block( ...$block_type ): bool { if ( static::CLOSER === $this->type ) { return false; } if ( count( $block_type ) === 0 ) { return true; } foreach ( $block_type as $block ) { if ( $this->is_block_type( $block ) ) { return true; } } return false; } /** * Indicates if the matched delimiter is implied due to top-level * non-block content in the post. * * @see self::is_non_whitespace_freeform * * @return bool Whether or not the matched delimiter is implied as `core/freeform`. */ public function is_freeform(): bool { return 0 === $this->name_length; } /** * Indicates if the matched delimiter is implicit and surrounding * top-level non-block content that contains non-whitespace text. * * Many block serializers introduce newlines between block delimiters, * so the presence of top-level non-block content does not imply that * there are “real” freeform HTML blocks. Checking if there is content * beyond whitespace is a more certain check, such as for determining * whether to load CSS for the freeform or fallback block type. * * @see self::is_freeform * * @return bool */ public function is_non_whitespace_freeform(): bool { if ( 0 !== $this->name_length ) { return false; } // For now, return false as this method is not yet fully implemented. // @todo Implement logic to check if freeform content contains non-whitespace text. return false; } /** * Allocates a substring for the block type and returns the * fully-qualified name, including the namespace. * * This function allocates a substring for the given block type. This * allocation will be small and likely fine in most cases, but it's * preferable to call {@link self::is_block_type} if only needing * to know whether the delimiter is for a given block type, as that * function is more efficient for this purpose and avoids the allocation. * * Example: * * // Avoid. * 'core/paragraph' = $scanner->get_block_type(); * * // Prefer. * $scanner->is_block_type( 'core/paragraph' ); * $scanner->is_block_type( 'paragraph' ); * * @return string Fully-qualified block namespace and type, e.g. "core/paragraph". */ public function get_block_type(): string { // This is a core/freeform text block, it’s special. if ( 0 === $this->name_length ) { return 'core/freeform'; } // This is implicitly in the "core" namespace. if ( 0 === $this->namespace_length ) { $block_name = substr( $this->source_text, $this->name_at, $this->name_length ); return "core/{$block_name}"; } return substr( $this->source_text, $this->namespace_at, $this->namespace_length + $this->name_length + 1 ); } /** * Returns a lazy wrapper around the block attributes, which can be used * for efficiently interacting with the JSON attributes. * * @throws Exception This function is not yet implemented. * * @todo Create a lazy JSON wrapper so specific attributes can be * efficiently queried without parsing everything and loading * the entire object into memory. * @todo After realistic benchmarking, see if JsonStreamingParser\Parser * could be used — it would need to be fast enough for the reduction * in memory use to be worth it, compared to {@see \json_decode}. * * @see \JsonStreamingParser\Parser * * @return never */ public function get_attributes(): void { throw new Exception( 'Lazy attribute parsing not yet supported' ); } /** * Attempts to parse and return the entire JSON attributes from the delimiter, * allocating memory and processing the JSON span in the process. * * This does not return any parsed attributes for a closing block delimiter * even if there is a span of JSON content; this JSON is a parsing error. * * Consider calling {@link self::get_attributes} instead if it's not * necessary to read all the attributes at the same time, as that provides * a more efficient mechanism for typical use cases. * * Since the JSON span inside the comment delimiter may not be valid JSON, * this function will return `null` if it cannot parse the span and set the * {@see self::get_last_json_error} to the appropriate JSON_ERROR_ constant. * * If the delimiter contains no JSON span, it will also return `null`, * but the last error will be set to {@see JSON_ERROR_NONE}. * * Example: * * $scanner = Block_Scanner::create( '' ); * $scanner->next_delimiter(); * $memory_hungry_and_slow_attributes = $scanner->allocate_and_return_parsed_attributes(); * $memory_hungry_and_slow_attributes === array( 'url' => 'https://wordpress.org/favicon.ico' ); * * $scanner = Block_Scanner::create( '' ); * $scanner->next_delimiter(); * null = $scanner->allocate_and_return_parsed_attributes(); * JSON_ERROR_NONE = $scanner->get_last_json_error(); * * $scanner = Block_Scanner::create( '' ); * $scanner->next_delimiter(); * array() === $scanner->allocate_and_return_parsed_attributes(); * * $scanner = Block_Scanner::create( '' ); * $scanner->next_delimiter(); * null = $scanner->allocate_and_return_parsed_attributes(); * * $scanner = Block_Scanner::create( '' ); * $scanner->next_delimiter(); * null = $scanner->allocate_and_return_parsed_attributes(); * JSON_ERROR_CTRL_CHAR = $scanner->get_last_json_error(); * * @return array|null Parsed JSON attributes, if present and valid, otherwise `null`. */ public function allocate_and_return_parsed_attributes(): ?array { $this->last_json_error = JSON_ERROR_NONE; if ( static::CLOSER === $this->type ) { return null; } if ( 0 === $this->json_length ) { return null; } $json_span = substr( $this->source_text, $this->json_at, $this->json_length ); $parsed = json_decode( $json_span, null, 512, JSON_OBJECT_AS_ARRAY | JSON_INVALID_UTF8_SUBSTITUTE ); $last_error = json_last_error(); $this->last_json_error = $last_error; return ( JSON_ERROR_NONE === $last_error && is_array( $parsed ) ) ? $parsed : null; } /** * Returns the span representing the currently-matched delimiter, * if matched, otherwise `null`. * * Note that for freeform blocks this will return a span of length * zero, since there is no explicit block delimiter. * * Example: * * $scanner = Block_Scanner::create( '' ); * null === $scanner->get_span(); * * $scanner->next_delimiter(); * WP_HTML_Span( 0, 17 ) === $scanner->get_span(); * * @return WP_HTML_Span|null Span of text in source text spanning matched delimiter. */ public function get_span(): ?WP_HTML_Span { return new WP_HTML_Span( $this->delimiter_at, $this->delimiter_length ); } // Debugging methods not meant for production use. /** * Prints a debugging message showing the structure of the parsed delimiter. * * This is not meant to be used in production! * * @access private */ public function debug_print_structure(): void { $c = ( ! defined( 'STDOUT' ) || posix_isatty( STDOUT ) ) ? function ( $color = null ) { return $color; } // phpcs:ignore : function ( $color ) { return ''; }; // phpcs:ignore if ( $this->is_block_type( 'core/freeform' ) ) { $closer = static::CLOSER === $this->get_delimiter_type() ? '/' : ''; echo "{$c( "\e[90m" )}\n"; // phpcs:ignore return; } $namespace = substr( $this->source_text, $this->namespace_at, $this->namespace_length ); $slash = 0 === $this->namespace_length ? '' : '/'; $block_name = substr( $this->source_text, $this->name_at, $this->name_length ); $closer = static::CLOSER === $this->type ? '/' : ''; $json = substr( $this->source_text, $this->json_at, $this->json_length ); $opener_whitespace_at = $this->delimiter_at + 4; $opener_whitespace_length = $this->namespace_at - 3 - $opener_whitespace_at - ( static::CLOSER === $this->type ? 1 : 0 ); $after_name_whitespace_at = $this->name_at + $this->name_length; $after_name_whitespace_length = $this->json_at - $after_name_whitespace_at; $closing_whitespace_at = $this->json_at + $this->json_length; $closing_whitespace_length = $this->delimiter_at + $this->delimiter_length - 3 - $closing_whitespace_at; if ( '/' === $this->source_text[ $this->delimiter_at + $this->delimiter_length - 4 ] ) { $void_flag = '/'; --$closing_whitespace_length; } else { $void_flag = ''; } $w = function ( $whitespace ) use ( $c ) { return $c( "\e[2;90m" ) . str_replace( array( ' ', "\t", "\f", "\r", "\n" ), array( '␣', '␉', '␌', '␍', '␤' ), $whitespace ); }; echo "{$c( "\e[90m" )}\n"; // phpcs:ignore } // Constant declarations that would otherwise pollute the top of the class. /** * Indicates that the block comment delimiter closes an open block. */ const CLOSER = 'closer'; /** * Indicates that the parser started parsing a block comment delimiter, but * the input document ended before it could finish. The document was likely truncated. */ const INCOMPLETE_INPUT = 'incomplete-input'; /** * Indicates that the block comment delimiter opens a block. */ const OPENER = 'opener'; /** * Indicates that the parser has not yet attempted to parse a block comment delimiter. */ const UNINITIALIZED = 'uninitialized'; /** * Indicates that the block comment delimiter represents a void block * with no inner content of any kind. */ const VOID = 'void'; }