Skip to content

Commit 5adee8a

Browse files
committed
HTML API: Add support for list elements.
From https://github.com/html5lib/html5lib-tests/blob/a9f44960a9fedf265093d22b2aa3c7ca123727b9/tree-construction/webkit01.dat#L468-L482 Co-authored-by: Jon Surrell <sirreal@users.noreply.github.com> Add docblocks to tests, expand comments in class docblock.
1 parent eff1a3d commit 5adee8a

9 files changed

+704
-44
lines changed

phpcs.xml.dist

+9
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,15 @@
250250
<exclude-pattern>/wp-tests-config-sample\.php</exclude-pattern>
251251
</rule>
252252

253+
<!-- Exclude forbidding goto in the HTML Processor, which mimics algorithms that are written
254+
this way in the HTML specification, and these particular algorithms are complex and
255+
highly imperative. Avoiding the goto introduces a number of risks that could make it
256+
more difficult to maintain the relationship to the standard, lead to subtle differences
257+
in the parsing, and distance the code from its standard. -->
258+
<rule ref="Generic.PHP.DiscourageGoto.Found">
259+
<exclude-pattern>/wp-includes/html-api/class-wp-html-processor\.php</exclude-pattern>
260+
</rule>
261+
253262
<!-- Exclude sample config from modernization to prevent breaking CI workflows based on WP-CLI scaffold.
254263
See: https://core.trac.wordpress.org/ticket/48082#comment:16 -->
255264
<rule ref="Modernize.FunctionCalls.Dirname.FileConstant">

src/wp-includes/html-api/class-wp-html-open-elements.php

+24-8
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ public function has_element_in_specific_scope( $tag_name, $termination_list ) {
129129
}
130130

131131
if ( in_array( $node->node_name, $termination_list, true ) ) {
132-
return true;
132+
return false;
133133
}
134134
}
135135

@@ -166,18 +166,22 @@ public function has_element_in_scope( $tag_name ) {
166166
* Returns whether a particular element is in list item scope.
167167
*
168168
* @since 6.4.0
169+
* @since 6.5.0 Implemented: no longer throws on every invocation.
169170
*
170171
* @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope
171172
*
172-
* @throws WP_HTML_Unsupported_Exception Always until this function is implemented.
173-
*
174173
* @param string $tag_name Name of tag to check.
175174
* @return bool Whether given element is in scope.
176175
*/
177176
public function has_element_in_list_item_scope( $tag_name ) {
178-
throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on list item scope.' );
179-
180-
return false; // The linter requires this unreachable code until the function is implemented and can return.
177+
return $this->has_element_in_specific_scope(
178+
$tag_name,
179+
array(
180+
// There are more elements that belong here which aren't currently supported.
181+
'OL',
182+
'UL',
183+
)
184+
);
181185
}
182186

183187
/**
@@ -375,10 +379,22 @@ public function walk_down() {
375379
* see WP_HTML_Open_Elements::walk_down().
376380
*
377381
* @since 6.4.0
382+
* @since 6.5.0 Accepts $above_this_node to start traversal above a given node, if it exists.
383+
*
384+
* @param ?WP_HTML_Token $above_this_node Start traversing above this node, if provided and if the node exists.
378385
*/
379-
public function walk_up() {
386+
public function walk_up( $above_this_node = null ) {
387+
$has_found_node = null === $above_this_node;
388+
380389
for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) {
381-
yield $this->stack[ $i ];
390+
$node = $this->stack[ $i ];
391+
392+
if ( ! $has_found_node ) {
393+
$has_found_node = $node === $above_this_node;
394+
continue;
395+
}
396+
397+
yield $node;
382398
}
383399
}
384400

src/wp-includes/html-api/class-wp-html-processor.php

+114-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
106106
* - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
107107
* - Links: A.
108-
* - Lists: DL.
108+
* - Lists: DD, DL, DT, LI, OL, LI.
109109
* - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
110110
* - Paragraph: P.
111111
* - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
@@ -648,10 +648,12 @@ private function step_in_body() {
648648
case '+MAIN':
649649
case '+MENU':
650650
case '+NAV':
651+
case '+OL':
651652
case '+P':
652653
case '+SEARCH':
653654
case '+SECTION':
654655
case '+SUMMARY':
656+
case '+UL':
655657
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
656658
$this->close_a_p_element();
657659
}
@@ -685,9 +687,11 @@ private function step_in_body() {
685687
case '-MAIN':
686688
case '-MENU':
687689
case '-NAV':
690+
case '-OL':
688691
case '-SEARCH':
689692
case '-SECTION':
690693
case '-SUMMARY':
694+
case '-UL':
691695
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) {
692696
// @todo Report parse error.
693697
// Ignore the token.
@@ -755,6 +759,109 @@ private function step_in_body() {
755759
$this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
756760
return true;
757761

762+
/*
763+
* > A start tag whose tag name is "li"
764+
* > A start tag whose tag name is one of: "dd", "dt"
765+
*/
766+
case '+DD':
767+
case '+DT':
768+
case '+LI':
769+
$this->state->frameset_ok = false;
770+
$node = $this->state->stack_of_open_elements->current_node();
771+
$is_li = 'LI' === $tag_name;
772+
773+
in_body_list_loop:
774+
/*
775+
* The logic for LI and DT/DD is the same except for one point: LI elements _only_
776+
* close other LI elements, but a DT or DD element closes _any_ open DT or DD element.
777+
*/
778+
if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) {
779+
$node_name = $is_li ? 'LI' : $node->node_name;
780+
$this->generate_implied_end_tags( $node_name );
781+
if ( $node_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
782+
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
783+
}
784+
785+
$this->state->stack_of_open_elements->pop_until( $node_name );
786+
goto in_body_list_done;
787+
}
788+
789+
if (
790+
'ADDRESS' !== $node->node_name &&
791+
'DIV' !== $node->node_name &&
792+
'P' !== $node->node_name &&
793+
$this->is_special( $node->node_name )
794+
) {
795+
/*
796+
* > If node is in the special category, but is not an address, div,
797+
* > or p element, then jump to the step labeled done below.
798+
*/
799+
goto in_body_list_done;
800+
} else {
801+
/*
802+
* > Otherwise, set node to the previous entry in the stack of open elements
803+
* > and return to the step labeled loop.
804+
*/
805+
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
806+
$node = $item;
807+
break;
808+
}
809+
goto in_body_list_loop;
810+
}
811+
812+
in_body_list_done:
813+
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
814+
$this->close_a_p_element();
815+
}
816+
817+
$this->insert_html_element( $this->state->current_token );
818+
return true;
819+
820+
/*
821+
* > An end tag whose tag name is "li"
822+
* > An end tag whose tag name is one of: "dd", "dt"
823+
*/
824+
case '-DD':
825+
case '-DT':
826+
case '-LI':
827+
if (
828+
/*
829+
* An end tag whose tag name is "li":
830+
* If the stack of open elements does not have an li element in list item scope,
831+
* then this is a parse error; ignore the token.
832+
*/
833+
(
834+
'LI' === $tag_name &&
835+
! $this->state->stack_of_open_elements->has_element_in_list_item_scope( 'LI' )
836+
) ||
837+
/*
838+
* An end tag whose tag name is one of: "dd", "dt":
839+
* If the stack of open elements does not have an element in scope that is an
840+
* HTML element with the same tag name as that of the token, then this is a
841+
* parse error; ignore the token.
842+
*/
843+
(
844+
'LI' !== $tag_name &&
845+
! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name )
846+
)
847+
) {
848+
/*
849+
* This is a parse error, ignore the token.
850+
*
851+
* @todo Indicate a parse error once it's possible.
852+
*/
853+
return $this->step();
854+
}
855+
856+
$this->generate_implied_end_tags( $tag_name );
857+
858+
if ( $tag_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
859+
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
860+
}
861+
862+
$this->state->stack_of_open_elements->pop_until( $tag_name );
863+
return true;
864+
758865
/*
759866
* > An end tag whose tag name is "p"
760867
*/
@@ -1223,6 +1330,9 @@ private function close_a_p_element() {
12231330
*/
12241331
private function generate_implied_end_tags( $except_for_this_element = null ) {
12251332
$elements_with_implied_end_tags = array(
1333+
'DD',
1334+
'DT',
1335+
'LI',
12261336
'P',
12271337
);
12281338

@@ -1248,6 +1358,9 @@ private function generate_implied_end_tags( $except_for_this_element = null ) {
12481358
*/
12491359
private function generate_implied_end_tags_thoroughly() {
12501360
$elements_with_implied_end_tags = array(
1361+
'DD',
1362+
'DT',
1363+
'LI',
12511364
'P',
12521365
);
12531366

tests/phpunit/tests/html-api/wpHtmlProcessor.php

-5
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,6 @@ public function data_unsupported_special_in_body_tags() {
168168
'CAPTION' => array( 'CAPTION' ),
169169
'COL' => array( 'COL' ),
170170
'COLGROUP' => array( 'COLGROUP' ),
171-
'DD' => array( 'DD' ),
172-
'DT' => array( 'DT' ),
173171
'EMBED' => array( 'EMBED' ),
174172
'FORM' => array( 'FORM' ),
175173
'FRAME' => array( 'FRAME' ),
@@ -180,7 +178,6 @@ public function data_unsupported_special_in_body_tags() {
180178
'IFRAME' => array( 'IFRAME' ),
181179
'INPUT' => array( 'INPUT' ),
182180
'KEYGEN' => array( 'KEYGEN' ),
183-
'LI' => array( 'LI' ),
184181
'LINK' => array( 'LINK' ),
185182
'LISTING' => array( 'LISTING' ),
186183
'MARQUEE' => array( 'MARQUEE' ),
@@ -191,7 +188,6 @@ public function data_unsupported_special_in_body_tags() {
191188
'NOFRAMES' => array( 'NOFRAMES' ),
192189
'NOSCRIPT' => array( 'NOSCRIPT' ),
193190
'OBJECT' => array( 'OBJECT' ),
194-
'OL' => array( 'OL' ),
195191
'OPTGROUP' => array( 'OPTGROUP' ),
196192
'OPTION' => array( 'OPTION' ),
197193
'PARAM' => array( 'PARAM' ),
@@ -218,7 +214,6 @@ public function data_unsupported_special_in_body_tags() {
218214
'TITLE' => array( 'TITLE' ),
219215
'TR' => array( 'TR' ),
220216
'TRACK' => array( 'TRACK' ),
221-
'UL' => array( 'UL' ),
222217
'WBR' => array( 'WBR' ),
223218
'XMP' => array( 'XMP' ),
224219
);

0 commit comments

Comments
 (0)