Skip to content

Commit b25c823

Browse files
dmsnellsirreal
andcommitted
HTML API: Add support for list elements.
From https://github.com/html5lib/html5lib-tests/blob/a9f44960a9fedf265093d22b2aa3c7ca123727b9/tree-construction/webkit01.dat#L468-L482 Co-authored-by: Jon Surrell <sirreal@users.noreply.github.com>
1 parent cc64516 commit b25c823

9 files changed

+618
-43
lines changed

phpcs.xml.dist

+9
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,15 @@
250250
<exclude-pattern>/wp-tests-config-sample\.php</exclude-pattern>
251251
</rule>
252252

253+
<!-- Exclude forbidding goto in the HTML Processor, which mimics algorithms that are written
254+
this way in the HTML specification, and these particular algorithms are complex and
255+
highly imperative. Avoiding the goto introduces a number of risks that could make it
256+
more difficult to maintain the relationship to the standard, lead to subtle differences
257+
in the parsing, and distance the code from its standard. -->
258+
<rule ref="Generic.PHP.DiscourageGoto.Found">
259+
<exclude-pattern>/wp-includes/html-api/class-wp-html-processor\.php</exclude-pattern>
260+
</rule>
261+
253262
<!-- Exclude sample config from modernization to prevent breaking CI workflows based on WP-CLI scaffold.
254263
See: https://core.trac.wordpress.org/ticket/48082#comment:16 -->
255264
<rule ref="Modernize.FunctionCalls.Dirname.FileConstant">

src/wp-includes/html-api/class-wp-html-open-elements.php

+24-8
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ public function has_element_in_specific_scope( $tag_name, $termination_list ) {
129129
}
130130

131131
if ( in_array( $node->node_name, $termination_list, true ) ) {
132-
return true;
132+
return false;
133133
}
134134
}
135135

@@ -166,18 +166,22 @@ public function has_element_in_scope( $tag_name ) {
166166
* Returns whether a particular element is in list item scope.
167167
*
168168
* @since 6.4.0
169+
* @since 6.5.0 Implemented: no longer throws on every invocation.
169170
*
170171
* @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope
171172
*
172-
* @throws WP_HTML_Unsupported_Exception Always until this function is implemented.
173-
*
174173
* @param string $tag_name Name of tag to check.
175174
* @return bool Whether given element is in scope.
176175
*/
177176
public function has_element_in_list_item_scope( $tag_name ) {
178-
throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on list item scope.' );
179-
180-
return false; // The linter requires this unreachable code until the function is implemented and can return.
177+
return $this->has_element_in_specific_scope(
178+
$tag_name,
179+
array(
180+
// There are more elements that belong here which aren't currently supported.
181+
'OL',
182+
'UL',
183+
)
184+
);
181185
}
182186

183187
/**
@@ -375,10 +379,22 @@ public function walk_down() {
375379
* see WP_HTML_Open_Elements::walk_down().
376380
*
377381
* @since 6.4.0
382+
* @since 6.5.0 Accepts $above_this_node to start traversal above a given node, if it exists.
383+
*
384+
* @param ?WP_HTML_Token $above_this_node Start traversing above this node, if provided and if the node exists.
378385
*/
379-
public function walk_up() {
386+
public function walk_up( $above_this_node = null ) {
387+
$has_found_node = null === $above_this_node;
388+
380389
for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) {
381-
yield $this->stack[ $i ];
390+
$node = $this->stack[ $i ];
391+
392+
if ( ! $has_found_node ) {
393+
$has_found_node = $node === $above_this_node;
394+
continue;
395+
}
396+
397+
yield $node;
382398
}
383399
}
384400

src/wp-includes/html-api/class-wp-html-processor.php

+113
Original file line numberDiff line numberDiff line change
@@ -648,10 +648,12 @@ private function step_in_body() {
648648
case '+MAIN':
649649
case '+MENU':
650650
case '+NAV':
651+
case '+OL':
651652
case '+P':
652653
case '+SEARCH':
653654
case '+SECTION':
654655
case '+SUMMARY':
656+
case '+UL':
655657
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
656658
$this->close_a_p_element();
657659
}
@@ -685,9 +687,11 @@ private function step_in_body() {
685687
case '-MAIN':
686688
case '-MENU':
687689
case '-NAV':
690+
case '-OL':
688691
case '-SEARCH':
689692
case '-SECTION':
690693
case '-SUMMARY':
694+
case '-UL':
691695
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) {
692696
// @todo Report parse error.
693697
// Ignore the token.
@@ -755,6 +759,109 @@ private function step_in_body() {
755759
$this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
756760
return true;
757761

762+
/*
763+
* > A start tag whose tag name is "li"
764+
* > A start tag whose tag name is one of: "dd", "dt"
765+
*/
766+
case '+DD':
767+
case '+DT':
768+
case '+LI':
769+
$this->state->frameset_ok = false;
770+
$node = $this->state->stack_of_open_elements->current_node();
771+
$is_li = 'LI' === $tag_name;
772+
773+
in_body_list_loop:
774+
/*
775+
* The logic for LI and DT/DD is the same except for one point: LI elements _only_
776+
* close other LI elements, but a DT or DD element closes _any_ open DT or DD element.
777+
*/
778+
if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) {
779+
$node_name = $is_li ? 'LI' : $node->node_name;
780+
$this->generate_implied_end_tags( $node_name );
781+
if ( $node_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
782+
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
783+
}
784+
785+
$this->state->stack_of_open_elements->pop_until( $node_name );
786+
goto in_body_list_done;
787+
}
788+
789+
if (
790+
'ADDRESS' !== $node->node_name &&
791+
'DIV' !== $node->node_name &&
792+
'P' !== $node->node_name &&
793+
$this->is_special( $node->node_name )
794+
) {
795+
/*
796+
* > If node is in the special category, but is not an address, div,
797+
* > or p element, then jump to the step labeled done below.
798+
*/
799+
goto in_body_list_done;
800+
} else {
801+
/*
802+
* > Otherwise, set node to the previous entry in the stack of open elements
803+
* > and return to the step labeled loop.
804+
*/
805+
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
806+
$node = $item;
807+
break;
808+
}
809+
goto in_body_list_loop;
810+
}
811+
812+
in_body_list_done:
813+
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
814+
$this->close_a_p_element();
815+
}
816+
817+
$this->insert_html_element( $this->state->current_token );
818+
return true;
819+
820+
/*
821+
* > An end tag whose tag name is "li"
822+
* > An end tag whose tag name is one of: "dd", "dt"
823+
*/
824+
case '-DD':
825+
case '-DT':
826+
case '-LI':
827+
if (
828+
/*
829+
* An end tag whose tag name is "li":
830+
* If the stack of open elements does not have an li element in list item scope,
831+
* then this is a parse error; ignore the token.
832+
*/
833+
(
834+
'LI' === $tag_name &&
835+
! $this->state->stack_of_open_elements->has_element_in_list_item_scope( 'LI' )
836+
) ||
837+
/*
838+
* An end tag whose tag name is one of: "dd", "dt":
839+
* If the stack of open elements does not have an element in scope that is an
840+
* HTML element with the same tag name as that of the token, then this is a
841+
* parse error; ignore the token.
842+
*/
843+
(
844+
'LI' !== $tag_name &&
845+
! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name )
846+
)
847+
) {
848+
/*
849+
* This is a parse error, ignore the token.
850+
*
851+
* @todo Indicate a parse error once it's possible.
852+
*/
853+
return $this->step();
854+
}
855+
856+
$this->generate_implied_end_tags( $tag_name );
857+
858+
if ( $tag_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
859+
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
860+
}
861+
862+
$this->state->stack_of_open_elements->pop_until( $tag_name );
863+
return true;
864+
758865
/*
759866
* > An end tag whose tag name is "p"
760867
*/
@@ -1223,6 +1330,9 @@ private function close_a_p_element() {
12231330
*/
12241331
private function generate_implied_end_tags( $except_for_this_element = null ) {
12251332
$elements_with_implied_end_tags = array(
1333+
'DD',
1334+
'DT',
1335+
'LI',
12261336
'P',
12271337
);
12281338

@@ -1248,6 +1358,9 @@ private function generate_implied_end_tags( $except_for_this_element = null ) {
12481358
*/
12491359
private function generate_implied_end_tags_thoroughly() {
12501360
$elements_with_implied_end_tags = array(
1361+
'DD',
1362+
'DT',
1363+
'LI',
12511364
'P',
12521365
);
12531366

tests/phpunit/tests/html-api/wpHtmlProcessor.php

-5
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,6 @@ public function data_unsupported_special_in_body_tags() {
168168
'CAPTION' => array( 'CAPTION' ),
169169
'COL' => array( 'COL' ),
170170
'COLGROUP' => array( 'COLGROUP' ),
171-
'DD' => array( 'DD' ),
172-
'DT' => array( 'DT' ),
173171
'EMBED' => array( 'EMBED' ),
174172
'FORM' => array( 'FORM' ),
175173
'FRAME' => array( 'FRAME' ),
@@ -180,7 +178,6 @@ public function data_unsupported_special_in_body_tags() {
180178
'IFRAME' => array( 'IFRAME' ),
181179
'INPUT' => array( 'INPUT' ),
182180
'KEYGEN' => array( 'KEYGEN' ),
183-
'LI' => array( 'LI' ),
184181
'LINK' => array( 'LINK' ),
185182
'LISTING' => array( 'LISTING' ),
186183
'MARQUEE' => array( 'MARQUEE' ),
@@ -191,7 +188,6 @@ public function data_unsupported_special_in_body_tags() {
191188
'NOFRAMES' => array( 'NOFRAMES' ),
192189
'NOSCRIPT' => array( 'NOSCRIPT' ),
193190
'OBJECT' => array( 'OBJECT' ),
194-
'OL' => array( 'OL' ),
195191
'OPTGROUP' => array( 'OPTGROUP' ),
196192
'OPTION' => array( 'OPTION' ),
197193
'PARAM' => array( 'PARAM' ),
@@ -218,7 +214,6 @@ public function data_unsupported_special_in_body_tags() {
218214
'TITLE' => array( 'TITLE' ),
219215
'TR' => array( 'TR' ),
220216
'TRACK' => array( 'TRACK' ),
221-
'UL' => array( 'UL' ),
222217
'WBR' => array( 'WBR' ),
223218
'XMP' => array( 'XMP' ),
224219
);

tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php

+23-17
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public function data_single_tag_of_supported_elements() {
3838
$supported_elements = array(
3939
'A',
4040
'ABBR',
41-
'ACRONYM', // Neutralized
41+
'ACRONYM', // Neutralized.
4242
'ADDRESS',
4343
'ARTICLE',
4444
'ASIDE',
@@ -47,13 +47,14 @@ public function data_single_tag_of_supported_elements() {
4747
'BDI',
4848
'BDO',
4949
'BIG',
50-
'BLINK', // Deprecated
50+
'BLINK', // Deprecated.
5151
'BUTTON',
5252
'CANVAS',
53-
'CENTER', // Neutralized
53+
'CENTER', // Neutralized.
5454
'CITE',
5555
'CODE',
5656
'DATA',
57+
'DD',
5758
'DATALIST',
5859
'DFN',
5960
'DEL',
@@ -62,6 +63,7 @@ public function data_single_tag_of_supported_elements() {
6263
'DIR',
6364
'DIV',
6465
'DL',
66+
'DT',
6567
'EM',
6668
'FIELDSET',
6769
'FIGCAPTION',
@@ -79,6 +81,7 @@ public function data_single_tag_of_supported_elements() {
7981
'I',
8082
'IMG',
8183
'INS',
84+
'LI',
8285
'ISINDEX', // Deprecated
8386
'KBD',
8487
'LABEL',
@@ -91,6 +94,7 @@ public function data_single_tag_of_supported_elements() {
9194
'MULTICOL', // Deprecated
9295
'NAV',
9396
'NEXTID', // Deprecated
97+
'OL',
9498
'OUTPUT',
9599
'P',
96100
'PICTURE',
@@ -112,6 +116,7 @@ public function data_single_tag_of_supported_elements() {
112116
'TIME',
113117
'TT',
114118
'U',
119+
'UL',
115120
'VAR',
116121
'VIDEO',
117122
);
@@ -156,7 +161,7 @@ public function test_fails_when_encountering_unsupported_tag( $html ) {
156161
*/
157162
public function data_unsupported_elements() {
158163
$unsupported_elements = array(
159-
'APPLET', // Deprecated
164+
'APPLET', // Deprecated.
160165
'AREA',
161166
'BASE',
162167
'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal.
@@ -165,8 +170,6 @@ public function data_unsupported_elements() {
165170
'CAPTION',
166171
'COL',
167172
'COLGROUP',
168-
'DD',
169-
'DT',
170173
'EMBED',
171174
'FORM',
172175
'FRAME',
@@ -176,27 +179,25 @@ public function data_unsupported_elements() {
176179
'HTML',
177180
'IFRAME',
178181
'INPUT',
179-
'KEYGEN', // Deprecated; void
180-
'LI',
182+
'KEYGEN', // Deprecated; void.
181183
'LINK',
182184
'LISTING', // Deprecated, use PRE instead.
183-
'MARQUEE', // Deprecated
185+
'MARQUEE', // Deprecated.
184186
'MATH',
185187
'META',
186-
'NOBR', // Neutralized
187-
'NOEMBED', // Neutralized
188-
'NOFRAMES', // Neutralized
188+
'NOBR', // Neutralized.
189+
'NOEMBED', // Neutralized.
190+
'NOFRAMES', // Neutralized.
189191
'NOSCRIPT',
190192
'OBJECT',
191-
'OL',
192193
'OPTGROUP',
193194
'OPTION',
194-
'PLAINTEXT', // Neutralized
195+
'PLAINTEXT', // Neutralized.
195196
'PRE',
196-
'RB', // Neutralized
197+
'RB', // Neutralized.
197198
'RP',
198199
'RT',
199-
'RTC', // Neutralized
200+
'RTC', // Neutralized.
200201
'SCRIPT',
201202
'SELECT',
202203
'SOURCE',
@@ -213,7 +214,6 @@ public function data_unsupported_elements() {
213214
'TITLE',
214215
'TR',
215216
'TRACK',
216-
'UL',
217217
'WBR',
218218
'XMP', // Deprecated, use PRE instead.
219219
);
@@ -348,6 +348,12 @@ public function data_html_target_with_breadcrumbs() {
348348
),
349349
'MAIN inside MAIN inside SPAN' => array( '<span><main><main target>', array( 'HTML', 'BODY', 'SPAN', 'MAIN', 'MAIN' ), 1 ),
350350
'MAIN next to unclosed P' => array( '<p><main target>', array( 'HTML', 'BODY', 'MAIN' ), 1 ),
351+
'LI after unclosed LI' => array( '<li>one<li>two<li target>three', array( 'HTML', 'BODY', 'LI' ), 3 ),
352+
'LI in UL in LI' => array( '<ul><li>one<ul><li target>two', array( 'HTML', 'BODY', 'UL', 'LI', 'UL', 'LI' ), 1 ),
353+
'DD and DT mutually close, LI self-closes (dt 2)' => array( '<dd><dd><dt><dt target><dd><li><li>', array( 'HTML', 'BODY', 'DT' ), 2 ),
354+
'DD and DT mutually close, LI self-closes (dd 3)' => array( '<dd><dd><dt><dt><dd target><li><li>', array( 'HTML', 'BODY', 'DD' ), 3 ),
355+
'DD and DT mutually close, LI self-closes (li 1)' => array( '<dd><dd><dt><dt><dd><li target><li>', array( 'HTML', 'BODY', 'DD', 'LI' ), 1 ),
356+
'DD and DT mutually close, LI self-closes (li 2)' => array( '<dd><dd><dt><dt><dd><li><li target>', array( 'HTML', 'BODY', 'DD', 'LI' ), 2 ),
351357

352358
// H1 - H6 close out _any_ H1 - H6 when encountering _any_ of H1 - H6, making this section surprising.
353359
'EM inside H3 after unclosed P' => array( '<p><h3><em target>Important Message</em></h3>', array( 'HTML', 'BODY', 'H3', 'EM' ), 1 ),

0 commit comments

Comments
 (0)